This is a more restricted solution to #82242 (vs the more general #82290 + #84360) whereby if we're concat'ing PCMPEQ/GT nodes to 256-bits on a AVX1 target then determine if the integer values are in bounds to allow them to be converted to FP for a (legal) float comparison. By performing this inside combineConcatVectorOps and working on PCMPEQ/GT nodes and not ICMP, we delay the fold until after more lowering has occurred, which avoids many of the issues where we were getting 'stuck' with CMPPS or unnecessary 256-bit nodes, and can more easily determine if either of the new concats() will be free. Additionally this patch requires BOTH comparison operands to be in range, while technically not required this does help avoid the remaining regressions. It doesn't require that one of the operands is constant as it didn't seem necessary to include that constraint. I've reused some of the code from #82290, and we may be able to add additional functionality (more CondCode patterns, v4i64/v4f64 handling, 'bitcastable' integers etc.) in future patches. Fixes #82242
259 lines
11 KiB
LLVM
259 lines
11 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
|
|
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=CHECK,X86
|
|
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=CHECK,X64
|
|
|
|
; PR82242
|
|
define <8 x i32> @cmp_eq_bitcast(<8 x i32> %x) {
|
|
; X86-LABEL: cmp_eq_bitcast:
|
|
; X86: # %bb.0:
|
|
; X86-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
|
|
; X86-NEXT: vcvtdq2ps %ymm0, %ymm0
|
|
; X86-NEXT: vcmpeqps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
|
|
; X86-NEXT: retl
|
|
;
|
|
; X64-LABEL: cmp_eq_bitcast:
|
|
; X64: # %bb.0:
|
|
; X64-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
|
|
; X64-NEXT: vcvtdq2ps %ymm0, %ymm0
|
|
; X64-NEXT: vcmpeqps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
|
|
; X64-NEXT: retq
|
|
%and = and <8 x i32> %x, <i32 7, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
|
|
%cmp = icmp eq <8 x i32> %and, <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
|
|
%sext = sext <8 x i1> %cmp to <8 x i32>
|
|
ret <8 x i32> %sext
|
|
}
|
|
|
|
define <8 x i32> @cmp_ne_sitofp(<8 x i32> %x) {
|
|
; CHECK-LABEL: cmp_ne_sitofp:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1
|
|
; CHECK-NEXT: vbroadcastss {{.*#+}} xmm2 = [3,3,3,3]
|
|
; CHECK-NEXT: vpcmpeqd %xmm2, %xmm1, %xmm1
|
|
; CHECK-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
|
|
; CHECK-NEXT: vpxor %xmm3, %xmm1, %xmm1
|
|
; CHECK-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0
|
|
; CHECK-NEXT: vpxor %xmm3, %xmm0, %xmm0
|
|
; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
|
; CHECK-NEXT: ret{{[l|q]}}
|
|
%cmp = icmp ne <8 x i32> %x, <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
|
|
%sext = sext <8 x i1> %cmp to <8 x i32>
|
|
ret <8 x i32> %sext
|
|
}
|
|
|
|
define <8 x i32> @cmp_slt_fail_no_const(<8 x i32> %x, <8 x i32> %y) {
|
|
; X86-LABEL: cmp_slt_fail_no_const:
|
|
; X86: # %bb.0:
|
|
; X86-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
|
|
; X86-NEXT: vextractf128 $1, %ymm1, %xmm2
|
|
; X86-NEXT: vextractf128 $1, %ymm0, %xmm3
|
|
; X86-NEXT: vpcmpgtd %xmm3, %xmm2, %xmm2
|
|
; X86-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
|
|
; X86-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
|
|
; X86-NEXT: retl
|
|
;
|
|
; X64-LABEL: cmp_slt_fail_no_const:
|
|
; X64: # %bb.0:
|
|
; X64-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
|
|
; X64-NEXT: vextractf128 $1, %ymm1, %xmm2
|
|
; X64-NEXT: vextractf128 $1, %ymm0, %xmm3
|
|
; X64-NEXT: vpcmpgtd %xmm3, %xmm2, %xmm2
|
|
; X64-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
|
|
; X64-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
|
|
; X64-NEXT: retq
|
|
%and = and <8 x i32> %x, <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
|
|
%cmp = icmp slt <8 x i32> %and, %y
|
|
%sext = sext <8 x i1> %cmp to <8 x i32>
|
|
ret <8 x i32> %sext
|
|
}
|
|
|
|
define <8 x i32> @cmp_eq_sitofp(<8 x i32> %x) {
|
|
; CHECK-LABEL: cmp_eq_sitofp:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1
|
|
; CHECK-NEXT: vbroadcastss {{.*#+}} xmm2 = [4294967293,4294967293,4294967293,4294967293]
|
|
; CHECK-NEXT: vpcmpeqd %xmm2, %xmm1, %xmm1
|
|
; CHECK-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0
|
|
; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
|
; CHECK-NEXT: ret{{[l|q]}}
|
|
%cmp = icmp eq <8 x i32> %x, <i32 -3, i32 -3, i32 -3, i32 -3, i32 -3, i32 -3, i32 -3, i32 -3>
|
|
%sext = sext <8 x i1> %cmp to <8 x i32>
|
|
ret <8 x i32> %sext
|
|
}
|
|
|
|
define <8 x i32> @cmp_sgt_fail_no_bounds(<8 x i32> %x, <8 x i32> %y) {
|
|
; CHECK-LABEL: cmp_sgt_fail_no_bounds:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm2
|
|
; CHECK-NEXT: vextractf128 $1, %ymm1, %xmm3
|
|
; CHECK-NEXT: vpcmpgtd %xmm2, %xmm3, %xmm2
|
|
; CHECK-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
|
|
; CHECK-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
|
|
; CHECK-NEXT: ret{{[l|q]}}
|
|
%cmp = icmp slt <8 x i32> %x, %y
|
|
%sext = sext <8 x i1> %cmp to <8 x i32>
|
|
ret <8 x i32> %sext
|
|
}
|
|
|
|
define <8 x i32> @cmp_sgt_bitcast(<8 x i32> %xx, <8 x i32> %yy) {
|
|
; CHECK-LABEL: cmp_sgt_bitcast:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vbroadcastss {{.*#+}} ymm2 = [2139095040,2139095040,2139095040,2139095040,2139095040,2139095040,2139095040,2139095040]
|
|
; CHECK-NEXT: vandps %ymm2, %ymm0, %ymm0
|
|
; CHECK-NEXT: vandps %ymm2, %ymm1, %ymm1
|
|
; CHECK-NEXT: vextractf128 $1, %ymm1, %xmm2
|
|
; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm3
|
|
; CHECK-NEXT: vpcmpgtd %xmm2, %xmm3, %xmm2
|
|
; CHECK-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
|
|
; CHECK-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
|
|
; CHECK-NEXT: ret{{[l|q]}}
|
|
%x = and <8 x i32> %xx, <i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040>
|
|
%y = and <8 x i32> %yy, <i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040>
|
|
|
|
%cmp = icmp sgt <8 x i32> %x, %y
|
|
%sext = sext <8 x i1> %cmp to <8 x i32>
|
|
ret <8 x i32> %sext
|
|
}
|
|
|
|
define <8 x i32> @cmp_sle_fail_out_of_bounds(<8 x i32> %xx) {
|
|
; X86-LABEL: cmp_sle_fail_out_of_bounds:
|
|
; X86: # %bb.0:
|
|
; X86-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
|
|
; X86-NEXT: vextractf128 $1, %ymm0, %xmm1
|
|
; X86-NEXT: vbroadcastss {{.*#+}} xmm2 = [2139095041,2139095041,2139095041,2139095041]
|
|
; X86-NEXT: vpcmpgtd %xmm1, %xmm2, %xmm1
|
|
; X86-NEXT: vpcmpgtd %xmm0, %xmm2, %xmm0
|
|
; X86-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
|
; X86-NEXT: retl
|
|
;
|
|
; X64-LABEL: cmp_sle_fail_out_of_bounds:
|
|
; X64: # %bb.0:
|
|
; X64-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
|
|
; X64-NEXT: vextractf128 $1, %ymm0, %xmm1
|
|
; X64-NEXT: vbroadcastss {{.*#+}} xmm2 = [2139095041,2139095041,2139095041,2139095041]
|
|
; X64-NEXT: vpcmpgtd %xmm1, %xmm2, %xmm1
|
|
; X64-NEXT: vpcmpgtd %xmm0, %xmm2, %xmm0
|
|
; X64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
|
; X64-NEXT: retq
|
|
%x = and <8 x i32> %xx, <i32 2139095041, i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040>
|
|
%cmp = icmp sle <8 x i32> %x, <i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040>
|
|
%sext = sext <8 x i1> %cmp to <8 x i32>
|
|
ret <8 x i32> %sext
|
|
}
|
|
|
|
define <8 x i32> @cmp_eq_fail_out_of_bounds(<8 x i32> %x) {
|
|
; CHECK-LABEL: cmp_eq_fail_out_of_bounds:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1
|
|
; CHECK-NEXT: vbroadcastss {{.*#+}} xmm2 = [16777216,16777216,16777216,16777216]
|
|
; CHECK-NEXT: vpcmpeqd %xmm2, %xmm1, %xmm1
|
|
; CHECK-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0
|
|
; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
|
; CHECK-NEXT: ret{{[l|q]}}
|
|
%cmp = icmp eq <8 x i32> %x, <i32 16777216, i32 16777216, i32 16777216, i32 16777216, i32 16777216, i32 16777216, i32 16777216, i32 16777216>
|
|
%sext = sext <8 x i1> %cmp to <8 x i32>
|
|
ret <8 x i32> %sext
|
|
}
|
|
|
|
define <8 x i32> @cmp_eq_fail_out_of_bounds2(<8 x i32> %x) {
|
|
; CHECK-LABEL: cmp_eq_fail_out_of_bounds2:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1
|
|
; CHECK-NEXT: vbroadcastss {{.*#+}} xmm2 = [4278190080,4278190080,4278190080,4278190080]
|
|
; CHECK-NEXT: vpcmpeqd %xmm2, %xmm1, %xmm1
|
|
; CHECK-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0
|
|
; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
|
; CHECK-NEXT: ret{{[l|q]}}
|
|
%cmp = icmp eq <8 x i32> %x, <i32 -16777216, i32 -16777216, i32 -16777216, i32 -16777216, i32 -16777216, i32 -16777216, i32 -16777216, i32 -16777216>
|
|
%sext = sext <8 x i1> %cmp to <8 x i32>
|
|
ret <8 x i32> %sext
|
|
}
|
|
|
|
define <8 x i32> @cmp_eq_todo(<8 x i32> %x) {
|
|
; X86-LABEL: cmp_eq_todo:
|
|
; X86: # %bb.0:
|
|
; X86-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm1
|
|
; X86-NEXT: vextractf128 $1, %ymm0, %xmm0
|
|
; X86-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
|
|
; X86-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
|
; X86-NEXT: retl
|
|
;
|
|
; X64-LABEL: cmp_eq_todo:
|
|
; X64: # %bb.0:
|
|
; X64-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
|
|
; X64-NEXT: vextractf128 $1, %ymm0, %xmm0
|
|
; X64-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
|
; X64-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
|
; X64-NEXT: retq
|
|
%cmp = icmp eq <8 x i32> %x, <i32 -16777215, i32 16777215, i32 16777215, i32 -16777215, i32 16777215, i32 -16777215, i32 16777215, i32 -16777215>
|
|
%sext = sext <8 x i1> %cmp to <8 x i32>
|
|
ret <8 x i32> %sext
|
|
}
|
|
|
|
define <8 x i32> @cmp_ult_fail_maybe_negative(<8 x i32> %x) {
|
|
; CHECK-LABEL: cmp_ult_fail_maybe_negative:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1
|
|
; CHECK-NEXT: vbroadcastss {{.*#+}} xmm2 = [2,2,2,2]
|
|
; CHECK-NEXT: vpminud %xmm2, %xmm1, %xmm3
|
|
; CHECK-NEXT: vpcmpeqd %xmm3, %xmm1, %xmm1
|
|
; CHECK-NEXT: vpminud %xmm2, %xmm0, %xmm2
|
|
; CHECK-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0
|
|
; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
|
; CHECK-NEXT: ret{{[l|q]}}
|
|
%cmp = icmp ult <8 x i32> %x, <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
|
|
%sext = sext <8 x i1> %cmp to <8 x i32>
|
|
ret <8 x i32> %sext
|
|
}
|
|
|
|
define <8 x i32> @cmp_ule_bitcast(<8 x i32> %xx) {
|
|
; X86-LABEL: cmp_ule_bitcast:
|
|
; X86: # %bb.0:
|
|
; X86-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
|
|
; X86-NEXT: vextractf128 $1, %ymm0, %xmm1
|
|
; X86-NEXT: vbroadcastss {{.*#+}} xmm2 = [4,4,4,4]
|
|
; X86-NEXT: vpcmpgtd %xmm1, %xmm2, %xmm1
|
|
; X86-NEXT: vpcmpgtd %xmm0, %xmm2, %xmm0
|
|
; X86-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
|
; X86-NEXT: retl
|
|
;
|
|
; X64-LABEL: cmp_ule_bitcast:
|
|
; X64: # %bb.0:
|
|
; X64-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
|
|
; X64-NEXT: vextractf128 $1, %ymm0, %xmm1
|
|
; X64-NEXT: vbroadcastss {{.*#+}} xmm2 = [4,4,4,4]
|
|
; X64-NEXT: vpcmpgtd %xmm1, %xmm2, %xmm1
|
|
; X64-NEXT: vpcmpgtd %xmm0, %xmm2, %xmm0
|
|
; X64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
|
; X64-NEXT: retq
|
|
%x = and <8 x i32> %xx, <i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040>
|
|
%cmp = icmp ule <8 x i32> %x, <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
|
|
%sext = sext <8 x i1> %cmp to <8 x i32>
|
|
ret <8 x i32> %sext
|
|
}
|
|
|
|
define <8 x i32> @cmp_ugt_sitofp(<8 x i32> %xx) {
|
|
; X86-LABEL: cmp_ugt_sitofp:
|
|
; X86: # %bb.0:
|
|
; X86-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
|
|
; X86-NEXT: vextractf128 $1, %ymm0, %xmm1
|
|
; X86-NEXT: vbroadcastss {{.*#+}} xmm2 = [3,3,3,3]
|
|
; X86-NEXT: vpcmpgtd %xmm2, %xmm1, %xmm1
|
|
; X86-NEXT: vpcmpgtd %xmm2, %xmm0, %xmm0
|
|
; X86-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
|
; X86-NEXT: retl
|
|
;
|
|
; X64-LABEL: cmp_ugt_sitofp:
|
|
; X64: # %bb.0:
|
|
; X64-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
|
|
; X64-NEXT: vextractf128 $1, %ymm0, %xmm1
|
|
; X64-NEXT: vbroadcastss {{.*#+}} xmm2 = [3,3,3,3]
|
|
; X64-NEXT: vpcmpgtd %xmm2, %xmm1, %xmm1
|
|
; X64-NEXT: vpcmpgtd %xmm2, %xmm0, %xmm0
|
|
; X64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
|
; X64-NEXT: retq
|
|
%x = and <8 x i32> %xx, <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647>
|
|
%cmp = icmp ugt <8 x i32> %x, <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
|
|
%sext = sext <8 x i1> %cmp to <8 x i32>
|
|
ret <8 x i32> %sext
|
|
}
|