Files
clang-p2996/llvm/test/Transforms/VectorCombine/X86/scalarize-cmp.ll
Alex Richardson e39f6c1844 [opt] Infer DataLayout from triple if not specified
There are many tests that specify a target triple/CPU flags but no
DataLayout which can lead to IR being generated that has unusual
behaviour. This commit attempts to use the default DataLayout based
on the relevant flags if there is no explicit override on the command
line or in the IR file.

One thing that is not currently possible to differentiate from a missing
datalayout `target datalayout = ""` in the IR file since the current
APIs don't allow detecting this case. If it is considered useful to
support this case (instead of passing "-data-layout=" on the command
line), I can change IR parsers to track whether they have seen such a
directive and change the callback type.

Differential Revision: https://reviews.llvm.org/D141060
2023-10-26 12:07:37 -07:00

291 lines
11 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=sse2 | FileCheck %s
; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=avx2 | FileCheck %s
declare void @use(<4 x i32>)
declare void @usef(<4 x float>)
; Eliminating an insert is profitable.
define <16 x i1> @ins0_ins0_i8(i8 %x, i8 %y) {
; CHECK-LABEL: @ins0_ins0_i8(
; CHECK-NEXT: [[R_SCALAR:%.*]] = icmp eq i8 [[X:%.*]], [[Y:%.*]]
; CHECK-NEXT: [[R:%.*]] = insertelement <16 x i1> undef, i1 [[R_SCALAR]], i64 0
; CHECK-NEXT: ret <16 x i1> [[R]]
;
%i0 = insertelement <16 x i8> undef, i8 %x, i32 0
%i1 = insertelement <16 x i8> undef, i8 %y, i32 0
%r = icmp eq <16 x i8> %i0, %i1
ret <16 x i1> %r
}
; Eliminating an insert is still profitable. Mismatch types on index is ok.
define <8 x i1> @ins5_ins5_i16(i16 %x, i16 %y) {
; CHECK-LABEL: @ins5_ins5_i16(
; CHECK-NEXT: [[R_SCALAR:%.*]] = icmp sgt i16 [[X:%.*]], [[Y:%.*]]
; CHECK-NEXT: [[R:%.*]] = insertelement <8 x i1> undef, i1 [[R_SCALAR]], i64 5
; CHECK-NEXT: ret <8 x i1> [[R]]
;
%i0 = insertelement <8 x i16> undef, i16 %x, i8 5
%i1 = insertelement <8 x i16> undef, i16 %y, i32 5
%r = icmp sgt <8 x i16> %i0, %i1
ret <8 x i1> %r
}
; The new vector constant is calculated by constant folding.
define <2 x i1> @ins1_ins1_i64(i64 %x, i64 %y) {
; CHECK-LABEL: @ins1_ins1_i64(
; CHECK-NEXT: [[R_SCALAR:%.*]] = icmp sle i64 [[X:%.*]], [[Y:%.*]]
; CHECK-NEXT: [[R:%.*]] = insertelement <2 x i1> <i1 true, i1 false>, i1 [[R_SCALAR]], i64 1
; CHECK-NEXT: ret <2 x i1> [[R]]
;
%i0 = insertelement <2 x i64> zeroinitializer, i64 %x, i64 1
%i1 = insertelement <2 x i64> <i64 1, i64 -1>, i64 %y, i32 1
%r = icmp sle <2 x i64> %i0, %i1
ret <2 x i1> %r
}
; The inserts are free, but it's still better to scalarize.
define <2 x i1> @ins0_ins0_f64(double %x, double %y) {
; CHECK-LABEL: @ins0_ins0_f64(
; CHECK-NEXT: [[R_SCALAR:%.*]] = fcmp nnan ninf uge double [[X:%.*]], [[Y:%.*]]
; CHECK-NEXT: [[R:%.*]] = insertelement <2 x i1> <i1 true, i1 true>, i1 [[R_SCALAR]], i64 0
; CHECK-NEXT: ret <2 x i1> [[R]]
;
%i0 = insertelement <2 x double> undef, double %x, i32 0
%i1 = insertelement <2 x double> undef, double %y, i32 0
%r = fcmp nnan ninf uge <2 x double> %i0, %i1
ret <2 x i1> %r
}
; Negative test - mismatched indexes (but could fold this).
define <16 x i1> @ins1_ins0_i8(i8 %x, i8 %y) {
; CHECK-LABEL: @ins1_ins0_i8(
; CHECK-NEXT: [[I0:%.*]] = insertelement <16 x i8> undef, i8 [[X:%.*]], i32 1
; CHECK-NEXT: [[I1:%.*]] = insertelement <16 x i8> undef, i8 [[Y:%.*]], i32 0
; CHECK-NEXT: [[R:%.*]] = icmp sle <16 x i8> [[I0]], [[I1]]
; CHECK-NEXT: ret <16 x i1> [[R]]
;
%i0 = insertelement <16 x i8> undef, i8 %x, i32 1
%i1 = insertelement <16 x i8> undef, i8 %y, i32 0
%r = icmp sle <16 x i8> %i0, %i1
ret <16 x i1> %r
}
; Base vector does not have to be undef.
define <4 x i1> @ins0_ins0_i32(i32 %x, i32 %y) {
; CHECK-LABEL: @ins0_ins0_i32(
; CHECK-NEXT: [[R_SCALAR:%.*]] = icmp ne i32 [[X:%.*]], [[Y:%.*]]
; CHECK-NEXT: [[R:%.*]] = insertelement <4 x i1> undef, i1 [[R_SCALAR]], i64 0
; CHECK-NEXT: ret <4 x i1> [[R]]
;
%i0 = insertelement <4 x i32> zeroinitializer, i32 %x, i32 0
%i1 = insertelement <4 x i32> undef, i32 %y, i32 0
%r = icmp ne <4 x i32> %i0, %i1
ret <4 x i1> %r
}
; Extra use is accounted for in cost calculation.
define <4 x i1> @ins0_ins0_i32_use(i32 %x, i32 %y) {
; CHECK-LABEL: @ins0_ins0_i32_use(
; CHECK-NEXT: [[I0:%.*]] = insertelement <4 x i32> undef, i32 [[X:%.*]], i32 0
; CHECK-NEXT: call void @use(<4 x i32> [[I0]])
; CHECK-NEXT: [[R_SCALAR:%.*]] = icmp ugt i32 [[X]], [[Y:%.*]]
; CHECK-NEXT: [[R:%.*]] = insertelement <4 x i1> undef, i1 [[R_SCALAR]], i64 0
; CHECK-NEXT: ret <4 x i1> [[R]]
;
%i0 = insertelement <4 x i32> undef, i32 %x, i32 0
call void @use(<4 x i32> %i0)
%i1 = insertelement <4 x i32> undef, i32 %y, i32 0
%r = icmp ugt <4 x i32> %i0, %i1
ret <4 x i1> %r
}
; Extra use is accounted for in cost calculation.
define <4 x i1> @ins1_ins1_f32_use(float %x, float %y) {
; CHECK-LABEL: @ins1_ins1_f32_use(
; CHECK-NEXT: [[I1:%.*]] = insertelement <4 x float> undef, float [[Y:%.*]], i32 1
; CHECK-NEXT: call void @usef(<4 x float> [[I1]])
; CHECK-NEXT: [[R_SCALAR:%.*]] = fcmp ogt float [[X:%.*]], [[Y]]
; CHECK-NEXT: [[R:%.*]] = insertelement <4 x i1> zeroinitializer, i1 [[R_SCALAR]], i64 1
; CHECK-NEXT: ret <4 x i1> [[R]]
;
%i0 = insertelement <4 x float> undef, float %x, i32 1
%i1 = insertelement <4 x float> undef, float %y, i32 1
call void @usef(<4 x float> %i1)
%r = fcmp ogt <4 x float> %i0, %i1
ret <4 x i1> %r
}
; If the scalar cmp is not cheaper than the vector cmp, extra uses can prevent the transform.
define <4 x i1> @ins2_ins2_f32_uses(float %x, float %y) {
; CHECK-LABEL: @ins2_ins2_f32_uses(
; CHECK-NEXT: [[I0:%.*]] = insertelement <4 x float> undef, float [[X:%.*]], i32 2
; CHECK-NEXT: call void @usef(<4 x float> [[I0]])
; CHECK-NEXT: [[I1:%.*]] = insertelement <4 x float> undef, float [[Y:%.*]], i32 2
; CHECK-NEXT: call void @usef(<4 x float> [[I1]])
; CHECK-NEXT: [[R:%.*]] = fcmp oeq <4 x float> [[I0]], [[I1]]
; CHECK-NEXT: ret <4 x i1> [[R]]
;
%i0 = insertelement <4 x float> undef, float %x, i32 2
call void @usef(<4 x float> %i0)
%i1 = insertelement <4 x float> undef, float %y, i32 2
call void @usef(<4 x float> %i1)
%r = fcmp oeq <4 x float> %i0, %i1
ret <4 x i1> %r
}
define <2 x i1> @constant_op1_i64(i64 %x) {
; CHECK-LABEL: @constant_op1_i64(
; CHECK-NEXT: [[R_SCALAR:%.*]] = icmp ne i64 [[X:%.*]], 42
; CHECK-NEXT: [[R:%.*]] = insertelement <2 x i1> undef, i1 [[R_SCALAR]], i64 0
; CHECK-NEXT: ret <2 x i1> [[R]]
;
%ins = insertelement <2 x i64> undef, i64 %x, i32 0
%r = icmp ne <2 x i64> %ins, <i64 42, i64 undef>
ret <2 x i1> %r
}
define <2 x i1> @constant_op1_i64_not_undef_lane(i64 %x) {
; CHECK-LABEL: @constant_op1_i64_not_undef_lane(
; CHECK-NEXT: [[R_SCALAR:%.*]] = icmp sge i64 [[X:%.*]], 42
; CHECK-NEXT: [[R:%.*]] = insertelement <2 x i1> <i1 true, i1 true>, i1 [[R_SCALAR]], i64 0
; CHECK-NEXT: ret <2 x i1> [[R]]
;
%ins = insertelement <2 x i64> undef, i64 %x, i32 0
%r = icmp sge <2 x i64> %ins, <i64 42, i64 -42>
ret <2 x i1> %r
}
; negative test - load prevents the transform
define <2 x i1> @constant_op1_i64_load(ptr %p) {
; CHECK-LABEL: @constant_op1_i64_load(
; CHECK-NEXT: [[LD:%.*]] = load i64, ptr [[P:%.*]], align 8
; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[LD]], i32 0
; CHECK-NEXT: [[R:%.*]] = icmp eq <2 x i64> [[INS]], <i64 42, i64 -42>
; CHECK-NEXT: ret <2 x i1> [[R]]
;
%ld = load i64, ptr %p
%ins = insertelement <2 x i64> undef, i64 %ld, i32 0
%r = icmp eq <2 x i64> %ins, <i64 42, i64 -42>
ret <2 x i1> %r
}
define <4 x i1> @constant_op0_i32(i32 %x) {
; CHECK-LABEL: @constant_op0_i32(
; CHECK-NEXT: [[R_SCALAR:%.*]] = icmp ult i32 -42, [[X:%.*]]
; CHECK-NEXT: [[R:%.*]] = insertelement <4 x i1> zeroinitializer, i1 [[R_SCALAR]], i64 1
; CHECK-NEXT: ret <4 x i1> [[R]]
;
%ins = insertelement <4 x i32> undef, i32 %x, i32 1
%r = icmp ult <4 x i32> <i32 undef, i32 -42, i32 undef, i32 undef>, %ins
ret <4 x i1> %r
}
define <4 x i1> @constant_op0_i32_not_undef_lane(i32 %x) {
; CHECK-LABEL: @constant_op0_i32_not_undef_lane(
; CHECK-NEXT: [[R_SCALAR:%.*]] = icmp ule i32 42, [[X:%.*]]
; CHECK-NEXT: [[R:%.*]] = insertelement <4 x i1> <i1 true, i1 true, i1 true, i1 true>, i1 [[R_SCALAR]], i64 1
; CHECK-NEXT: ret <4 x i1> [[R]]
;
%ins = insertelement <4 x i32> undef, i32 %x, i32 1
%r = icmp ule <4 x i32> <i32 1, i32 42, i32 42, i32 -42>, %ins
ret <4 x i1> %r
}
define <2 x i1> @constant_op0_f64(double %x) {
; CHECK-LABEL: @constant_op0_f64(
; CHECK-NEXT: [[R_SCALAR:%.*]] = fcmp fast olt double 4.200000e+01, [[X:%.*]]
; CHECK-NEXT: [[R:%.*]] = insertelement <2 x i1> zeroinitializer, i1 [[R_SCALAR]], i64 0
; CHECK-NEXT: ret <2 x i1> [[R]]
;
%ins = insertelement <2 x double> undef, double %x, i32 0
%r = fcmp fast olt <2 x double> <double 42.0, double undef>, %ins
ret <2 x i1> %r
}
define <2 x i1> @constant_op0_f64_not_undef_lane(double %x) {
; CHECK-LABEL: @constant_op0_f64_not_undef_lane(
; CHECK-NEXT: [[R_SCALAR:%.*]] = fcmp nnan ueq double -4.200000e+01, [[X:%.*]]
; CHECK-NEXT: [[R:%.*]] = insertelement <2 x i1> <i1 true, i1 true>, i1 [[R_SCALAR]], i64 1
; CHECK-NEXT: ret <2 x i1> [[R]]
;
%ins = insertelement <2 x double> undef, double %x, i32 1
%r = fcmp nnan ueq <2 x double> <double 42.0, double -42.0>, %ins
ret <2 x i1> %r
}
define <2 x i1> @constant_op1_f64(double %x) {
; CHECK-LABEL: @constant_op1_f64(
; CHECK-NEXT: [[R_SCALAR:%.*]] = fcmp one double [[X:%.*]], 4.200000e+01
; CHECK-NEXT: [[R:%.*]] = insertelement <2 x i1> zeroinitializer, i1 [[R_SCALAR]], i64 1
; CHECK-NEXT: ret <2 x i1> [[R]]
;
%ins = insertelement <2 x double> undef, double %x, i32 1
%r = fcmp one <2 x double> %ins, <double undef, double 42.0>
ret <2 x i1> %r
}
define <4 x i1> @constant_op1_f32_not_undef_lane(float %x) {
; CHECK-LABEL: @constant_op1_f32_not_undef_lane(
; CHECK-NEXT: [[R_SCALAR:%.*]] = fcmp uge float [[X:%.*]], 4.200000e+01
; CHECK-NEXT: [[R:%.*]] = insertelement <4 x i1> <i1 true, i1 true, i1 true, i1 true>, i1 [[R_SCALAR]], i64 0
; CHECK-NEXT: ret <4 x i1> [[R]]
;
%ins = insertelement <4 x float> undef, float %x, i32 0
%r = fcmp uge <4 x float> %ins, <float 42.0, float -42.0, float 0.0, float 1.0>
ret <4 x i1> %r
}
; negative test - select prevents the transform
define <4 x float> @vec_select_use1(<4 x float> %x, <4 x float> %y, i32 %a, i32 %b) {
; CHECK-LABEL: @vec_select_use1(
; CHECK-NEXT: [[VECA:%.*]] = insertelement <4 x i32> undef, i32 [[A:%.*]], i8 0
; CHECK-NEXT: [[VECB:%.*]] = insertelement <4 x i32> undef, i32 [[B:%.*]], i8 0
; CHECK-NEXT: [[COND:%.*]] = icmp eq <4 x i32> [[VECA]], [[VECB]]
; CHECK-NEXT: [[R:%.*]] = select <4 x i1> [[COND]], <4 x float> [[X:%.*]], <4 x float> [[Y:%.*]]
; CHECK-NEXT: ret <4 x float> [[R]]
;
%veca = insertelement <4 x i32> undef, i32 %a, i8 0
%vecb = insertelement <4 x i32> undef, i32 %b, i8 0
%cond = icmp eq <4 x i32> %veca, %vecb
%r = select <4 x i1> %cond, <4 x float> %x, <4 x float> %y
ret <4 x float> %r
}
; negative test - select prevents the transform
define <4 x float> @vec_select_use2(<4 x float> %x, <4 x float> %y, float %a) {
; CHECK-LABEL: @vec_select_use2(
; CHECK-NEXT: [[VECA:%.*]] = insertelement <4 x float> undef, float [[A:%.*]], i8 0
; CHECK-NEXT: [[COND:%.*]] = fcmp oeq <4 x float> [[VECA]], zeroinitializer
; CHECK-NEXT: [[R:%.*]] = select <4 x i1> [[COND]], <4 x float> [[X:%.*]], <4 x float> [[Y:%.*]]
; CHECK-NEXT: ret <4 x float> [[R]]
;
%veca = insertelement <4 x float> undef, float %a, i8 0
%cond = fcmp oeq <4 x float> %veca, zeroinitializer
%r = select <4 x i1> %cond, <4 x float> %x, <4 x float> %y
ret <4 x float> %r
}
define <4 x i1> @vector_of_pointers(ptr %t1) {
; CHECK-LABEL: @vector_of_pointers(
; CHECK-NEXT: [[T6_SCALAR:%.*]] = icmp ne ptr [[T1:%.*]], null
; CHECK-NEXT: [[T6:%.*]] = insertelement <4 x i1> undef, i1 [[T6_SCALAR]], i64 0
; CHECK-NEXT: ret <4 x i1> [[T6]]
;
%t5 = insertelement <4 x ptr> undef, ptr %t1, i32 0
%t6 = icmp ne <4 x ptr> %t5, zeroinitializer
ret <4 x i1> %t6
}