Previously SLP vectorize supported clustered vectorization for loads only. This patch adds support for "clustered" vectorization for other instructions. If the buildvector node contains "clusters", which can be vectorized separately and then inserted into the resulting buildvector result, it is better to do, since it may reduce the cost of the vector graph and produce better vector code. The patch does some analysis, if it is profitable to try to do this kind of extra vectorization. It checks the scalar instructions and its operands and tries to vectorize them only if they result in a better graph. Reviewers: RKSimon Reviewed By: RKSimon Pull Request: https://github.com/llvm/llvm-project/pull/108430
511 lines
22 KiB
LLVM
511 lines
22 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
|
; RUN: opt < %s -passes=slp-vectorizer -mtriple=x86_64-- -S | FileCheck %s --check-prefixes=CHECK,SSE
|
|
; RUN: opt < %s -passes=slp-vectorizer -mtriple=x86_64-- -mattr=avx512vl -S | FileCheck %s --check-prefixes=CHECK,AVX
|
|
|
|
declare void @use1(i1)
|
|
|
|
define i1 @logical_and_icmp(<4 x i32> %x) {
|
|
; CHECK-LABEL: @logical_and_icmp(
|
|
; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <4 x i32> [[X:%.*]], zeroinitializer
|
|
; CHECK-NEXT: [[TMP2:%.*]] = freeze <4 x i1> [[TMP1]]
|
|
; CHECK-NEXT: [[TMP3:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP2]])
|
|
; CHECK-NEXT: ret i1 [[TMP3]]
|
|
;
|
|
%x0 = extractelement <4 x i32> %x, i32 0
|
|
%x1 = extractelement <4 x i32> %x, i32 1
|
|
%x2 = extractelement <4 x i32> %x, i32 2
|
|
%x3 = extractelement <4 x i32> %x, i32 3
|
|
%c0 = icmp slt i32 %x0, 0
|
|
%c1 = icmp slt i32 %x1, 0
|
|
%c2 = icmp slt i32 %x2, 0
|
|
%c3 = icmp slt i32 %x3, 0
|
|
%s1 = select i1 %c0, i1 %c1, i1 false
|
|
%s2 = select i1 %s1, i1 %c2, i1 false
|
|
%s3 = select i1 %s2, i1 %c3, i1 false
|
|
ret i1 %s3
|
|
}
|
|
|
|
define i1 @logical_or_icmp(<4 x i32> %x, <4 x i32> %y) {
|
|
; CHECK-LABEL: @logical_or_icmp(
|
|
; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <4 x i32> [[X:%.*]], [[Y:%.*]]
|
|
; CHECK-NEXT: [[TMP2:%.*]] = freeze <4 x i1> [[TMP1]]
|
|
; CHECK-NEXT: [[TMP3:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP2]])
|
|
; CHECK-NEXT: ret i1 [[TMP3]]
|
|
;
|
|
%x0 = extractelement <4 x i32> %x, i32 0
|
|
%x1 = extractelement <4 x i32> %x, i32 1
|
|
%x2 = extractelement <4 x i32> %x, i32 2
|
|
%x3 = extractelement <4 x i32> %x, i32 3
|
|
%y0 = extractelement <4 x i32> %y, i32 0
|
|
%y1 = extractelement <4 x i32> %y, i32 1
|
|
%y2 = extractelement <4 x i32> %y, i32 2
|
|
%y3 = extractelement <4 x i32> %y, i32 3
|
|
%c0 = icmp slt i32 %x0, %y0
|
|
%c1 = icmp slt i32 %x1, %y1
|
|
%c2 = icmp slt i32 %x2, %y2
|
|
%c3 = icmp slt i32 %x3, %y3
|
|
%s1 = select i1 %c0, i1 true, i1 %c1
|
|
%s2 = select i1 %s1, i1 true, i1 %c2
|
|
%s3 = select i1 %s2, i1 true, i1 %c3
|
|
ret i1 %s3
|
|
}
|
|
|
|
define i1 @logical_and_fcmp(<4 x float> %x) {
|
|
; CHECK-LABEL: @logical_and_fcmp(
|
|
; CHECK-NEXT: [[TMP1:%.*]] = fcmp olt <4 x float> [[X:%.*]], zeroinitializer
|
|
; CHECK-NEXT: [[TMP2:%.*]] = freeze <4 x i1> [[TMP1]]
|
|
; CHECK-NEXT: [[TMP3:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP2]])
|
|
; CHECK-NEXT: ret i1 [[TMP3]]
|
|
;
|
|
%x0 = extractelement <4 x float> %x, i32 0
|
|
%x1 = extractelement <4 x float> %x, i32 1
|
|
%x2 = extractelement <4 x float> %x, i32 2
|
|
%x3 = extractelement <4 x float> %x, i32 3
|
|
%c0 = fcmp olt float %x0, 0.0
|
|
%c1 = fcmp olt float %x1, 0.0
|
|
%c2 = fcmp olt float %x2, 0.0
|
|
%c3 = fcmp olt float %x3, 0.0
|
|
%s1 = select i1 %c0, i1 %c1, i1 false
|
|
%s2 = select i1 %s1, i1 %c2, i1 false
|
|
%s3 = select i1 %s2, i1 %c3, i1 false
|
|
ret i1 %s3
|
|
}
|
|
|
|
define i1 @logical_or_fcmp(<4 x float> %x) {
|
|
; CHECK-LABEL: @logical_or_fcmp(
|
|
; CHECK-NEXT: [[TMP1:%.*]] = fcmp olt <4 x float> [[X:%.*]], zeroinitializer
|
|
; CHECK-NEXT: [[TMP2:%.*]] = freeze <4 x i1> [[TMP1]]
|
|
; CHECK-NEXT: [[TMP3:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP2]])
|
|
; CHECK-NEXT: ret i1 [[TMP3]]
|
|
;
|
|
%x0 = extractelement <4 x float> %x, i32 0
|
|
%x1 = extractelement <4 x float> %x, i32 1
|
|
%x2 = extractelement <4 x float> %x, i32 2
|
|
%x3 = extractelement <4 x float> %x, i32 3
|
|
%c0 = fcmp olt float %x0, 0.0
|
|
%c1 = fcmp olt float %x1, 0.0
|
|
%c2 = fcmp olt float %x2, 0.0
|
|
%c3 = fcmp olt float %x3, 0.0
|
|
%s1 = select i1 %c0, i1 true, i1 %c1
|
|
%s2 = select i1 %s1, i1 true, i1 %c2
|
|
%s3 = select i1 %s2, i1 true, i1 %c3
|
|
ret i1 %s3
|
|
}
|
|
|
|
define i1 @logical_and_icmp_diff_preds(<4 x i32> %x) {
|
|
; SSE-LABEL: @logical_and_icmp_diff_preds(
|
|
; SSE-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> <i32 poison, i32 poison, i32 0, i32 poison>, <4 x i32> <i32 1, i32 3, i32 6, i32 0>
|
|
; SSE-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[X]], <4 x i32> <i32 0, i32 0, i32 poison, i32 0>, <4 x i32> <i32 4, i32 5, i32 2, i32 7>
|
|
; SSE-NEXT: [[TMP3:%.*]] = icmp slt <4 x i32> [[TMP1]], [[TMP2]]
|
|
; SSE-NEXT: [[TMP4:%.*]] = icmp ult <4 x i32> [[TMP1]], [[TMP2]]
|
|
; SSE-NEXT: [[TMP5:%.*]] = shufflevector <4 x i1> [[TMP3]], <4 x i1> [[TMP4]], <4 x i32> <i32 0, i32 1, i32 2, i32 7>
|
|
; SSE-NEXT: [[TMP6:%.*]] = freeze <4 x i1> [[TMP5]]
|
|
; SSE-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP6]])
|
|
; SSE-NEXT: ret i1 [[TMP7]]
|
|
;
|
|
; AVX-LABEL: @logical_and_icmp_diff_preds(
|
|
; AVX-NEXT: [[X0:%.*]] = extractelement <4 x i32> [[X:%.*]], i32 0
|
|
; AVX-NEXT: [[X1:%.*]] = extractelement <4 x i32> [[X]], i32 1
|
|
; AVX-NEXT: [[X2:%.*]] = extractelement <4 x i32> [[X]], i32 2
|
|
; AVX-NEXT: [[X3:%.*]] = extractelement <4 x i32> [[X]], i32 3
|
|
; AVX-NEXT: [[C0:%.*]] = icmp ult i32 [[X0]], 0
|
|
; AVX-NEXT: [[C1:%.*]] = icmp slt i32 [[X1]], 0
|
|
; AVX-NEXT: [[C2:%.*]] = icmp sgt i32 [[X2]], 0
|
|
; AVX-NEXT: [[C3:%.*]] = icmp slt i32 [[X3]], 0
|
|
; AVX-NEXT: [[S1:%.*]] = select i1 [[C0]], i1 [[C1]], i1 false
|
|
; AVX-NEXT: [[S2:%.*]] = select i1 [[S1]], i1 [[C2]], i1 false
|
|
; AVX-NEXT: [[S3:%.*]] = select i1 [[S2]], i1 [[C3]], i1 false
|
|
; AVX-NEXT: ret i1 [[S3]]
|
|
;
|
|
%x0 = extractelement <4 x i32> %x, i32 0
|
|
%x1 = extractelement <4 x i32> %x, i32 1
|
|
%x2 = extractelement <4 x i32> %x, i32 2
|
|
%x3 = extractelement <4 x i32> %x, i32 3
|
|
%c0 = icmp ult i32 %x0, 0
|
|
%c1 = icmp slt i32 %x1, 0
|
|
%c2 = icmp sgt i32 %x2, 0
|
|
%c3 = icmp slt i32 %x3, 0
|
|
%s1 = select i1 %c0, i1 %c1, i1 false
|
|
%s2 = select i1 %s1, i1 %c2, i1 false
|
|
%s3 = select i1 %s2, i1 %c3, i1 false
|
|
ret i1 %s3
|
|
}
|
|
|
|
define i1 @logical_and_icmp_diff_const(<4 x i32> %x) {
|
|
; CHECK-LABEL: @logical_and_icmp_diff_const(
|
|
; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <4 x i32> [[X:%.*]], <i32 0, i32 1, i32 2, i32 3>
|
|
; CHECK-NEXT: [[TMP2:%.*]] = freeze <4 x i1> [[TMP1]]
|
|
; CHECK-NEXT: [[TMP3:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP2]])
|
|
; CHECK-NEXT: ret i1 [[TMP3]]
|
|
;
|
|
%x0 = extractelement <4 x i32> %x, i32 0
|
|
%x1 = extractelement <4 x i32> %x, i32 1
|
|
%x2 = extractelement <4 x i32> %x, i32 2
|
|
%x3 = extractelement <4 x i32> %x, i32 3
|
|
%c0 = icmp sgt i32 %x0, 0
|
|
%c1 = icmp sgt i32 %x1, 1
|
|
%c2 = icmp sgt i32 %x2, 2
|
|
%c3 = icmp sgt i32 %x3, 3
|
|
%s1 = select i1 %c0, i1 %c1, i1 false
|
|
%s2 = select i1 %s1, i1 %c2, i1 false
|
|
%s3 = select i1 %s2, i1 %c3, i1 false
|
|
ret i1 %s3
|
|
}
|
|
|
|
define i1 @mixed_logical_icmp(<4 x i32> %x) {
|
|
; CHECK-LABEL: @mixed_logical_icmp(
|
|
; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <4 x i32> [[X:%.*]], zeroinitializer
|
|
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i1> [[TMP1]], i32 0
|
|
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i1> [[TMP1]], i32 1
|
|
; CHECK-NEXT: [[S1:%.*]] = select i1 [[TMP2]], i1 [[TMP3]], i1 false
|
|
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i1> [[TMP1]], i32 2
|
|
; CHECK-NEXT: [[S2:%.*]] = select i1 [[S1]], i1 true, i1 [[TMP4]]
|
|
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i1> [[TMP1]], i32 3
|
|
; CHECK-NEXT: [[S3:%.*]] = select i1 [[S2]], i1 [[TMP5]], i1 false
|
|
; CHECK-NEXT: ret i1 [[S3]]
|
|
;
|
|
%x0 = extractelement <4 x i32> %x, i32 0
|
|
%x1 = extractelement <4 x i32> %x, i32 1
|
|
%x2 = extractelement <4 x i32> %x, i32 2
|
|
%x3 = extractelement <4 x i32> %x, i32 3
|
|
%c0 = icmp sgt i32 %x0, 0
|
|
%c1 = icmp sgt i32 %x1, 0
|
|
%c2 = icmp sgt i32 %x2, 0
|
|
%c3 = icmp sgt i32 %x3, 0
|
|
%s1 = select i1 %c0, i1 %c1, i1 false
|
|
%s2 = select i1 %s1, i1 true, i1 %c2
|
|
%s3 = select i1 %s2, i1 %c3, i1 false
|
|
ret i1 %s3
|
|
}
|
|
|
|
define i1 @logical_and_icmp_subvec(<4 x i32> %x) {
|
|
; CHECK-LABEL: @logical_and_icmp_subvec(
|
|
; CHECK-NEXT: [[X2:%.*]] = extractelement <4 x i32> [[X:%.*]], i32 2
|
|
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[X]], <4 x i32> poison, <2 x i32> <i32 0, i32 1>
|
|
; CHECK-NEXT: [[TMP2:%.*]] = icmp slt <2 x i32> [[TMP1]], zeroinitializer
|
|
; CHECK-NEXT: [[C2:%.*]] = icmp slt i32 [[X2]], 0
|
|
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i1> [[TMP2]], i32 0
|
|
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i1> [[TMP2]], i32 1
|
|
; CHECK-NEXT: [[S1:%.*]] = select i1 [[TMP3]], i1 [[TMP4]], i1 false
|
|
; CHECK-NEXT: [[S2:%.*]] = select i1 [[S1]], i1 [[C2]], i1 false
|
|
; CHECK-NEXT: ret i1 [[S2]]
|
|
;
|
|
%x0 = extractelement <4 x i32> %x, i32 0
|
|
%x1 = extractelement <4 x i32> %x, i32 1
|
|
%x2 = extractelement <4 x i32> %x, i32 2
|
|
%c0 = icmp slt i32 %x0, 0
|
|
%c1 = icmp slt i32 %x1, 0
|
|
%c2 = icmp slt i32 %x2, 0
|
|
%s1 = select i1 %c0, i1 %c1, i1 false
|
|
%s2 = select i1 %s1, i1 %c2, i1 false
|
|
ret i1 %s2
|
|
}
|
|
|
|
; TODO: This is better than all-scalar and still safe,
|
|
; but we want this to be 2 reductions with glue
|
|
; logic...or a wide reduction?
|
|
|
|
define i1 @logical_and_icmp_clamp(<4 x i32> %x) {
|
|
; CHECK-LABEL: @logical_and_icmp_clamp(
|
|
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
|
|
; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <8 x i32> [[TMP1]], <i32 17, i32 17, i32 17, i32 17, i32 42, i32 42, i32 42, i32 42>
|
|
; CHECK-NEXT: [[TMP3:%.*]] = icmp slt <8 x i32> [[TMP1]], <i32 17, i32 17, i32 17, i32 17, i32 42, i32 42, i32 42, i32 42>
|
|
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x i1> [[TMP2]], <8 x i1> [[TMP3]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
|
|
; CHECK-NEXT: [[TMP5:%.*]] = freeze <8 x i1> [[TMP4]]
|
|
; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> [[TMP5]])
|
|
; CHECK-NEXT: ret i1 [[TMP6]]
|
|
;
|
|
%x0 = extractelement <4 x i32> %x, i32 0
|
|
%x1 = extractelement <4 x i32> %x, i32 1
|
|
%x2 = extractelement <4 x i32> %x, i32 2
|
|
%x3 = extractelement <4 x i32> %x, i32 3
|
|
%c0 = icmp slt i32 %x0, 42
|
|
%c1 = icmp slt i32 %x1, 42
|
|
%c2 = icmp slt i32 %x2, 42
|
|
%c3 = icmp slt i32 %x3, 42
|
|
%d0 = icmp sgt i32 %x0, 17
|
|
%d1 = icmp sgt i32 %x1, 17
|
|
%d2 = icmp sgt i32 %x2, 17
|
|
%d3 = icmp sgt i32 %x3, 17
|
|
%s1 = select i1 %c0, i1 %c1, i1 false
|
|
%s2 = select i1 %s1, i1 %c2, i1 false
|
|
%s3 = select i1 %s2, i1 %c3, i1 false
|
|
%s4 = select i1 %s3, i1 %d0, i1 false
|
|
%s5 = select i1 %s4, i1 %d1, i1 false
|
|
%s6 = select i1 %s5, i1 %d2, i1 false
|
|
%s7 = select i1 %s6, i1 %d3, i1 false
|
|
ret i1 %s7
|
|
}
|
|
|
|
define i1 @logical_and_icmp_clamp_extra_use_cmp(<4 x i32> %x) {
|
|
; CHECK-LABEL: @logical_and_icmp_clamp_extra_use_cmp(
|
|
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
|
|
; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <8 x i32> [[TMP1]], <i32 17, i32 17, i32 17, i32 17, i32 42, i32 42, i32 42, i32 42>
|
|
; CHECK-NEXT: [[TMP3:%.*]] = icmp slt <8 x i32> [[TMP1]], <i32 17, i32 17, i32 17, i32 17, i32 42, i32 42, i32 42, i32 42>
|
|
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x i1> [[TMP2]], <8 x i1> [[TMP3]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
|
|
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <8 x i1> [[TMP4]], i32 6
|
|
; CHECK-NEXT: call void @use1(i1 [[TMP5]])
|
|
; CHECK-NEXT: [[TMP6:%.*]] = freeze <8 x i1> [[TMP4]]
|
|
; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> [[TMP6]])
|
|
; CHECK-NEXT: ret i1 [[TMP7]]
|
|
;
|
|
%x0 = extractelement <4 x i32> %x, i32 0
|
|
%x1 = extractelement <4 x i32> %x, i32 1
|
|
%x2 = extractelement <4 x i32> %x, i32 2
|
|
%x3 = extractelement <4 x i32> %x, i32 3
|
|
%c0 = icmp slt i32 %x0, 42
|
|
%c1 = icmp slt i32 %x1, 42
|
|
%c2 = icmp slt i32 %x2, 42
|
|
call void @use1(i1 %c2)
|
|
%c3 = icmp slt i32 %x3, 42
|
|
%d0 = icmp sgt i32 %x0, 17
|
|
%d1 = icmp sgt i32 %x1, 17
|
|
%d2 = icmp sgt i32 %x2, 17
|
|
%d3 = icmp sgt i32 %x3, 17
|
|
%s1 = select i1 %c0, i1 %c1, i1 false
|
|
%s2 = select i1 %s1, i1 %c2, i1 false
|
|
%s3 = select i1 %s2, i1 %c3, i1 false
|
|
%s4 = select i1 %s3, i1 %d0, i1 false
|
|
%s5 = select i1 %s4, i1 %d1, i1 false
|
|
%s6 = select i1 %s5, i1 %d2, i1 false
|
|
%s7 = select i1 %s6, i1 %d3, i1 false
|
|
ret i1 %s7
|
|
}
|
|
|
|
define i1 @logical_and_icmp_clamp_extra_use_select(<4 x i32> %x) {
|
|
; CHECK-LABEL: @logical_and_icmp_clamp_extra_use_select(
|
|
; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <4 x i32> [[X:%.*]], <i32 42, i32 42, i32 42, i32 42>
|
|
; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <4 x i32> [[X]], <i32 17, i32 17, i32 17, i32 17>
|
|
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i1> [[TMP1]], i32 0
|
|
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i1> [[TMP1]], i32 1
|
|
; CHECK-NEXT: [[S1:%.*]] = select i1 [[TMP3]], i1 [[TMP4]], i1 false
|
|
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i1> [[TMP1]], i32 2
|
|
; CHECK-NEXT: [[S2:%.*]] = select i1 [[S1]], i1 [[TMP5]], i1 false
|
|
; CHECK-NEXT: call void @use1(i1 [[S2]])
|
|
; CHECK-NEXT: [[TMP6:%.*]] = freeze <4 x i1> [[TMP2]]
|
|
; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP6]])
|
|
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i1> [[TMP1]], i32 3
|
|
; CHECK-NEXT: [[OP_RDX:%.*]] = select i1 [[TMP7]], i1 [[TMP8]], i1 false
|
|
; CHECK-NEXT: [[OP_RDX1:%.*]] = select i1 [[S2]], i1 [[OP_RDX]], i1 false
|
|
; CHECK-NEXT: ret i1 [[OP_RDX1]]
|
|
;
|
|
%x0 = extractelement <4 x i32> %x, i32 0
|
|
%x1 = extractelement <4 x i32> %x, i32 1
|
|
%x2 = extractelement <4 x i32> %x, i32 2
|
|
%x3 = extractelement <4 x i32> %x, i32 3
|
|
%c0 = icmp slt i32 %x0, 42
|
|
%c1 = icmp slt i32 %x1, 42
|
|
%c2 = icmp slt i32 %x2, 42
|
|
%c3 = icmp slt i32 %x3, 42
|
|
%d0 = icmp sgt i32 %x0, 17
|
|
%d1 = icmp sgt i32 %x1, 17
|
|
%d2 = icmp sgt i32 %x2, 17
|
|
%d3 = icmp sgt i32 %x3, 17
|
|
%s1 = select i1 %c0, i1 %c1, i1 false
|
|
%s2 = select i1 %s1, i1 %c2, i1 false
|
|
call void @use1(i1 %s2)
|
|
%s3 = select i1 %s2, i1 %c3, i1 false
|
|
%s4 = select i1 %s3, i1 %d0, i1 false
|
|
%s5 = select i1 %s4, i1 %d1, i1 false
|
|
%s6 = select i1 %s5, i1 %d2, i1 false
|
|
%s7 = select i1 %s6, i1 %d3, i1 false
|
|
ret i1 %s7
|
|
}
|
|
|
|
define i1 @logical_and_icmp_clamp_v8i32(<8 x i32> %x, <8 x i32> %y) {
|
|
; CHECK-LABEL: @logical_and_icmp_clamp_v8i32(
|
|
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[X:%.*]], <8 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
|
|
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[Y:%.*]], <8 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> <i32 42, i32 42, i32 42, i32 42, i32 poison, i32 poison, i32 poison, i32 poison>, <4 x i32> [[TMP2]], i64 4)
|
|
; CHECK-NEXT: [[TMP4:%.*]] = icmp slt <8 x i32> [[TMP1]], [[TMP3]]
|
|
; CHECK-NEXT: [[TMP5:%.*]] = freeze <8 x i1> [[TMP4]]
|
|
; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> [[TMP5]])
|
|
; CHECK-NEXT: ret i1 [[TMP6]]
|
|
;
|
|
%x0 = extractelement <8 x i32> %x, i32 0
|
|
%x1 = extractelement <8 x i32> %x, i32 1
|
|
%x2 = extractelement <8 x i32> %x, i32 2
|
|
%x3 = extractelement <8 x i32> %x, i32 3
|
|
%y0 = extractelement <8 x i32> %y, i32 0
|
|
%y1 = extractelement <8 x i32> %y, i32 1
|
|
%y2 = extractelement <8 x i32> %y, i32 2
|
|
%y3 = extractelement <8 x i32> %y, i32 3
|
|
%c0 = icmp slt i32 %x0, 42
|
|
%c1 = icmp slt i32 %x1, 42
|
|
%c2 = icmp slt i32 %x2, 42
|
|
%c3 = icmp slt i32 %x3, 42
|
|
%d0 = icmp slt i32 %x0, %y0
|
|
%d1 = icmp slt i32 %x1, %y1
|
|
%d2 = icmp slt i32 %x2, %y2
|
|
%d3 = icmp slt i32 %x3, %y3
|
|
%s1 = select i1 %c0, i1 %c1, i1 false
|
|
%s2 = select i1 %s1, i1 %c2, i1 false
|
|
%s3 = select i1 %s2, i1 %c3, i1 false
|
|
%s4 = select i1 %s3, i1 %d0, i1 false
|
|
%s5 = select i1 %s4, i1 %d1, i1 false
|
|
%s6 = select i1 %s5, i1 %d2, i1 false
|
|
%s7 = select i1 %s6, i1 %d3, i1 false
|
|
ret i1 %s7
|
|
}
|
|
|
|
define i1 @logical_and_icmp_clamp_partial(<4 x i32> %x) {
|
|
; CHECK-LABEL: @logical_and_icmp_clamp_partial(
|
|
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i32> [[X:%.*]], i32 2
|
|
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[X]], <4 x i32> poison, <2 x i32> <i32 0, i32 1>
|
|
; CHECK-NEXT: [[TMP3:%.*]] = icmp slt <2 x i32> [[TMP2]], <i32 42, i32 42>
|
|
; CHECK-NEXT: [[C2:%.*]] = icmp slt i32 [[TMP1]], 42
|
|
; CHECK-NEXT: [[TMP4:%.*]] = icmp sgt <4 x i32> [[X]], <i32 17, i32 17, i32 17, i32 17>
|
|
; CHECK-NEXT: [[TMP5:%.*]] = freeze <4 x i1> [[TMP4]]
|
|
; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP5]])
|
|
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i1> [[TMP3]], i32 0
|
|
; CHECK-NEXT: [[OP_RDX:%.*]] = select i1 [[TMP7]], i1 [[TMP6]], i1 false
|
|
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i1> [[TMP3]], i32 1
|
|
; CHECK-NEXT: [[TMP9:%.*]] = freeze i1 [[TMP8]]
|
|
; CHECK-NEXT: [[OP_RDX1:%.*]] = select i1 [[TMP9]], i1 [[C2]], i1 false
|
|
; CHECK-NEXT: [[TMP10:%.*]] = freeze i1 [[OP_RDX]]
|
|
; CHECK-NEXT: [[OP_RDX2:%.*]] = select i1 [[TMP10]], i1 [[OP_RDX1]], i1 false
|
|
; CHECK-NEXT: ret i1 [[OP_RDX2]]
|
|
;
|
|
%x0 = extractelement <4 x i32> %x, i32 0
|
|
%x1 = extractelement <4 x i32> %x, i32 1
|
|
%x2 = extractelement <4 x i32> %x, i32 2
|
|
%x3 = extractelement <4 x i32> %x, i32 3
|
|
%c0 = icmp slt i32 %x0, 42
|
|
%c1 = icmp slt i32 %x1, 42
|
|
%c2 = icmp slt i32 %x2, 42
|
|
; remove an element from the previous test
|
|
%d0 = icmp sgt i32 %x0, 17
|
|
%d1 = icmp sgt i32 %x1, 17
|
|
%d2 = icmp sgt i32 %x2, 17
|
|
%d3 = icmp sgt i32 %x3, 17
|
|
%s1 = select i1 %c0, i1 %c1, i1 false
|
|
%s2 = select i1 %s1, i1 %c2, i1 false
|
|
; remove an element from the previous test
|
|
%s4 = select i1 %s2, i1 %d0, i1 false
|
|
%s5 = select i1 %s4, i1 %d1, i1 false
|
|
%s6 = select i1 %s5, i1 %d2, i1 false
|
|
%s7 = select i1 %s6, i1 %d3, i1 false
|
|
ret i1 %s7
|
|
}
|
|
|
|
define i1 @logical_and_icmp_clamp_pred_diff(<4 x i32> %x) {
|
|
; CHECK-LABEL: @logical_and_icmp_clamp_pred_diff(
|
|
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
|
|
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 42, i32 42, i32 42, i32 poison>, <8 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 12, i32 13, i32 14, i32 3>
|
|
; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> [[TMP2]], <4 x i32> [[X]], i64 0)
|
|
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> <i32 17, i32 17, i32 17, i32 17, i32 poison, i32 poison, i32 poison, i32 42>, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 0, i32 1, i32 2, i32 15>
|
|
; CHECK-NEXT: [[TMP5:%.*]] = icmp sgt <8 x i32> [[TMP3]], [[TMP4]]
|
|
; CHECK-NEXT: [[TMP6:%.*]] = icmp ult <8 x i32> [[TMP3]], [[TMP4]]
|
|
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <8 x i1> [[TMP5]], <8 x i1> [[TMP6]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 15>
|
|
; CHECK-NEXT: [[TMP8:%.*]] = freeze <8 x i1> [[TMP7]]
|
|
; CHECK-NEXT: [[TMP9:%.*]] = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> [[TMP8]])
|
|
; CHECK-NEXT: ret i1 [[TMP9]]
|
|
;
|
|
%x0 = extractelement <4 x i32> %x, i32 0
|
|
%x1 = extractelement <4 x i32> %x, i32 1
|
|
%x2 = extractelement <4 x i32> %x, i32 2
|
|
%x3 = extractelement <4 x i32> %x, i32 3
|
|
%c0 = icmp slt i32 %x0, 42
|
|
%c1 = icmp slt i32 %x1, 42
|
|
%c2 = icmp slt i32 %x2, 42
|
|
%c3 = icmp ult i32 %x3, 42 ; predicate changed
|
|
%d0 = icmp sgt i32 %x0, 17
|
|
%d1 = icmp sgt i32 %x1, 17
|
|
%d2 = icmp sgt i32 %x2, 17
|
|
%d3 = icmp sgt i32 %x3, 17
|
|
%s1 = select i1 %c0, i1 %c1, i1 false
|
|
%s2 = select i1 %s1, i1 %c2, i1 false
|
|
%s3 = select i1 %s2, i1 %c3, i1 false
|
|
%s4 = select i1 %s3, i1 %d0, i1 false
|
|
%s5 = select i1 %s4, i1 %d1, i1 false
|
|
%s6 = select i1 %s5, i1 %d2, i1 false
|
|
%s7 = select i1 %s6, i1 %d3, i1 false
|
|
ret i1 %s7
|
|
}
|
|
|
|
define i1 @logical_and_icmp_extra_op(<4 x i32> %x, <4 x i32> %y, i1 %c) {
|
|
; CHECK-LABEL: @logical_and_icmp_extra_op(
|
|
; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <4 x i32> [[X:%.*]], [[Y:%.*]]
|
|
; CHECK-NEXT: [[TMP2:%.*]] = freeze <4 x i1> [[TMP1]]
|
|
; CHECK-NEXT: [[TMP3:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP2]])
|
|
; CHECK-NEXT: [[OP_RDX:%.*]] = select i1 [[TMP3]], i1 [[C:%.*]], i1 false
|
|
; CHECK-NEXT: ret i1 [[OP_RDX]]
|
|
;
|
|
%x0 = extractelement <4 x i32> %x, i32 0
|
|
%x1 = extractelement <4 x i32> %x, i32 1
|
|
%x2 = extractelement <4 x i32> %x, i32 2
|
|
%x3 = extractelement <4 x i32> %x, i32 3
|
|
%y0 = extractelement <4 x i32> %y, i32 0
|
|
%y1 = extractelement <4 x i32> %y, i32 1
|
|
%y2 = extractelement <4 x i32> %y, i32 2
|
|
%y3 = extractelement <4 x i32> %y, i32 3
|
|
%d0 = icmp slt i32 %x0, %y0
|
|
%d1 = icmp slt i32 %x1, %y1
|
|
%d2 = icmp slt i32 %x2, %y2
|
|
%d3 = icmp slt i32 %x3, %y3
|
|
%s3 = select i1 %c, i1 %c, i1 false
|
|
%s4 = select i1 %s3, i1 %d0, i1 false
|
|
%s5 = select i1 %s4, i1 %d1, i1 false
|
|
%s6 = select i1 %s5, i1 %d2, i1 false
|
|
%s7 = select i1 %s6, i1 %d3, i1 false
|
|
ret i1 %s7
|
|
}
|
|
|
|
define i1 @logical_or_icmp_extra_op(<4 x i32> %x, <4 x i32> %y, i1 %c) {
|
|
; CHECK-LABEL: @logical_or_icmp_extra_op(
|
|
; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <4 x i32> [[X:%.*]], [[Y:%.*]]
|
|
; CHECK-NEXT: [[TMP2:%.*]] = freeze <4 x i1> [[TMP1]]
|
|
; CHECK-NEXT: [[TMP3:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP2]])
|
|
; CHECK-NEXT: [[OP_RDX:%.*]] = select i1 [[TMP3]], i1 true, i1 [[C:%.*]]
|
|
; CHECK-NEXT: ret i1 [[OP_RDX]]
|
|
;
|
|
%x0 = extractelement <4 x i32> %x, i32 0
|
|
%x1 = extractelement <4 x i32> %x, i32 1
|
|
%x2 = extractelement <4 x i32> %x, i32 2
|
|
%x3 = extractelement <4 x i32> %x, i32 3
|
|
%y0 = extractelement <4 x i32> %y, i32 0
|
|
%y1 = extractelement <4 x i32> %y, i32 1
|
|
%y2 = extractelement <4 x i32> %y, i32 2
|
|
%y3 = extractelement <4 x i32> %y, i32 3
|
|
%d0 = icmp slt i32 %x0, %y0
|
|
%d1 = icmp slt i32 %x1, %y1
|
|
%d2 = icmp slt i32 %x2, %y2
|
|
%d3 = icmp slt i32 %x3, %y3
|
|
%s3 = select i1 %c, i1 true, i1 %c
|
|
%s4 = select i1 %s3, i1 true, i1 %d0
|
|
%s5 = select i1 %s4, i1 true, i1 %d1
|
|
%s6 = select i1 %s5, i1 true, i1 %d2
|
|
%s7 = select i1 %s6, i1 true, i1 %d3
|
|
ret i1 %s7
|
|
}
|
|
|
|
define i1 @logical_and_icmp_extra_args(<4 x i32> %x, i1 %c0, i1 %c1, i1 %c2) {
|
|
; CHECK-LABEL: @logical_and_icmp_extra_args(
|
|
; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <4 x i32> [[X:%.*]], <i32 17, i32 17, i32 17, i32 17>
|
|
; CHECK-NEXT: [[TMP2:%.*]] = freeze <4 x i1> [[TMP1]]
|
|
; CHECK-NEXT: [[TMP3:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP2]])
|
|
; CHECK-NEXT: [[OP_RDX:%.*]] = select i1 [[TMP3]], i1 [[C0:%.*]], i1 false
|
|
; CHECK-NEXT: [[TMP4:%.*]] = freeze i1 [[C1:%.*]]
|
|
; CHECK-NEXT: [[OP_RDX1:%.*]] = select i1 [[TMP4]], i1 [[C2:%.*]], i1 false
|
|
; CHECK-NEXT: [[TMP5:%.*]] = freeze i1 [[OP_RDX]]
|
|
; CHECK-NEXT: [[OP_RDX2:%.*]] = select i1 [[TMP5]], i1 [[OP_RDX1]], i1 false
|
|
; CHECK-NEXT: ret i1 [[OP_RDX2]]
|
|
;
|
|
%x0 = extractelement <4 x i32> %x, i32 0
|
|
%x1 = extractelement <4 x i32> %x, i32 1
|
|
%x2 = extractelement <4 x i32> %x, i32 2
|
|
%x3 = extractelement <4 x i32> %x, i32 3
|
|
%d0 = icmp sgt i32 %x0, 17
|
|
%d1 = icmp sgt i32 %x1, 17
|
|
%d2 = icmp sgt i32 %x2, 17
|
|
%d3 = icmp sgt i32 %x3, 17
|
|
%s1 = select i1 %d0, i1 %c0, i1 false ; <- d0, d1, d2, d3 gets reduced.
|
|
%s2 = select i1 %s1, i1 %c1, i1 false ; <- c0, c1, c2 remain scalar.
|
|
%s3 = select i1 %s2, i1 %c2, i1 false
|
|
%s5 = select i1 %s3, i1 %d1, i1 false
|
|
%s6 = select i1 %s5, i1 %d2, i1 false
|
|
%s7 = select i1 %s6, i1 %d3, i1 false
|
|
ret i1 %s7
|
|
}
|
|
|