If the operands of the potentially alternate node are going to produce
buildvector sequences, which result in more instructions, than the
original code, then suhinstructions should be vectorized as alternate
node, better to end up with the buildvector node.
Left column - experimental, Right - reference.
Metric: size..text
Program size..text
results results0 diff
test-suite :: SingleSource/Benchmarks/Adobe-C++/loop_unroll.test 413680.00 416272.00 0.6%
test-suite :: External/SPEC/CFP2017rate/526.blender_r/526.blender_r.test 12351788.00 12354844.00 0.0%
test-suite :: External/SPEC/CINT2017speed/625.x264_s/625.x264_s.test 664901.00 664949.00 0.0%
test-suite :: External/SPEC/CINT2017rate/525.x264_r/525.x264_r.test 664901.00 664949.00 0.0%
test-suite :: External/SPEC/CFP2017rate/511.povray_r/511.povray_r.test 1171371.00 1171355.00 -0.0%
test-suite :: MultiSource/Benchmarks/7zip/7zip-benchmark.test 1036396.00 1036284.00 -0.0%
test-suite :: MultiSource/Benchmarks/MiBench/consumer-jpeg/consumer-jpeg.test 111280.00 111248.00 -0.0%
test-suite :: External/SPEC/CFP2017rate/538.imagick_r/538.imagick_r.test 1392113.00 1391361.00 -0.1%
test-suite :: External/SPEC/CFP2017speed/638.imagick_s/638.imagick_s.test 1392113.00 1391361.00 -0.1%
test-suite :: MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/timberwolfmc.test 281676.00 281452.00 -0.1%
test-suite :: MultiSource/Benchmarks/VersaBench/ecbdes/ecbdes.test 3025.00 3019.00 -0.2%
test-suite :: MultiSource/Benchmarks/Prolangs-C/plot2fig/plot2fig.test 6351.00 6335.00 -0.3%
Metric: SLP.NumVectorInstructions
Program SLP.NumVectorInstructions
results results0 diff
test-suite :: MultiSource/Benchmarks/VersaBench/ecbdes/ecbdes.test 15.00 16.00 6.7%
test-suite :: External/SPEC/CINT2017rate/525.x264_r/525.x264_r.test 1703.00 1707.00 0.2%
test-suite :: External/SPEC/CINT2017speed/625.x264_s/625.x264_s.test 1703.00 1707.00 0.2%
test-suite :: External/SPEC/CFP2017rate/526.blender_r/526.blender_r.test 26241.00 26239.00 -0.0%
test-suite :: External/SPEC/CFP2017rate/510.parest_r/510.parest_r.test 11761.00 11754.00 -0.1%
test-suite :: MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/timberwolfmc.test 824.00 822.00 -0.2%
test-suite :: External/SPEC/CFP2017speed/638.imagick_s/638.imagick_s.test 5668.00 5654.00 -0.2%
test-suite :: External/SPEC/CFP2017rate/538.imagick_r/538.imagick_r.test 5668.00 5654.00 -0.2%
test-suite :: External/SPEC/CINT2017rate/502.gcc_r/502.gcc_r.test 792.00 790.00 -0.3%
test-suite :: External/SPEC/CINT2017speed/602.gcc_s/602.gcc_s.test 792.00 790.00 -0.3%
test-suite :: MultiSource/Benchmarks/FreeBench/pifft/pifft.test 1389.00 1384.00 -0.4%
test-suite :: MultiSource/Benchmarks/7zip/7zip-benchmark.test 596.00 590.00 -1.0%
test-suite :: MultiSource/Benchmarks/Prolangs-C/plot2fig/plot2fig.test 6.00 5.00 -16.7%
Metric: exec_time
Program exec_time
results results0 diff
test-suite :: External/SPEC/CFP2017rate/526.blender_r/526.blender_r.test 99.14 100.00 0.9%
Other changes are not significant (less than 0.1% percent with exectime
less 5 secs).
SingleSource/Benchmarks/Adobe-C++/loop_unroll - same small patterns
remain scalar, smaller code.
External/SPEC/CFP2017rate/526.blender_r/526.blender_r - many small
changes, some extra stores gets vectorized.
External/SPEC/CINT2017speed/625.x264_s/625.x264_s
External/SPEC/CINT2017rate/525.x264_r/525.x264_r
x264 has one change in a loop body, in function ssim_end4, some code
remain scalar, resulting in less code size.
External/SPEC/CFP2017rate/511.povray_r/511.povray_r - some extra code
gets vectorized, looks like some other patterns were matched.
MultiSource/Benchmarks/7zip/7zip-benchmark - extra stores were
vectorized (looks like the graphs become profitable)
MultiSource/Benchmarks/MiBench/consumer-jpeg/consumer-jpeg - small
changes in vectorized code (some small part remain scalar).
External/SPEC/CFP2017rate/538.imagick_r/538.imagick_r
External/SPEC/CFP2017speed/638.imagick_s/638.imagick_s
Many changes cause by the fact that the code of one function becomes
smaller (onvertLCHabToRGB) and this functions gets inlined after that.
MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/timberwolfmc - some small
changes here and there, some extra code is vectorized, some remain
scalar (2 x vectors)
MultiSource/Benchmarks/VersaBench/ecbdes/ecbdes - emits 2 scalars
+ 2 insertelems instead of insert, broadcast, alt code (3 instructions,
total 5 insts)
MultiSource/Benchmarks/Prolangs-C/plot2fig/plot2fig - small graph
becomes profitable and gets vectorized.
External/SPEC/CINT2017rate/502.gcc_r/502.gcc_r
External/SPEC/CINT2017speed/602.gcc_s/602.gcc_s
Some small graph becomes profitable and gets vectorized.
MultiSource/Benchmarks/FreeBench/pifft/pifft - no changes in final code.
Reviewers: RKSimon, dtcxzyw
Reviewed By: RKSimon
Pull Request: https://github.com/llvm/llvm-project/pull/84978
187 lines
8.2 KiB
LLVM
187 lines
8.2 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
|
|
; RUN: opt -S -passes=slp-vectorizer -slp-threshold=-10000 < %s | FileCheck %s
|
|
|
|
define <2 x float> @test_fdiv(float %a, i1 %cmp) {
|
|
; CHECK-LABEL: define <2 x float> @test_fdiv(
|
|
; CHECK-SAME: float [[A:%.*]], i1 [[CMP:%.*]]) {
|
|
; CHECK-NEXT: [[TMP1:%.*]] = fdiv float [[A]], 3.000000e+00
|
|
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x float> poison, float [[TMP1]], i64 1
|
|
; CHECK-NEXT: [[TMP3:%.*]] = select i1 [[CMP]], <2 x float> <float 7.700000e+01, float 9.900000e+01>, <2 x float> [[TMP2]]
|
|
; CHECK-NEXT: ret <2 x float> [[TMP3]]
|
|
;
|
|
%1 = fdiv float %a, 3.000000e+00
|
|
%2 = insertelement <2 x float> poison, float %1, i64 1
|
|
%3 = select i1 %cmp, <2 x float> <float 7.700000e+01, float 9.900000e+01>, <2 x float> %2
|
|
ret <2 x float> %3
|
|
}
|
|
|
|
define <2 x float> @test_frem(float %a, i1 %cmp) {
|
|
; CHECK-LABEL: define <2 x float> @test_frem(
|
|
; CHECK-SAME: float [[A:%.*]], i1 [[CMP:%.*]]) {
|
|
; CHECK-NEXT: [[TMP1:%.*]] = frem float [[A]], 3.000000e+00
|
|
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x float> poison, float [[TMP1]], i64 1
|
|
; CHECK-NEXT: [[TMP3:%.*]] = select i1 [[CMP]], <2 x float> <float 7.700000e+01, float 9.900000e+01>, <2 x float> [[TMP2]]
|
|
; CHECK-NEXT: ret <2 x float> [[TMP3]]
|
|
;
|
|
%1 = frem float %a, 3.000000e+00
|
|
%2 = insertelement <2 x float> poison, float %1, i64 1
|
|
%3 = select i1 %cmp, <2 x float> <float 7.700000e+01, float 9.900000e+01>, <2 x float> %2
|
|
ret <2 x float> %3
|
|
}
|
|
|
|
define <2 x float> @replace_through_casts(i16 %inp) {
|
|
; CHECK-LABEL: define <2 x float> @replace_through_casts(
|
|
; CHECK-SAME: i16 [[INP:%.*]]) {
|
|
; CHECK-NEXT: [[ADD:%.*]] = add nsw i16 [[INP]], -10
|
|
; CHECK-NEXT: [[TMP1:%.*]] = uitofp i16 [[INP]] to float
|
|
; CHECK-NEXT: [[TMP2:%.*]] = sitofp i16 [[ADD]] to float
|
|
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x float> poison, float [[TMP1]], i64 0
|
|
; CHECK-NEXT: [[R:%.*]] = insertelement <2 x float> [[TMP3]], float [[TMP2]], i64 1
|
|
; CHECK-NEXT: ret <2 x float> [[R]]
|
|
;
|
|
%add = add nsw i16 %inp, -10
|
|
%1 = uitofp i16 %inp to float
|
|
%2 = sitofp i16 %add to float
|
|
%3 = insertelement <2 x float> poison, float %1, i64 0
|
|
%r = insertelement <2 x float> %3, float %2, i64 1
|
|
ret <2 x float> %r
|
|
}
|
|
|
|
define <2 x float> @replace_through_casts_and_binop(i16 %inp) {
|
|
; CHECK-LABEL: define <2 x float> @replace_through_casts_and_binop(
|
|
; CHECK-SAME: i16 [[INP:%.*]]) {
|
|
; CHECK-NEXT: [[ADD:%.*]] = add nsw i16 [[INP]], -10
|
|
; CHECK-NEXT: [[MUL:%.*]] = mul nsw i16 [[INP]], 5
|
|
; CHECK-NEXT: [[TMP1:%.*]] = uitofp i16 [[MUL]] to float
|
|
; CHECK-NEXT: [[TMP2:%.*]] = fadd float [[TMP1]], 2.000000e+00
|
|
; CHECK-NEXT: [[TMP3:%.*]] = sitofp i16 [[ADD]] to float
|
|
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x float> poison, float [[TMP2]], i64 0
|
|
; CHECK-NEXT: [[R:%.*]] = insertelement <2 x float> [[TMP4]], float [[TMP3]], i64 1
|
|
; CHECK-NEXT: ret <2 x float> [[R]]
|
|
;
|
|
%add = add nsw i16 %inp, -10
|
|
%mul = mul nsw i16 %inp, 5
|
|
%1 = uitofp i16 %mul to float
|
|
%2 = fadd float %1, 2.000000e+00
|
|
%3 = sitofp i16 %add to float
|
|
%4 = insertelement <2 x float> poison, float %2, i64 0
|
|
%r = insertelement <2 x float> %4, float %3, i64 1
|
|
ret <2 x float> %r
|
|
}
|
|
|
|
define <2 x float> @replace_through_casts_and_binop_and_unop(i16 %inp) {
|
|
; CHECK-LABEL: define <2 x float> @replace_through_casts_and_binop_and_unop(
|
|
; CHECK-SAME: i16 [[INP:%.*]]) {
|
|
; CHECK-NEXT: [[ADD:%.*]] = add nsw i16 [[INP]], -10
|
|
; CHECK-NEXT: [[TMP1:%.*]] = sitofp i16 [[ADD]] to float
|
|
; CHECK-NEXT: [[TMP2:%.*]] = fneg float [[TMP1]]
|
|
; CHECK-NEXT: [[TMP3:%.*]] = uitofp i16 [[ADD]] to float
|
|
; CHECK-NEXT: [[TMP4:%.*]] = fadd float [[TMP3]], 2.000000e+00
|
|
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x float> poison, float [[TMP4]], i64 0
|
|
; CHECK-NEXT: [[R:%.*]] = insertelement <2 x float> [[TMP5]], float [[TMP2]], i64 1
|
|
; CHECK-NEXT: ret <2 x float> [[R]]
|
|
;
|
|
%add = add nsw i16 %inp, -10
|
|
%1 = sitofp i16 %add to float
|
|
%2 = fneg float %1
|
|
%3 = uitofp i16 %add to float
|
|
%4 = fadd float %3, 2.000000e+00
|
|
%5 = insertelement <2 x float> poison, float %4, i64 0
|
|
%r = insertelement <2 x float> %5, float %2, i64 1
|
|
ret <2 x float> %r
|
|
}
|
|
|
|
define <2 x float> @replace_through_casts_through_splat(i16 %inp) {
|
|
; CHECK-LABEL: define <2 x float> @replace_through_casts_through_splat(
|
|
; CHECK-SAME: i16 [[INP:%.*]]) {
|
|
; CHECK-NEXT: [[ADD:%.*]] = add nsw i16 [[INP]], -10
|
|
; CHECK-NEXT: [[TMP1:%.*]] = uitofp i16 [[ADD]] to float
|
|
; CHECK-NEXT: [[TMP2:%.*]] = fadd float [[TMP1]], 2.000000e+00
|
|
; CHECK-NEXT: [[TMP3:%.*]] = sitofp i16 [[ADD]] to float
|
|
; CHECK-NEXT: [[TMP4:%.*]] = fneg float [[TMP3]]
|
|
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x float> poison, float [[TMP2]], i64 0
|
|
; CHECK-NEXT: [[R:%.*]] = insertelement <2 x float> [[TMP5]], float [[TMP4]], i64 1
|
|
; CHECK-NEXT: ret <2 x float> [[R]]
|
|
;
|
|
%add = add nsw i16 %inp, -10
|
|
%1 = uitofp i16 %add to float
|
|
%2 = fadd float %1, 2.000000e+00
|
|
%3 = sitofp i16 %add to float
|
|
%4 = fneg float %3
|
|
%5 = insertelement <2 x float> poison, float %2, i64 0
|
|
%r = insertelement <2 x float> %5, float %4, i64 1
|
|
ret <2 x float> %r
|
|
}
|
|
|
|
define <2 x i32> @replace_through_int_casts(i16 %inp, <2 x i16> %dead) {
|
|
; CHECK-LABEL: define <2 x i32> @replace_through_int_casts(
|
|
; CHECK-SAME: i16 [[INP:%.*]], <2 x i16> [[DEAD:%.*]]) {
|
|
; CHECK-NEXT: [[ADD:%.*]] = add nsw i16 [[INP]], -10
|
|
; CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[INP]] to i32
|
|
; CHECK-NEXT: [[TMP2:%.*]] = sext i16 [[ADD]] to i32
|
|
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i32> poison, i32 [[TMP1]], i64 0
|
|
; CHECK-NEXT: [[R:%.*]] = insertelement <2 x i32> [[TMP3]], i32 [[TMP2]], i64 1
|
|
; CHECK-NEXT: ret <2 x i32> [[R]]
|
|
;
|
|
%add = add nsw i16 %inp, -10
|
|
%1 = zext i16 %inp to i32
|
|
%2 = sext i16 %add to i32
|
|
%3 = insertelement <2 x i32> poison, i32 %1, i64 0
|
|
%r = insertelement <2 x i32> %3, i32 %2, i64 1
|
|
ret <2 x i32> %r
|
|
}
|
|
|
|
define <2 x i32> @replace_through_int_casts_ele0_only(i16 %inp, <2 x i16> %dead) {
|
|
; CHECK-LABEL: define <2 x i32> @replace_through_int_casts_ele0_only(
|
|
; CHECK-SAME: i16 [[INP:%.*]], <2 x i16> [[DEAD:%.*]]) {
|
|
; CHECK-NEXT: [[TMP1:%.*]] = sext i16 [[INP]] to i32
|
|
; CHECK-NEXT: [[TMP2:%.*]] = zext i16 [[INP]] to i32
|
|
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i32> poison, i32 [[TMP2]], i64 0
|
|
; CHECK-NEXT: [[R:%.*]] = insertelement <2 x i32> [[TMP3]], i32 [[TMP1]], i64 1
|
|
; CHECK-NEXT: ret <2 x i32> [[R]]
|
|
;
|
|
%2 = sext i16 %inp to i32
|
|
%4 = zext i16 %inp to i32
|
|
%5 = insertelement <2 x i32> poison, i32 %4, i64 0
|
|
%r = insertelement <2 x i32> %5, i32 %2, i64 1
|
|
ret <2 x i32> %r
|
|
}
|
|
|
|
define <2 x i8> @replace_through_binop_fail_cant_speculate(i8 %inp, <2 x i8> %d, <2 x i8> %any) {
|
|
; CHECK-LABEL: define <2 x i8> @replace_through_binop_fail_cant_speculate(
|
|
; CHECK-SAME: i8 [[INP:%.*]], <2 x i8> [[D:%.*]], <2 x i8> [[ANY:%.*]]) {
|
|
; CHECK-NEXT: [[ADD:%.*]] = add i8 [[INP]], 5
|
|
; CHECK-NEXT: [[V0:%.*]] = insertelement <2 x i8> poison, i8 [[INP]], i64 0
|
|
; CHECK-NEXT: [[V:%.*]] = insertelement <2 x i8> [[V0]], i8 [[ADD]], i64 1
|
|
; CHECK-NEXT: [[DIV0:%.*]] = sdiv <2 x i8> <i8 -128, i8 -128>, [[V]]
|
|
; CHECK-NEXT: [[TMP1:%.*]] = xor i8 [[INP]], 123
|
|
; CHECK-NEXT: [[R:%.*]] = insertelement <2 x i8> [[DIV0]], i8 [[TMP1]], i64 0
|
|
; CHECK-NEXT: ret <2 x i8> [[R]]
|
|
;
|
|
%add = add i8 %inp, 5
|
|
%v0 = insertelement <2 x i8> poison, i8 %inp, i64 0
|
|
%v = insertelement <2 x i8> %v0, i8 %add, i64 1
|
|
%div0 = sdiv <2 x i8> <i8 -128, i8 -128>, %v
|
|
%1 = xor i8 %inp, 123
|
|
%r = insertelement <2 x i8> %div0, i8 %1, i64 0
|
|
ret <2 x i8> %r
|
|
}
|
|
|
|
define <2 x i8> @replace_through_binop_preserve_flags(i8 %inp, <2 x i8> %d, <2 x i8> %any) {
|
|
; CHECK-LABEL: define <2 x i8> @replace_through_binop_preserve_flags(
|
|
; CHECK-SAME: i8 [[INP:%.*]], <2 x i8> [[D:%.*]], <2 x i8> [[ANY:%.*]]) {
|
|
; CHECK-NEXT: [[ADD:%.*]] = xor i8 [[INP]], 5
|
|
; CHECK-NEXT: [[TMP1:%.*]] = xor i8 [[INP]], 123
|
|
; CHECK-NEXT: [[TMP2:%.*]] = add nsw i8 [[ADD]], 1
|
|
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i8> poison, i8 [[TMP1]], i64 0
|
|
; CHECK-NEXT: [[R:%.*]] = insertelement <2 x i8> [[TMP3]], i8 [[TMP2]], i64 1
|
|
; CHECK-NEXT: ret <2 x i8> [[R]]
|
|
;
|
|
%add = xor i8 %inp, 5
|
|
%1 = xor i8 %inp, 123
|
|
%2 = add nsw i8 %add, 1
|
|
%3 = insertelement <2 x i8> poison, i8 %1, i64 0
|
|
%r = insertelement <2 x i8> %3, i8 %2, i64 1
|
|
ret <2 x i8> %r
|
|
}
|