Files
clang-p2996/llvm/test/Transforms/SLPVectorizer/X86/opaque-ptr.ll
Alexey Bataev c7657cf7d1 [SLP]Keep externally used GEPs as GEPs, if possible instead of extractelement.
If the vectorized GEP instruction can be still kept as a scalar GEP,
better to keep it as scalar instead of extractelement. In many cases it
is more profitable.

Metric: size..text

Program                                                                          size..text
                                                                                 results     results0    diff
                        test-suite :: SingleSource/Benchmarks/Misc/oourafft.test    18911.00    19695.00  4.1%
                   test-suite :: SingleSource/Benchmarks/Misc-C++-EH/spirit.test    59987.00    60707.00  1.2%
       test-suite :: External/SPEC/CFP2017speed/638.imagick_s/638.imagick_s.test  1392209.00  1392753.00  0.0%
        test-suite :: External/SPEC/CFP2017rate/538.imagick_r/538.imagick_r.test  1392209.00  1392753.00  0.0%
           test-suite :: External/SPEC/CINT2006/400.perlbench/400.perlbench.test  1087996.00  1088236.00  0.0%
                         test-suite :: MultiSource/Benchmarks/Bullet/bullet.test   309310.00   309342.00  0.0%
             test-suite :: External/SPEC/CINT2017rate/525.x264_r/525.x264_r.test   664661.00   664693.00  0.0%
            test-suite :: External/SPEC/CINT2017speed/625.x264_s/625.x264_s.test   664661.00   664693.00  0.0%
        test-suite :: External/SPEC/CFP2017rate/526.blender_r/526.blender_r.test 12354636.00 12354908.00  0.0%
                  test-suite :: External/SPEC/CFP2006/453.povray/453.povray.test  1152748.00  1152716.00 -0.0%
                       test-suite :: MultiSource/Applications/oggenc/oggenc.test   191787.00   191771.00 -0.0%
                     test-suite :: SingleSource/UnitTests/matrix-types-spec.test   480796.00   480476.00 -0.1%

Misc/oourafft - Extra code gets vectorized
Misc-C++-EH/spirit - same
CFP2017speed/638.imagick_s
CFP2017rate/538.imagick_r - same, extra code gets vectorized
CINT2006/400.perlbench - some extra 4 x ptr stores vectorized
Bullet/bullet - extra 4 x ptr store vectorized
CINT2017rate/525.x264_r
CINT2017speed/625.x264_s - same
CFP2017rate/526.blender_r - extra 8 x float stores (several), some extra
4 x ptr stores
CFP2006/453.povray - 2 x double loads/stores replaced by 4 x double
loads/stores
Applications/oggenc - extra code is vectorized
UnitTests/matrix-types-spec - extra code gets vectorized

Reviewers: RKSimon

Reviewed By: RKSimon

Pull Request: https://github.com/llvm/llvm-project/pull/88877
2024-04-16 14:54:06 -04:00

78 lines
3.3 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -S -passes=slp-vectorizer -mtriple=x86_64-apple-macosx -mcpu=haswell < %s | FileCheck %s
define void @test(ptr %r, ptr %p, ptr %q) #0 {
; CHECK-LABEL: @test(
; CHECK-NEXT: [[P0:%.*]] = getelementptr inbounds i64, ptr [[P:%.*]], i64 0
; CHECK-NEXT: [[Q0:%.*]] = getelementptr inbounds i64, ptr [[Q:%.*]], i64 0
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr [[P0]], align 2
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i64>, ptr [[Q0]], align 2
; CHECK-NEXT: [[TMP3:%.*]] = sub nsw <4 x i64> [[TMP1]], [[TMP2]]
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i64> [[TMP3]], i32 0
; CHECK-NEXT: [[G0:%.*]] = getelementptr inbounds i32, ptr [[R:%.*]], i64 [[TMP4]]
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i64> [[TMP3]], i32 1
; CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds i32, ptr [[R]], i64 [[TMP5]]
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i64> [[TMP3]], i32 2
; CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds i32, ptr [[R]], i64 [[TMP6]]
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP3]], i32 3
; CHECK-NEXT: [[G3:%.*]] = getelementptr inbounds i32, ptr [[R]], i64 [[TMP7]]
; CHECK-NEXT: ret void
;
%p0 = getelementptr inbounds i64, ptr %p, i64 0
%p1 = getelementptr inbounds i64, ptr %p, i64 1
%p2 = getelementptr inbounds i64, ptr %p, i64 2
%p3 = getelementptr inbounds i64, ptr %p, i64 3
%q0 = getelementptr inbounds i64, ptr %q, i64 0
%q1 = getelementptr inbounds i64, ptr %q, i64 1
%q2 = getelementptr inbounds i64, ptr %q, i64 2
%q3 = getelementptr inbounds i64, ptr %q, i64 3
%x0 = load i64, ptr %p0, align 2
%x1 = load i64, ptr %p1, align 2
%x2 = load i64, ptr %p2, align 2
%x3 = load i64, ptr %p3, align 2
%y0 = load i64, ptr %q0, align 2
%y1 = load i64, ptr %q1, align 2
%y2 = load i64, ptr %q2, align 2
%y3 = load i64, ptr %q3, align 2
%sub0 = sub nsw i64 %x0, %y0
%sub1 = sub nsw i64 %x1, %y1
%sub2 = sub nsw i64 %x2, %y2
%sub3 = sub nsw i64 %x3, %y3
%g0 = getelementptr inbounds i32, ptr %r, i64 %sub0
%g1 = getelementptr inbounds i32, ptr %r, i64 %sub1
%g2 = getelementptr inbounds i32, ptr %r, i64 %sub2
%g3 = getelementptr inbounds i32, ptr %r, i64 %sub3
ret void
}
define void @test2(ptr %a, ptr %b) {
; CHECK-LABEL: @test2(
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x ptr> poison, ptr [[A:%.*]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x ptr> [[TMP1]], ptr [[B:%.*]], i32 1
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i64, <2 x ptr> [[TMP2]], <2 x i64> <i64 1, i64 3>
; CHECK-NEXT: [[A1:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 1
; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint <2 x ptr> [[TMP3]] to <2 x i64>
; CHECK-NEXT: [[TMP5:%.*]] = load <2 x i64>, ptr [[A1]], align 8
; CHECK-NEXT: [[TMP6:%.*]] = add <2 x i64> [[TMP4]], [[TMP5]]
; CHECK-NEXT: store <2 x i64> [[TMP6]], ptr [[A1]], align 8
; CHECK-NEXT: ret void
;
%a1 = getelementptr inbounds i64, ptr %a, i64 1
%a2 = getelementptr inbounds i64, ptr %a, i64 2
%i1 = ptrtoint ptr %a1 to i64
%b3 = getelementptr inbounds i64, ptr %b, i64 3
%i2 = ptrtoint ptr %b3 to i64
%v1 = load i64, ptr %a1, align 8
%v2 = load i64, ptr %a2, align 8
%add1 = add i64 %i1, %v1
%add2 = add i64 %i2, %v2
store i64 %add1, ptr %a1, align 8
store i64 %add2, ptr %a2, align 8
ret void
}