Files
clang-p2996/llvm/test/Transforms/SLPVectorizer/RISCV/small-phi-tree.ll
Alexey Bataev 894935cb51 [SLP]Represent SLP graph as a tree
We can stop using a graph representation of the SLP structure and switch
directly to tree by relying on a single user of each tree node. If the
node has multiple uses, other uses must be represented as a separate
gather/buildvector node, which then will be combined with the existing
vectorized node(s) uoon cost estimation/codegen.
This allow to simplify inner structure and turn in some extra
optimizations, which could not be turned on for the nodes with multi
users (reordering, minbitwidth analysis).

AVX512, -O3+LTO
Metric: size..text
                                                                               results     results0    diff
         test-suite :: MultiSource/Benchmarks/ASCI_Purple/SMG2000/smg2000.test   253453.00   254253.00  0.3%
                    test-suite :: External/SPEC/CFP2006/444.namd/444.namd.test   251411.00   252051.00  0.3%
                      test-suite :: SingleSource/Benchmarks/Misc/oourafft.test    19114.00    19146.00  0.2%
     test-suite :: External/SPEC/CFP2017speed/638.imagick_s/638.imagick_s.test  1399200.00  1399520.00  0.0%
      test-suite :: External/SPEC/CFP2017rate/538.imagick_r/538.imagick_r.test  1399200.00  1399520.00  0.0%
      test-suite :: MicroBenchmarks/LCALS/SubsetALambdaLoops/lcalsALambda.test   304310.00   304326.00  0.0%
            test-suite :: MicroBenchmarks/LCALS/SubsetARawLoops/lcalsARaw.test   304662.00   304678.00  0.0%
      test-suite :: External/SPEC/CFP2017rate/526.blender_r/526.blender_r.test 12566919.00 12567511.00  0.0%
                test-suite :: External/SPEC/CFP2006/453.povray/453.povray.test  1146300.00  1146316.00  0.0%
        test-suite :: External/SPEC/CFP2017rate/511.povray_r/511.povray_r.test  1159864.00  1159880.00  0.0%
             test-suite :: External/SPEC/CINT2017rate/502.gcc_r/502.gcc_r.test  9407880.00  9407864.00 -0.0%
            test-suite :: External/SPEC/CINT2017speed/602.gcc_s/602.gcc_s.test  9407880.00  9407864.00 -0.0%
               test-suite :: MultiSource/Benchmarks/tramp3d-v4/tramp3d-v4.test  1011612.00  1011596.00 -0.0%
test-suite :: MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/timberwolfmc.test   280584.00   280536.00 -0.0%
     test-suite :: MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/miniFE.test    93016.00    93000.00 -0.0%

ASCI_Purple/SMG2000 - extra code vectorized, small variations
CFP2006/444.namd - small variations, less shuffles
Benchmarks/Misc/oourafft - small variations
CFP2017rate/538.imagick_r
CFP2017speed/638.imagick_s - small variations, less shuffles
LCALS/SubsetALambdaLoops - less shuffles
LCALS/SubsetARawLoops - less shuffles
CFP2017rate/526.blender_r - small variations, extra vector code
CFP2006/453.povray - small variations
CFP2017rate/511.povray_r - small variations
CINT2017rate/502.gcc_r
CINT2017speed/602.gcc_s - small variations
Benchmarks/tramp3d-v4 - small variations
Prolangs-C/TimberWolfMC - small variations
DOE-ProxyApps-C++/miniFE - extra code vectorized, small variations
DOE-ProxyApps-C++/CLAMR - extra code vectorized, small variations
ASCI_Purple/SMG2000 - no significant changes

RISCV, -O3+LTO
Metric: size..text
                                                                                          results    results0   diff
test-suite :: SingleSource/Regression/C/gcc-c-torture/execute/GCC-C-execute-pr28982b.test    1812.00    1866.00  3.0%
                            test-suite :: MultiSource/Benchmarks/Olden/health/health.test    3946.00    4016.00  1.8%
                     test-suite :: External/SPEC/CINT2017speed/625.x264_s/625.x264_s.test  513180.00  513550.00  0.1%
                      test-suite :: External/SPEC/CINT2017rate/525.x264_r/525.x264_r.test  513180.00  513550.00  0.1%
                        test-suite :: External/SPEC/CINT2017rate/502.gcc_r/502.gcc_r.test 7672198.00 7672202.00  0.0%
                       test-suite :: External/SPEC/CINT2017speed/602.gcc_s/602.gcc_s.test 7672198.00 7672202.00  0.0%
                       test-suite :: External/SPEC/CFP2017rate/508.namd_r/508.namd_r.test  746060.00  746044.00 -0.0%
                 test-suite :: External/SPEC/CFP2017rate/526.blender_r/526.blender_r.test 9497716.00 9497364.00 -0.0%
                           test-suite :: External/SPEC/CFP2006/453.povray/453.povray.test  948266.00  948214.00 -0.0%
                               test-suite :: External/SPEC/CFP2006/433.milc/433.milc.test   89874.00   89862.00 -0.0%
                            test-suite :: MultiSource/Benchmarks/7zip/7zip-benchmark.test  835492.00  835346.00 -0.0%
                test-suite :: MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/miniFE.test   66230.00   66202.00 -0.0%
                   test-suite :: External/SPEC/CFP2017rate/511.povray_r/511.povray_r.test  946090.00  944206.00 -0.2%
                test-suite :: External/SPEC/CFP2017speed/638.imagick_s/638.imagick_s.test 1136404.00 1131854.00 -0.4%
                 test-suite :: External/SPEC/CFP2017rate/538.imagick_r/538.imagick_r.test 1136404.00 1131854.00 -0.4%

gcc-c-torture/execute/GCC-C-execute-pr28982b - better vector code
Olden/health - extra vector code
CINT2017speed/625.x264_s
CINT2017rate/525.x264_r - small variation + improvements in reordering, @pixel_hadamard_ac stopped
being vectorized because of some non-effective shuffle recognition by
the compiler
CINT2017rate/502.gcc_r
CINT2017speed/602.gcc_s - small variations
CFP2017rate/508.namd_r - small variations
CFP2017rate/526.blender_r - small variations
CFP2006/453.povray - extra vector code
Benchmarks/7zip - extra vector code
DOE-ProxyApps-C++/miniFE - small variations
CFP2017rate/511.povray_r - extra vector code
CFP2017speed/638.imagick_s
CFP2017rate/538.imagick_r - extra vector code

Reviewers: RKSimon, hiraditya

Reviewed By: RKSimon

Pull Request: https://github.com/llvm/llvm-project/pull/126771
2025-02-21 07:15:02 -05:00

40 lines
2.0 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -mtriple=riscv64 -mcpu=sifive-x280 -passes=slp-vectorizer -S -slp-threshold=-11 < %s | FileCheck %s
define float @test(ptr %call78) {
; CHECK-LABEL: @test(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x ptr> <ptr poison, ptr null>, ptr [[CALL78:%.*]], i32 0
; CHECK-NEXT: br label [[FOR_BODY194:%.*]]
; CHECK: for.body194:
; CHECK-NEXT: [[INDVARS_IV132:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ 0, [[FOR_BODY194]] ]
; CHECK-NEXT: [[CURRENTW_031:%.*]] = phi ptr [ [[CALL78]], [[ENTRY]] ], [ [[PREVIOUSW_030:%.*]], [[FOR_BODY194]] ]
; CHECK-NEXT: [[PREVIOUSW_030]] = phi ptr [ null, [[ENTRY]] ], [ [[CURRENTW_031]], [[FOR_BODY194]] ]
; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x ptr> [ [[TMP0]], [[ENTRY]] ], [ [[TMP3:%.*]], [[FOR_BODY194]] ]
; CHECK-NEXT: [[TMP3]] = shufflevector <2 x ptr> [[TMP1]], <2 x ptr> poison, <2 x i32> <i32 1, i32 0>
; CHECK-NEXT: store float 0.000000e+00, ptr [[CURRENTW_031]], align 4
; CHECK-NEXT: tail call void null(ptr [[PREVIOUSW_030]], ptr null, ptr null, i32 0, i32 0, ptr null, ptr null, i32 0)
; CHECK-NEXT: br i1 false, label [[FOR_END286_LOOPEXIT:%.*]], label [[FOR_BODY194]]
; CHECK: for.end286.loopexit:
; CHECK-NEXT: [[TMP4:%.*]] = phi <2 x ptr> [ [[TMP3]], [[FOR_BODY194]] ]
; CHECK-NEXT: ret float 0.000000e+00
;
entry:
br label %for.body194
for.body194:
%indvars.iv132 = phi i64 [ 0, %entry ], [ 0, %for.body194 ]
%currentw.031 = phi ptr [ %call78, %entry ], [ %previousw.030, %for.body194 ]
%previousw.030 = phi ptr [ null, %entry ], [ %currentw.031, %for.body194 ]
store float 0.000000e+00, ptr %currentw.031, align 4
tail call void null(ptr %previousw.030, ptr null, ptr null, i32 0, i32 0, ptr null, ptr null, i32 0)
br i1 false, label %for.end286.loopexit, label %for.body194
for.end286.loopexit:
%currentw.031.lcssa = phi ptr [ %currentw.031, %for.body194 ]
%previousw.030.lcssa = phi ptr [ %previousw.030, %for.body194 ]
ret float 0.000000e+00
}