Previous version was reviewed here https://github.com/llvm/llvm-project/pull/123360 It is mostly the same, adjusted after graph-to-tree transformation Patch tries to remove wide alternate operations. Currently SLP vectorizer emits something like this: ``` %0 = add i32 %1 = sub i32 %2 = add i32 %3 = sub i32 %4 = add i32 %5 = sub i32 %6 = add i32 %7 = sub i32 transformes to %v1 = add <8 x i32> %v2 = sub <8 x i32> %res = shuffle %v1, %v2, <0, 9, 2, 11, 4, 13, 6, 15> ``` i.e. half of the results are just unused. This leads to increased register pressure and potentially doubles number of operations. Patch introduces SplitVectorize mode, where it splits the operations by opcodes and produces instead something like this: ``` %v1 = add <4 x i32> %v2 = sub <4 x i32> %res = shuffle %v1, %v2, <0, 4, 1, 5, 2, 6, 3, 7> ``` It allows to improve the performance by reducing number of ops. Also, it turns on some other improvements, like improved graph reordering. -O3+LTO, AVX512 Metric: size..text Program size..text results results0 diff test-suite :: MultiSource/Benchmarks/Olden/tsp/tsp.test 2788.00 2820.00 1.1% test-suite :: MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/timberwolfmc.test 278168.00 280904.00 1.0% test-suite :: MultiSource/Benchmarks/FreeBench/pifft/pifft.test 82682.00 83258.00 0.7% test-suite :: External/SPEC/CFP2006/433.milc/433.milc.test 139344.00 139712.00 0.3% test-suite :: MultiSource/Benchmarks/FreeBench/fourinarow/fourinarow.test 27149.00 27197.00 0.2% test-suite :: MultiSource/Benchmarks/tramp3d-v4/tramp3d-v4.test 1008188.00 1009948.00 0.2% test-suite :: MultiSource/Benchmarks/mediabench/gsm/toast/toast.test 39226.00 39290.00 0.2% test-suite :: MultiSource/Benchmarks/MiBench/telecomm-gsm/telecomm-gsm.test 39229.00 39293.00 0.2% test-suite :: External/SPEC/CINT2017rate/500.perlbench_r/500.perlbench_r.test 2074533.00 2076549.00 0.1% test-suite :: External/SPEC/CINT2017speed/600.perlbench_s/600.perlbench_s.test 2074533.00 2076549.00 0.1% test-suite :: External/SPEC/CINT2006/464.h264ref/464.h264ref.test 798440.00 798952.00 0.1% test-suite :: MultiSource/Benchmarks/DOE-ProxyApps-C/miniGMG/miniGMG.test 44123.00 44139.00 0.0% test-suite :: MultiSource/Benchmarks/Bullet/bullet.test 318942.00 319038.00 0.0% test-suite :: External/SPEC/CFP2017rate/511.povray_r/511.povray_r.test 1159880.00 1160152.00 0.0% test-suite :: MultiSource/Benchmarks/DOE-ProxyApps-C/miniAMR/miniAMR.test 73595.00 73611.00 0.0% test-suite :: External/SPEC/CFP2006/453.povray/453.povray.test 1146124.00 1146348.00 0.0% test-suite :: MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/CLAMR.test 203831.00 203847.00 0.0% test-suite :: MultiSource/Benchmarks/MiBench/consumer-lame/consumer-lame.test 207662.00 207678.00 0.0% test-suite :: External/SPEC/CFP2006/447.dealII/447.dealII.test 589851.00 589883.00 0.0% test-suite :: External/SPEC/CFP2017rate/538.imagick_r/538.imagick_r.test 1398543.00 1398559.00 0.0% test-suite :: External/SPEC/CFP2017speed/638.imagick_s/638.imagick_s.test 1398543.00 1398559.00 0.0% test-suite :: External/SPEC/CFP2017rate/510.parest_r/510.parest_r.test 2050990.00 2051006.00 0.0% test-suite :: External/SPEC/CFP2017rate/526.blender_r/526.blender_r.test 12559687.00 12559591.00 -0.0% test-suite :: External/SPEC/CINT2006/403.gcc/403.gcc.test 3074157.00 3074125.00 -0.0% test-suite :: External/SPEC/CINT2006/400.perlbench/400.perlbench.test 1092252.00 1092188.00 -0.0% test-suite :: External/SPEC/CFP2017rate/508.namd_r/508.namd_r.test 779763.00 779715.00 -0.0% test-suite :: MultiSource/Benchmarks/ASCI_Purple/SMG2000/smg2000.test 253517.00 253485.00 -0.0% test-suite :: MultiSource/Applications/JM/lencod/lencod.test 848259.00 848035.00 -0.0% test-suite :: MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/miniFE.test 93064.00 93016.00 -0.1% test-suite :: MultiSource/Applications/JM/ldecod/ldecod.test 383747.00 383475.00 -0.1% test-suite :: External/SPEC/CINT2017speed/625.x264_s/625.x264_s.test 673051.00 662907.00 -1.5% test-suite :: External/SPEC/CINT2017rate/525.x264_r/525.x264_r.test 673051.00 662907.00 -1.5% Olden/tsp - small variations Prolangs-C/TimberWolfMC - small variations, some code not inlined FreeBench/pifft - extra store <8 x double> vectorized, some other extra vectorizations CFP2006/433.milc - better vector code FreeBench/fourinarow - better vector code Benchmarks/tramp3d-v4 - extra vector code, small variations mediabench/gsm/toast - small variations MiBench/telecomm-gsm - small variations CINT2017rate/500.perlbench_r CINT2017speed/600.perlbench_s - better vector code, small variations CINT2006/464.h264ref - some smaller code + changes similar to x264 DOE-ProxyApps-C/miniGMG - small variations Benchmarks/Bullet - small variations CFP2017rate/511.povray_r - small variations DOE-ProxyApps-C/miniAMR - small variations CFP2006/453.povray - small variations DOE-ProxyApps-C++/CLAMR - small variations MiBench/consumer-lame - small variations CFP2006/447.dealII - small variations CFP2017rate/538.imagick_r CFP2017speed/638.imagick_s - small variations CFP2017rate/510.parest_r - better vector code, small variations CFP2017rate/526.blender_r - small variations CINT2006/403.gcc - small variations CINT2006/400.perlbench - small variations CFP2017rate/508.namd_r - small variations ASCI_Purple/SMG2000 - small variations JM/lencod - extra store <16 x i32>, small variations DOE-ProxyApps-C++/miniFE - small variations JM/ldecod - extra vector code, small variations, less shuffles CINT2017speed/625.x264_s CINT2017rate/525.x264_r - the number of instructions increased, but looks like they are more performant. E.g., for function x264_pixel_satd_8x8, llvm-mca reports better throughput - 84 for the current version and 59 for the new version. -O3+LTO, mcpu=sifive-p470 Metric: size..text results results0 diff test-suite :: External/SPEC/CINT2006/464.h264ref/464.h264ref.test 580768.00 581118.00 0.1% test-suite :: MultiSource/Applications/d/make_dparser.test 78854.00 78894.00 0.1% test-suite :: MultiSource/Applications/JM/lencod/lencod.test 633448.00 633750.00 0.0% test-suite :: MultiSource/Benchmarks/Bullet/bullet.test 277002.00 277080.00 0.0% test-suite :: External/SPEC/CINT2006/400.perlbench/400.perlbench.test 931938.00 931960.00 0.0% test-suite :: External/SPEC/CINT2006/403.gcc/403.gcc.test 2512806.00 2512822.00 0.0% test-suite :: External/SPEC/CINT2017speed/602.gcc_s/602.gcc_s.test 7659880.00 7659876.00 -0.0% test-suite :: External/SPEC/CINT2017rate/502.gcc_r/502.gcc_r.test 7659880.00 7659876.00 -0.0% test-suite :: External/SPEC/CFP2017rate/510.parest_r/510.parest_r.test 1602448.00 1602434.00 -0.0% test-suite :: External/SPEC/CFP2017rate/526.blender_r/526.blender_r.test 9496664.00 9496542.00 -0.0% test-suite :: MultiSource/Benchmarks/MiBench/consumer-lame/consumer-lame.test 147424.00 147422.00 -0.0% test-suite :: External/SPEC/CINT2017speed/600.perlbench_s/600.perlbench_s.test 1764608.00 1764578.00 -0.0% test-suite :: External/SPEC/CINT2017rate/500.perlbench_r/500.perlbench_r.test 1764608.00 1764578.00 -0.0% test-suite :: MultiSource/Benchmarks/7zip/7zip-benchmark.test 841656.00 841632.00 -0.0% test-suite :: External/SPEC/CFP2006/453.povray/453.povray.test 949026.00 948962.00 -0.0% test-suite :: External/SPEC/CFP2017rate/511.povray_r/511.povray_r.test 946348.00 946284.00 -0.0% test-suite :: MultiSource/Applications/JM/ldecod/ldecod.test 279794.00 279764.00 -0.0% test-suite :: MultiSource/Benchmarks/mediabench/g721/g721encode/encode.test 4776.00 4772.00 -0.1% test-suite :: MultiSource/Benchmarks/mediabench/gsm/toast/toast.test 25074.00 25028.00 -0.2% test-suite :: MultiSource/Benchmarks/MiBench/telecomm-gsm/telecomm-gsm.test 25074.00 25028.00 -0.2% test-suite :: MultiSource/Benchmarks/DOE-ProxyApps-C/miniGMG/miniGMG.test 29336.00 29184.00 -0.5% test-suite :: External/SPEC/CINT2017rate/525.x264_r/525.x264_r.test 535390.00 510124.00 -4.7% test-suite :: External/SPEC/CINT2017speed/625.x264_s/625.x264_s.test 535390.00 510124.00 -4.7% test-suite :: SingleSource/Regression/C/gcc-c-torture/execute/ieee/GCC-C-execute-ieee-pr50310.test 886.00 608.00 -31.4% CINT2006/464.h264ref - extra v16i32 reduction d/make_dparser - better vector code JM/lencod - extra v16i32 reduction Benchmarks/Bullet - smaller vector code CINT2006/400.perlbench - better vector code CINT2006/403.gcc - small variations CINT2017speed/602.gcc_s CINT2017rate/502.gcc_r - small variations CFP2017rate/510.parest_r - small variations CFP2017rate/526.blender_r - small variations MiBench/consumer-lame - small variations CINT2017speed/600.perlbench_s CINT2017rate/500.perlbench_r - small variations Benchmarks/7zip - small variations CFP2017rate/511.povray_r - small variations JM/ldecod - extra vector code mediabench/g721/g721encode - extra vector code mediabench/gsm - extra vector code MiBench/telecomm-gsm - extra vector code DOE-ProxyApps-C/miniGMG - extra vector code CINT2017rate/525.x264_r CINT2017speed/625.x264_s - reduced number of wide operations and shuffles, saving the registers, similar to X86, extra code in pixel_hadamard_ac vectorized ieee/GCC-C-execute-ieee-pr50310 - extra code vectorized CINT2006/464.h264ref - extra vector code in find_sad_16x16 JM/lencod - extra vector code in find_sad_16x16 d/make_dparser - smaller vector code Benchmarks/Bullet - small variations CINT2006/400.perlbench - smaller vector code CFP2017rate/526.blender_r - small variations, extra store <8 x float> in the loop, extra store <8 x i8> in loop CINT2017rate/500.perlbench_r CINT2017speed/600.perlbench_s - small variations MiBench/consumer-lame - small variations JM/ldecod - extra vector code mediabench/g721/g721encode - small variations Reviewers: hiraditya Reviewed By: hiraditya Pull Request: https://github.com/llvm/llvm-project/pull/128907
223 lines
11 KiB
LLVM
223 lines
11 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
|
|
; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s
|
|
|
|
define void @test(i32 %0, i8 %1, i64 %2, float %3) {
|
|
; CHECK-LABEL: define void @test(
|
|
; CHECK-SAME: i32 [[TMP0:%.*]], i8 [[TMP1:%.*]], i64 [[TMP2:%.*]], float [[TMP3:%.*]]) {
|
|
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i64> poison, i64 [[TMP2]], i32 0
|
|
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x i64> [[TMP5]], <2 x i64> poison, <2 x i32> zeroinitializer
|
|
; CHECK-NEXT: [[TMP7:%.*]] = and <2 x i64> [[TMP6]], <i64 255, i64 -65536>
|
|
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x i64> [[TMP6]], <2 x i64> <i64 1, i64 poison>, <2 x i32> <i32 2, i32 0>
|
|
; CHECK-NEXT: [[TMP9:%.*]] = add <2 x i64> [[TMP7]], [[TMP8]]
|
|
; CHECK-NEXT: [[TMP10:%.*]] = lshr <2 x i64> [[TMP9]], <i64 1, i64 16>
|
|
; CHECK-NEXT: [[TMP11:%.*]] = trunc <2 x i64> [[TMP10]] to <2 x i8>
|
|
; CHECK-NEXT: [[TMP12:%.*]] = call <2 x i8> @llvm.smax.v2i8(<2 x i8> [[TMP11]], <2 x i8> zeroinitializer)
|
|
; CHECK-NEXT: [[TMP13:%.*]] = uitofp <2 x i8> [[TMP12]] to <2 x float>
|
|
; CHECK-NEXT: [[TMP14:%.*]] = insertelement <4 x i8> poison, i8 [[TMP1]], i32 0
|
|
; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <4 x i8> [[TMP14]], <4 x i8> poison, <4 x i32> zeroinitializer
|
|
; CHECK-NEXT: [[TMP16:%.*]] = uitofp <4 x i8> [[TMP15]] to <4 x float>
|
|
; CHECK-NEXT: [[TMP17:%.*]] = fdiv <2 x float> [[TMP13]], zeroinitializer
|
|
; CHECK-NEXT: [[TMP18:%.*]] = insertelement <2 x i32> poison, i32 [[TMP0]], i32 0
|
|
; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <2 x i32> [[TMP18]], <2 x i32> poison, <2 x i32> zeroinitializer
|
|
; CHECK-NEXT: [[TMP20:%.*]] = sitofp <2 x i32> [[TMP19]] to <2 x float>
|
|
; CHECK-NEXT: [[TMP21:%.*]] = fdiv <2 x float> zeroinitializer, [[TMP20]]
|
|
; CHECK-NEXT: [[TMP22:%.*]] = trunc <2 x i64> [[TMP7]] to <2 x i32>
|
|
; CHECK-NEXT: [[TMP23:%.*]] = sub <2 x i32> zeroinitializer, [[TMP22]]
|
|
; CHECK-NEXT: [[TMP24:%.*]] = ashr <2 x i32> [[TMP23]], splat (i32 1)
|
|
; CHECK-NEXT: [[TMP25:%.*]] = sitofp <2 x i32> [[TMP24]] to <2 x float>
|
|
; CHECK-NEXT: [[TMP26:%.*]] = shufflevector <2 x float> [[TMP25]], <2 x float> poison, <4 x i32> <i32 1, i32 0, i32 1, i32 0>
|
|
; CHECK-NEXT: [[TMP27:%.*]] = shufflevector <4 x float> [[TMP26]], <4 x float> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
|
|
; CHECK-NEXT: [[TMP28:%.*]] = call <8 x float> @llvm.vector.insert.v8f32.v4f32(<8 x float> [[TMP27]], <4 x float> [[TMP16]], i64 4)
|
|
; CHECK-NEXT: [[TMP29:%.*]] = fdiv <8 x float> zeroinitializer, [[TMP28]]
|
|
; CHECK-NEXT: [[TMP30:%.*]] = call <8 x float> @llvm.fabs.v8f32(<8 x float> [[TMP29]])
|
|
; CHECK-NEXT: [[TMP31:%.*]] = bitcast <8 x float> [[TMP30]] to <8 x i32>
|
|
; CHECK-NEXT: [[TMP32:%.*]] = icmp ult <8 x i32> [[TMP31]], splat (i32 1325400064)
|
|
; CHECK-NEXT: [[TMP33:%.*]] = extractelement <8 x i1> [[TMP32]], i32 6
|
|
; CHECK-NEXT: [[TMP34:%.*]] = select i1 [[TMP33]], i64 0, i64 2147483648
|
|
; CHECK-NEXT: [[TMP35:%.*]] = extractelement <8 x i1> [[TMP32]], i32 5
|
|
; CHECK-NEXT: [[TMP36:%.*]] = select i1 [[TMP35]], i64 0, i64 4286578688
|
|
; CHECK-NEXT: [[TMP37:%.*]] = or i64 [[TMP34]], [[TMP36]]
|
|
; CHECK-NEXT: [[TMP38:%.*]] = extractelement <8 x i1> [[TMP32]], i32 7
|
|
; CHECK-NEXT: [[TMP39:%.*]] = select i1 [[TMP38]], i64 0, i64 128
|
|
; CHECK-NEXT: [[TMP40:%.*]] = extractelement <8 x i1> [[TMP32]], i32 4
|
|
; CHECK-NEXT: [[TMP41:%.*]] = select i1 [[TMP40]], i64 0, i64 128
|
|
; CHECK-NEXT: [[TMP42:%.*]] = extractelement <8 x i1> [[TMP32]], i32 2
|
|
; CHECK-NEXT: [[TMP43:%.*]] = select i1 [[TMP42]], i64 0, i64 8388608
|
|
; CHECK-NEXT: [[TMP44:%.*]] = extractelement <8 x i1> [[TMP32]], i32 3
|
|
; CHECK-NEXT: [[TMP45:%.*]] = select i1 [[TMP44]], i64 0, i64 32768
|
|
; CHECK-NEXT: [[TMP46:%.*]] = or i64 [[TMP43]], [[TMP45]]
|
|
; CHECK-NEXT: [[TMP47:%.*]] = extractelement <8 x i1> [[TMP32]], i32 0
|
|
; CHECK-NEXT: [[TMP48:%.*]] = select i1 [[TMP47]], i64 0, i64 8388608
|
|
; CHECK-NEXT: [[TMP49:%.*]] = extractelement <8 x i1> [[TMP32]], i32 1
|
|
; CHECK-NEXT: [[TMP50:%.*]] = select i1 [[TMP49]], i64 0, i64 32768
|
|
; CHECK-NEXT: br label %[[BB52:.*]]
|
|
; CHECK: [[BB51:.*]]:
|
|
; CHECK-NEXT: unreachable
|
|
; CHECK: [[BB52]]:
|
|
; CHECK-NEXT: br label %[[BB53:.*]]
|
|
; CHECK: [[BB53]]:
|
|
; CHECK-NEXT: [[TMP54:%.*]] = call <2 x float> @llvm.fabs.v2f32(<2 x float> [[TMP17]])
|
|
; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 0, ptr null)
|
|
; CHECK-NEXT: [[TMP55:%.*]] = call <2 x float> @llvm.fabs.v2f32(<2 x float> [[TMP21]])
|
|
; CHECK-NEXT: [[TMP56:%.*]] = insertelement <8 x float> poison, float [[TMP3]], i32 0
|
|
; CHECK-NEXT: [[TMP57:%.*]] = shufflevector <8 x float> [[TMP56]], <8 x float> poison, <8 x i32> <i32 poison, i32 poison, i32 0, i32 0, i32 0, i32 0, i32 poison, i32 poison>
|
|
; CHECK-NEXT: [[TMP58:%.*]] = call <8 x float> @llvm.vector.insert.v8f32.v2f32(<8 x float> [[TMP57]], <2 x float> [[TMP55]], i64 0)
|
|
; CHECK-NEXT: [[TMP59:%.*]] = call <8 x float> @llvm.vector.insert.v8f32.v2f32(<8 x float> [[TMP58]], <2 x float> [[TMP54]], i64 6)
|
|
; CHECK-NEXT: [[TMP60:%.*]] = bitcast <8 x float> [[TMP59]] to <8 x i32>
|
|
; CHECK-NEXT: [[TMP61:%.*]] = icmp ult <8 x i32> [[TMP60]], splat (i32 1325400064)
|
|
; CHECK-NEXT: [[TMP62:%.*]] = extractelement <8 x i1> [[TMP61]], i32 5
|
|
; CHECK-NEXT: [[TMP63:%.*]] = select i1 [[TMP62]], i64 [[TMP37]], i64 0
|
|
; CHECK-NEXT: [[TMP64:%.*]] = extractelement <8 x i1> [[TMP61]], i32 4
|
|
; CHECK-NEXT: [[TMP65:%.*]] = select i1 [[TMP64]], i64 0, i64 4294967168
|
|
; CHECK-NEXT: [[TMP66:%.*]] = or i64 [[TMP63]], [[TMP65]]
|
|
; CHECK-NEXT: [[TMP67:%.*]] = extractelement <8 x i1> [[TMP61]], i32 7
|
|
; CHECK-NEXT: [[TMP68:%.*]] = select i1 [[TMP67]], i64 0, i64 8388608
|
|
; CHECK-NEXT: [[TMP69:%.*]] = extractelement <8 x i1> [[TMP61]], i32 6
|
|
; CHECK-NEXT: [[TMP70:%.*]] = select i1 [[TMP69]], i64 0, i64 32768
|
|
; CHECK-NEXT: [[TMP71:%.*]] = or i64 [[TMP68]], [[TMP70]]
|
|
; CHECK-NEXT: [[TMP72:%.*]] = or i64 [[TMP71]], [[TMP66]]
|
|
; CHECK-NEXT: [[TMP73:%.*]] = or i64 [[TMP72]], [[TMP39]]
|
|
; CHECK-NEXT: store i64 [[TMP73]], ptr null, align 1
|
|
; CHECK-NEXT: store i64 [[TMP41]], ptr null, align 1
|
|
; CHECK-NEXT: [[TMP74:%.*]] = extractelement <8 x i1> [[TMP61]], i32 3
|
|
; CHECK-NEXT: [[TMP75:%.*]] = select i1 [[TMP74]], i64 0, i64 -9223372036854775808
|
|
; CHECK-NEXT: [[TMP76:%.*]] = extractelement <8 x i1> [[TMP61]], i32 2
|
|
; CHECK-NEXT: [[TMP77:%.*]] = zext i1 [[TMP76]] to i64
|
|
; CHECK-NEXT: [[TMP78:%.*]] = or i64 [[TMP46]], [[TMP77]]
|
|
; CHECK-NEXT: [[TMP79:%.*]] = or i64 [[TMP78]], [[TMP75]]
|
|
; CHECK-NEXT: store i64 [[TMP79]], ptr null, align 1
|
|
; CHECK-NEXT: [[TMP80:%.*]] = extractelement <8 x i1> [[TMP61]], i32 1
|
|
; CHECK-NEXT: [[TMP81:%.*]] = select i1 [[TMP80]], i64 0, i64 2147483648
|
|
; CHECK-NEXT: [[TMP82:%.*]] = extractelement <8 x i1> [[TMP61]], i32 0
|
|
; CHECK-NEXT: [[TMP83:%.*]] = select i1 [[TMP82]], i64 0, i64 128
|
|
; CHECK-NEXT: [[TMP84:%.*]] = or i64 [[TMP83]], [[TMP50]]
|
|
; CHECK-NEXT: [[TMP85:%.*]] = or i64 [[TMP84]], [[TMP48]]
|
|
; CHECK-NEXT: [[TMP86:%.*]] = or i64 [[TMP85]], [[TMP81]]
|
|
; CHECK-NEXT: store i64 [[TMP86]], ptr null, align 1
|
|
; CHECK-NEXT: br label %[[BB51]]
|
|
;
|
|
%5 = and i64 %2, 255
|
|
%6 = and i64 %2, -65536
|
|
%7 = add i64 %5, 1
|
|
%8 = add i64 %2, %6
|
|
%9 = lshr i64 %7, 1
|
|
%10 = trunc i64 %9 to i8
|
|
%11 = tail call i8 @llvm.smax.i8(i8 %10, i8 0)
|
|
%12 = lshr i64 %8, 16
|
|
%13 = trunc i64 %12 to i8
|
|
%14 = tail call i8 @llvm.smax.i8(i8 %13, i8 0)
|
|
%15 = uitofp i8 %11 to float
|
|
%16 = uitofp i8 %14 to float
|
|
%17 = uitofp i8 %1 to float
|
|
%18 = uitofp i8 %1 to float
|
|
%19 = uitofp i8 %1 to float
|
|
%20 = fdiv float 0.000000e+00, %17
|
|
%21 = fdiv float %15, 0.000000e+00
|
|
%22 = fdiv float %16, 0.000000e+00
|
|
%23 = call float @llvm.fabs.f32(float %20)
|
|
%24 = bitcast float %23 to i32
|
|
%25 = icmp ult i32 %24, 1325400064
|
|
%26 = fdiv float 0.000000e+00, %18
|
|
%27 = fdiv float 0.000000e+00, %19
|
|
%28 = call float @llvm.fabs.f32(float %27)
|
|
%29 = bitcast float %28 to i32
|
|
%30 = icmp ult i32 %29, 1325400064
|
|
%31 = select i1 %30, i64 0, i64 2147483648
|
|
%32 = call float @llvm.fabs.f32(float %26)
|
|
%33 = bitcast float %32 to i32
|
|
%34 = icmp ult i32 %33, 1325400064
|
|
%35 = select i1 %34, i64 0, i64 4286578688
|
|
%36 = or i64 %31, %35
|
|
%37 = select i1 %25, i64 0, i64 128
|
|
%38 = fdiv float 0.000000e+00, %17
|
|
%39 = call float @llvm.fabs.f32(float %38)
|
|
%40 = bitcast float %39 to i32
|
|
%41 = icmp ult i32 %40, 1325400064
|
|
%42 = select i1 %41, i64 0, i64 128
|
|
%43 = trunc i64 %5 to i32
|
|
%44 = sub i32 0, %43
|
|
%45 = trunc i64 %6 to i32
|
|
%46 = sub i32 0, %45
|
|
%47 = ashr i32 %44, 1
|
|
%48 = ashr i32 %46, 1
|
|
%49 = sitofp i32 %0 to float
|
|
%50 = sitofp i32 %47 to float
|
|
%51 = sitofp i32 %48 to float
|
|
%52 = sitofp i32 %0 to float
|
|
%53 = fdiv float 0.000000e+00, %50
|
|
%54 = fdiv float 0.000000e+00, %51
|
|
%55 = call float @llvm.fabs.f32(float %53)
|
|
%56 = bitcast float %55 to i32
|
|
%57 = icmp ult i32 %56, 1325400064
|
|
%58 = call float @llvm.fabs.f32(float %54)
|
|
%59 = bitcast float %58 to i32
|
|
%60 = icmp ult i32 %59, 1325400064
|
|
%61 = select i1 %60, i64 0, i64 8388608
|
|
%62 = select i1 %57, i64 0, i64 32768
|
|
%63 = or i64 %61, %62
|
|
%64 = fdiv float 0.000000e+00, %49
|
|
%65 = fdiv float 0.000000e+00, %50
|
|
%66 = fdiv float 0.000000e+00, %51
|
|
%67 = fdiv float 0.000000e+00, %52
|
|
%68 = call float @llvm.fabs.f32(float %65)
|
|
%69 = bitcast float %68 to i32
|
|
%70 = icmp ult i32 %69, 1325400064
|
|
%71 = call float @llvm.fabs.f32(float %66)
|
|
%72 = bitcast float %71 to i32
|
|
%73 = icmp ult i32 %72, 1325400064
|
|
%74 = select i1 %73, i64 0, i64 8388608
|
|
%75 = select i1 %70, i64 0, i64 32768
|
|
br label %77
|
|
|
|
76: ; preds = %78
|
|
unreachable
|
|
|
|
77: ; preds = %4
|
|
br label %78
|
|
|
|
78: ; preds = %77
|
|
%79 = call float @llvm.fabs.f32(float %22)
|
|
%80 = bitcast float %79 to i32
|
|
%81 = icmp ult i32 %80, 1325400064
|
|
%82 = call float @llvm.fabs.f32(float %21)
|
|
%83 = bitcast float %82 to i32
|
|
%84 = icmp ult i32 %83, 1325400064
|
|
%85 = bitcast float %3 to i32
|
|
%86 = icmp ult i32 %85, 1325400064
|
|
%87 = select i1 %86, i64 %36, i64 0
|
|
%88 = bitcast float %3 to i32
|
|
%89 = icmp ult i32 %88, 1325400064
|
|
%90 = select i1 %89, i64 0, i64 4294967168
|
|
%91 = or i64 %87, %90
|
|
%92 = select i1 %81, i64 0, i64 8388608
|
|
%93 = select i1 %84, i64 0, i64 32768
|
|
%94 = or i64 %92, %93
|
|
%95 = or i64 %94, %91
|
|
%96 = or i64 %95, %37
|
|
store i64 %96, ptr null, align 1
|
|
call void @llvm.lifetime.start.p0(i64 0, ptr null)
|
|
store i64 %42, ptr null, align 1
|
|
%97 = bitcast float %3 to i32
|
|
%98 = icmp ult i32 %97, 1325400064
|
|
%99 = select i1 %98, i64 0, i64 -9223372036854775808
|
|
%100 = bitcast float %3 to i32
|
|
%101 = icmp ult i32 %100, 1325400064
|
|
%102 = zext i1 %101 to i64
|
|
%103 = or i64 %63, %102
|
|
%104 = or i64 %103, %99
|
|
store i64 %104, ptr null, align 1
|
|
%105 = call float @llvm.fabs.f32(float %67)
|
|
%106 = bitcast float %105 to i32
|
|
%107 = icmp ult i32 %106, 1325400064
|
|
%108 = call float @llvm.fabs.f32(float %64)
|
|
%109 = bitcast float %108 to i32
|
|
%110 = icmp ult i32 %109, 1325400064
|
|
%111 = select i1 %107, i64 0, i64 2147483648
|
|
%112 = select i1 %110, i64 0, i64 128
|
|
%113 = or i64 %112, %75
|
|
%114 = or i64 %113, %74
|
|
%115 = or i64 %114, %111
|
|
store i64 %115, ptr null, align 1
|
|
br label %76
|
|
}
|