We have two types of mask in SLP: a scalar mask and a vector mask. When vectorizing four i32 additions into <4 x i32>, SLP creates a mask of length 4. When vectorizing four <2 x i32> additions into <8 x i32>, SLP also creates a mask of length 4. We refer to the first case as a scalar mask (because the mask element represents a scalar, i32), and the second case as a vector mask (because the mask element represents a vector, <4 x i32>). At some point, we must convert the scalar mask into a vector mask (otherwise, calling TTI cost functions or IRBuilderBase functions may yield incorrect results). Since both ShuffleCostEstimator and ShuffleInstructionBuilder can modify the CommonMask, we have decided to perform the mask transformation only within createShuffle. However, we do not store the transformed result, as createShuffle may be called multiple times.
271 lines
16 KiB
LLVM
271 lines
16 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
|
; RUN: opt -mtriple=riscv64 -mcpu=sifive-x280 -passes=slp-vectorizer -S -slp-revec -slp-max-reg-size=1024 -slp-threshold=-100 %s | FileCheck --check-prefixes=CHECK,POWEROF2 %s
|
|
; RUN: opt -mtriple=riscv64 -mcpu=sifive-x280 -passes=slp-vectorizer -S -slp-revec -slp-max-reg-size=1024 -slp-threshold=-100 -slp-vectorize-non-power-of-2 %s | FileCheck --check-prefixes=CHECK,NONPOWEROF2 %s
|
|
|
|
define i32 @test() {
|
|
; CHECK-LABEL: @test(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: br label [[IF_END_I87:%.*]]
|
|
; CHECK: if.end.i87:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> getelementptr (i32, <4 x ptr> <ptr inttoptr (i64 64036 to ptr), ptr inttoptr (i64 64036 to ptr), ptr inttoptr (i64 64064 to ptr), ptr inttoptr (i64 64064 to ptr)>, <4 x i64> <i64 0, i64 1, i64 0, i64 1>), i32 4, <4 x i1> splat (i1 true), <4 x i32> poison)
|
|
; CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.vector.insert.v4i32.v2i32(<4 x i32> poison, <2 x i32> zeroinitializer, i64 2)
|
|
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
|
|
; CHECK-NEXT: switch i32 0, label [[SW_BB509_I:%.*]] [
|
|
; CHECK-NEXT: i32 1, label [[SW_BB509_I]]
|
|
; CHECK-NEXT: i32 0, label [[IF_THEN458_I:%.*]]
|
|
; CHECK-NEXT: ]
|
|
; CHECK: if.then458.i:
|
|
; CHECK-NEXT: br label [[SW_BB509_I]]
|
|
; CHECK: sw.bb509.i:
|
|
; CHECK-NEXT: [[TMP4:%.*]] = phi <4 x i32> [ [[TMP0]], [[IF_THEN458_I]] ], [ [[TMP3]], [[IF_END_I87]] ], [ [[TMP3]], [[IF_END_I87]] ]
|
|
; CHECK-NEXT: ret i32 0
|
|
;
|
|
entry:
|
|
%getelementptr0 = getelementptr i8, ptr null, i64 64036
|
|
%getelementptr1 = getelementptr i8, ptr null, i64 64064
|
|
br label %if.end.i87
|
|
|
|
if.end.i87: ; preds = %entry
|
|
%0 = load <2 x i32>, ptr %getelementptr0, align 4
|
|
%1 = load <2 x i32>, ptr %getelementptr1, align 8
|
|
switch i32 0, label %sw.bb509.i [
|
|
i32 1, label %sw.bb509.i
|
|
i32 0, label %if.then458.i
|
|
]
|
|
|
|
if.then458.i: ; preds = %if.end.i87
|
|
br label %sw.bb509.i
|
|
|
|
sw.bb509.i: ; preds = %if.then458.i, %if.end.i87, %if.end.i87
|
|
%4 = phi <2 x i32> [ %0, %if.then458.i ], [ %0, %if.end.i87 ], [ %0, %if.end.i87 ]
|
|
%5 = phi <2 x i32> [ %1, %if.then458.i ], [ zeroinitializer, %if.end.i87 ], [ zeroinitializer, %if.end.i87 ]
|
|
ret i32 0
|
|
}
|
|
|
|
define void @test2() {
|
|
; CHECK-LABEL: @test2(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr null, i64 132
|
|
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr null, i64 200
|
|
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr null, i64 300
|
|
; CHECK-NEXT: [[TMP3:%.*]] = load <8 x float>, ptr [[TMP1]], align 4
|
|
; CHECK-NEXT: [[TMP4:%.*]] = load <8 x float>, ptr [[TMP2]], align 4
|
|
; CHECK-NEXT: [[TMP5:%.*]] = load <16 x float>, ptr [[TMP0]], align 4
|
|
; CHECK-NEXT: [[TMP6:%.*]] = call <32 x float> @llvm.vector.insert.v32f32.v8f32(<32 x float> poison, <8 x float> [[TMP4]], i64 0)
|
|
; CHECK-NEXT: [[TMP7:%.*]] = call <32 x float> @llvm.vector.insert.v32f32.v8f32(<32 x float> [[TMP6]], <8 x float> [[TMP3]], i64 8)
|
|
; CHECK-NEXT: [[TMP8:%.*]] = call <32 x float> @llvm.vector.insert.v32f32.v16f32(<32 x float> [[TMP7]], <16 x float> [[TMP5]], i64 16)
|
|
; CHECK-NEXT: [[TMP9:%.*]] = fpext <32 x float> [[TMP8]] to <32 x double>
|
|
; CHECK-NEXT: [[TMP10:%.*]] = call <32 x double> @llvm.vector.insert.v32f64.v8f64(<32 x double> poison, <8 x double> zeroinitializer, i64 0)
|
|
; CHECK-NEXT: [[TMP11:%.*]] = call <32 x double> @llvm.vector.insert.v32f64.v8f64(<32 x double> [[TMP10]], <8 x double> zeroinitializer, i64 8)
|
|
; CHECK-NEXT: [[TMP12:%.*]] = call <32 x double> @llvm.vector.insert.v32f64.v8f64(<32 x double> [[TMP11]], <8 x double> zeroinitializer, i64 16)
|
|
; CHECK-NEXT: [[TMP13:%.*]] = call <32 x double> @llvm.vector.insert.v32f64.v8f64(<32 x double> [[TMP12]], <8 x double> zeroinitializer, i64 24)
|
|
; CHECK-NEXT: [[TMP14:%.*]] = fadd <32 x double> [[TMP13]], [[TMP9]]
|
|
; CHECK-NEXT: [[TMP15:%.*]] = fptrunc <32 x double> [[TMP14]] to <32 x float>
|
|
; CHECK-NEXT: [[TMP16:%.*]] = call <32 x float> @llvm.vector.insert.v32f32.v8f32(<32 x float> poison, <8 x float> zeroinitializer, i64 0)
|
|
; CHECK-NEXT: [[TMP17:%.*]] = call <32 x float> @llvm.vector.insert.v32f32.v8f32(<32 x float> [[TMP16]], <8 x float> zeroinitializer, i64 8)
|
|
; CHECK-NEXT: [[TMP18:%.*]] = call <32 x float> @llvm.vector.insert.v32f32.v8f32(<32 x float> [[TMP17]], <8 x float> zeroinitializer, i64 16)
|
|
; CHECK-NEXT: [[TMP19:%.*]] = call <32 x float> @llvm.vector.insert.v32f32.v8f32(<32 x float> [[TMP18]], <8 x float> zeroinitializer, i64 24)
|
|
; CHECK-NEXT: [[TMP20:%.*]] = fcmp ogt <32 x float> [[TMP19]], [[TMP15]]
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
%0 = getelementptr i8, ptr null, i64 132
|
|
%1 = getelementptr i8, ptr null, i64 164
|
|
%2 = getelementptr i8, ptr null, i64 200
|
|
%3 = getelementptr i8, ptr null, i64 300
|
|
%4 = load <8 x float>, ptr %0, align 4
|
|
%5 = load <8 x float>, ptr %1, align 4
|
|
%6 = load <8 x float>, ptr %2, align 4
|
|
%7 = load <8 x float>, ptr %3, align 4
|
|
%8 = fpext <8 x float> %4 to <8 x double>
|
|
%9 = fpext <8 x float> %5 to <8 x double>
|
|
%10 = fpext <8 x float> %6 to <8 x double>
|
|
%11 = fpext <8 x float> %7 to <8 x double>
|
|
%12 = fadd <8 x double> zeroinitializer, %8
|
|
%13 = fadd <8 x double> zeroinitializer, %9
|
|
%14 = fadd <8 x double> zeroinitializer, %10
|
|
%15 = fadd <8 x double> zeroinitializer, %11
|
|
%16 = fptrunc <8 x double> %12 to <8 x float>
|
|
%17 = fptrunc <8 x double> %13 to <8 x float>
|
|
%18 = fptrunc <8 x double> %14 to <8 x float>
|
|
%19 = fptrunc <8 x double> %15 to <8 x float>
|
|
%20 = fcmp ogt <8 x float> zeroinitializer, %16
|
|
%21 = fcmp ogt <8 x float> zeroinitializer, %17
|
|
%22 = fcmp ogt <8 x float> zeroinitializer, %18
|
|
%23 = fcmp ogt <8 x float> zeroinitializer, %19
|
|
ret void
|
|
}
|
|
|
|
define void @test3(float %0) {
|
|
; CHECK-LABEL: @test3(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: br label [[FOR_BODY_LR_PH:%.*]]
|
|
; CHECK: for.body.lr.ph:
|
|
; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.vector.insert.v4f32.v2f32(<4 x float> poison, <2 x float> zeroinitializer, i64 0)
|
|
; CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.vector.insert.v4f32.v2f32(<4 x float> [[TMP1]], <2 x float> zeroinitializer, i64 2)
|
|
; CHECK-NEXT: br i1 false, label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY:%.*]]
|
|
; CHECK: for.cond.cleanup:
|
|
; CHECK-NEXT: [[TMP3:%.*]] = phi <4 x float> [ [[TMP2]], [[FOR_BODY_LR_PH]] ], [ [[TMP10:%.*]], [[FOR_BODY]] ]
|
|
; CHECK-NEXT: ret void
|
|
; CHECK: for.body:
|
|
; CHECK-NEXT: [[TMP4:%.*]] = load <2 x float>, ptr null, align 4
|
|
; CHECK-NEXT: [[TMP5:%.*]] = fcmp olt <2 x float> zeroinitializer, [[TMP4]]
|
|
; CHECK-NEXT: [[TMP6:%.*]] = call <4 x i1> @llvm.vector.insert.v4i1.v2i1(<4 x i1> poison, <2 x i1> splat (i1 true), i64 0)
|
|
; CHECK-NEXT: [[TMP7:%.*]] = call <4 x i1> @llvm.vector.insert.v4i1.v2i1(<4 x i1> [[TMP6]], <2 x i1> [[TMP5]], i64 2)
|
|
; CHECK-NEXT: [[TMP8:%.*]] = call <4 x float> @llvm.vector.insert.v4f32.v2f32(<4 x float> poison, <2 x float> [[TMP4]], i64 0)
|
|
; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x float> [[TMP8]], <4 x float> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
|
|
; CHECK-NEXT: [[TMP10]] = select <4 x i1> [[TMP7]], <4 x float> [[TMP9]], <4 x float> [[TMP2]]
|
|
; CHECK-NEXT: br label [[FOR_COND_CLEANUP]]
|
|
;
|
|
entry:
|
|
br label %for.body.lr.ph
|
|
|
|
for.body.lr.ph:
|
|
br i1 false, label %for.cond.cleanup, label %for.body
|
|
|
|
for.cond.cleanup: ; preds = %for.body, %for.body.lr.ph
|
|
%1 = phi <2 x float> [ zeroinitializer, %for.body.lr.ph ], [ %5, %for.body ]
|
|
%2 = phi <2 x float> [ zeroinitializer, %for.body.lr.ph ], [ %6, %for.body ]
|
|
ret void
|
|
|
|
for.body:
|
|
%3 = load <2 x float>, ptr null, align 4
|
|
%4 = fcmp olt <2 x float> zeroinitializer, %3
|
|
%5 = select <2 x i1> <i1 true, i1 true>, <2 x float> %3, <2 x float> zeroinitializer
|
|
%6 = select <2 x i1> %4, <2 x float> %3, <2 x float> zeroinitializer
|
|
br label %for.cond.cleanup
|
|
}
|
|
|
|
define ptr @test4() {
|
|
; POWEROF2-LABEL: @test4(
|
|
; POWEROF2-NEXT: [[TMP1:%.*]] = fadd <8 x float> zeroinitializer, zeroinitializer
|
|
; POWEROF2-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> poison, <2 x i32> <i32 1, i32 2>
|
|
; POWEROF2-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> poison, <2 x i32> <i32 5, i32 6>
|
|
; POWEROF2-NEXT: [[TMP4:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> poison, <2 x i32> <i32 4, i32 0>
|
|
; POWEROF2-NEXT: [[TMP5:%.*]] = call <4 x float> @llvm.vector.insert.v4f32.v2f32(<4 x float> poison, <2 x float> [[TMP2]], i64 0)
|
|
; POWEROF2-NEXT: [[TMP6:%.*]] = call <4 x float> @llvm.vector.insert.v4f32.v2f32(<4 x float> [[TMP5]], <2 x float> [[TMP3]], i64 2)
|
|
; POWEROF2-NEXT: br label [[TMP8:%.*]]
|
|
; POWEROF2: 7:
|
|
; POWEROF2-NEXT: br label [[TMP8]]
|
|
; POWEROF2: 8:
|
|
; POWEROF2-NEXT: [[TMP9:%.*]] = phi <2 x float> [ poison, [[TMP7:%.*]] ], [ [[TMP4]], [[TMP0:%.*]] ]
|
|
; POWEROF2-NEXT: [[TMP10:%.*]] = phi <4 x float> [ poison, [[TMP7]] ], [ [[TMP6]], [[TMP0]] ]
|
|
; POWEROF2-NEXT: br label [[TMP11:%.*]]
|
|
; POWEROF2: 11:
|
|
; POWEROF2-NEXT: [[TMP12:%.*]] = call <2 x float> @llvm.vector.extract.v2f32.v4f32(<4 x float> [[TMP10]], i64 0)
|
|
; POWEROF2-NEXT: [[TMP13:%.*]] = fmul <2 x float> [[TMP12]], zeroinitializer
|
|
; POWEROF2-NEXT: [[TMP14:%.*]] = call <2 x float> @llvm.vector.extract.v2f32.v4f32(<4 x float> [[TMP10]], i64 2)
|
|
; POWEROF2-NEXT: [[TMP15:%.*]] = fmul <2 x float> zeroinitializer, [[TMP14]]
|
|
; POWEROF2-NEXT: [[TMP16:%.*]] = extractelement <2 x float> [[TMP9]], i32 1
|
|
; POWEROF2-NEXT: [[TMP17:%.*]] = fmul float 0.000000e+00, [[TMP16]]
|
|
; POWEROF2-NEXT: [[TMP18:%.*]] = extractelement <2 x float> [[TMP9]], i32 0
|
|
; POWEROF2-NEXT: [[TMP19:%.*]] = fmul float [[TMP18]], 0.000000e+00
|
|
; POWEROF2-NEXT: [[TMP20:%.*]] = extractelement <2 x float> [[TMP13]], i32 0
|
|
; POWEROF2-NEXT: [[TMP21:%.*]] = fadd reassoc nsz float [[TMP20]], [[TMP17]]
|
|
; POWEROF2-NEXT: [[TMP22:%.*]] = extractelement <2 x float> [[TMP15]], i32 0
|
|
; POWEROF2-NEXT: [[TMP23:%.*]] = fadd reassoc nsz float [[TMP22]], [[TMP19]]
|
|
; POWEROF2-NEXT: [[TMP24:%.*]] = extractelement <2 x float> [[TMP13]], i32 1
|
|
; POWEROF2-NEXT: [[TMP25:%.*]] = fadd reassoc nsz float [[TMP21]], [[TMP24]]
|
|
; POWEROF2-NEXT: [[TMP26:%.*]] = extractelement <2 x float> [[TMP15]], i32 1
|
|
; POWEROF2-NEXT: [[TMP27:%.*]] = fadd reassoc nsz float [[TMP23]], [[TMP26]]
|
|
; POWEROF2-NEXT: [[TMP28:%.*]] = tail call float @llvm.sqrt.f32(float [[TMP25]])
|
|
; POWEROF2-NEXT: [[TMP29:%.*]] = tail call float @llvm.sqrt.f32(float [[TMP27]])
|
|
; POWEROF2-NEXT: ret ptr null
|
|
;
|
|
; NONPOWEROF2-LABEL: @test4(
|
|
; NONPOWEROF2-NEXT: [[TMP1:%.*]] = fadd <8 x float> zeroinitializer, zeroinitializer
|
|
; NONPOWEROF2-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> poison, <3 x i32> <i32 0, i32 1, i32 2>
|
|
; NONPOWEROF2-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> poison, <3 x i32> <i32 4, i32 5, i32 6>
|
|
; NONPOWEROF2-NEXT: [[TMP4:%.*]] = call <6 x float> @llvm.vector.insert.v6f32.v3f32(<6 x float> poison, <3 x float> [[TMP2]], i64 0)
|
|
; NONPOWEROF2-NEXT: [[TMP5:%.*]] = call <6 x float> @llvm.vector.insert.v6f32.v3f32(<6 x float> [[TMP4]], <3 x float> [[TMP3]], i64 3)
|
|
; NONPOWEROF2-NEXT: br label [[TMP7:%.*]]
|
|
; NONPOWEROF2: 6:
|
|
; NONPOWEROF2-NEXT: br label [[TMP7]]
|
|
; NONPOWEROF2: 7:
|
|
; NONPOWEROF2-NEXT: [[TMP8:%.*]] = phi <6 x float> [ poison, [[TMP6:%.*]] ], [ [[TMP5]], [[TMP0:%.*]] ]
|
|
; NONPOWEROF2-NEXT: br label [[TMP9:%.*]]
|
|
; NONPOWEROF2: 9:
|
|
; NONPOWEROF2-NEXT: [[TMP10:%.*]] = call <3 x float> @llvm.vector.extract.v3f32.v6f32(<6 x float> [[TMP8]], i64 0)
|
|
; NONPOWEROF2-NEXT: [[TMP11:%.*]] = fmul <3 x float> zeroinitializer, [[TMP10]]
|
|
; NONPOWEROF2-NEXT: [[TMP12:%.*]] = call <3 x float> @llvm.vector.extract.v3f32.v6f32(<6 x float> [[TMP8]], i64 3)
|
|
; NONPOWEROF2-NEXT: [[TMP13:%.*]] = fmul <3 x float> zeroinitializer, [[TMP12]]
|
|
; NONPOWEROF2-NEXT: [[TMP14:%.*]] = call reassoc nsz float @llvm.vector.reduce.fadd.v3f32(float 0.000000e+00, <3 x float> [[TMP11]])
|
|
; NONPOWEROF2-NEXT: [[TMP15:%.*]] = call reassoc nsz float @llvm.vector.reduce.fadd.v3f32(float 0.000000e+00, <3 x float> [[TMP13]])
|
|
; NONPOWEROF2-NEXT: [[TMP16:%.*]] = tail call float @llvm.sqrt.f32(float [[TMP14]])
|
|
; NONPOWEROF2-NEXT: [[TMP17:%.*]] = tail call float @llvm.sqrt.f32(float [[TMP15]])
|
|
; NONPOWEROF2-NEXT: ret ptr null
|
|
;
|
|
%1 = fadd <8 x float> zeroinitializer, zeroinitializer
|
|
%2 = extractelement <8 x float> %1, i64 0
|
|
%3 = extractelement <8 x float> %1, i64 1
|
|
%4 = extractelement <8 x float> %1, i64 2
|
|
%5 = extractelement <8 x float> %1, i64 4
|
|
%6 = extractelement <8 x float> %1, i64 5
|
|
%7 = extractelement <8 x float> %1, i64 6
|
|
br label %9
|
|
|
|
8:
|
|
br label %9
|
|
|
|
9:
|
|
%10 = phi float [ 0.000000e+00, %8 ], [ %7, %0 ]
|
|
%11 = phi float [ 0.000000e+00, %8 ], [ %6, %0 ]
|
|
%12 = phi float [ 0.000000e+00, %8 ], [ %5, %0 ]
|
|
%13 = phi float [ 0.000000e+00, %8 ], [ %4, %0 ]
|
|
%14 = phi float [ 0.000000e+00, %8 ], [ %3, %0 ]
|
|
%15 = phi float [ 0.000000e+00, %8 ], [ %2, %0 ]
|
|
br label %16
|
|
|
|
16:
|
|
%17 = fmul float %14, 0.000000e+00
|
|
%18 = fmul float 0.000000e+00, %11
|
|
%19 = fmul float 0.000000e+00, %15
|
|
%20 = fmul float %12, 0.000000e+00
|
|
%21 = fadd reassoc nsz float %17, %19
|
|
%22 = fadd reassoc nsz float %18, %20
|
|
%23 = fmul float %13, 0.000000e+00
|
|
%24 = fmul float %10, 0.000000e+00
|
|
%25 = fadd reassoc nsz float %21, %23
|
|
%26 = fadd reassoc nsz float %22, %24
|
|
%27 = tail call float @llvm.sqrt.f32(float %25)
|
|
%28 = tail call float @llvm.sqrt.f32(float %26)
|
|
ret ptr null
|
|
}
|
|
|
|
define i32 @test5() {
|
|
; CHECK-LABEL: @test5(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = call <4 x double> @llvm.vector.insert.v4f64.v2f64(<4 x double> poison, <2 x double> zeroinitializer, i64 0)
|
|
; CHECK-NEXT: [[TMP1:%.*]] = call <4 x double> @llvm.vector.insert.v4f64.v2f64(<4 x double> [[TMP0]], <2 x double> zeroinitializer, i64 2)
|
|
; CHECK-NEXT: [[TMP2:%.*]] = fdiv <4 x double> [[TMP1]], [[TMP1]]
|
|
; CHECK-NEXT: [[TMP3:%.*]] = call <8 x double> @llvm.vector.insert.v8f64.v2f64(<8 x double> poison, <2 x double> zeroinitializer, i64 0)
|
|
; CHECK-NEXT: [[TMP4:%.*]] = call <8 x double> @llvm.vector.insert.v8f64.v2f64(<8 x double> [[TMP3]], <2 x double> zeroinitializer, i64 2)
|
|
; CHECK-NEXT: [[TMP5:%.*]] = call <8 x double> @llvm.vector.insert.v8f64.v2f64(<8 x double> [[TMP4]], <2 x double> zeroinitializer, i64 4)
|
|
; CHECK-NEXT: [[TMP6:%.*]] = call <8 x double> @llvm.vector.insert.v8f64.v2f64(<8 x double> [[TMP5]], <2 x double> zeroinitializer, i64 6)
|
|
; CHECK-NEXT: [[TMP7:%.*]] = call <8 x double> @llvm.vector.insert.v8f64.v2f64(<8 x double> poison, <2 x double> zeroinitializer, i64 2)
|
|
; CHECK-NEXT: [[TMP8:%.*]] = call <8 x double> @llvm.vector.insert.v8f64.v2f64(<8 x double> [[TMP7]], <2 x double> zeroinitializer, i64 6)
|
|
; CHECK-NEXT: [[TMP9:%.*]] = call <8 x double> @llvm.vector.insert.v8f64.v4f64(<8 x double> poison, <4 x double> [[TMP2]], i64 0)
|
|
; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <8 x double> [[TMP9]], <8 x double> [[TMP8]], <8 x i32> <i32 0, i32 1, i32 10, i32 11, i32 2, i32 3, i32 14, i32 15>
|
|
; CHECK-NEXT: [[TMP11:%.*]] = fadd <8 x double> [[TMP6]], [[TMP10]]
|
|
; CHECK-NEXT: br label [[FOR_END47:%.*]]
|
|
; CHECK: for.end47:
|
|
; CHECK-NEXT: [[TMP12:%.*]] = phi <8 x double> [ [[TMP11]], [[ENTRY:%.*]] ]
|
|
; CHECK-NEXT: ret i32 0
|
|
;
|
|
entry:
|
|
%div0 = fdiv <2 x double> zeroinitializer, zeroinitializer
|
|
%div1 = fdiv <2 x double> zeroinitializer, zeroinitializer
|
|
%add0 = fadd <2 x double> zeroinitializer, %div0
|
|
%add1 = fadd <2 x double> zeroinitializer, zeroinitializer
|
|
%add2 = fadd <2 x double> %div1, zeroinitializer
|
|
%add3 = fadd <2 x double> zeroinitializer, zeroinitializer
|
|
br label %for.end47
|
|
|
|
for.end47: ; preds = %entry
|
|
%add0.lcssa = phi <2 x double> [ %add0, %entry ]
|
|
%add1.lcssa = phi <2 x double> [ %add1, %entry ]
|
|
%add2.lcssa = phi <2 x double> [ %add2, %entry ]
|
|
%add3.lcssa = phi <2 x double> [ %add3, %entry ]
|
|
ret i32 0
|
|
}
|