[VPlan] Don't add live-outs for IV phis.

Resume and exit values for inductions are currently still created
outside of VPlan and independent of the induction recipes. Don't add
live-outs for now, as the additional unneeded users can pessimize other
anlysis.

Fixes https://github.com/llvm/llvm-project/issues/98660.
This commit is contained in:
Florian Hahn
2024-07-14 20:49:03 +01:00
parent efde640cdf
commit fc9cd3272b
8 changed files with 116 additions and 361 deletions

View File

@@ -8693,6 +8693,14 @@ static void addUsersInExitBlock(VPBasicBlock *HeaderVPBB, Loop *OrigLoop,
Value *IncomingValue =
ExitPhi.getIncomingValueForBlock(ExitingBB);
VPValue *V = Builder.getVPValueOrAddLiveIn(IncomingValue, Plan);
// Exit values for inductions are computed and updated outside of VPlan and
// independent of induction recipes.
// TODO: Compute induction exit values in VPlan, use VPLiveOuts to update
// live-outs.
if ((isa<VPWidenIntOrFpInductionRecipe>(V) &&
!cast<VPWidenIntOrFpInductionRecipe>(V)->getTruncInst()) ||
isa<VPWidenPointerInductionRecipe>(V))
continue;
Plan.addLiveOut(&ExitPhi, V);
}
}

View File

@@ -27,26 +27,7 @@ define ptr @test(ptr %start.1, ptr %start.2, ptr %end) {
; CHECK-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 4
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[START_1]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 2
; CHECK-NEXT: [[TMP14:%.*]] = mul i64 [[TMP13]], 2
; CHECK-NEXT: [[TMP15:%.*]] = mul i64 8, [[TMP14]]
; CHECK-NEXT: [[TMP16:%.*]] = mul i64 [[TMP13]], 0
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP16]], i64 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
; CHECK-NEXT: [[TMP17:%.*]] = call <vscale x 2 x i64> @llvm.experimental.stepvector.nxv2i64()
; CHECK-NEXT: [[TMP18:%.*]] = add <vscale x 2 x i64> [[DOTSPLAT]], [[TMP17]]
; CHECK-NEXT: [[VECTOR_GEP:%.*]] = mul <vscale x 2 x i64> [[TMP18]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 8, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
; CHECK-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <vscale x 2 x i64> [[VECTOR_GEP]]
; CHECK-NEXT: [[TMP20:%.*]] = mul i64 [[TMP13]], 1
; CHECK-NEXT: [[DOTSPLATINSERT5:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP20]], i64 0
; CHECK-NEXT: [[DOTSPLAT6:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT5]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
; CHECK-NEXT: [[TMP21:%.*]] = call <vscale x 2 x i64> @llvm.experimental.stepvector.nxv2i64()
; CHECK-NEXT: [[TMP22:%.*]] = add <vscale x 2 x i64> [[DOTSPLAT6]], [[TMP21]]
; CHECK-NEXT: [[VECTOR_GEP7:%.*]] = mul <vscale x 2 x i64> [[TMP22]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 8, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
; CHECK-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <vscale x 2 x i64> [[VECTOR_GEP7]]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8
; CHECK-NEXT: [[TMP24:%.*]] = add i64 [[OFFSET_IDX]], 0
; CHECK-NEXT: [[TMP25:%.*]] = call i64 @llvm.vscale.i64()
@@ -63,7 +44,6 @@ define ptr @test(ptr %start.1, ptr %start.2, ptr %end) {
; CHECK-NEXT: store <vscale x 2 x i64> zeroinitializer, ptr [[TMP32]], align 8
; CHECK-NEXT: store <vscale x 2 x i64> zeroinitializer, ptr [[TMP35]], align 8
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]]
; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 [[TMP15]]
; CHECK-NEXT: [[TMP36:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP36]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: middle.block:

View File

@@ -8,313 +8,17 @@ define i32 @ephemeral_load_and_compare_iv_used_outside(ptr %start, ptr %end) #0
; CHECK-LABEL: define i32 @ephemeral_load_and_compare_iv_used_outside(
; CHECK-SAME: ptr [[START:%.*]], ptr [[END:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: [[END2:%.*]] = ptrtoint ptr [[END]] to i64
; CHECK-NEXT: [[START1:%.*]] = ptrtoint ptr [[START]] to i64
; CHECK-NEXT: [[TMP0:%.*]] = sub i64 [[START1]], [[END2]]
; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 [[TMP0]], 3
; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 128
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 128
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]]
; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[N_VEC]], -8
; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP3]]
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[START]], %[[VECTOR_PH]] ], [ [[PTR_IND:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <32 x i64> <i64 0, i64 -8, i64 -16, i64 -24, i64 -32, i64 -40, i64 -48, i64 -56, i64 -64, i64 -72, i64 -80, i64 -88, i64 -96, i64 -104, i64 -112, i64 -120, i64 -128, i64 -136, i64 -144, i64 -152, i64 -160, i64 -168, i64 -176, i64 -184, i64 -192, i64 -200, i64 -208, i64 -216, i64 -224, i64 -232, i64 -240, i64 -248>
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <32 x i64> <i64 -256, i64 -264, i64 -272, i64 -280, i64 -288, i64 -296, i64 -304, i64 -312, i64 -320, i64 -328, i64 -336, i64 -344, i64 -352, i64 -360, i64 -368, i64 -376, i64 -384, i64 -392, i64 -400, i64 -408, i64 -416, i64 -424, i64 -432, i64 -440, i64 -448, i64 -456, i64 -464, i64 -472, i64 -480, i64 -488, i64 -496, i64 -504>
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <32 x i64> <i64 -512, i64 -520, i64 -528, i64 -536, i64 -544, i64 -552, i64 -560, i64 -568, i64 -576, i64 -584, i64 -592, i64 -600, i64 -608, i64 -616, i64 -624, i64 -632, i64 -640, i64 -648, i64 -656, i64 -664, i64 -672, i64 -680, i64 -688, i64 -696, i64 -704, i64 -712, i64 -720, i64 -728, i64 -736, i64 -744, i64 -752, i64 -760>
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <32 x i64> <i64 -768, i64 -776, i64 -784, i64 -792, i64 -800, i64 -808, i64 -816, i64 -824, i64 -832, i64 -840, i64 -848, i64 -856, i64 -864, i64 -872, i64 -880, i64 -888, i64 -896, i64 -904, i64 -912, i64 -920, i64 -928, i64 -936, i64 -944, i64 -952, i64 -960, i64 -968, i64 -976, i64 -984, i64 -992, i64 -1000, i64 -1008, i64 -1016>
; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <32 x i32> @llvm.masked.gather.v32i32.v32p0(<32 x ptr> [[TMP4]], i32 4, <32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <32 x i32> poison)
; CHECK-NEXT: [[WIDE_MASKED_GATHER3:%.*]] = call <32 x i32> @llvm.masked.gather.v32i32.v32p0(<32 x ptr> [[TMP5]], i32 4, <32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <32 x i32> poison)
; CHECK-NEXT: [[WIDE_MASKED_GATHER4:%.*]] = call <32 x i32> @llvm.masked.gather.v32i32.v32p0(<32 x ptr> [[TMP6]], i32 4, <32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <32 x i32> poison)
; CHECK-NEXT: [[WIDE_MASKED_GATHER5:%.*]] = call <32 x i32> @llvm.masked.gather.v32i32.v32p0(<32 x ptr> [[TMP7]], i32 4, <32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <32 x i32> poison)
; CHECK-NEXT: [[TMP8:%.*]] = icmp ne <32 x i32> [[WIDE_MASKED_GATHER]], zeroinitializer
; CHECK-NEXT: [[TMP9:%.*]] = icmp ne <32 x i32> [[WIDE_MASKED_GATHER3]], zeroinitializer
; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <32 x i32> [[WIDE_MASKED_GATHER4]], zeroinitializer
; CHECK-NEXT: [[TMP11:%.*]] = icmp ne <32 x i32> [[WIDE_MASKED_GATHER5]], zeroinitializer
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <32 x i1> [[TMP8]], i32 0
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP12]])
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <32 x i1> [[TMP8]], i32 1
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP13]])
; CHECK-NEXT: [[TMP14:%.*]] = extractelement <32 x i1> [[TMP8]], i32 2
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP14]])
; CHECK-NEXT: [[TMP15:%.*]] = extractelement <32 x i1> [[TMP8]], i32 3
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP15]])
; CHECK-NEXT: [[TMP16:%.*]] = extractelement <32 x i1> [[TMP8]], i32 4
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP16]])
; CHECK-NEXT: [[TMP17:%.*]] = extractelement <32 x i1> [[TMP8]], i32 5
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP17]])
; CHECK-NEXT: [[TMP18:%.*]] = extractelement <32 x i1> [[TMP8]], i32 6
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP18]])
; CHECK-NEXT: [[TMP19:%.*]] = extractelement <32 x i1> [[TMP8]], i32 7
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP19]])
; CHECK-NEXT: [[TMP20:%.*]] = extractelement <32 x i1> [[TMP8]], i32 8
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP20]])
; CHECK-NEXT: [[TMP21:%.*]] = extractelement <32 x i1> [[TMP8]], i32 9
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP21]])
; CHECK-NEXT: [[TMP22:%.*]] = extractelement <32 x i1> [[TMP8]], i32 10
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP22]])
; CHECK-NEXT: [[TMP23:%.*]] = extractelement <32 x i1> [[TMP8]], i32 11
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP23]])
; CHECK-NEXT: [[TMP24:%.*]] = extractelement <32 x i1> [[TMP8]], i32 12
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP24]])
; CHECK-NEXT: [[TMP25:%.*]] = extractelement <32 x i1> [[TMP8]], i32 13
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP25]])
; CHECK-NEXT: [[TMP26:%.*]] = extractelement <32 x i1> [[TMP8]], i32 14
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP26]])
; CHECK-NEXT: [[TMP27:%.*]] = extractelement <32 x i1> [[TMP8]], i32 15
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP27]])
; CHECK-NEXT: [[TMP28:%.*]] = extractelement <32 x i1> [[TMP8]], i32 16
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP28]])
; CHECK-NEXT: [[TMP29:%.*]] = extractelement <32 x i1> [[TMP8]], i32 17
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP29]])
; CHECK-NEXT: [[TMP30:%.*]] = extractelement <32 x i1> [[TMP8]], i32 18
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP30]])
; CHECK-NEXT: [[TMP31:%.*]] = extractelement <32 x i1> [[TMP8]], i32 19
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP31]])
; CHECK-NEXT: [[TMP32:%.*]] = extractelement <32 x i1> [[TMP8]], i32 20
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP32]])
; CHECK-NEXT: [[TMP33:%.*]] = extractelement <32 x i1> [[TMP8]], i32 21
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP33]])
; CHECK-NEXT: [[TMP34:%.*]] = extractelement <32 x i1> [[TMP8]], i32 22
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP34]])
; CHECK-NEXT: [[TMP35:%.*]] = extractelement <32 x i1> [[TMP8]], i32 23
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP35]])
; CHECK-NEXT: [[TMP36:%.*]] = extractelement <32 x i1> [[TMP8]], i32 24
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP36]])
; CHECK-NEXT: [[TMP37:%.*]] = extractelement <32 x i1> [[TMP8]], i32 25
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP37]])
; CHECK-NEXT: [[TMP38:%.*]] = extractelement <32 x i1> [[TMP8]], i32 26
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP38]])
; CHECK-NEXT: [[TMP39:%.*]] = extractelement <32 x i1> [[TMP8]], i32 27
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP39]])
; CHECK-NEXT: [[TMP40:%.*]] = extractelement <32 x i1> [[TMP8]], i32 28
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP40]])
; CHECK-NEXT: [[TMP41:%.*]] = extractelement <32 x i1> [[TMP8]], i32 29
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP41]])
; CHECK-NEXT: [[TMP42:%.*]] = extractelement <32 x i1> [[TMP8]], i32 30
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP42]])
; CHECK-NEXT: [[TMP43:%.*]] = extractelement <32 x i1> [[TMP8]], i32 31
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP43]])
; CHECK-NEXT: [[TMP44:%.*]] = extractelement <32 x i1> [[TMP9]], i32 0
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP44]])
; CHECK-NEXT: [[TMP45:%.*]] = extractelement <32 x i1> [[TMP9]], i32 1
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP45]])
; CHECK-NEXT: [[TMP46:%.*]] = extractelement <32 x i1> [[TMP9]], i32 2
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP46]])
; CHECK-NEXT: [[TMP47:%.*]] = extractelement <32 x i1> [[TMP9]], i32 3
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP47]])
; CHECK-NEXT: [[TMP48:%.*]] = extractelement <32 x i1> [[TMP9]], i32 4
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP48]])
; CHECK-NEXT: [[TMP49:%.*]] = extractelement <32 x i1> [[TMP9]], i32 5
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP49]])
; CHECK-NEXT: [[TMP50:%.*]] = extractelement <32 x i1> [[TMP9]], i32 6
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP50]])
; CHECK-NEXT: [[TMP51:%.*]] = extractelement <32 x i1> [[TMP9]], i32 7
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP51]])
; CHECK-NEXT: [[TMP52:%.*]] = extractelement <32 x i1> [[TMP9]], i32 8
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP52]])
; CHECK-NEXT: [[TMP53:%.*]] = extractelement <32 x i1> [[TMP9]], i32 9
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP53]])
; CHECK-NEXT: [[TMP54:%.*]] = extractelement <32 x i1> [[TMP9]], i32 10
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP54]])
; CHECK-NEXT: [[TMP55:%.*]] = extractelement <32 x i1> [[TMP9]], i32 11
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP55]])
; CHECK-NEXT: [[TMP56:%.*]] = extractelement <32 x i1> [[TMP9]], i32 12
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP56]])
; CHECK-NEXT: [[TMP57:%.*]] = extractelement <32 x i1> [[TMP9]], i32 13
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP57]])
; CHECK-NEXT: [[TMP58:%.*]] = extractelement <32 x i1> [[TMP9]], i32 14
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP58]])
; CHECK-NEXT: [[TMP59:%.*]] = extractelement <32 x i1> [[TMP9]], i32 15
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP59]])
; CHECK-NEXT: [[TMP60:%.*]] = extractelement <32 x i1> [[TMP9]], i32 16
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP60]])
; CHECK-NEXT: [[TMP61:%.*]] = extractelement <32 x i1> [[TMP9]], i32 17
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP61]])
; CHECK-NEXT: [[TMP62:%.*]] = extractelement <32 x i1> [[TMP9]], i32 18
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP62]])
; CHECK-NEXT: [[TMP63:%.*]] = extractelement <32 x i1> [[TMP9]], i32 19
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP63]])
; CHECK-NEXT: [[TMP64:%.*]] = extractelement <32 x i1> [[TMP9]], i32 20
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP64]])
; CHECK-NEXT: [[TMP65:%.*]] = extractelement <32 x i1> [[TMP9]], i32 21
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP65]])
; CHECK-NEXT: [[TMP66:%.*]] = extractelement <32 x i1> [[TMP9]], i32 22
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP66]])
; CHECK-NEXT: [[TMP67:%.*]] = extractelement <32 x i1> [[TMP9]], i32 23
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP67]])
; CHECK-NEXT: [[TMP68:%.*]] = extractelement <32 x i1> [[TMP9]], i32 24
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP68]])
; CHECK-NEXT: [[TMP69:%.*]] = extractelement <32 x i1> [[TMP9]], i32 25
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP69]])
; CHECK-NEXT: [[TMP70:%.*]] = extractelement <32 x i1> [[TMP9]], i32 26
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP70]])
; CHECK-NEXT: [[TMP71:%.*]] = extractelement <32 x i1> [[TMP9]], i32 27
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP71]])
; CHECK-NEXT: [[TMP72:%.*]] = extractelement <32 x i1> [[TMP9]], i32 28
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP72]])
; CHECK-NEXT: [[TMP73:%.*]] = extractelement <32 x i1> [[TMP9]], i32 29
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP73]])
; CHECK-NEXT: [[TMP74:%.*]] = extractelement <32 x i1> [[TMP9]], i32 30
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP74]])
; CHECK-NEXT: [[TMP75:%.*]] = extractelement <32 x i1> [[TMP9]], i32 31
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP75]])
; CHECK-NEXT: [[TMP76:%.*]] = extractelement <32 x i1> [[TMP10]], i32 0
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP76]])
; CHECK-NEXT: [[TMP77:%.*]] = extractelement <32 x i1> [[TMP10]], i32 1
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP77]])
; CHECK-NEXT: [[TMP78:%.*]] = extractelement <32 x i1> [[TMP10]], i32 2
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP78]])
; CHECK-NEXT: [[TMP79:%.*]] = extractelement <32 x i1> [[TMP10]], i32 3
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP79]])
; CHECK-NEXT: [[TMP80:%.*]] = extractelement <32 x i1> [[TMP10]], i32 4
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP80]])
; CHECK-NEXT: [[TMP81:%.*]] = extractelement <32 x i1> [[TMP10]], i32 5
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP81]])
; CHECK-NEXT: [[TMP82:%.*]] = extractelement <32 x i1> [[TMP10]], i32 6
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP82]])
; CHECK-NEXT: [[TMP83:%.*]] = extractelement <32 x i1> [[TMP10]], i32 7
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP83]])
; CHECK-NEXT: [[TMP84:%.*]] = extractelement <32 x i1> [[TMP10]], i32 8
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP84]])
; CHECK-NEXT: [[TMP85:%.*]] = extractelement <32 x i1> [[TMP10]], i32 9
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP85]])
; CHECK-NEXT: [[TMP86:%.*]] = extractelement <32 x i1> [[TMP10]], i32 10
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP86]])
; CHECK-NEXT: [[TMP87:%.*]] = extractelement <32 x i1> [[TMP10]], i32 11
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP87]])
; CHECK-NEXT: [[TMP88:%.*]] = extractelement <32 x i1> [[TMP10]], i32 12
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP88]])
; CHECK-NEXT: [[TMP89:%.*]] = extractelement <32 x i1> [[TMP10]], i32 13
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP89]])
; CHECK-NEXT: [[TMP90:%.*]] = extractelement <32 x i1> [[TMP10]], i32 14
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP90]])
; CHECK-NEXT: [[TMP91:%.*]] = extractelement <32 x i1> [[TMP10]], i32 15
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP91]])
; CHECK-NEXT: [[TMP92:%.*]] = extractelement <32 x i1> [[TMP10]], i32 16
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP92]])
; CHECK-NEXT: [[TMP93:%.*]] = extractelement <32 x i1> [[TMP10]], i32 17
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP93]])
; CHECK-NEXT: [[TMP94:%.*]] = extractelement <32 x i1> [[TMP10]], i32 18
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP94]])
; CHECK-NEXT: [[TMP95:%.*]] = extractelement <32 x i1> [[TMP10]], i32 19
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP95]])
; CHECK-NEXT: [[TMP96:%.*]] = extractelement <32 x i1> [[TMP10]], i32 20
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP96]])
; CHECK-NEXT: [[TMP97:%.*]] = extractelement <32 x i1> [[TMP10]], i32 21
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP97]])
; CHECK-NEXT: [[TMP98:%.*]] = extractelement <32 x i1> [[TMP10]], i32 22
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP98]])
; CHECK-NEXT: [[TMP99:%.*]] = extractelement <32 x i1> [[TMP10]], i32 23
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP99]])
; CHECK-NEXT: [[TMP100:%.*]] = extractelement <32 x i1> [[TMP10]], i32 24
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP100]])
; CHECK-NEXT: [[TMP101:%.*]] = extractelement <32 x i1> [[TMP10]], i32 25
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP101]])
; CHECK-NEXT: [[TMP102:%.*]] = extractelement <32 x i1> [[TMP10]], i32 26
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP102]])
; CHECK-NEXT: [[TMP103:%.*]] = extractelement <32 x i1> [[TMP10]], i32 27
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP103]])
; CHECK-NEXT: [[TMP104:%.*]] = extractelement <32 x i1> [[TMP10]], i32 28
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP104]])
; CHECK-NEXT: [[TMP105:%.*]] = extractelement <32 x i1> [[TMP10]], i32 29
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP105]])
; CHECK-NEXT: [[TMP106:%.*]] = extractelement <32 x i1> [[TMP10]], i32 30
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP106]])
; CHECK-NEXT: [[TMP107:%.*]] = extractelement <32 x i1> [[TMP10]], i32 31
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP107]])
; CHECK-NEXT: [[TMP108:%.*]] = extractelement <32 x i1> [[TMP11]], i32 0
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP108]])
; CHECK-NEXT: [[TMP109:%.*]] = extractelement <32 x i1> [[TMP11]], i32 1
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP109]])
; CHECK-NEXT: [[TMP110:%.*]] = extractelement <32 x i1> [[TMP11]], i32 2
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP110]])
; CHECK-NEXT: [[TMP111:%.*]] = extractelement <32 x i1> [[TMP11]], i32 3
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP111]])
; CHECK-NEXT: [[TMP112:%.*]] = extractelement <32 x i1> [[TMP11]], i32 4
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP112]])
; CHECK-NEXT: [[TMP113:%.*]] = extractelement <32 x i1> [[TMP11]], i32 5
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP113]])
; CHECK-NEXT: [[TMP114:%.*]] = extractelement <32 x i1> [[TMP11]], i32 6
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP114]])
; CHECK-NEXT: [[TMP115:%.*]] = extractelement <32 x i1> [[TMP11]], i32 7
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP115]])
; CHECK-NEXT: [[TMP116:%.*]] = extractelement <32 x i1> [[TMP11]], i32 8
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP116]])
; CHECK-NEXT: [[TMP117:%.*]] = extractelement <32 x i1> [[TMP11]], i32 9
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP117]])
; CHECK-NEXT: [[TMP118:%.*]] = extractelement <32 x i1> [[TMP11]], i32 10
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP118]])
; CHECK-NEXT: [[TMP119:%.*]] = extractelement <32 x i1> [[TMP11]], i32 11
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP119]])
; CHECK-NEXT: [[TMP120:%.*]] = extractelement <32 x i1> [[TMP11]], i32 12
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP120]])
; CHECK-NEXT: [[TMP121:%.*]] = extractelement <32 x i1> [[TMP11]], i32 13
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP121]])
; CHECK-NEXT: [[TMP122:%.*]] = extractelement <32 x i1> [[TMP11]], i32 14
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP122]])
; CHECK-NEXT: [[TMP123:%.*]] = extractelement <32 x i1> [[TMP11]], i32 15
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP123]])
; CHECK-NEXT: [[TMP124:%.*]] = extractelement <32 x i1> [[TMP11]], i32 16
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP124]])
; CHECK-NEXT: [[TMP125:%.*]] = extractelement <32 x i1> [[TMP11]], i32 17
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP125]])
; CHECK-NEXT: [[TMP126:%.*]] = extractelement <32 x i1> [[TMP11]], i32 18
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP126]])
; CHECK-NEXT: [[TMP127:%.*]] = extractelement <32 x i1> [[TMP11]], i32 19
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP127]])
; CHECK-NEXT: [[TMP128:%.*]] = extractelement <32 x i1> [[TMP11]], i32 20
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP128]])
; CHECK-NEXT: [[TMP129:%.*]] = extractelement <32 x i1> [[TMP11]], i32 21
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP129]])
; CHECK-NEXT: [[TMP130:%.*]] = extractelement <32 x i1> [[TMP11]], i32 22
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP130]])
; CHECK-NEXT: [[TMP131:%.*]] = extractelement <32 x i1> [[TMP11]], i32 23
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP131]])
; CHECK-NEXT: [[TMP132:%.*]] = extractelement <32 x i1> [[TMP11]], i32 24
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP132]])
; CHECK-NEXT: [[TMP133:%.*]] = extractelement <32 x i1> [[TMP11]], i32 25
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP133]])
; CHECK-NEXT: [[TMP134:%.*]] = extractelement <32 x i1> [[TMP11]], i32 26
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP134]])
; CHECK-NEXT: [[TMP135:%.*]] = extractelement <32 x i1> [[TMP11]], i32 27
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP135]])
; CHECK-NEXT: [[TMP136:%.*]] = extractelement <32 x i1> [[TMP11]], i32 28
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP136]])
; CHECK-NEXT: [[TMP137:%.*]] = extractelement <32 x i1> [[TMP11]], i32 29
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP137]])
; CHECK-NEXT: [[TMP138:%.*]] = extractelement <32 x i1> [[TMP11]], i32 30
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP138]])
; CHECK-NEXT: [[TMP139:%.*]] = extractelement <32 x i1> [[TMP11]], i32 31
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP139]])
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 128
; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 -1024
; CHECK-NEXT: [[TMP140:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP140]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
; CHECK-NEXT: [[CMO:%.*]] = sub i64 [[N_VEC]], 1
; CHECK-NEXT: [[TMP141:%.*]] = mul i64 [[CMO]], -8
; CHECK-NEXT: [[IND_ESCAPE:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP141]]
; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; CHECK: [[SCALAR_PH]]:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ [[START]], %[[ENTRY]] ]
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK-NEXT: [[IV:%.*]] = phi ptr [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
; CHECK-NEXT: [[IV:%.*]] = phi ptr [ [[START]], %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
; CHECK-NEXT: [[IV_NEXT]] = getelementptr nusw i8, ptr [[IV]], i64 -8
; CHECK-NEXT: [[L1:%.*]] = load i32, ptr [[IV]], align 4
; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[L1]], 0
; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]])
; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq ptr [[IV]], [[END]]
; CHECK-NEXT: br i1 [[CMP_NOT]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
; CHECK-NEXT: br i1 [[CMP_NOT]], label %[[EXIT:.*]], label %[[LOOP]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: [[IV_LCSSA:%.*]] = phi ptr [ [[IV]], %[[LOOP]] ], [ [[IND_ESCAPE]], %[[MIDDLE_BLOCK]] ]
; CHECK-NEXT: [[IV_LCSSA:%.*]] = phi ptr [ [[IV]], %[[LOOP]] ]
; CHECK-NEXT: [[FINAL_LOAD:%.*]] = load i32, ptr [[IV_LCSSA]], align 4
; CHECK-NEXT: ret i32 [[FINAL_LOAD]]
;
@@ -375,9 +79,3 @@ exit:
declare void @llvm.assume(i1 noundef)
attributes #0 = { "target-cpu"="skylake-avx512" }
;.
; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
;.

View File

@@ -0,0 +1,104 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
; RUN: opt -p loop-vectorize -mcpu=skylake-avx512 -S %s | FileCheck %s
target triple = "x86_64-unknown-linux-gnu"
define i64 @test_pr98660(ptr %dst, i64 %N) {
; CHECK-LABEL: define i64 @test_pr98660(
; CHECK-SAME: ptr [[DST:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N]], 1
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 32
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 32
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]]
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 8
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 16
; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 24
; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[TMP1]], 1
; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[TMP2]], 1
; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[TMP3]], 1
; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[TMP4]], 1
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i32, ptr [[DST]], i64 [[TMP5]]
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[DST]], i64 [[TMP6]]
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[DST]], i64 [[TMP7]]
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[DST]], i64 [[TMP8]]
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[TMP9]], i32 0
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[TMP9]], i32 8
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i32, ptr [[TMP9]], i32 16
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[TMP9]], i32 24
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i32>, ptr [[TMP13]], align 4
; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <8 x i32>, ptr [[TMP14]], align 4
; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <8 x i32>, ptr [[TMP15]], align 4
; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <8 x i32>, ptr [[TMP16]], align 4
; CHECK-NEXT: [[TMP17:%.*]] = icmp eq <8 x i32> [[WIDE_LOAD]], zeroinitializer
; CHECK-NEXT: [[TMP18:%.*]] = icmp eq <8 x i32> [[WIDE_LOAD1]], zeroinitializer
; CHECK-NEXT: [[TMP19:%.*]] = icmp eq <8 x i32> [[WIDE_LOAD2]], zeroinitializer
; CHECK-NEXT: [[TMP20:%.*]] = icmp eq <8 x i32> [[WIDE_LOAD3]], zeroinitializer
; CHECK-NEXT: call void @llvm.masked.store.v8i32.p0(<8 x i32> zeroinitializer, ptr [[TMP13]], i32 4, <8 x i1> [[TMP17]])
; CHECK-NEXT: call void @llvm.masked.store.v8i32.p0(<8 x i32> zeroinitializer, ptr [[TMP14]], i32 4, <8 x i1> [[TMP18]])
; CHECK-NEXT: call void @llvm.masked.store.v8i32.p0(<8 x i32> zeroinitializer, ptr [[TMP15]], i32 4, <8 x i1> [[TMP19]])
; CHECK-NEXT: call void @llvm.masked.store.v8i32.p0(<8 x i32> zeroinitializer, ptr [[TMP16]], i32 4, <8 x i1> [[TMP20]])
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32
; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP21]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
; CHECK-NEXT: [[IND_ESCAPE:%.*]] = sub i64 [[N_VEC]], 1
; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; CHECK: [[SCALAR_PH]]:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
; CHECK: [[LOOP_HEADER]]:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
; CHECK-NEXT: [[OR:%.*]] = or disjoint i64 [[IV]], 1
; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, ptr [[DST]], i64 [[OR]]
; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[GEP]], align 4
; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[L]], 0
; CHECK-NEXT: br i1 [[C]], label %[[THEN:.*]], label %[[LOOP_LATCH]]
; CHECK: [[THEN]]:
; CHECK-NEXT: store i32 0, ptr [[GEP]], align 4
; CHECK-NEXT: br label %[[LOOP_LATCH]]
; CHECK: [[LOOP_LATCH]]:
; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
; CHECK-NEXT: [[EC:%.*]] = icmp ult i64 [[IV]], [[N]]
; CHECK-NEXT: br i1 [[EC]], label %[[LOOP_HEADER]], label %[[EXIT]], !llvm.loop [[LOOP3:![0-9]+]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: [[RET:%.*]] = phi i64 [ [[IV]], %[[LOOP_LATCH]] ], [ [[IND_ESCAPE]], %[[MIDDLE_BLOCK]] ]
; CHECK-NEXT: ret i64 [[RET]]
;
entry:
br label %loop.header
loop.header:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
%or = or disjoint i64 %iv, 1
%gep = getelementptr i32, ptr %dst, i64 %or
%l = load i32, ptr %gep
%c = icmp eq i32 %l, 0
br i1 %c, label %then, label %loop.latch
then:
store i32 0, ptr %gep, align 4
br label %loop.latch
loop.latch:
%iv.next = add i64 %iv, 1
%ec = icmp ult i64 %iv, %N
br i1 %ec, label %loop.header, label %exit
exit:
%ret = phi i64 [ %iv, %loop.latch ]
ret i64 %ret
}
;.
; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
;.

View File

@@ -202,7 +202,6 @@ exit:
; %iv.2 is dead in the vector loop and only used outside the loop.
; FIXME: Scalar steps for iv.2 are not removed at the moment.
define i32 @iv_2_dead_in_loop_only_used_outside(ptr %ptr) {
; CHECK-LABEL: @iv_2_dead_in_loop_only_used_outside
; CHECK-LABEL: vector.body:
@@ -210,7 +209,7 @@ define i32 @iv_2_dead_in_loop_only_used_outside(ptr %ptr) {
; VEC-NEXT: [[VEC_IND:%.+]] = phi <2 x i64> [ <i64 0, i64 1>, %vector.ph ], [ [[VEC_IND_NEXT:%.+]], %vector.body ]
; CHECK: [[IV_0:%.+]] = add i64 [[INDEX]], 0
; VEC-NOT: add i64 [[INDEX]], 1
; CHECK: [[IV_2_0:%.+]] = add i32 %offset.idx, 0
; CHECK-NOT: add i32 %offset.idx, 0
; CHECK-LABEL: scalar.ph:
; CHECK-NEXT: {{.+}} = phi i64 [ 1002, %middle.block ], [ 0, %entry ]
; CHECK-NEXT: {{.+}} = phi i32 [ 2004, %middle.block ], [ 0, %entry ]

View File

@@ -42,9 +42,6 @@ define i32 @test(ptr %arr, i64 %n) {
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[INDEX]]
; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[OFFSET_IDX]], 0
; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[OFFSET_IDX]], 1
; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[OFFSET_IDX]], 2
; CHECK-NEXT: [[TMP16:%.*]] = add i64 [[OFFSET_IDX]], 3
; CHECK-NEXT: [[TMP17:%.*]] = add nsw i64 [[TMP13]], -1
; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[TMP17]]
; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[TMP18]], i32 0

View File

@@ -27,15 +27,6 @@ define void @test1_pr58811() {
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i32 [[INDEX]], [[INDUCTION_IV_LCSSA]]
; CHECK-NEXT: [[TMP2:%.*]] = mul i32 0, [[INDUCTION_IV_LCSSA]]
; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[OFFSET_IDX]], [[TMP2]]
; CHECK-NEXT: [[TMP4:%.*]] = mul i32 1, [[INDUCTION_IV_LCSSA]]
; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[OFFSET_IDX]], [[TMP4]]
; CHECK-NEXT: [[TMP6:%.*]] = mul i32 2, [[INDUCTION_IV_LCSSA]]
; CHECK-NEXT: [[TMP7:%.*]] = add i32 [[OFFSET_IDX]], [[TMP6]]
; CHECK-NEXT: [[TMP8:%.*]] = mul i32 3, [[INDUCTION_IV_LCSSA]]
; CHECK-NEXT: [[TMP9:%.*]] = add i32 [[OFFSET_IDX]], [[TMP8]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i32 [[INDEX_NEXT]], 196
; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
@@ -131,15 +122,6 @@ define void @test2_pr58811() {
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i32 [[INDEX]], [[INDUCTION_IV_LCSSA]]
; CHECK-NEXT: [[TMP2:%.*]] = mul i32 0, [[INDUCTION_IV_LCSSA]]
; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[OFFSET_IDX]], [[TMP2]]
; CHECK-NEXT: [[TMP4:%.*]] = mul i32 1, [[INDUCTION_IV_LCSSA]]
; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[OFFSET_IDX]], [[TMP4]]
; CHECK-NEXT: [[TMP6:%.*]] = mul i32 2, [[INDUCTION_IV_LCSSA]]
; CHECK-NEXT: [[TMP7:%.*]] = add i32 [[OFFSET_IDX]], [[TMP6]]
; CHECK-NEXT: [[TMP8:%.*]] = mul i32 3, [[INDUCTION_IV_LCSSA]]
; CHECK-NEXT: [[TMP9:%.*]] = add i32 [[OFFSET_IDX]], [[TMP8]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i32 [[INDEX_NEXT]], 196
; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
@@ -218,15 +200,6 @@ define void @test3_pr58811() {
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i32 [[INDEX]], [[TMP3]]
; CHECK-NEXT: [[TMP4:%.*]] = mul i32 0, [[TMP3]]
; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[OFFSET_IDX]], [[TMP4]]
; CHECK-NEXT: [[TMP6:%.*]] = mul i32 1, [[TMP3]]
; CHECK-NEXT: [[TMP7:%.*]] = add i32 [[OFFSET_IDX]], [[TMP6]]
; CHECK-NEXT: [[TMP8:%.*]] = mul i32 2, [[TMP3]]
; CHECK-NEXT: [[TMP9:%.*]] = add i32 [[OFFSET_IDX]], [[TMP8]]
; CHECK-NEXT: [[TMP10:%.*]] = mul i32 3, [[TMP3]]
; CHECK-NEXT: [[TMP11:%.*]] = add i32 [[OFFSET_IDX]], [[TMP10]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i32 [[INDEX_NEXT]], 196
; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]

View File

@@ -16,10 +16,6 @@ define void @reduced(ptr %0, ptr %1, i64 %iv, ptr %2, i64 %iv76, i64 %iv93) {
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0
; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 1
; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 2
; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 3
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[IND_END]]
; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]