[LV] Bail out early if BTC+1 wraps.

Currently we fail to detect the case where BTC + 1 wraps, i.e. the
vector trip count is 0, In those cases, the minimum iteration count
check will fail, and the vector code will never be executed.

Explicitly check for this condition in computeMaxVF and avoid trying to
vectorize alltogether.

Note that a number of tests needed to be updated, because the vector
loop would never be executed given the input IR.

Fixes https://github.com/llvm/llvm-project/issues/122558.
This commit is contained in:
Florian Hahn
2025-01-14 22:07:38 +00:00
parent 25f28ddd69
commit 1de3dc7d23
27 changed files with 3247 additions and 501 deletions

View File

@@ -4052,7 +4052,8 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) {
return FixedScalableVFPair::getNone();
}
unsigned TC = PSE.getSE()->getSmallConstantTripCount(TheLoop);
ScalarEvolution *SE = PSE.getSE();
unsigned TC = SE->getSmallConstantTripCount(TheLoop);
unsigned MaxTC = PSE.getSmallConstantMaxTripCount();
LLVM_DEBUG(dbgs() << "LV: Found trip count: " << TC << '\n');
if (TC != MaxTC)
@@ -4064,6 +4065,22 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) {
return FixedScalableVFPair::getNone();
}
// If BTC matches the widest induction type and is -1 then the trip count
// computation will wrap to 0 and the vector trip count will be 0. Do not try
// to vectorize.
const SCEV *BTC = SE->getBackedgeTakenCount(TheLoop);
if (!isa<SCEVCouldNotCompute>(BTC) &&
BTC->getType()->getScalarSizeInBits() >=
Legal->getWidestInductionType()->getScalarSizeInBits() &&
SE->isKnownPredicate(CmpInst::ICMP_EQ, BTC,
SE->getMinusOne(BTC->getType()))) {
reportVectorizationFailure(
"Trip count computation wrapped",
"backedge-taken count is -1, loop trip count wrapped to 0",
"TripCountWrapped", ORE, TheLoop);
return FixedScalableVFPair::getNone();
}
switch (ScalarEpilogueStatus) {
case CM_ScalarEpilogueAllowed:
return computeFeasibleMaxVF(MaxTC, UserVF, false);

View File

@@ -26,7 +26,7 @@ for.body14.i.i: ; preds = %for.body14.i.i, %en
%next19.i.i = getelementptr inbounds %struct.CvNode1D, ptr %dst, i32 %i.1424.i.i, i32 1
store ptr %dst, ptr %next19.i.i, align 4
%inc21.i.i = add nuw nsw i32 %i.1424.i.i, 1
%exitcond438.i.i = icmp eq i32 %inc21.i.i, 0
%exitcond438.i.i = icmp eq i32 %inc21.i.i, 1000
br i1 %exitcond438.i.i, label %for.end22.i.i, label %for.body14.i.i
for.end22.i.i: ; preds = %for.body14.i.i
@@ -52,7 +52,7 @@ for.body14.i.i: ; preds = %for.body14.i.i, %en
%val.i.i = getelementptr inbounds %struct.CvNode1D2, ptr %arrayidx15.i.i1427, i32 0, i32 1
store double 0xC415AF1D80000000, ptr %val.i.i, align 4
%inc21.i.i = add nuw nsw i32 %i.1424.i.i, 1
%exitcond438.i.i = icmp eq i32 %inc21.i.i, 0
%exitcond438.i.i = icmp eq i32 %inc21.i.i, 1000
br i1 %exitcond438.i.i, label %for.end22.i.i, label %for.body14.i.i
for.end22.i.i: ; preds = %for.body14.i.i
@@ -79,7 +79,7 @@ for.body14.i.i: ; preds = %for.body14.i.i, %en
store double %load_d, ptr %dst.ptr, align 4
store ptr %load_p, ptr %dst.ptr.1, align 4
%inc21.i.i = add nuw nsw i32 %i.1424.i.i, 1
%exitcond438.i.i = icmp eq i32 %inc21.i.i, 0
%exitcond438.i.i = icmp eq i32 %inc21.i.i, 1000
br i1 %exitcond438.i.i, label %for.end22.i.i, label %for.body14.i.i
for.end22.i.i: ; preds = %for.body14.i.i
@@ -107,7 +107,7 @@ for.body14.i.i: ; preds = %for.body14.i.i, %en
store double %load_d, ptr %dst.ptr, align 4
store ptr %load_p, ptr %dst.ptr.1, align 4
%inc21.i.i = add nuw nsw i32 %i.1424.i.i, 1
%exitcond438.i.i = icmp eq i32 %inc21.i.i, 0
%exitcond438.i.i = icmp eq i32 %inc21.i.i, 1000
br i1 %exitcond438.i.i, label %for.end22.i.i, label %for.body14.i.i
for.end22.i.i: ; preds = %for.body14.i.i

View File

@@ -10,10 +10,10 @@ define i32 @dotp(ptr %a, ptr %b) #0 {
; CHECK-NEXT: iter.check:
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 0, [[TMP1]]
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]]
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]
; CHECK: vector.main.loop.iter.check:
; CHECK-NEXT: br i1 true, label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-NEXT: br i1 false, label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
@@ -31,7 +31,7 @@ define i32 @dotp(ptr %a, ptr %b) #0 {
; CHECK-NEXT: [[TMP9:%.*]] = mul <16 x i32> [[TMP8]], [[TMP5]]
; CHECK-NEXT: [[PARTIAL_REDUCE]] = call <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI]], <16 x i32> [[TMP9]])
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 0
; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[TMP11:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[PARTIAL_REDUCE]])
@@ -42,12 +42,12 @@ define i32 @dotp(ptr %a, ptr %b) #0 {
; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 0, [[TMP13]]
; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]
; CHECK: vec.epilog.ph:
; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ 0, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ 1024, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP11]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
; CHECK-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP15:%.*]] = mul i64 [[TMP14]], 4
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 0, [[TMP15]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 0, [[N_MOD_VF]]
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP15]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
; CHECK-NEXT: [[TMP16:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP17:%.*]] = mul i64 [[TMP16]], 4
; CHECK-NEXT: [[TMP18:%.*]] = insertelement <vscale x 4 x i32> zeroinitializer, i32 [[BC_MERGE_RDX]], i32 0
@@ -71,8 +71,29 @@ define i32 @dotp(ptr %a, ptr %b) #0 {
; CHECK-NEXT: br i1 [[TMP28]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
; CHECK: vec.epilog.middle.block:
; CHECK-NEXT: [[TMP29:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> [[TMP27]])
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 0, [[N_VEC]]
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_EXIT]], label [[VEC_EPILOG_SCALAR_PH]]
; CHECK: vec.epilog.scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[ITER_CHECK:%.*]] ], [ 1024, [[VEC_EPILOG_ITER_CHECK]] ]
; CHECK-NEXT: [[BC_MERGE_RDX7:%.*]] = phi i32 [ [[TMP29]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[ITER_CHECK]] ], [ [[TMP11]], [[VEC_EPILOG_ITER_CHECK]] ]
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT: [[ACCUM:%.*]] = phi i32 [ [[BC_MERGE_RDX7]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr i8, ptr [[A]], i64 [[IV]]
; CHECK-NEXT: [[LOAD_A:%.*]] = load i8, ptr [[GEP_A]], align 1
; CHECK-NEXT: [[EXT_A:%.*]] = zext i8 [[LOAD_A]] to i32
; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr i8, ptr [[B]], i64 [[IV]]
; CHECK-NEXT: [[LOAD_B:%.*]] = load i8, ptr [[GEP_B]], align 1
; CHECK-NEXT: [[EXT_B:%.*]] = zext i8 [[LOAD_B]] to i32
; CHECK-NEXT: [[MUL:%.*]] = mul i32 [[EXT_B]], [[EXT_A]]
; CHECK-NEXT: [[ADD]] = add i32 [[MUL]], [[ACCUM]]
; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; CHECK: for.exit:
; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP11]], [[MIDDLE_BLOCK]] ], [ [[TMP29]], [[VEC_EPILOG_MIDDLE_BLOCK]] ]
; CHECK-NEXT: ret i32 [[ADD_LCSSA]]
;
entry:
br label %for.body
@@ -89,7 +110,7 @@ for.body: ; preds = %for.body, %entry
%mul = mul i32 %ext.b, %ext.a
%add = add i32 %mul, %accum
%iv.next = add i64 %iv, 1
%exitcond.not = icmp eq i64 %iv.next, 0
%exitcond.not = icmp eq i64 %iv.next, 1024
br i1 %exitcond.not, label %for.exit, label %for.body
for.exit: ; preds = %for.body
@@ -211,3 +232,13 @@ while.end.loopexit: ; preds = %while.body
attributes #0 = { vscale_range(1,16) "target-features"="+sve" }
attributes #1 = { "target-cpu"="apple-m1" }
;.
; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META1]], [[META2]]}
; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META2]], [[META1]]}
; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META1]], [[META2]]}
; CHECK: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]], [[META2]]}
; CHECK: [[LOOP7]] = distinct !{[[LOOP7]], [[META2]], [[META1]]}
;.

View File

@@ -9,7 +9,7 @@ define i32 @dotp_z_s(ptr %a, ptr %b) #0 {
; CHECK-LABEL: define i32 @dotp_z_s(
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: br i1 true, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
@@ -36,17 +36,38 @@ define i32 @dotp_z_s(ptr %a, ptr %b) #0 {
; CHECK-NEXT: [[PARTIAL_REDUCE]] = call <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI]], <16 x i32> [[TMP11]])
; CHECK-NEXT: [[PARTIAL_REDUCE5]] = call <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI1]], <16 x i32> [[TMP12]])
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32
; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], 0
; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[PARTIAL_REDUCE5]], [[PARTIAL_REDUCE]]
; CHECK-NEXT: [[TMP14:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX]])
; CHECK-NEXT: br i1 true, label [[FOR_EXIT:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP14]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT: [[ACCUM:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr i8, ptr [[A]], i64 [[IV]]
; CHECK-NEXT: [[LOAD_A:%.*]] = load i8, ptr [[GEP_A]], align 1
; CHECK-NEXT: [[EXT_A:%.*]] = zext i8 [[LOAD_A]] to i32
; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr i8, ptr [[B]], i64 [[IV]]
; CHECK-NEXT: [[LOAD_B:%.*]] = load i8, ptr [[GEP_B]], align 1
; CHECK-NEXT: [[EXT_B:%.*]] = sext i8 [[LOAD_B]] to i32
; CHECK-NEXT: [[MUL:%.*]] = mul i32 [[EXT_B]], [[EXT_A]]
; CHECK-NEXT: [[ADD]] = add i32 [[MUL]], [[ACCUM]]
; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
; CHECK: for.exit:
; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP14]], [[MIDDLE_BLOCK]] ]
; CHECK-NEXT: ret i32 [[ADD_LCSSA]]
;
; CHECK-NOI8MM-LABEL: define i32 @dotp_z_s(
; CHECK-NOI8MM-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-NOI8MM-NEXT: entry:
; CHECK-NOI8MM-NEXT: br i1 true, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-NOI8MM-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-NOI8MM: vector.ph:
; CHECK-NOI8MM-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK-NOI8MM: vector.body:
@@ -73,12 +94,33 @@ define i32 @dotp_z_s(ptr %a, ptr %b) #0 {
; CHECK-NOI8MM-NEXT: [[PARTIAL_REDUCE]] = call <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI]], <16 x i32> [[TMP11]])
; CHECK-NOI8MM-NEXT: [[PARTIAL_REDUCE5]] = call <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI1]], <16 x i32> [[TMP12]])
; CHECK-NOI8MM-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32
; CHECK-NOI8MM-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], 0
; CHECK-NOI8MM-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
; CHECK-NOI8MM-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK-NOI8MM: middle.block:
; CHECK-NOI8MM-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[PARTIAL_REDUCE5]], [[PARTIAL_REDUCE]]
; CHECK-NOI8MM-NEXT: [[TMP14:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX]])
; CHECK-NOI8MM-NEXT: br i1 true, label [[FOR_EXIT:%.*]], label [[SCALAR_PH]]
; CHECK-NOI8MM: scalar.ph:
; CHECK-NOI8MM-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
; CHECK-NOI8MM-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP14]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
; CHECK-NOI8MM-NEXT: br label [[FOR_BODY:%.*]]
; CHECK-NOI8MM: for.body:
; CHECK-NOI8MM-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
; CHECK-NOI8MM-NEXT: [[ACCUM:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ]
; CHECK-NOI8MM-NEXT: [[GEP_A:%.*]] = getelementptr i8, ptr [[A]], i64 [[IV]]
; CHECK-NOI8MM-NEXT: [[LOAD_A:%.*]] = load i8, ptr [[GEP_A]], align 1
; CHECK-NOI8MM-NEXT: [[EXT_A:%.*]] = zext i8 [[LOAD_A]] to i32
; CHECK-NOI8MM-NEXT: [[GEP_B:%.*]] = getelementptr i8, ptr [[B]], i64 [[IV]]
; CHECK-NOI8MM-NEXT: [[LOAD_B:%.*]] = load i8, ptr [[GEP_B]], align 1
; CHECK-NOI8MM-NEXT: [[EXT_B:%.*]] = sext i8 [[LOAD_B]] to i32
; CHECK-NOI8MM-NEXT: [[MUL:%.*]] = mul i32 [[EXT_B]], [[EXT_A]]
; CHECK-NOI8MM-NEXT: [[ADD]] = add i32 [[MUL]], [[ACCUM]]
; CHECK-NOI8MM-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
; CHECK-NOI8MM-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024
; CHECK-NOI8MM-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
; CHECK-NOI8MM: for.exit:
; CHECK-NOI8MM-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP14]], [[MIDDLE_BLOCK]] ]
; CHECK-NOI8MM-NEXT: ret i32 [[ADD_LCSSA]]
;
entry:
br label %for.body
@@ -95,7 +137,7 @@ for.body: ; preds = %for.body, %entry
%mul = mul i32 %ext.b, %ext.a
%add = add i32 %mul, %accum
%iv.next = add i64 %iv, 1
%exitcond.not = icmp eq i64 %iv.next, 0
%exitcond.not = icmp eq i64 %iv.next, 1024
br i1 %exitcond.not, label %for.exit, label %for.body
for.exit: ; preds = %for.body
@@ -104,9 +146,9 @@ for.exit: ; preds = %for.body
define i32 @dotp_s_z(ptr %a, ptr %b) #0 {
; CHECK-LABEL: define i32 @dotp_s_z(
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: br i1 true, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
@@ -133,17 +175,38 @@ define i32 @dotp_s_z(ptr %a, ptr %b) #0 {
; CHECK-NEXT: [[PARTIAL_REDUCE]] = call <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI]], <16 x i32> [[TMP11]])
; CHECK-NEXT: [[PARTIAL_REDUCE5]] = call <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI1]], <16 x i32> [[TMP12]])
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32
; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], 0
; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[PARTIAL_REDUCE5]], [[PARTIAL_REDUCE]]
; CHECK-NEXT: [[TMP14:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX]])
; CHECK-NEXT: br i1 true, label [[FOR_EXIT:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP14]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT: [[ACCUM:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr i8, ptr [[A]], i64 [[IV]]
; CHECK-NEXT: [[LOAD_A:%.*]] = load i8, ptr [[GEP_A]], align 1
; CHECK-NEXT: [[EXT_A:%.*]] = sext i8 [[LOAD_A]] to i32
; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr i8, ptr [[B]], i64 [[IV]]
; CHECK-NEXT: [[LOAD_B:%.*]] = load i8, ptr [[GEP_B]], align 1
; CHECK-NEXT: [[EXT_B:%.*]] = zext i8 [[LOAD_B]] to i32
; CHECK-NEXT: [[MUL:%.*]] = mul i32 [[EXT_B]], [[EXT_A]]
; CHECK-NEXT: [[ADD]] = add i32 [[MUL]], [[ACCUM]]
; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
; CHECK: for.exit:
; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP14]], [[MIDDLE_BLOCK]] ]
; CHECK-NEXT: ret i32 [[ADD_LCSSA]]
;
; CHECK-NOI8MM-LABEL: define i32 @dotp_s_z(
; CHECK-NOI8MM-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-NOI8MM-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
; CHECK-NOI8MM-NEXT: entry:
; CHECK-NOI8MM-NEXT: br i1 true, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-NOI8MM-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-NOI8MM: vector.ph:
; CHECK-NOI8MM-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK-NOI8MM: vector.body:
@@ -170,12 +233,33 @@ define i32 @dotp_s_z(ptr %a, ptr %b) #0 {
; CHECK-NOI8MM-NEXT: [[PARTIAL_REDUCE]] = call <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI]], <16 x i32> [[TMP11]])
; CHECK-NOI8MM-NEXT: [[PARTIAL_REDUCE5]] = call <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI1]], <16 x i32> [[TMP12]])
; CHECK-NOI8MM-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32
; CHECK-NOI8MM-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], 0
; CHECK-NOI8MM-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK-NOI8MM-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
; CHECK-NOI8MM-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; CHECK-NOI8MM: middle.block:
; CHECK-NOI8MM-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[PARTIAL_REDUCE5]], [[PARTIAL_REDUCE]]
; CHECK-NOI8MM-NEXT: [[TMP14:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX]])
; CHECK-NOI8MM-NEXT: br i1 true, label [[FOR_EXIT:%.*]], label [[SCALAR_PH]]
; CHECK-NOI8MM: scalar.ph:
; CHECK-NOI8MM-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
; CHECK-NOI8MM-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP14]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
; CHECK-NOI8MM-NEXT: br label [[FOR_BODY:%.*]]
; CHECK-NOI8MM: for.body:
; CHECK-NOI8MM-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
; CHECK-NOI8MM-NEXT: [[ACCUM:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ]
; CHECK-NOI8MM-NEXT: [[GEP_A:%.*]] = getelementptr i8, ptr [[A]], i64 [[IV]]
; CHECK-NOI8MM-NEXT: [[LOAD_A:%.*]] = load i8, ptr [[GEP_A]], align 1
; CHECK-NOI8MM-NEXT: [[EXT_A:%.*]] = sext i8 [[LOAD_A]] to i32
; CHECK-NOI8MM-NEXT: [[GEP_B:%.*]] = getelementptr i8, ptr [[B]], i64 [[IV]]
; CHECK-NOI8MM-NEXT: [[LOAD_B:%.*]] = load i8, ptr [[GEP_B]], align 1
; CHECK-NOI8MM-NEXT: [[EXT_B:%.*]] = zext i8 [[LOAD_B]] to i32
; CHECK-NOI8MM-NEXT: [[MUL:%.*]] = mul i32 [[EXT_B]], [[EXT_A]]
; CHECK-NOI8MM-NEXT: [[ADD]] = add i32 [[MUL]], [[ACCUM]]
; CHECK-NOI8MM-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
; CHECK-NOI8MM-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024
; CHECK-NOI8MM-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
; CHECK-NOI8MM: for.exit:
; CHECK-NOI8MM-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP14]], [[MIDDLE_BLOCK]] ]
; CHECK-NOI8MM-NEXT: ret i32 [[ADD_LCSSA]]
;
entry:
br label %for.body
@@ -192,7 +276,7 @@ for.body: ; preds = %for.body, %entry
%mul = mul i32 %ext.b, %ext.a
%add = add i32 %mul, %accum
%iv.next = add i64 %iv, 1
%exitcond.not = icmp eq i64 %iv.next, 0
%exitcond.not = icmp eq i64 %iv.next, 1024
br i1 %exitcond.not, label %for.exit, label %for.body
for.exit: ; preds = %for.body
@@ -204,3 +288,18 @@ for.exit: ; preds = %for.body
!9 = !{!"llvm.loop.vectorize.predicate.enable", i1 true}
!10 = !{!"llvm.loop.vectorize.enable", i1 true}
attributes #0 = { vscale_range(1,16) "target-features"="+sve" }
;.
; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]}
; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]}
;.
; CHECK-NOI8MM: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
; CHECK-NOI8MM: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
; CHECK-NOI8MM: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
; CHECK-NOI8MM: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
; CHECK-NOI8MM: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]}
; CHECK-NOI8MM: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]}
;.

View File

@@ -8,7 +8,7 @@ define i32 @not_dotp(ptr %a, ptr %b) {
; CHECK-LABEL: define i32 @not_dotp(
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: br i1 true, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
@@ -35,7 +35,7 @@ define i32 @not_dotp(ptr %a, ptr %b) {
; CHECK-NEXT: [[TMP13]] = add <16 x i32> [[TMP11]], [[VEC_PHI]]
; CHECK-NEXT: [[TMP14]] = add <16 x i32> [[TMP12]], [[VEC_PHI1]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32
; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], 0
; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], 992
; CHECK-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
;
entry:
@@ -53,7 +53,7 @@ for.body: ; preds = %for.body, %entry
%mul = mul i32 %ext.b, %ext.a
%add = add i32 %mul, %accum
%iv.next = add i64 %iv, 1
%exitcond.not = icmp eq i64 %iv.next, 0
%exitcond.not = icmp eq i64 %iv.next, 1000
br i1 %exitcond.not, label %for.exit, label %for.body
for.exit: ; preds = %for.body

View File

@@ -148,18 +148,16 @@ define void @trunc_store(ptr %dst, ptr %src, i16 %x) #1 {
; DEFAULT-LABEL: define void @trunc_store(
; DEFAULT-SAME: ptr [[DST:%.*]], ptr [[SRC:%.*]], i16 [[X:%.*]]) #[[ATTR1:[0-9]+]] {
; DEFAULT-NEXT: iter.check:
; DEFAULT-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
; DEFAULT-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2
; DEFAULT-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 0, [[TMP1]]
; DEFAULT-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
; DEFAULT-NEXT: br i1 false, label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
; DEFAULT: vector.memcheck:
; DEFAULT-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[DST]], i64 1000
; DEFAULT-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[SRC]], i64 8
; DEFAULT-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[DST]], [[SCEVGEP]]
; DEFAULT-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[SRC]], [[DST]]
; DEFAULT-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[SRC]], [[SCEVGEP1]]
; DEFAULT-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
; DEFAULT-NEXT: br i1 [[FOUND_CONFLICT]], label [[VEC_EPILOG_SCALAR_PH]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]
; DEFAULT: vector.main.loop.iter.check:
; DEFAULT-NEXT: br i1 true, label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
; DEFAULT-NEXT: br i1 false, label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
; DEFAULT: vector.ph:
; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <16 x i16> poison, i16 [[X]], i64 0
; DEFAULT-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <16 x i16> [[BROADCAST_SPLATINSERT3]], <16 x i16> poison, <16 x i32> zeroinitializer
@@ -180,46 +178,36 @@ define void @trunc_store(ptr %dst, ptr %src, i16 %x) #1 {
; DEFAULT-NEXT: store <16 x i8> [[TMP8]], ptr [[TMP12]], align 1, !alias.scope [[META8:![0-9]+]], !noalias [[META5]]
; DEFAULT-NEXT: store <16 x i8> [[TMP9]], ptr [[TMP13]], align 1, !alias.scope [[META8]], !noalias [[META5]]
; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32
; DEFAULT-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], 0
; DEFAULT-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
; DEFAULT-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 992
; DEFAULT-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
; DEFAULT: middle.block:
; DEFAULT-NEXT: br i1 true, label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
; DEFAULT-NEXT: br i1 false, label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
; DEFAULT: vec.epilog.iter.check:
; DEFAULT-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64()
; DEFAULT-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 2
; DEFAULT-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 0, [[TMP16]]
; DEFAULT-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]
; DEFAULT-NEXT: br i1 false, label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]
; DEFAULT: vec.epilog.ph:
; DEFAULT-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ 0, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
; DEFAULT-NEXT: [[TMP17:%.*]] = call i64 @llvm.vscale.i64()
; DEFAULT-NEXT: [[TMP18:%.*]] = mul i64 [[TMP17]], 2
; DEFAULT-NEXT: [[N_MOD_VF:%.*]] = urem i64 0, [[TMP18]]
; DEFAULT-NEXT: [[N_VEC:%.*]] = sub i64 0, [[N_MOD_VF]]
; DEFAULT-NEXT: [[TMP19:%.*]] = call i64 @llvm.vscale.i64()
; DEFAULT-NEXT: [[TMP20:%.*]] = mul i64 [[TMP19]], 2
; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT6:%.*]] = insertelement <vscale x 2 x i16> poison, i16 [[X]], i64 0
; DEFAULT-NEXT: [[BROADCAST_SPLAT7:%.*]] = shufflevector <vscale x 2 x i16> [[BROADCAST_SPLATINSERT6]], <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer
; DEFAULT-NEXT: [[TMP24:%.*]] = trunc <vscale x 2 x i16> [[BROADCAST_SPLAT7]] to <vscale x 2 x i8>
; DEFAULT-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ 992, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT4:%.*]] = insertelement <8 x i16> poison, i16 [[X]], i64 0
; DEFAULT-NEXT: [[BROADCAST_SPLAT5:%.*]] = shufflevector <8 x i16> [[BROADCAST_SPLATINSERT4]], <8 x i16> poison, <8 x i32> zeroinitializer
; DEFAULT-NEXT: [[TMP15:%.*]] = trunc <8 x i16> [[BROADCAST_SPLAT5]] to <8 x i8>
; DEFAULT-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
; DEFAULT: vec.epilog.vector.body:
; DEFAULT-NEXT: [[INDEX5:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT8:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
; DEFAULT-NEXT: [[TMP21:%.*]] = add i64 [[INDEX5]], 0
; DEFAULT-NEXT: [[TMP22:%.*]] = load i64, ptr [[SRC]], align 8, !alias.scope [[META11:![0-9]+]]
; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP22]], i64 0
; DEFAULT-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
; DEFAULT-NEXT: [[TMP23:%.*]] = trunc <vscale x 2 x i64> [[BROADCAST_SPLAT]] to <vscale x 2 x i8>
; DEFAULT-NEXT: [[TMP25:%.*]] = and <vscale x 2 x i8> [[TMP23]], [[TMP24]]
; DEFAULT-NEXT: [[TMP16:%.*]] = load i64, ptr [[SRC]], align 8, !alias.scope [[META11:![0-9]+]]
; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT7:%.*]] = insertelement <8 x i64> poison, i64 [[TMP16]], i64 0
; DEFAULT-NEXT: [[BROADCAST_SPLAT8:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT7]], <8 x i64> poison, <8 x i32> zeroinitializer
; DEFAULT-NEXT: [[TMP18:%.*]] = trunc <8 x i64> [[BROADCAST_SPLAT8]] to <8 x i8>
; DEFAULT-NEXT: [[TMP14:%.*]] = and <8 x i8> [[TMP18]], [[TMP15]]
; DEFAULT-NEXT: [[TMP26:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP21]]
; DEFAULT-NEXT: [[TMP27:%.*]] = getelementptr i8, ptr [[TMP26]], i32 0
; DEFAULT-NEXT: store <vscale x 2 x i8> [[TMP25]], ptr [[TMP27]], align 1, !alias.scope [[META14:![0-9]+]], !noalias [[META11]]
; DEFAULT-NEXT: [[INDEX_NEXT8]] = add nuw i64 [[INDEX5]], [[TMP20]]
; DEFAULT-NEXT: [[TMP28:%.*]] = icmp eq i64 [[INDEX_NEXT8]], [[N_VEC]]
; DEFAULT-NEXT: br i1 [[TMP28]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
; DEFAULT-NEXT: store <8 x i8> [[TMP14]], ptr [[TMP27]], align 1, !alias.scope [[META14:![0-9]+]], !noalias [[META11]]
; DEFAULT-NEXT: [[INDEX_NEXT8]] = add nuw i64 [[INDEX5]], 8
; DEFAULT-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT8]], 1000
; DEFAULT-NEXT: br i1 [[TMP17]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
; DEFAULT: vec.epilog.middle.block:
; DEFAULT-NEXT: [[CMP_N:%.*]] = icmp eq i64 0, [[N_VEC]]
; DEFAULT-NEXT: br i1 [[CMP_N]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]]
; DEFAULT-NEXT: br i1 true, label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]]
; DEFAULT: vec.epilog.scalar.ph:
; DEFAULT-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ITER_CHECK:%.*]] ], [ 0, [[VEC_EPILOG_ITER_CHECK]] ]
; DEFAULT-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1000, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ITER_CHECK:%.*]] ], [ 992, [[VEC_EPILOG_ITER_CHECK]] ]
; DEFAULT-NEXT: br label [[LOOP:%.*]]
; DEFAULT: loop:
; DEFAULT-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
@@ -230,7 +218,7 @@ define void @trunc_store(ptr %dst, ptr %src, i16 %x) #1 {
; DEFAULT-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[DST]], i64 [[IV]]
; DEFAULT-NEXT: store i8 [[TRUNC]], ptr [[GEP]], align 1
; DEFAULT-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
; DEFAULT-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 0
; DEFAULT-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 1000
; DEFAULT-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP17:![0-9]+]]
; DEFAULT: exit:
; DEFAULT-NEXT: ret void
@@ -238,36 +226,49 @@ define void @trunc_store(ptr %dst, ptr %src, i16 %x) #1 {
; PRED-LABEL: define void @trunc_store(
; PRED-SAME: ptr [[DST:%.*]], ptr [[SRC:%.*]], i16 [[X:%.*]]) #[[ATTR1:[0-9]+]] {
; PRED-NEXT: entry:
; PRED-NEXT: br i1 true, label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
; PRED-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
; PRED: vector.memcheck:
; PRED-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[DST]], i64 1000
; PRED-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[SRC]], i64 8
; PRED-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[DST]], [[SCEVGEP]]
; PRED-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[SRC]], [[DST]]
; PRED-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[SRC]], [[SCEVGEP1]]
; PRED-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
; PRED-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
; PRED: vector.ph:
; PRED-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <16 x i16> poison, i16 [[X]], i64 0
; PRED-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <16 x i16> [[BROADCAST_SPLATINSERT1]], <16 x i16> poison, <16 x i32> zeroinitializer
; PRED-NEXT: [[TMP3:%.*]] = trunc <16 x i16> [[BROADCAST_SPLAT2]] to <16 x i8>
; PRED-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64()
; PRED-NEXT: [[TMP1:%.*]] = mul i64 [[TMP10]], 2
; PRED-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1
; PRED-NEXT: [[N_RND_UP:%.*]] = add i64 1000, [[TMP2]]
; PRED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]]
; PRED-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
; PRED-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
; PRED-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 2
; PRED-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 1000)
; PRED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i16> poison, i16 [[X]], i64 0
; PRED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i16> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer
; PRED-NEXT: [[TMP11:%.*]] = trunc <vscale x 2 x i16> [[BROADCAST_SPLAT]] to <vscale x 2 x i8>
; PRED-NEXT: br label [[VECTOR_BODY:%.*]]
; PRED: vector.body:
; PRED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; PRED-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 2 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ]
; PRED-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
; PRED-NEXT: [[TMP1:%.*]] = load i64, ptr [[SRC]], align 8, !alias.scope [[META4:![0-9]+]]
; PRED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i64> poison, i64 [[TMP1]], i64 0
; PRED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i64> [[BROADCAST_SPLATINSERT]], <16 x i64> poison, <16 x i32> zeroinitializer
; PRED-NEXT: [[TMP2:%.*]] = trunc <16 x i64> [[BROADCAST_SPLAT]] to <16 x i8>
; PRED-NEXT: [[TMP4:%.*]] = and <16 x i8> [[TMP2]], [[TMP3]]
; PRED-NEXT: [[TMP7:%.*]] = load i64, ptr [[SRC]], align 8, !alias.scope [[META4:![0-9]+]]
; PRED-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP7]], i64 0
; PRED-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT2]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
; PRED-NEXT: [[TMP8:%.*]] = trunc <vscale x 2 x i64> [[BROADCAST_SPLAT3]] to <vscale x 2 x i8>
; PRED-NEXT: [[TMP9:%.*]] = and <vscale x 2 x i8> [[TMP8]], [[TMP11]]
; PRED-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP0]]
; PRED-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[TMP5]], i32 0
; PRED-NEXT: store <16 x i8> [[TMP4]], ptr [[TMP6]], align 1, !alias.scope [[META7:![0-9]+]], !noalias [[META4]]
; PRED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
; PRED-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 0
; PRED-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
; PRED-NEXT: call void @llvm.masked.store.nxv2i8.p0(<vscale x 2 x i8> [[TMP9]], ptr [[TMP6]], i32 1, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]]), !alias.scope [[META7:![0-9]+]], !noalias [[META4]]
; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]]
; PRED-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX_NEXT]], i64 1000)
; PRED-NEXT: [[TMP12:%.*]] = xor <vscale x 2 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true)
; PRED-NEXT: [[TMP13:%.*]] = extractelement <vscale x 2 x i1> [[TMP12]], i32 0
; PRED-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
; PRED: middle.block:
; PRED-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
; PRED: scalar.ph:
; PRED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ENTRY:%.*]] ]
; PRED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ENTRY:%.*]] ]
; PRED-NEXT: br label [[LOOP:%.*]]
; PRED: loop:
; PRED-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
@@ -278,7 +279,7 @@ define void @trunc_store(ptr %dst, ptr %src, i16 %x) #1 {
; PRED-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[DST]], i64 [[IV]]
; PRED-NEXT: store i8 [[TRUNC]], ptr [[GEP]], align 1
; PRED-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
; PRED-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 0
; PRED-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 1000
; PRED-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP10:![0-9]+]]
; PRED: exit:
; PRED-NEXT: ret void
@@ -295,7 +296,7 @@ loop:
%gep = getelementptr i8, ptr %dst, i64 %iv
store i8 %trunc, ptr %gep, align 1
%iv.next = add i64 %iv, 1
%ec = icmp eq i64 %iv.next, 0
%ec = icmp eq i64 %iv.next, 1000
br i1 %ec, label %exit, label %loop
exit:

View File

@@ -972,7 +972,7 @@ loop:
%red.next = fadd double %for, %red
%for.next = sitofp i32 %iv to double
%iv.next = add nsw i32 %iv, 1
%ec = icmp eq i32 %iv.next, 0
%ec = icmp eq i32 %iv.next, 1024
br i1 %ec, label %exit, label %loop, !llvm.loop !13
exit:

View File

@@ -23,7 +23,7 @@ loop.body:
%add = add i64 %a, %b
store i64 %add, ptr %addr
%iv.next = add nsw i32 %iv, 1
%cond = icmp ne i32 %iv.next, 0
%cond = icmp ne i32 %iv.next, 1000
br i1 %cond, label %loop.body, label %exit, !llvm.loop !0
exit:

View File

@@ -10,7 +10,7 @@ define i32 @print_partial_reduction(ptr %a, ptr %b) {
; CHECK: VPlan 'Initial VPlan for VF={8,16},UF>=1' {
; CHECK-NEXT: Live-in vp<[[VFxUF:%.]]> = VF * UF
; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count
; CHECK-NEXT: Live-in ir<0> = original trip-count
; CHECK-NEXT: Live-in ir<1024> = original trip-count
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<entry>:
; CHECK-NEXT: Successor(s): vector.ph
@@ -42,7 +42,7 @@ define i32 @print_partial_reduction(ptr %a, ptr %b) {
; CHECK-NEXT: middle.block:
; CHECK-NEXT: EMIT vp<[[RED_RESULT:%.+]]> = compute-reduction-result ir<[[ACC]]>, ir<[[REDUCE]]>
; CHECK-NEXT: EMIT vp<[[EXTRACT:%.+]]> = extract-from-end vp<[[RED_RESULT]]>, ir<1>
; CHECK-NEXT: EMIT vp<[[CMP:%.+]]> = icmp eq ir<0>, vp<%1>
; CHECK-NEXT: EMIT vp<[[CMP:%.+]]> = icmp eq ir<1024>, vp<%1>
; CHECK-NEXT: EMIT branch-on-cond vp<[[CMP]]>
; CHECK-NEXT: Successor(s): ir-bb<exit>, scalar.ph
; CHECK-EMPTY:
@@ -63,7 +63,7 @@ define i32 @print_partial_reduction(ptr %a, ptr %b) {
; CHECK-NEXT: IR %mul = mul i32 %ext.b, %ext.a
; CHECK-NEXT: IR %add = add i32 %mul, %accum
; CHECK-NEXT: IR %iv.next = add i64 %iv, 1
; CHECK-NEXT: IR %exitcond.not = icmp eq i64 %iv.next, 0
; CHECK-NEXT: IR %exitcond.not = icmp eq i64 %iv.next, 1024
; CHECK-NEXT: No successors
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<exit>:
@@ -86,7 +86,7 @@ for.body: ; preds = %for.body, %entry
%mul = mul i32 %ext.b, %ext.a
%add = add i32 %mul, %accum
%iv.next = add i64 %iv, 1
%exitcond.not = icmp eq i64 %iv.next, 0
%exitcond.not = icmp eq i64 %iv.next, 1024
br i1 %exitcond.not, label %exit, label %for.body
exit:

View File

@@ -8,17 +8,17 @@
target datalayout = "e-m:e-p:32:32:32-a:0-n16:32-i64:64:64-i32:32:32-i16:16:16-i1:8:8-f32:32:32-f64:64:64-v32:32:32-v64:64:64-v512:512:512-v1024:1024:1024-v2048:2048:2048"
; Function Attrs: optsize
define i32 @f() #0 {
define i32 @f(ptr %src) #0 {
entry:
br label %loop
loop:
%g.016 = phi i32 [ 0, %entry ], [ %g.1.lcssa, %loop ]
%iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
%0 = load i8, ptr undef, align 1
%g.1.lcssa = add i32 %g.016, undef
%0 = load i8, ptr %src, align 1
%g.1.lcssa = add i32 %g.016, 1
%iv.next = add nsw i32 %iv, 1
%exitcond = icmp eq i32 %iv.next, 0
%exitcond = icmp eq i32 %iv.next, 1000
br i1 %exitcond, label %exit, label %loop
exit:

View File

@@ -64,7 +64,7 @@ define i64 @second_lshr_operand_zero_via_scev() {
; CHECK-LABEL: define i64 @second_lshr_operand_zero_via_scev() {
; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: [[EXT_0:%.*]] = sext i8 0 to i32
; CHECK-NEXT: br i1 true, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
@@ -90,14 +90,14 @@ define i64 @second_lshr_operand_zero_via_scev() {
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[STEP_ADD]], splat (i64 2)
; CHECK-NEXT: [[VEC_IND_NEXT3]] = add <2 x i32> [[STEP_ADD4]], splat (i32 2)
; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 0
; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
; CHECK-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: [[BIN_RDX:%.*]] = or <2 x i64> [[TMP11]], [[TMP10]]
; CHECK-NEXT: [[TMP13:%.*]] = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> [[BIN_RDX]])
; CHECK-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]]
; CHECK: [[SCALAR_PH]]:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1000, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[TMP13]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
; CHECK-NEXT: br label %[[LOOPS:.*]]
; CHECK: [[LOOPS]]:
@@ -111,7 +111,7 @@ define i64 @second_lshr_operand_zero_via_scev() {
; CHECK-NEXT: [[RED_NEXT_V:%.*]] = select i1 [[C]], i64 [[AND]], i64 [[CONV_1]]
; CHECK-NEXT: [[RED_NEXT]] = or i64 [[RED_NEXT_V]], [[RED]]
; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 0
; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 1000
; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOPS]], !llvm.loop [[LOOP5:![0-9]+]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: [[RES:%.*]] = phi i64 [ [[RED_NEXT]], %[[LOOPS]] ], [ [[TMP13]], %[[MIDDLE_BLOCK]] ]
@@ -132,7 +132,7 @@ loops:
%red.next.v = select i1 %c, i64 %and, i64 %conv.1
%red.next = or i64 %red.next.v, %red
%iv.next = add i64 %iv, 1
%ec = icmp eq i64 %iv.next, 0
%ec = icmp eq i64 %iv.next, 1000
br i1 %ec, label %exit, label %loops
exit:

View File

@@ -1159,47 +1159,39 @@ define i32 @narrowed_reduction(ptr %a, i1 %cmp) #0 {
; CHECK-LABEL: @narrowed_reduction(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[CONV:%.*]] = zext i1 [[CMP:%.*]] to i32
; CHECK-NEXT: br i1 true, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-NEXT: br i1 false, label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH1:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[CONV]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i32> [[BROADCAST_SPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP0:%.*]] = and <16 x i32> [[VEC_PHI]], splat (i32 1)
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH1]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH1]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP1:%.*]] = and <16 x i32> [[VEC_PHI1]], splat (i32 1)
; CHECK-NEXT: [[TMP2:%.*]] = or <16 x i32> [[TMP0]], [[BROADCAST_SPLAT]]
; CHECK-NEXT: [[TMP3:%.*]] = or <16 x i32> [[TMP1]], [[BROADCAST_SPLAT]]
; CHECK-NEXT: [[TMP4:%.*]] = trunc <16 x i32> [[TMP2]] to <16 x i1>
; CHECK-NEXT: [[TMP5:%.*]] = trunc <16 x i32> [[TMP3]] to <16 x i1>
; CHECK-NEXT: [[TMP6]] = zext <16 x i1> [[TMP4]] to <16 x i32>
; CHECK-NEXT: [[TMP7]] = zext <16 x i1> [[TMP5]] to <16 x i32>
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 32
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], 0
; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 16
; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[TMP9:%.*]] = trunc <16 x i32> [[TMP6]] to <16 x i1>
; CHECK-NEXT: [[TMP10:%.*]] = trunc <16 x i32> [[TMP7]] to <16 x i1>
; CHECK-NEXT: [[BIN_RDX:%.*]] = or <16 x i1> [[TMP10]], [[TMP9]]
; CHECK-NEXT: [[TMP11:%.*]] = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> [[BIN_RDX]])
; CHECK-NEXT: [[TMP12:%.*]] = zext i1 [[TMP11]] to i32
; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
; CHECK-NEXT: [[TMP20:%.*]] = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> [[TMP10]])
; CHECK-NEXT: [[TMP21:%.*]] = zext i1 [[TMP20]] to i32
; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[VEC_EPILOG_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 1, [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY:%.*]] ]
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP12]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 17, [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY:%.*]] ]
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP21]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
; CHECK-NEXT: br label [[LOOP1:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[LOOP]] ]
; CHECK-NEXT: [[OR13:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[OR:%.*]], [[LOOP]] ]
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INC:%.*]], [[LOOP1]] ]
; CHECK-NEXT: [[OR13:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[VEC_EPILOG_PH]] ], [ [[OR:%.*]], [[LOOP1]] ]
; CHECK-NEXT: [[AND:%.*]] = and i32 [[OR13]], 1
; CHECK-NEXT: [[OR]] = or i32 [[AND]], [[CONV]]
; CHECK-NEXT: [[INC]] = add i32 [[IV]], 1
; CHECK-NEXT: [[EC:%.*]] = icmp eq i32 [[IV]], 0
; CHECK-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP29:![0-9]+]]
; CHECK-NEXT: [[EC:%.*]] = icmp eq i32 [[IV]], 16
; CHECK-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP1]], !llvm.loop [[LOOP29:![0-9]+]]
; CHECK: exit:
; CHECK-NEXT: [[OR_LCSSA:%.*]] = phi i32 [ [[OR]], [[LOOP]] ], [ [[TMP12]], [[MIDDLE_BLOCK]] ]
; CHECK-NEXT: [[OR_LCSSA:%.*]] = phi i32 [ [[OR]], [[LOOP1]] ], [ [[TMP21]], [[MIDDLE_BLOCK]] ]
; CHECK-NEXT: ret i32 [[OR_LCSSA]]
;
entry:
@@ -1212,7 +1204,7 @@ loop:
%and = and i32 %or13, 1
%or = or i32 %and, %conv
%inc = add i32 %iv, 1
%ec = icmp eq i32 %iv, 0
%ec = icmp eq i32 %iv, 16
br i1 %ec, label %exit, label %loop
exit:

View File

@@ -352,9 +352,7 @@ define void @drop_zext_nneg(ptr noalias %p, ptr noalias %p1) #0 {
; CHECK-LABEL: define void @drop_zext_nneg(
; CHECK-SAME: ptr noalias [[P:%.*]], ptr noalias [[P1:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: br i1 true, label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
; CHECK: vector.scevcheck:
; CHECK-NEXT: br i1 true, label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
@@ -372,12 +370,12 @@ define void @drop_zext_nneg(ptr noalias %p, ptr noalias %p1) #0 {
; CHECK-NEXT: store double [[TMP6]], ptr [[P1]], align 8
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 0
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY:%.*]] ]
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
; CHECK-NEXT: br label [[BODY:%.*]]
; CHECK: body:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[NEXT:%.*]], [[ELSE:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
@@ -394,7 +392,7 @@ define void @drop_zext_nneg(ptr noalias %p, ptr noalias %p1) #0 {
; CHECK-NEXT: [[PHI:%.*]] = phi double [ [[TMP9]], [[THEN]] ], [ 0.000000e+00, [[BODY]] ]
; CHECK-NEXT: store double [[PHI]], ptr [[P1]], align 8
; CHECK-NEXT: [[NEXT]] = add i64 [[IV]], 1
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[NEXT]], 0
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[NEXT]], 1024
; CHECK-NEXT: br i1 [[CMP]], label [[EXIT]], label [[BODY]], !llvm.loop [[LOOP18:![0-9]+]]
; CHECK: exit:
; CHECK-NEXT: ret void
@@ -419,7 +417,7 @@ else:
%phi = phi double [ %1, %then ], [ 0.000000e+00, %body ]
store double %phi, ptr %p1, align 8
%next = add i64 %iv, 1
%cmp = icmp eq i64 %next, 0
%cmp = icmp eq i64 %next, 1024
br i1 %cmp, label %exit, label %body
exit:

View File

@@ -8,78 +8,7 @@ define void @test_free_instructions_feeding_geps_for_interleave_groups(ptr noali
; CHECK-LABEL: define void @test_free_instructions_feeding_geps_for_interleave_groups(
; CHECK-SAME: ptr noalias [[P_INVAR:%.*]], ptr noalias [[DST_1:%.*]], ptr noalias [[DST_2:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: br i1 true, label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]]
; CHECK: [[VECTOR_SCEVCHECK]]:
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[DST_1]], i64 8
; CHECK-NEXT: [[MUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 16, i64 -1)
; CHECK-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i64, i1 } [[MUL]], 0
; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i64, i1 } [[MUL]], 1
; CHECK-NEXT: [[TMP0:%.*]] = sub i64 0, [[MUL_RESULT]]
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[SCEVGEP]], i64 [[MUL_RESULT]]
; CHECK-NEXT: [[TMP2:%.*]] = icmp ult ptr [[TMP1]], [[SCEVGEP]]
; CHECK-NEXT: [[TMP3:%.*]] = or i1 [[TMP2]], [[MUL_OVERFLOW]]
; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[DST_1]], i64 12
; CHECK-NEXT: [[MUL2:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 16, i64 -1)
; CHECK-NEXT: [[MUL_RESULT3:%.*]] = extractvalue { i64, i1 } [[MUL2]], 0
; CHECK-NEXT: [[MUL_OVERFLOW4:%.*]] = extractvalue { i64, i1 } [[MUL2]], 1
; CHECK-NEXT: [[TMP4:%.*]] = sub i64 0, [[MUL_RESULT3]]
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[SCEVGEP1]], i64 [[MUL_RESULT3]]
; CHECK-NEXT: [[TMP6:%.*]] = icmp ult ptr [[TMP5]], [[SCEVGEP1]]
; CHECK-NEXT: [[TMP7:%.*]] = or i1 [[TMP6]], [[MUL_OVERFLOW4]]
; CHECK-NEXT: [[SCEVGEP5:%.*]] = getelementptr i8, ptr [[DST_1]], i64 4
; CHECK-NEXT: [[MUL6:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 16, i64 -1)
; CHECK-NEXT: [[MUL_RESULT7:%.*]] = extractvalue { i64, i1 } [[MUL6]], 0
; CHECK-NEXT: [[MUL_OVERFLOW8:%.*]] = extractvalue { i64, i1 } [[MUL6]], 1
; CHECK-NEXT: [[TMP8:%.*]] = sub i64 0, [[MUL_RESULT7]]
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[SCEVGEP5]], i64 [[MUL_RESULT7]]
; CHECK-NEXT: [[TMP10:%.*]] = icmp ult ptr [[TMP9]], [[SCEVGEP5]]
; CHECK-NEXT: [[TMP11:%.*]] = or i1 [[TMP10]], [[MUL_OVERFLOW8]]
; CHECK-NEXT: [[MUL9:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 16, i64 -1)
; CHECK-NEXT: [[MUL_RESULT10:%.*]] = extractvalue { i64, i1 } [[MUL9]], 0
; CHECK-NEXT: [[MUL_OVERFLOW11:%.*]] = extractvalue { i64, i1 } [[MUL9]], 1
; CHECK-NEXT: [[TMP12:%.*]] = sub i64 0, [[MUL_RESULT10]]
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[DST_1]], i64 [[MUL_RESULT10]]
; CHECK-NEXT: [[TMP14:%.*]] = icmp ult ptr [[TMP13]], [[DST_1]]
; CHECK-NEXT: [[TMP15:%.*]] = or i1 [[TMP14]], [[MUL_OVERFLOW11]]
; CHECK-NEXT: [[SCEVGEP12:%.*]] = getelementptr i8, ptr [[DST_2]], i64 8
; CHECK-NEXT: [[MUL13:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 16, i64 -1)
; CHECK-NEXT: [[MUL_RESULT14:%.*]] = extractvalue { i64, i1 } [[MUL13]], 0
; CHECK-NEXT: [[MUL_OVERFLOW15:%.*]] = extractvalue { i64, i1 } [[MUL13]], 1
; CHECK-NEXT: [[TMP16:%.*]] = sub i64 0, [[MUL_RESULT14]]
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[SCEVGEP12]], i64 [[MUL_RESULT14]]
; CHECK-NEXT: [[TMP18:%.*]] = icmp ult ptr [[TMP17]], [[SCEVGEP12]]
; CHECK-NEXT: [[TMP19:%.*]] = or i1 [[TMP18]], [[MUL_OVERFLOW15]]
; CHECK-NEXT: [[SCEVGEP16:%.*]] = getelementptr i8, ptr [[DST_2]], i64 12
; CHECK-NEXT: [[MUL17:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 16, i64 -1)
; CHECK-NEXT: [[MUL_RESULT18:%.*]] = extractvalue { i64, i1 } [[MUL17]], 0
; CHECK-NEXT: [[MUL_OVERFLOW19:%.*]] = extractvalue { i64, i1 } [[MUL17]], 1
; CHECK-NEXT: [[TMP20:%.*]] = sub i64 0, [[MUL_RESULT18]]
; CHECK-NEXT: [[TMP21:%.*]] = getelementptr i8, ptr [[SCEVGEP16]], i64 [[MUL_RESULT18]]
; CHECK-NEXT: [[TMP22:%.*]] = icmp ult ptr [[TMP21]], [[SCEVGEP16]]
; CHECK-NEXT: [[TMP23:%.*]] = or i1 [[TMP22]], [[MUL_OVERFLOW19]]
; CHECK-NEXT: [[SCEVGEP20:%.*]] = getelementptr i8, ptr [[DST_2]], i64 4
; CHECK-NEXT: [[MUL21:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 16, i64 -1)
; CHECK-NEXT: [[MUL_RESULT22:%.*]] = extractvalue { i64, i1 } [[MUL21]], 0
; CHECK-NEXT: [[MUL_OVERFLOW23:%.*]] = extractvalue { i64, i1 } [[MUL21]], 1
; CHECK-NEXT: [[TMP24:%.*]] = sub i64 0, [[MUL_RESULT22]]
; CHECK-NEXT: [[TMP25:%.*]] = getelementptr i8, ptr [[SCEVGEP20]], i64 [[MUL_RESULT22]]
; CHECK-NEXT: [[TMP26:%.*]] = icmp ult ptr [[TMP25]], [[SCEVGEP20]]
; CHECK-NEXT: [[TMP27:%.*]] = or i1 [[TMP26]], [[MUL_OVERFLOW23]]
; CHECK-NEXT: [[MUL24:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 16, i64 -1)
; CHECK-NEXT: [[MUL_RESULT25:%.*]] = extractvalue { i64, i1 } [[MUL24]], 0
; CHECK-NEXT: [[MUL_OVERFLOW26:%.*]] = extractvalue { i64, i1 } [[MUL24]], 1
; CHECK-NEXT: [[TMP28:%.*]] = sub i64 0, [[MUL_RESULT25]]
; CHECK-NEXT: [[TMP29:%.*]] = getelementptr i8, ptr [[DST_2]], i64 [[MUL_RESULT25]]
; CHECK-NEXT: [[TMP30:%.*]] = icmp ult ptr [[TMP29]], [[DST_2]]
; CHECK-NEXT: [[TMP31:%.*]] = or i1 [[TMP30]], [[MUL_OVERFLOW26]]
; CHECK-NEXT: [[TMP32:%.*]] = or i1 [[TMP3]], [[TMP7]]
; CHECK-NEXT: [[TMP33:%.*]] = or i1 [[TMP32]], [[TMP11]]
; CHECK-NEXT: [[TMP34:%.*]] = or i1 [[TMP33]], [[TMP15]]
; CHECK-NEXT: [[TMP35:%.*]] = or i1 [[TMP34]], [[TMP19]]
; CHECK-NEXT: [[TMP36:%.*]] = or i1 [[TMP35]], [[TMP23]]
; CHECK-NEXT: [[TMP37:%.*]] = or i1 [[TMP36]], [[TMP27]]
; CHECK-NEXT: [[TMP38:%.*]] = or i1 [[TMP37]], [[TMP31]]
; CHECK-NEXT: br i1 [[TMP38]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
@@ -106,12 +35,12 @@ define void @test_free_instructions_feeding_geps_for_interleave_groups(ptr noali
; CHECK-NEXT: [[INTERLEAVED_VEC31:%.*]] = shufflevector <8 x float> [[TMP51]], <8 x float> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 1, i32 3, i32 5, i32 7>
; CHECK-NEXT: store <8 x float> [[INTERLEAVED_VEC31]], ptr [[TMP49]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
; CHECK-NEXT: [[TMP53:%.*]] = icmp eq i64 [[INDEX_NEXT]], 0
; CHECK-NEXT: br i1 [[TMP53]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
; CHECK-NEXT: br i1 [[TMP11]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]]
; CHECK: [[SCALAR_PH]]:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[MIDDLE_BLOCK]] ], [ 0, %[[VECTOR_SCEVCHECK]] ], [ 0, %[[ENTRY]] ]
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
@@ -139,7 +68,7 @@ define void @test_free_instructions_feeding_geps_for_interleave_groups(ptr noali
; CHECK-NEXT: [[GEP_DST_276:%.*]] = getelementptr float, ptr [[DST_2]], i64 [[ADD_3]]
; CHECK-NEXT: store float 0.000000e+00, ptr [[GEP_DST_276]], align 4
; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 0
; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 1024
; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: ret void
@@ -173,7 +102,7 @@ loop:
%gep.dst.276 = getelementptr float, ptr %dst.2, i64 %add.3
store float 0.000000e+00, ptr %gep.dst.276, align 4
%iv.next = add i64 %iv, 1
%ec = icmp eq i64 %iv.next, 0
%ec = icmp eq i64 %iv.next, 1024
br i1 %ec, label %exit, label %loop
exit:
@@ -771,7 +700,7 @@ attributes #1 = { "min-legal-vector-width"="0" "target-cpu"="cascadelake" }
; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META1]]}
; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]}
; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META1]]}
; CHECK: [[META6]] = !{[[META7:![0-9]+]]}

View File

@@ -11,7 +11,7 @@ define void @smax_call_uniform(ptr %dst, i64 %x) {
; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: [[C:%.*]] = icmp ult i8 -68, -69
; CHECK-NEXT: [[MUL:%.*]] = mul nuw nsw i64 [[X]], 0
; CHECK-NEXT: br i1 true, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i1> poison, i1 [[C]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i1> [[BROADCAST_SPLATINSERT]], <2 x i1> poison, <2 x i32> zeroinitializer
@@ -58,12 +58,12 @@ define void @smax_call_uniform(ptr %dst, i64 %x) {
; CHECK-NEXT: store i64 0, ptr [[GEP]], align 8
; CHECK-NEXT: store i64 0, ptr [[TMP19]], align 8
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], 0
; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
; CHECK-NEXT: br i1 [[TMP20]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]]
; CHECK: [[SCALAR_PH]]:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
; CHECK: [[LOOP_HEADER]]:
; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT1:%.*]], %[[LOOP_LATCH:.*]] ]
@@ -78,7 +78,7 @@ define void @smax_call_uniform(ptr %dst, i64 %x) {
; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i64, ptr [[DST]], i64 [[IV_NEXT]]
; CHECK-NEXT: store i64 0, ptr [[GEP1]], align 8
; CHECK-NEXT: [[IV_NEXT1]] = add i64 [[IV1]], 1
; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT1]], 0
; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT1]], 1024
; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP3:![0-9]+]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: ret void
@@ -103,7 +103,7 @@ loop.latch:
%gep = getelementptr i64, ptr %dst, i64 %add
store i64 0, ptr %gep, align 8
%iv.next = add i64 %iv, 1
%ec = icmp eq i64 %iv.next, 0
%ec = icmp eq i64 %iv.next, 1024
br i1 %ec, label %exit, label %loop.header
exit:

View File

@@ -0,0 +1,35 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
; RUN: opt -p loop-vectorize -force-vector-width=2 -epilogue-vectorization-force-VF=2 -S %s | FileCheck %s
target datalayout = "E-m:e-i1:8:16-i8:8:16-i64:64-f128:64-v128:64-a:8:16-n32:64"
; Test case for https://github.com/llvm/llvm-project/issues/122558.
define void @vector_trip_count_0_as_btc_is_all_1(ptr %dst) #0 {
; CHECK-LABEL: define void @vector_trip_count_0_as_btc_is_all_1(
; CHECK-SAME: ptr [[DST:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], -1
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[IV]]
; CHECK-NEXT: store i32 [[IV_NEXT]], ptr [[GEP]], align 4
; CHECK-NEXT: [[EC:%.*]] = icmp eq i32 [[IV_NEXT]], 0
; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: ret void
;
entry:
br label %loop
loop:
%iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
%iv.next = add i32 %iv, -1
%gep = getelementptr inbounds i32, ptr %dst, i32 %iv
store i32 %iv.next, ptr %gep, align 4
%ec = icmp eq i32 %iv.next, 0
br i1 %ec, label %exit, label %loop
exit:
ret void
}

View File

@@ -654,7 +654,7 @@ define double @test_resinking_required(ptr %p, ptr noalias %a, ptr noalias %b) {
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x double> [[TMP2]], i32 3
; CHECK-NEXT: store double [[TMP6]], ptr [[P:%.*]], align 8
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 0
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI10:%.*]] = extractelement <4 x double> [[TMP4]], i32 2
@@ -664,7 +664,7 @@ define double @test_resinking_required(ptr %p, ptr noalias %a, ptr noalias %b) {
; CHECK-NEXT: phi double [ [[TMP0]], %middle.block ], [ 0.000000e+00, %Entry ]
; CHECK-NEXT: phi double [ [[TMP3]], %middle.block ], [ 0.000000e+00, %Entry ]
; CHECK-NEXT: phi double [ [[VECTOR_RECUR_EXTRACT9]], %middle.block ], [ 0.000000e+00, %Entry ]
; CHECK-NEXT: %bc.resume.val = phi i64 [ 0, %middle.block ], [ 0, %Entry ]
; CHECK-NEXT: %bc.resume.val = phi i64 [ 1000, %middle.block ], [ 0, %Entry ]
; CHECK: End:
; CHECK-NEXT: = phi double [ {{.+}}, %Loop ], [ [[TMP0]], %middle.block ]
; CHECK-NEXT: = phi double [ {{.+}}, %Loop ], [ [[TMP3]], %middle.block ]
@@ -684,7 +684,7 @@ Loop:
%iv.next= add nuw nsw i64 %iv, 1
%l2 = load double, ptr %b, align 8
store double %div, ptr %p, align 8
%cond = icmp eq i64 %iv.next, 0
%cond = icmp eq i64 %iv.next, 1000
br i1 %cond, label %End, label %Loop
End:

View File

@@ -555,29 +555,31 @@ define void @minimal_bit_widths_with_aliasing_store(i1 %c, ptr %ptr) {
; UNROLL-LABEL: @minimal_bit_widths_with_aliasing_store(
; UNROLL-NEXT: entry:
; UNROLL-NEXT: br label [[FOR_BODY:%.*]]
; UNROLL: for.body:
; UNROLL-NEXT: [[TMP0:%.*]] = phi i64 [ [[TMP6:%.*]], [[FOR_INC:%.*]] ], [ 0, [[ENTRY:%.*]] ]
; UNROLL-NEXT: [[TMP1:%.*]] = phi i64 [ [[TMP7:%.*]], [[FOR_INC]] ], [ 0, [[ENTRY]] ]
; UNROLL: vector.body:
; UNROLL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[FOR_INC:%.*]] ]
; UNROLL-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
; UNROLL-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
; UNROLL-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[PTR:%.*]], i64 [[TMP0]]
; UNROLL-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[TMP1]]
; UNROLL-NEXT: [[TMP3:%.*]] = load i8, ptr [[TMP2]], align 1
; UNROLL-NEXT: [[TMP5:%.*]] = load i8, ptr [[TMP4]], align 1
; UNROLL-NEXT: store i8 0, ptr [[TMP2]], align 1
; UNROLL-NEXT: store i8 0, ptr [[TMP4]], align 1
; UNROLL-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[FOR_INC]]
; UNROLL: if.then:
; UNROLL-NEXT: [[TMP4:%.*]] = zext i8 [[TMP3]] to i32
; UNROLL-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i8
; UNROLL-NEXT: store i8 [[TMP5]], ptr [[TMP2]], align 1
; UNROLL: pred.store.if:
; UNROLL-NEXT: store i8 [[TMP3]], ptr [[TMP2]], align 1
; UNROLL-NEXT: store i8 [[TMP5]], ptr [[TMP4]], align 1
; UNROLL-NEXT: br label [[FOR_INC]]
; UNROLL: for.inc:
; UNROLL-NEXT: [[TMP6]] = add nuw nsw i64 [[TMP0]], 1
; UNROLL-NEXT: [[TMP7]] = add i64 [[TMP1]], -1
; UNROLL-NEXT: [[TMP8:%.*]] = icmp eq i64 [[TMP7]], 0
; UNROLL-NEXT: br i1 [[TMP8]], label [[FOR_END:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
; UNROLL: pred.store.continue2:
; UNROLL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
; UNROLL-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
; UNROLL-NEXT: br i1 [[TMP6]], label [[FOR_END:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
; UNROLL: for.end:
; UNROLL-NEXT: ret void
;
; UNROLL-NOSIMPLIFY-LABEL: @minimal_bit_widths_with_aliasing_store(
; UNROLL-NOSIMPLIFY-NEXT: entry:
; UNROLL-NOSIMPLIFY-NEXT: br i1 true, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; UNROLL-NOSIMPLIFY-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; UNROLL-NOSIMPLIFY: vector.ph:
; UNROLL-NOSIMPLIFY-NEXT: br label [[VECTOR_BODY:%.*]]
; UNROLL-NOSIMPLIFY: vector.body:
@@ -601,13 +603,13 @@ define void @minimal_bit_widths_with_aliasing_store(i1 %c, ptr %ptr) {
; UNROLL-NOSIMPLIFY-NEXT: br label [[PRED_STORE_CONTINUE2]]
; UNROLL-NOSIMPLIFY: pred.store.continue2:
; UNROLL-NOSIMPLIFY-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
; UNROLL-NOSIMPLIFY-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 0
; UNROLL-NOSIMPLIFY-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
; UNROLL-NOSIMPLIFY-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
; UNROLL-NOSIMPLIFY: middle.block:
; UNROLL-NOSIMPLIFY-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
; UNROLL-NOSIMPLIFY: scalar.ph:
; UNROLL-NOSIMPLIFY-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
; UNROLL-NOSIMPLIFY-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i64 [ 0, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
; UNROLL-NOSIMPLIFY-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
; UNROLL-NOSIMPLIFY-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i64 [ 0, [[MIDDLE_BLOCK]] ], [ 1000, [[ENTRY]] ]
; UNROLL-NOSIMPLIFY-NEXT: br label [[FOR_BODY:%.*]]
; UNROLL-NOSIMPLIFY: for.body:
; UNROLL-NOSIMPLIFY-NEXT: [[TMP0:%.*]] = phi i64 [ [[TMP6:%.*]], [[FOR_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
@@ -631,24 +633,36 @@ define void @minimal_bit_widths_with_aliasing_store(i1 %c, ptr %ptr) {
;
; VEC-LABEL: @minimal_bit_widths_with_aliasing_store(
; VEC-NEXT: entry:
; VEC-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i1> poison, i1 [[C1:%.*]], i64 0
; VEC-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i1> [[BROADCAST_SPLATINSERT]], <2 x i1> poison, <2 x i32> zeroinitializer
; VEC-NEXT: br label [[FOR_BODY:%.*]]
; VEC: for.body:
; VEC-NEXT: [[TMP0:%.*]] = phi i64 [ [[TMP6:%.*]], [[FOR_INC:%.*]] ], [ 0, [[ENTRY:%.*]] ]
; VEC-NEXT: [[TMP1:%.*]] = phi i64 [ [[TMP7:%.*]], [[FOR_INC]] ], [ 0, [[ENTRY]] ]
; VEC: vector.body:
; VEC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE2:%.*]] ]
; VEC-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
; VEC-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[PTR:%.*]], i64 [[TMP0]]
; VEC-NEXT: [[TMP3:%.*]] = load i8, ptr [[TMP2]], align 1
; VEC-NEXT: store i8 0, ptr [[TMP2]], align 1
; VEC-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[FOR_INC]]
; VEC: if.then:
; VEC-NEXT: [[TMP4:%.*]] = zext i8 [[TMP3]] to i32
; VEC-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i8
; VEC-NEXT: store i8 [[TMP5]], ptr [[TMP2]], align 1
; VEC-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[TMP2]], i32 0
; VEC-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i8>, ptr [[TMP3]], align 1
; VEC-NEXT: store <2 x i8> zeroinitializer, ptr [[TMP3]], align 1
; VEC-NEXT: [[C:%.*]] = extractelement <2 x i1> [[BROADCAST_SPLAT]], i32 0
; VEC-NEXT: br i1 [[C]], label [[IF_THEN:%.*]], label [[FOR_INC:%.*]]
; VEC: pred.store.if:
; VEC-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[TMP0]]
; VEC-NEXT: [[TMP5:%.*]] = extractelement <2 x i8> [[WIDE_LOAD]], i32 0
; VEC-NEXT: store i8 [[TMP5]], ptr [[TMP4]], align 1
; VEC-NEXT: br label [[FOR_INC]]
; VEC: for.inc:
; VEC-NEXT: [[TMP6]] = add nuw nsw i64 [[TMP0]], 1
; VEC-NEXT: [[TMP7]] = add i64 [[TMP1]], -1
; VEC-NEXT: [[TMP8:%.*]] = icmp eq i64 [[TMP7]], 0
; VEC-NEXT: br i1 [[TMP8]], label [[FOR_END:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
; VEC: pred.store.continue:
; VEC-NEXT: [[TMP6:%.*]] = extractelement <2 x i1> [[BROADCAST_SPLAT]], i32 1
; VEC-NEXT: br i1 [[TMP6]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2]]
; VEC: pred.store.if1:
; VEC-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 1
; VEC-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[TMP7]]
; VEC-NEXT: [[TMP9:%.*]] = extractelement <2 x i8> [[WIDE_LOAD]], i32 1
; VEC-NEXT: store i8 [[TMP9]], ptr [[TMP8]], align 1
; VEC-NEXT: br label [[PRED_STORE_CONTINUE2]]
; VEC: pred.store.continue2:
; VEC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
; VEC-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
; VEC-NEXT: br i1 [[TMP10]], label [[FOR_END:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
; VEC: for.end:
; VEC-NEXT: ret void
;
@@ -657,7 +671,7 @@ entry:
for.body:
%tmp0 = phi i64 [ %tmp6, %for.inc ], [ 0, %entry ]
%tmp1 = phi i64 [ %tmp7, %for.inc ], [ 0, %entry ]
%tmp1 = phi i64 [ %tmp7, %for.inc ], [ 1000, %entry ]
%tmp2 = getelementptr i8, ptr %ptr, i64 %tmp0
%tmp3 = load i8, ptr %tmp2, align 1
store i8 0, ptr %tmp2

View File

@@ -3031,128 +3031,67 @@ exit:
}
; This loop has a backedge taken count of i32_max. We need to check for this
; condition and branch directly to the scalar loop.
; condition and can skip vectorizing.
define i32 @max_i32_backedgetaken() nounwind readnone ssp uwtable {
define i32 @max_i32_backedgetaken() {
; CHECK-LABEL: @max_i32_backedgetaken(
; CHECK-NEXT: entry:
; CHECK-NEXT: br i1 true, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ <i32 1, i32 -1>, [[VECTOR_PH]] ], [ [[TMP0:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP0]] = and <2 x i32> [[VEC_PHI]], splat (i32 4)
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[INDEX_NEXT]], 0
; CHECK-NEXT: br i1 [[TMP1]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.and.v2i32(<2 x i32> [[TMP0]])
; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP2]], [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY:%.*]] ]
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[A_0:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[A_0_AND:%.*]], [[LOOP]] ]
; CHECK-NEXT: [[B_0:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[B_NEXT:%.*]], [[LOOP]] ]
; CHECK-NEXT: [[A_0:%.*]] = phi i32 [ 1, [[ENTRY:%.*]] ], [ [[A_0_AND:%.*]], [[LOOP]] ]
; CHECK-NEXT: [[B_0:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[B_NEXT:%.*]], [[LOOP]] ]
; CHECK-NEXT: [[A_0_AND]] = and i32 [[A_0]], 4
; CHECK-NEXT: [[B_NEXT]] = add i32 [[B_0]], -1
; CHECK-NEXT: [[EC:%.*]] = icmp eq i32 [[B_NEXT]], 0
; CHECK-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP33:![0-9]+]]
; CHECK-NEXT: br i1 [[EC]], label [[EXIT:%.*]], label [[LOOP]]
; CHECK: exit:
; CHECK-NEXT: [[A_0_AND_LCSSA:%.*]] = phi i32 [ [[A_0_AND]], [[LOOP]] ], [ [[TMP2]], [[MIDDLE_BLOCK]] ]
; CHECK-NEXT: [[A_0_AND_LCSSA:%.*]] = phi i32 [ [[A_0_AND]], [[LOOP]] ]
; CHECK-NEXT: ret i32 [[A_0_AND_LCSSA]]
;
; IND-LABEL: @max_i32_backedgetaken(
; IND-NEXT: entry:
; IND-NEXT: br i1 true, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; IND: vector.ph:
; IND-NEXT: br label [[VECTOR_BODY:%.*]]
; IND: vector.body:
; IND-NEXT: br i1 poison, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]]
; IND: middle.block:
; IND-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
; IND: scalar.ph:
; IND-NEXT: br label [[LOOP:%.*]]
; IND: loop:
; IND-NEXT: [[B_0:%.*]] = phi i32 [ 0, [[SCALAR_PH]] ], [ [[B_NEXT:%.*]], [[LOOP]] ]
; IND-NEXT: [[B_0:%.*]] = phi i32 [ 0, [[SCALAR_PH:%.*]] ], [ [[B_NEXT:%.*]], [[LOOP]] ]
; IND-NEXT: [[B_NEXT]] = add i32 [[B_0]], -1
; IND-NEXT: [[EC:%.*]] = icmp eq i32 [[B_NEXT]], 0
; IND-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP33:![0-9]+]]
; IND-NEXT: br i1 [[EC]], label [[EXIT:%.*]], label [[LOOP]]
; IND: exit:
; IND-NEXT: ret i32 0
;
; UNROLL-LABEL: @max_i32_backedgetaken(
; UNROLL-NEXT: entry:
; UNROLL-NEXT: br i1 true, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; UNROLL: vector.ph:
; UNROLL-NEXT: br label [[VECTOR_BODY:%.*]]
; UNROLL: vector.body:
; UNROLL-NEXT: br i1 poison, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]]
; UNROLL: middle.block:
; UNROLL-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
; UNROLL: scalar.ph:
; UNROLL-NEXT: br label [[LOOP:%.*]]
; UNROLL: loop:
; UNROLL-NEXT: [[B_0:%.*]] = phi i32 [ 0, [[SCALAR_PH]] ], [ [[B_NEXT:%.*]], [[LOOP]] ]
; UNROLL-NEXT: [[B_0:%.*]] = phi i32 [ 0, [[SCALAR_PH:%.*]] ], [ [[B_NEXT:%.*]], [[LOOP]] ]
; UNROLL-NEXT: [[B_NEXT]] = add i32 [[B_0]], -1
; UNROLL-NEXT: [[EC:%.*]] = icmp eq i32 [[B_NEXT]], 0
; UNROLL-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP33:![0-9]+]]
; UNROLL-NEXT: br i1 [[EC]], label [[EXIT:%.*]], label [[LOOP]]
; UNROLL: exit:
; UNROLL-NEXT: ret i32 0
;
; UNROLL-NO-IC-LABEL: @max_i32_backedgetaken(
; UNROLL-NO-IC-NEXT: entry:
; UNROLL-NO-IC-NEXT: br i1 true, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; UNROLL-NO-IC: vector.ph:
; UNROLL-NO-IC-NEXT: br label [[VECTOR_BODY:%.*]]
; UNROLL-NO-IC: vector.body:
; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NO-IC-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ <i32 1, i32 -1>, [[VECTOR_PH]] ], [ [[TMP0:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NO-IC-NEXT: [[VEC_PHI1:%.*]] = phi <2 x i32> [ splat (i32 -1), [[VECTOR_PH]] ], [ [[TMP1:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NO-IC-NEXT: [[TMP0]] = and <2 x i32> [[VEC_PHI]], splat (i32 4)
; UNROLL-NO-IC-NEXT: [[TMP1]] = and <2 x i32> [[VEC_PHI1]], splat (i32 4)
; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = icmp eq i32 [[INDEX_NEXT]], 0
; UNROLL-NO-IC-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]]
; UNROLL-NO-IC: middle.block:
; UNROLL-NO-IC-NEXT: [[BIN_RDX:%.*]] = and <2 x i32> [[TMP1]], [[TMP0]]
; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.and.v2i32(<2 x i32> [[BIN_RDX]])
; UNROLL-NO-IC-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
; UNROLL-NO-IC: scalar.ph:
; UNROLL-NO-IC-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP3]], [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY:%.*]] ]
; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
; UNROLL-NO-IC-NEXT: br label [[LOOP:%.*]]
; UNROLL-NO-IC: loop:
; UNROLL-NO-IC-NEXT: [[A_0:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[A_0_AND:%.*]], [[LOOP]] ]
; UNROLL-NO-IC-NEXT: [[B_0:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[B_NEXT:%.*]], [[LOOP]] ]
; UNROLL-NO-IC-NEXT: [[A_0:%.*]] = phi i32 [ 1, [[ENTRY:%.*]] ], [ [[A_0_AND:%.*]], [[LOOP]] ]
; UNROLL-NO-IC-NEXT: [[B_0:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[B_NEXT:%.*]], [[LOOP]] ]
; UNROLL-NO-IC-NEXT: [[A_0_AND]] = and i32 [[A_0]], 4
; UNROLL-NO-IC-NEXT: [[B_NEXT]] = add i32 [[B_0]], -1
; UNROLL-NO-IC-NEXT: [[EC:%.*]] = icmp eq i32 [[B_NEXT]], 0
; UNROLL-NO-IC-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP33:![0-9]+]]
; UNROLL-NO-IC-NEXT: br i1 [[EC]], label [[EXIT:%.*]], label [[LOOP]]
; UNROLL-NO-IC: exit:
; UNROLL-NO-IC-NEXT: [[A_0_AND_LCSSA:%.*]] = phi i32 [ [[A_0_AND]], [[LOOP]] ], [ [[TMP3]], [[MIDDLE_BLOCK]] ]
; UNROLL-NO-IC-NEXT: [[A_0_AND_LCSSA:%.*]] = phi i32 [ [[A_0_AND]], [[LOOP]] ]
; UNROLL-NO-IC-NEXT: ret i32 [[A_0_AND_LCSSA]]
;
; INTERLEAVE-LABEL: @max_i32_backedgetaken(
; INTERLEAVE-NEXT: entry:
; INTERLEAVE-NEXT: br i1 true, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; INTERLEAVE: vector.ph:
; INTERLEAVE-NEXT: br label [[VECTOR_BODY:%.*]]
; INTERLEAVE: vector.body:
; INTERLEAVE-NEXT: br i1 poison, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]]
; INTERLEAVE: middle.block:
; INTERLEAVE-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
; INTERLEAVE: scalar.ph:
; INTERLEAVE-NEXT: br label [[LOOP:%.*]]
; INTERLEAVE: loop:
; INTERLEAVE-NEXT: [[B_0:%.*]] = phi i32 [ 0, [[SCALAR_PH]] ], [ [[B_NEXT:%.*]], [[LOOP]] ]
; INTERLEAVE-NEXT: [[B_0:%.*]] = phi i32 [ 0, [[SCALAR_PH:%.*]] ], [ [[B_NEXT:%.*]], [[LOOP]] ]
; INTERLEAVE-NEXT: [[B_NEXT]] = add i32 [[B_0]], -1
; INTERLEAVE-NEXT: [[EC:%.*]] = icmp eq i32 [[B_NEXT]], 0
; INTERLEAVE-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP33:![0-9]+]]
; INTERLEAVE-NEXT: br i1 [[EC]], label [[EXIT:%.*]], label [[LOOP]]
; INTERLEAVE: exit:
; INTERLEAVE-NEXT: ret i32 0
;

View File

@@ -317,7 +317,7 @@ define void @scalarize_ptrtoint(ptr %src, ptr %dst) {
; CHECK-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP9]] to ptr
; CHECK-NEXT: store ptr [[TMP11]], ptr %dst, align 8
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 0
; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
; CHECK-NEXT: br i1 [[TMP12]], label %middle.block, label %vector.body
entry:
@@ -332,7 +332,7 @@ loop:
%cast.2 = inttoptr i64 %add to ptr
store ptr %cast.2, ptr %dst, align 8
%iv.next = add i64 %iv, 1
%ec = icmp eq i64 %iv.next, 0
%ec = icmp eq i64 %iv.next, 1024
br i1 %ec, label %exit, label %loop
exit:

View File

@@ -4,7 +4,7 @@
define void @d() {
; CHECK-LABEL: define void @d() {
; CHECK-NEXT: entry:
; CHECK-NEXT: br i1 true, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
@@ -19,12 +19,12 @@ define void @d() {
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr float, ptr [[TMP1]], i32 0
; CHECK-NEXT: store <2 x float> [[TMP3]], ptr [[TMP4]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 0
; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 128
; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 128, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[I7:%.*]], [[LOOP]] ]
@@ -34,7 +34,7 @@ define void @d() {
; CHECK-NEXT: [[I6:%.*]] = select i1 [[I5]], float 0.000000e+00, float 0.000000e+00
; CHECK-NEXT: store float [[I6]], ptr [[I4]], align 4
; CHECK-NEXT: [[I7]] = add i64 [[I]], 1
; CHECK-NEXT: [[I8:%.*]] = icmp eq i64 [[I7]], 0
; CHECK-NEXT: [[I8:%.*]] = icmp eq i64 [[I7]], 128
; CHECK-NEXT: br i1 [[I8]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
; CHECK: exit:
; CHECK-NEXT: ret void
@@ -50,7 +50,7 @@ loop:
%i6 = select i1 %i5, float 0.0, float 0.0
store float %i6, ptr %i4, align 4
%i7 = add i64 %i, 1
%i8 = icmp eq i64 %i7, 0
%i8 = icmp eq i64 %i7, 128
br i1 %i8, label %exit, label %loop
exit:

View File

@@ -821,3 +821,122 @@ loop:
exit:
ret void
}
define void @multiple_ivs_wide(ptr %dst) {
; CHECK-LABEL: @multiple_ivs_wide(
; CHECK-NEXT: iter.check:
; CHECK-NEXT: br i1 false, label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]
; CHECK: vector.main.loop.iter.check:
; CHECK-NEXT: br i1 false, label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 -64, i32 -62, i32 -60, i32 -58>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP0:%.*]] = mul i32 [[INDEX]], 2
; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[TMP0]], 0
; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[TMP0]], 2
; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[TMP0]], 4
; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[TMP0]], 6
; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i32> [[VEC_IND]], splat (i32 2)
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i32 [[TMP1]]
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP2]]
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP3]]
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP4]]
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i32> [[TMP5]], i32 0
; CHECK-NEXT: store i32 [[TMP10]], ptr [[TMP6]], align 4
; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x i32> [[TMP5]], i32 1
; CHECK-NEXT: store i32 [[TMP11]], ptr [[TMP7]], align 4
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i32> [[TMP5]], i32 2
; CHECK-NEXT: store i32 [[TMP12]], ptr [[TMP8]], align 4
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i32> [[TMP5]], i32 3
; CHECK-NEXT: store i32 [[TMP13]], ptr [[TMP9]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 8)
; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i32 [[INDEX_NEXT]], 64
; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
; CHECK: vec.epilog.iter.check:
; CHECK-NEXT: br i1 true, label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]
; CHECK: vec.epilog.ph:
; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i32 [ 64, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 64, [[VEC_EPILOG_ITER_CHECK]] ], [ -64, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[BC_RESUME_VAL]], i64 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i32> [[DOTSPLAT]], <i32 0, i32 2, i32 4, i32 6>
; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
; CHECK: vec.epilog.vector.body:
; CHECK-NEXT: [[INDEX1:%.*]] = phi i32 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT4:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_IND2:%.*]] = phi <4 x i32> [ [[INDUCTION]], [[VEC_EPILOG_PH]] ], [ [[VEC_IND_NEXT3:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i32 [[INDEX1]], 2
; CHECK-NEXT: [[TMP15:%.*]] = add i32 [[OFFSET_IDX]], 0
; CHECK-NEXT: [[TMP16:%.*]] = add i32 [[OFFSET_IDX]], 2
; CHECK-NEXT: [[TMP17:%.*]] = add i32 [[OFFSET_IDX]], 4
; CHECK-NEXT: [[TMP18:%.*]] = add i32 [[OFFSET_IDX]], 6
; CHECK-NEXT: [[TMP19:%.*]] = add <4 x i32> [[VEC_IND2]], splat (i32 2)
; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP15]]
; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP16]]
; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP17]]
; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP18]]
; CHECK-NEXT: [[TMP24:%.*]] = extractelement <4 x i32> [[TMP19]], i32 0
; CHECK-NEXT: store i32 [[TMP24]], ptr [[TMP20]], align 4
; CHECK-NEXT: [[TMP25:%.*]] = extractelement <4 x i32> [[TMP19]], i32 1
; CHECK-NEXT: store i32 [[TMP25]], ptr [[TMP21]], align 4
; CHECK-NEXT: [[TMP26:%.*]] = extractelement <4 x i32> [[TMP19]], i32 2
; CHECK-NEXT: store i32 [[TMP26]], ptr [[TMP22]], align 4
; CHECK-NEXT: [[TMP27:%.*]] = extractelement <4 x i32> [[TMP19]], i32 3
; CHECK-NEXT: store i32 [[TMP27]], ptr [[TMP23]], align 4
; CHECK-NEXT: [[INDEX_NEXT4]] = add nuw i32 [[INDEX1]], 4
; CHECK-NEXT: [[VEC_IND_NEXT3]] = add <4 x i32> [[VEC_IND2]], splat (i32 8)
; CHECK-NEXT: [[TMP28:%.*]] = icmp eq i32 [[INDEX_NEXT4]], 64
; CHECK-NEXT: br i1 [[TMP28]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
; CHECK: vec.epilog.middle.block:
; CHECK-NEXT: br i1 true, label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]]
; CHECK: vec.epilog.scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL5:%.*]] = phi i32 [ 128, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[ITER_CHECK:%.*]] ], [ 128, [[VEC_EPILOG_ITER_CHECK]] ]
; CHECK-NEXT: [[BC_RESUME_VAL6:%.*]] = phi i32 [ 64, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ -64, [[ITER_CHECK]] ], [ 64, [[VEC_EPILOG_ITER_CHECK]] ]
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL5]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
; CHECK-NEXT: [[IV_2:%.*]] = phi i32 [ [[BC_RESUME_VAL6]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_2_NEXT:%.*]], [[LOOP]] ]
; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 2
; CHECK-NEXT: [[IV_2_NEXT]] = add i32 [[IV_2]], 2
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[IV]]
; CHECK-NEXT: store i32 [[IV_2_NEXT]], ptr [[GEP]], align 4
; CHECK-NEXT: [[EC:%.*]] = icmp eq i32 [[IV_NEXT]], 128
; CHECK-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP19:![0-9]+]]
; CHECK: exit:
; CHECK-NEXT: ret void
;
; CHECK-PROFITABLE-BY-DEFAULT-LABEL: @multiple_ivs_wide(
; CHECK-PROFITABLE-BY-DEFAULT-NEXT: entry:
; CHECK-PROFITABLE-BY-DEFAULT-NEXT: br label [[LOOP:%.*]]
; CHECK-PROFITABLE-BY-DEFAULT: loop:
; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[IV_2:%.*]] = phi i32 [ -64, [[ENTRY]] ], [ [[IV_2_NEXT:%.*]], [[LOOP]] ]
; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[IV_NEXT]] = add i32 [[IV]], 2
; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[IV_2_NEXT]] = add i32 [[IV_2]], 2
; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i32 [[IV]]
; CHECK-PROFITABLE-BY-DEFAULT-NEXT: store i32 [[IV_2_NEXT]], ptr [[GEP]], align 4
; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[EC:%.*]] = icmp eq i32 [[IV_NEXT]], 128
; CHECK-PROFITABLE-BY-DEFAULT-NEXT: br i1 [[EC]], label [[EXIT:%.*]], label [[LOOP]]
; CHECK-PROFITABLE-BY-DEFAULT: exit:
; CHECK-PROFITABLE-BY-DEFAULT-NEXT: ret void
;
entry:
br label %loop
loop:
%iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
%iv.2 = phi i32 [ -64, %entry ], [ %iv.2.next, %loop ]
%iv.next = add i32 %iv, 2
%iv.2.next = add i32 %iv.2, 2
%gep = getelementptr inbounds i32, ptr %dst, i32 %iv
store i32 %iv.2.next, ptr %gep, align 4
%ec = icmp eq i32 %iv.next, 128
br i1 %ec, label %exit, label %loop
exit: ; preds = %loop
ret void
}

View File

@@ -1528,19 +1528,17 @@ define void @stride_check_known_via_loop_guard(ptr %C, ptr %A, i32 %Acols) {
; CHECK-NEXT: [[PRE_C:%.*]] = icmp ugt i32 [[ACOLS]], 0
; CHECK-NEXT: br i1 [[PRE_C]], label [[EXIT:%.*]], label [[OUTER_HEADER_PREHEADER:%.*]]
; CHECK: outer.header.preheader:
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[A]], i64 8
; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[C]], i64 34359738368
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[C]], i64 8000
; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[A]], i64 8
; CHECK-NEXT: br label [[OUTER_HEADER:%.*]]
; CHECK: outer.header:
; CHECK-NEXT: [[OUTER_IV:%.*]] = phi i32 [ [[OUTER_IV_NEXT:%.*]], [[OUTER_LATCH:%.*]] ], [ 0, [[OUTER_HEADER_PREHEADER]] ]
; CHECK-NEXT: [[MUL_US:%.*]] = mul i32 [[OUTER_IV]], [[ACOLS]]
; CHECK-NEXT: [[ARRAYIDX_US:%.*]] = getelementptr double, ptr [[A]], i32 [[MUL_US]]
; CHECK-NEXT: br i1 true, label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
; CHECK: vector.scevcheck:
; CHECK-NEXT: br i1 true, label [[SCALAR_PH]], label [[VECTOR_MEMCHECK:%.*]]
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
; CHECK: vector.memcheck:
; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[A]], [[SCEVGEP1]]
; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[C]], [[SCEVGEP]]
; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[C]], [[SCEVGEP1]]
; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[A]], [[SCEVGEP]]
; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
@@ -1549,18 +1547,18 @@ define void @stride_check_known_via_loop_guard(ptr %C, ptr %A, i32 %Acols) {
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds double, ptr [[C]], i32 [[TMP0]]
; CHECK-NEXT: [[TMP2:%.*]] = load double, ptr [[ARRAYIDX_US]], align 8, !alias.scope [[META69:![0-9]+]], !noalias [[META72:![0-9]+]]
; CHECK-NEXT: [[TMP2:%.*]] = load double, ptr [[ARRAYIDX_US]], align 8, !alias.scope [[META69:![0-9]+]]
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x double> poison, double [[TMP2]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x double> [[BROADCAST_SPLATINSERT]], <4 x double> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds double, ptr [[TMP1]], i32 0
; CHECK-NEXT: store <4 x double> [[BROADCAST_SPLAT]], ptr [[TMP3]], align 8, !alias.scope [[META72]]
; CHECK-NEXT: store <4 x double> [[BROADCAST_SPLAT]], ptr [[TMP3]], align 8, !alias.scope [[META72:![0-9]+]], !noalias [[META69]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], 0
; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000
; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP74:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br i1 true, label [[OUTER_LATCH]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[OUTER_HEADER]] ]
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[OUTER_HEADER]] ]
; CHECK-NEXT: br label [[INNER:%.*]]
; CHECK: inner:
; CHECK-NEXT: [[INNER_IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INNER_IV_NEXT:%.*]], [[INNER]] ]
@@ -1568,7 +1566,7 @@ define void @stride_check_known_via_loop_guard(ptr %C, ptr %A, i32 %Acols) {
; CHECK-NEXT: [[L:%.*]] = load double, ptr [[ARRAYIDX_US]], align 8
; CHECK-NEXT: store double [[L]], ptr [[GEP_C]], align 8
; CHECK-NEXT: [[INNER_IV_NEXT]] = add i32 [[INNER_IV]], 1
; CHECK-NEXT: [[INNER_C:%.*]] = icmp eq i32 [[INNER_IV_NEXT]], 0
; CHECK-NEXT: [[INNER_C:%.*]] = icmp eq i32 [[INNER_IV_NEXT]], 1000
; CHECK-NEXT: br i1 [[INNER_C]], label [[OUTER_LATCH]], label [[INNER]], !llvm.loop [[LOOP75:![0-9]+]]
; CHECK: outer.latch:
; CHECK-NEXT: [[OUTER_IV_NEXT]] = add i32 [[OUTER_IV]], 1
@@ -1595,7 +1593,7 @@ inner:
%l = load double, ptr %arrayidx.us, align 8
store double %l, ptr %gep.C, align 8
%inner.iv.next = add i32 %inner.iv, 1
%inner.c = icmp eq i32 %inner.iv.next, 0
%inner.c = icmp eq i32 %inner.iv.next, 1000
br i1 %inner.c, label %outer.latch, label %inner
outer.latch:

View File

@@ -6,12 +6,12 @@ define void @step_direction_unknown(i32 %arg, ptr %dst) {
; CHECK-SAME: (i32 [[ARG:%.*]], ptr [[DST:%.*]]) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[ADD:%.*]] = add i32 [[ARG]], 1
; CHECK-NEXT: br i1 true, label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
; CHECK: vector.scevcheck:
; CHECK-NEXT: [[TMP0:%.*]] = sub i32 -1, [[ARG]]
; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i32 [[ADD]], 0
; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[TMP0]], i32 [[ADD]]
; CHECK-NEXT: [[MUL1:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 [[TMP2]], i32 -1)
; CHECK-NEXT: [[MUL1:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 [[TMP2]], i32 1023)
; CHECK-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i32, i1 } [[MUL1]], 0
; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i32, i1 } [[MUL1]], 1
; CHECK-NEXT: [[TMP3:%.*]] = sub i32 0, [[MUL_RESULT]]
@@ -19,8 +19,7 @@ define void @step_direction_unknown(i32 %arg, ptr %dst) {
; CHECK-NEXT: [[TMP5:%.*]] = select i1 [[TMP1]], i1 [[TMP4]], i1 false
; CHECK-NEXT: [[TMP6:%.*]] = or i1 [[TMP5]], [[MUL_OVERFLOW]]
; CHECK-NEXT: [[TMP7:%.*]] = icmp ne i32 [[ADD]], 0
; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]]
; CHECK-NEXT: br i1 [[TMP8]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
; CHECK-NEXT: br i1 [[TMP6]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[ADD]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
@@ -28,28 +27,28 @@ define void @step_direction_unknown(i32 %arg, ptr %dst) {
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP9:%.*]] = mul <4 x i32> [[BROADCAST_SPLAT]], [[VEC_IND]]
; CHECK-NEXT: [[TMP10:%.*]] = zext <4 x i32> [[TMP9]] to <4 x i64>
; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x i64> [[TMP10]], i32 0
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr double, ptr [[DST]], i64 [[TMP11]]
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i64> [[TMP10]], i32 1
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr double, ptr [[DST]], i64 [[TMP13]]
; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x i64> [[TMP10]], i32 2
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr double, ptr [[DST]], i64 [[TMP15]]
; CHECK-NEXT: [[TMP17:%.*]] = extractelement <4 x i64> [[TMP10]], i32 3
; CHECK-NEXT: [[TMP18:%.*]] = getelementptr double, ptr [[DST]], i64 [[TMP17]]
; CHECK-NEXT: store double 0.000000e+00, ptr [[TMP12]], align 8
; CHECK-NEXT: store double 0.000000e+00, ptr [[TMP14]], align 8
; CHECK-NEXT: store double 0.000000e+00, ptr [[TMP16]], align 8
; CHECK-NEXT: store double 0.000000e+00, ptr [[TMP18]], align 8
; CHECK-NEXT: [[TMP8:%.*]] = mul <4 x i32> [[BROADCAST_SPLAT]], [[VEC_IND]]
; CHECK-NEXT: [[TMP9:%.*]] = zext <4 x i32> [[TMP8]] to <4 x i64>
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i64> [[TMP9]], i32 0
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr double, ptr [[DST]], i64 [[TMP10]]
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i64> [[TMP9]], i32 1
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr double, ptr [[DST]], i64 [[TMP12]]
; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i64> [[TMP9]], i32 2
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr double, ptr [[DST]], i64 [[TMP14]]
; CHECK-NEXT: [[TMP16:%.*]] = extractelement <4 x i64> [[TMP9]], i32 3
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr double, ptr [[DST]], i64 [[TMP16]]
; CHECK-NEXT: store double 0.000000e+00, ptr [[TMP11]], align 8
; CHECK-NEXT: store double 0.000000e+00, ptr [[TMP13]], align 8
; CHECK-NEXT: store double 0.000000e+00, ptr [[TMP15]], align 8
; CHECK-NEXT: store double 0.000000e+00, ptr [[TMP17]], align 8
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
; CHECK-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], 0
; CHECK-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY:%.*]] ]
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY:%.*]] ]
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[PHI:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[ADD2:%.*]], [[LOOP]] ]
@@ -59,7 +58,7 @@ define void @step_direction_unknown(i32 %arg, ptr %dst) {
; CHECK-NEXT: [[GETELEMENTPTR:%.*]] = getelementptr double, ptr [[DST]], i64 [[ZEXT]]
; CHECK-NEXT: store double 0.000000e+00, ptr [[GETELEMENTPTR]], align 8
; CHECK-NEXT: [[ADD2]] = add i64 [[PHI]], 1
; CHECK-NEXT: [[ICMP:%.*]] = icmp eq i64 [[ADD2]], 0
; CHECK-NEXT: [[ICMP:%.*]] = icmp eq i64 [[ADD2]], 1024
; CHECK-NEXT: br i1 [[ICMP]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
; CHECK: exit:
; CHECK-NEXT: ret void
@@ -76,7 +75,7 @@ loop:
%getelementptr = getelementptr double, ptr %dst, i64 %zext
store double 0.000000e+00, ptr %getelementptr, align 8
%add2 = add i64 %phi, 1
%icmp = icmp eq i64 %add2, 0
%icmp = icmp eq i64 %add2, 1024
br i1 %icmp, label %exit, label %loop
exit:
@@ -89,36 +88,34 @@ define void @integer_induction_wraps_scev_predicate_known(i32 %x, ptr %call, ptr
; CHECK-NEXT: entry:
; CHECK-NEXT: [[MUL:%.*]] = shl i32 [[X]], 1
; CHECK-NEXT: [[TMP0:%.*]] = sext i32 [[MUL]] to i64
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
; CHECK: vector.scevcheck:
; CHECK-NEXT: br i1 true, label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[TMP1:%.*]] = mul i64 4294967264, [[TMP0]]
; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP1]]
; CHECK-NEXT: [[TMP1:%.*]] = mul i64 992, [[TMP0]]
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP1]]
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[START]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP2:%.*]] = mul i64 [[TMP0]], 4
; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP0]], 4
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[TMP0]], i64 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i64> [[DOTSPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP3:%.*]] = mul <4 x i64> <i64 0, i64 1, i64 2, i64 3>, [[DOTSPLAT]]
; CHECK-NEXT: [[VECTOR_GEP:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i64> [[TMP3]]
; CHECK-NEXT: [[TMP4:%.*]] = mul <4 x i64> <i64 0, i64 1, i64 2, i64 3>, [[DOTSPLAT]]
; CHECK-NEXT: [[VECTOR_GEP:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i64> [[TMP4]]
; CHECK-NEXT: [[DOTCAST:%.*]] = trunc i64 [[INDEX]] to i32
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i32 30, [[DOTCAST]]
; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[OFFSET_IDX]], 0
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr ptr, ptr [[CALL]], i32 [[TMP4]]
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr ptr, ptr [[TMP5]], i32 0
; CHECK-NEXT: store <4 x ptr> [[VECTOR_GEP]], ptr [[TMP6]], align 4
; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[OFFSET_IDX]], 0
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr ptr, ptr [[CALL]], i32 [[TMP5]]
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr ptr, ptr [[TMP6]], i32 0
; CHECK-NEXT: store <4 x ptr> [[VECTOR_GEP]], ptr [[TMP7]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 [[TMP2]]
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 4294967264
; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 [[TMP3]]
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 992
; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br i1 false, label [[FOR_END:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ -2, [[MIDDLE_BLOCK]] ], [ 30, [[VECTOR_SCEVCHECK]] ], [ 30, [[ENTRY:%.*]] ]
; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[START]], [[VECTOR_SCEVCHECK]] ], [ [[START]], [[ENTRY]] ]
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 1022, [[MIDDLE_BLOCK]] ], [ 30, [[ENTRY:%.*]] ]
; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi ptr [ [[TMP2]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ]
; CHECK-NEXT: br label [[FOR_COND:%.*]]
; CHECK: for.cond:
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_COND]] ]
@@ -127,7 +124,7 @@ define void @integer_induction_wraps_scev_predicate_known(i32 %x, ptr %call, ptr
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr ptr, ptr [[CALL]], i32 [[IV]]
; CHECK-NEXT: store ptr [[P_0]], ptr [[ARRAYIDX]], align 4
; CHECK-NEXT: [[INC]] = add i32 [[IV]], 1
; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[IV]], 0
; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[IV]], 1024
; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[FOR_END]], label [[FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]]
; CHECK: for.end:
; CHECK-NEXT: ret void
@@ -143,7 +140,7 @@ for.cond: ; preds = %for.body, %entry
%arrayidx = getelementptr ptr, ptr %call, i32 %iv
store ptr %p.0, ptr %arrayidx, align 4
%inc = add i32 %iv, 1
%tobool.not = icmp eq i32 %iv, 0
%tobool.not = icmp eq i32 %iv, 1024
br i1 %tobool.not, label %for.end, label %for.cond
for.end: ; preds = %for.cond

View File

@@ -10,13 +10,13 @@ define void @sink_with_sideeffects(i1 %c, ptr %ptr) {
; CHECK: VPlan 'Initial VPlan for VF={1},UF>=1' {
; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF
; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count
; CHECK-NEXT: ir<0> = original trip-count
; CHECK-NEXT: ir<1024> = original trip-count
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<entry>:
; CHECK-NEXT: Successor(s): vector.ph
; CHECK-EMPTY:
; CHECK-NEXT: vector.ph:
; CHECK-NEXT: vp<[[END:%.+]]> = DERIVED-IV ir<0> + vp<[[VEC_TC]]> * ir<-1>
; CHECK-NEXT: vp<[[END:%.+]]> = DERIVED-IV ir<1024> + vp<[[VEC_TC]]> * ir<-1>
; CHECK-NEXT: Successor(s): vector loop
; CHECK-EMPTY:
; CHECK-NEXT: <x1> vector loop: {
@@ -49,18 +49,18 @@ define void @sink_with_sideeffects(i1 %c, ptr %ptr) {
; CHECK-NEXT: Successor(s): middle.block
; CHECK-EMPTY:
; CHECK-NEXT: middle.block:
; CHECK-NEXT: EMIT vp<[[CMP:%.+]]> = icmp eq ir<0>, vp<[[VEC_TC]]>
; CHECK-NEXT: EMIT vp<[[CMP:%.+]]> = icmp eq ir<1024>, vp<[[VEC_TC]]>
; CHECK-NEXT: EMIT branch-on-cond vp<[[CMP]]>
; CHECK-NEXT: Successor(s): ir-bb<for.end>, scalar.ph
; CHECK-EMPTY:
; CHECK-NEXT: scalar.ph:
; CHECK-NEXT: EMIT vp<[[RESUME1:%.+]]> = resume-phi vp<[[VEC_TC]]>, ir<0>
; CHECK-NEXT: EMIT vp<[[RESUME2:%.+]]>.1 = resume-phi vp<[[END]]>, ir<0>
; CHECK-NEXT: EMIT vp<[[RESUME2:%.+]]>.1 = resume-phi vp<[[END]]>, ir<1024>
; CHECK-NEXT: Successor(s): ir-bb<for.body>
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<for.body>:
; CHECK-NEXT: IR %tmp0 = phi i64 [ %tmp6, %for.inc ], [ 0, %entry ] (extra operand: vp<[[RESUME1]]> from scalar.ph)
; CHECK-NEXT: IR %tmp1 = phi i64 [ %tmp7, %for.inc ], [ 0, %entry ] (extra operand: vp<[[RESUME2]]>.1 from scalar.ph)
; CHECK-NEXT: IR %tmp1 = phi i64 [ %tmp7, %for.inc ], [ 1024, %entry ] (extra operand: vp<[[RESUME2]]>.1 from scalar.ph)
; CHECK: IR %tmp5 = trunc i32 %tmp4 to i8
; CHECK-NEXT: No successors
; CHECK-EMPTY:
@@ -73,7 +73,7 @@ entry:
for.body:
%tmp0 = phi i64 [ %tmp6, %for.inc ], [ 0, %entry ]
%tmp1 = phi i64 [ %tmp7, %for.inc ], [ 0, %entry ]
%tmp1 = phi i64 [ %tmp7, %for.inc ], [ 1024, %entry ]
%tmp2 = getelementptr i8, ptr %ptr, i64 %tmp0
%tmp3 = load i8, ptr %tmp2, align 1
store i8 0, ptr %tmp2