[LoopVectorize] Enable vectorisation of early exit loops with live-outs (#120567)
This work feeds part of PR https://github.com/llvm/llvm-project/pull/88385, and adds support for vectorising loops with uncountable early exits and outside users of loop-defined variables. When calculating the final value from an uncountable early exit we need to calculate the vector lane that triggered the exit, and hence determine the value at the point we exited. All code for calculating the last value when exiting the loop early now lives in a new vector.early.exit block, which sits between the middle.split block and the original exit block. Doing this required two fixes: 1. The vplan verifier incorrectly assumed that the block containing a definition always dominates the block of the user. That's not true if you can arrive at the use block from multiple incoming blocks. This is possible for early exit loops where both the early exit and the latch jump to the same block. 2. We were adding the new vector.early.exit to the wrong parent loop. It needs to have the same parent as the actual early exit block from the original loop. I've added a new ExtractFirstActive VPInstruction that extracts the first active lane of a vector, i.e. the lane of the vector predicate that triggered the exit. NOTE: The IR generated for dealing with live-outs from early exit loops is unoptimised, as opposed to normal loops. This inevitably leads to poor quality code, but this can be fixed up later.
This commit is contained in:
@@ -405,9 +405,11 @@ Early Exit Vectorization
|
|||||||
When vectorizing a loop with a single early exit, the loop blocks following the
|
When vectorizing a loop with a single early exit, the loop blocks following the
|
||||||
early exit are predicated and the vector loop will always exit via the latch.
|
early exit are predicated and the vector loop will always exit via the latch.
|
||||||
If the early exit has been taken, the vector loop's successor block
|
If the early exit has been taken, the vector loop's successor block
|
||||||
(``middle.split`` below) branches to the early exit block. Otherwise
|
(``middle.split`` below) branches to the early exit block via an intermediate
|
||||||
``middle.block`` selects between the exit block from the latch or the scalar
|
block (``vector.early.exit`` below). This intermediate block is responsible for
|
||||||
remainder loop.
|
calculating any exit values of loop-defined variables that are used in the
|
||||||
|
early exit block. Otherwise, ``middle.block`` selects between the exit block
|
||||||
|
from the latch or the scalar remainder loop.
|
||||||
|
|
||||||
.. image:: vplan-early-exit.png
|
.. image:: vplan-early-exit.png
|
||||||
|
|
||||||
|
|||||||
@@ -19,23 +19,27 @@ compound=true
|
|||||||
"middle.split"
|
"middle.split"
|
||||||
]
|
]
|
||||||
N4 -> N5 [ label=""]
|
N4 -> N5 [ label=""]
|
||||||
N4 -> N6 [ label=""]
|
N4 -> N7 [ label=""]
|
||||||
N5 [label =
|
N5 [label =
|
||||||
|
"vector.early.exit"
|
||||||
|
]
|
||||||
|
N5 -> N6 [ label=""]
|
||||||
|
N6 [label =
|
||||||
"early.exit"
|
"early.exit"
|
||||||
]
|
]
|
||||||
N6 [label =
|
N7 [label =
|
||||||
"middle.block"
|
"middle.block"
|
||||||
]
|
]
|
||||||
N6 -> N9 [ label=""]
|
N7 -> N10 [ label=""]
|
||||||
N6 -> N7 [ label=""]
|
|
||||||
N7 [label =
|
|
||||||
"scalar.ph"
|
|
||||||
]
|
|
||||||
N7 -> N8 [ label=""]
|
N7 -> N8 [ label=""]
|
||||||
N8 [label =
|
N8 [label =
|
||||||
|
"scalar.ph"
|
||||||
|
]
|
||||||
|
N8 -> N9 [ label=""]
|
||||||
|
N9 [label =
|
||||||
"loop.header"
|
"loop.header"
|
||||||
]
|
]
|
||||||
N9 [label =
|
N10 [label =
|
||||||
"latch.exit"
|
"latch.exit"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|||||||
Binary file not shown.
|
Before Width: | Height: | Size: 129 KiB After Width: | Height: | Size: 74 KiB |
@@ -9407,14 +9407,8 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
|
|||||||
|
|
||||||
if (auto *UncountableExitingBlock =
|
if (auto *UncountableExitingBlock =
|
||||||
Legal->getUncountableEarlyExitingBlock()) {
|
Legal->getUncountableEarlyExitingBlock()) {
|
||||||
if (!VPlanTransforms::handleUncountableEarlyExit(
|
VPlanTransforms::handleUncountableEarlyExit(
|
||||||
*Plan, *PSE.getSE(), OrigLoop, UncountableExitingBlock,
|
*Plan, *PSE.getSE(), OrigLoop, UncountableExitingBlock, RecipeBuilder);
|
||||||
RecipeBuilder)) {
|
|
||||||
reportVectorizationFailure(
|
|
||||||
"Some exit values in loop with uncountable exit not supported yet",
|
|
||||||
"UncountableEarlyExitLoopsUnsupportedExitValue", ORE, OrigLoop);
|
|
||||||
return nullptr;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
DenseMap<VPValue *, VPValue *> IVEndValues;
|
DenseMap<VPValue *, VPValue *> IVEndValues;
|
||||||
addScalarResumePhis(RecipeBuilder, *Plan, IVEndValues);
|
addScalarResumePhis(RecipeBuilder, *Plan, IVEndValues);
|
||||||
|
|||||||
@@ -501,8 +501,15 @@ void VPBasicBlock::execute(VPTransformState *State) {
|
|||||||
UnreachableInst *Terminator = State->Builder.CreateUnreachable();
|
UnreachableInst *Terminator = State->Builder.CreateUnreachable();
|
||||||
// Register NewBB in its loop. In innermost loops its the same for all
|
// Register NewBB in its loop. In innermost loops its the same for all
|
||||||
// BB's.
|
// BB's.
|
||||||
if (State->CurrentParentLoop)
|
Loop *ParentLoop = State->CurrentParentLoop;
|
||||||
State->CurrentParentLoop->addBasicBlockToLoop(NewBB, *State->LI);
|
// If this block has a sole successor that is an exit block then it needs
|
||||||
|
// adding to the same parent loop as the exit block.
|
||||||
|
VPBlockBase *SuccVPBB = getSingleSuccessor();
|
||||||
|
if (SuccVPBB && State->Plan->isExitBlock(SuccVPBB))
|
||||||
|
ParentLoop = State->LI->getLoopFor(
|
||||||
|
cast<VPIRBasicBlock>(SuccVPBB)->getIRBasicBlock());
|
||||||
|
if (ParentLoop)
|
||||||
|
ParentLoop->addBasicBlockToLoop(NewBB, *State->LI);
|
||||||
State->Builder.SetInsertPoint(Terminator);
|
State->Builder.SetInsertPoint(Terminator);
|
||||||
|
|
||||||
State->CFG.PrevBB = NewBB;
|
State->CFG.PrevBB = NewBB;
|
||||||
@@ -950,6 +957,10 @@ void VPlan::prepareToExecute(Value *TripCountV, Value *VectorTripCountV,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool VPlan::isExitBlock(VPBlockBase *VPBB) {
|
||||||
|
return isa<VPIRBasicBlock>(VPBB) && VPBB->getNumSuccessors() == 0;
|
||||||
|
}
|
||||||
|
|
||||||
/// Generate the code inside the preheader and body of the vectorized loop.
|
/// Generate the code inside the preheader and body of the vectorized loop.
|
||||||
/// Assumes a single pre-header basic-block was created for this. Introduce
|
/// Assumes a single pre-header basic-block was created for this. Introduce
|
||||||
/// additional basic-blocks as needed, and fill them all.
|
/// additional basic-blocks as needed, and fill them all.
|
||||||
|
|||||||
@@ -1223,6 +1223,9 @@ public:
|
|||||||
// Returns a scalar boolean value, which is true if any lane of its (only
|
// Returns a scalar boolean value, which is true if any lane of its (only
|
||||||
// boolean) vector operand is true.
|
// boolean) vector operand is true.
|
||||||
AnyOf,
|
AnyOf,
|
||||||
|
// Extracts the first active lane of a vector, where the first operand is
|
||||||
|
// the predicate, and the second operand is the vector to extract.
|
||||||
|
ExtractFirstActive,
|
||||||
};
|
};
|
||||||
|
|
||||||
private:
|
private:
|
||||||
@@ -3967,6 +3970,9 @@ public:
|
|||||||
/// of VPBlockShallowTraversalWrapper.
|
/// of VPBlockShallowTraversalWrapper.
|
||||||
auto getExitBlocks();
|
auto getExitBlocks();
|
||||||
|
|
||||||
|
/// Returns true if \p VPBB is an exit block.
|
||||||
|
bool isExitBlock(VPBlockBase *VPBB);
|
||||||
|
|
||||||
/// The trip count of the original loop.
|
/// The trip count of the original loop.
|
||||||
VPValue *getTripCount() const {
|
VPValue *getTripCount() const {
|
||||||
assert(TripCount && "trip count needs to be set before accessing it");
|
assert(TripCount && "trip count needs to be set before accessing it");
|
||||||
|
|||||||
@@ -78,6 +78,7 @@ Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPInstruction *R) {
|
|||||||
case VPInstruction::CanonicalIVIncrementForPart:
|
case VPInstruction::CanonicalIVIncrementForPart:
|
||||||
case VPInstruction::AnyOf:
|
case VPInstruction::AnyOf:
|
||||||
return SetResultTyFromOp();
|
return SetResultTyFromOp();
|
||||||
|
case VPInstruction::ExtractFirstActive:
|
||||||
case VPInstruction::ExtractFromEnd: {
|
case VPInstruction::ExtractFromEnd: {
|
||||||
Type *BaseTy = inferScalarType(R->getOperand(0));
|
Type *BaseTy = inferScalarType(R->getOperand(0));
|
||||||
if (auto *VecTy = dyn_cast<VectorType>(BaseTy))
|
if (auto *VecTy = dyn_cast<VectorType>(BaseTy))
|
||||||
|
|||||||
@@ -697,7 +697,13 @@ Value *VPInstruction::generate(VPTransformState &State) {
|
|||||||
Value *A = State.get(getOperand(0));
|
Value *A = State.get(getOperand(0));
|
||||||
return Builder.CreateOrReduce(A);
|
return Builder.CreateOrReduce(A);
|
||||||
}
|
}
|
||||||
|
case VPInstruction::ExtractFirstActive: {
|
||||||
|
Value *Vec = State.get(getOperand(0));
|
||||||
|
Value *Mask = State.get(getOperand(1));
|
||||||
|
Value *Ctz = Builder.CreateCountTrailingZeroElems(
|
||||||
|
Builder.getInt64Ty(), Mask, true, "first.active.lane");
|
||||||
|
return Builder.CreateExtractElement(Vec, Ctz, "early.exit.value");
|
||||||
|
}
|
||||||
default:
|
default:
|
||||||
llvm_unreachable("Unsupported opcode for instruction");
|
llvm_unreachable("Unsupported opcode for instruction");
|
||||||
}
|
}
|
||||||
@@ -705,6 +711,7 @@ Value *VPInstruction::generate(VPTransformState &State) {
|
|||||||
|
|
||||||
bool VPInstruction::isVectorToScalar() const {
|
bool VPInstruction::isVectorToScalar() const {
|
||||||
return getOpcode() == VPInstruction::ExtractFromEnd ||
|
return getOpcode() == VPInstruction::ExtractFromEnd ||
|
||||||
|
getOpcode() == VPInstruction::ExtractFirstActive ||
|
||||||
getOpcode() == VPInstruction::ComputeReductionResult ||
|
getOpcode() == VPInstruction::ComputeReductionResult ||
|
||||||
getOpcode() == VPInstruction::AnyOf;
|
getOpcode() == VPInstruction::AnyOf;
|
||||||
}
|
}
|
||||||
@@ -769,6 +776,7 @@ bool VPInstruction::opcodeMayReadOrWriteFromMemory() const {
|
|||||||
case VPInstruction::CalculateTripCountMinusVF:
|
case VPInstruction::CalculateTripCountMinusVF:
|
||||||
case VPInstruction::CanonicalIVIncrementForPart:
|
case VPInstruction::CanonicalIVIncrementForPart:
|
||||||
case VPInstruction::ExtractFromEnd:
|
case VPInstruction::ExtractFromEnd:
|
||||||
|
case VPInstruction::ExtractFirstActive:
|
||||||
case VPInstruction::FirstOrderRecurrenceSplice:
|
case VPInstruction::FirstOrderRecurrenceSplice:
|
||||||
case VPInstruction::LogicalAnd:
|
case VPInstruction::LogicalAnd:
|
||||||
case VPInstruction::Not:
|
case VPInstruction::Not:
|
||||||
@@ -888,6 +896,9 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent,
|
|||||||
case VPInstruction::AnyOf:
|
case VPInstruction::AnyOf:
|
||||||
O << "any-of";
|
O << "any-of";
|
||||||
break;
|
break;
|
||||||
|
case VPInstruction::ExtractFirstActive:
|
||||||
|
O << "extract-first-active";
|
||||||
|
break;
|
||||||
default:
|
default:
|
||||||
O << Instruction::getOpcodeName(getOpcode());
|
O << Instruction::getOpcodeName(getOpcode());
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -2064,7 +2064,7 @@ void VPlanTransforms::convertToConcreteRecipes(VPlan &Plan) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bool VPlanTransforms::handleUncountableEarlyExit(
|
void VPlanTransforms::handleUncountableEarlyExit(
|
||||||
VPlan &Plan, ScalarEvolution &SE, Loop *OrigLoop,
|
VPlan &Plan, ScalarEvolution &SE, Loop *OrigLoop,
|
||||||
BasicBlock *UncountableExitingBlock, VPRecipeBuilder &RecipeBuilder) {
|
BasicBlock *UncountableExitingBlock, VPRecipeBuilder &RecipeBuilder) {
|
||||||
VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
|
VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
|
||||||
@@ -2101,12 +2101,17 @@ bool VPlanTransforms::handleUncountableEarlyExit(
|
|||||||
Builder.createNaryOp(VPInstruction::AnyOf, {EarlyExitTakenCond});
|
Builder.createNaryOp(VPInstruction::AnyOf, {EarlyExitTakenCond});
|
||||||
|
|
||||||
VPBasicBlock *NewMiddle = Plan.createVPBasicBlock("middle.split");
|
VPBasicBlock *NewMiddle = Plan.createVPBasicBlock("middle.split");
|
||||||
|
VPBasicBlock *VectorEarlyExitVPBB =
|
||||||
|
Plan.createVPBasicBlock("vector.early.exit");
|
||||||
VPBlockUtils::insertOnEdge(LoopRegion, MiddleVPBB, NewMiddle);
|
VPBlockUtils::insertOnEdge(LoopRegion, MiddleVPBB, NewMiddle);
|
||||||
VPBlockUtils::connectBlocks(NewMiddle, VPEarlyExitBlock);
|
VPBlockUtils::connectBlocks(NewMiddle, VectorEarlyExitVPBB);
|
||||||
NewMiddle->swapSuccessors();
|
NewMiddle->swapSuccessors();
|
||||||
|
|
||||||
|
VPBlockUtils::connectBlocks(VectorEarlyExitVPBB, VPEarlyExitBlock);
|
||||||
|
|
||||||
// Update the exit phis in the early exit block.
|
// Update the exit phis in the early exit block.
|
||||||
VPBuilder MiddleBuilder(NewMiddle);
|
VPBuilder MiddleBuilder(NewMiddle);
|
||||||
|
VPBuilder EarlyExitB(VectorEarlyExitVPBB);
|
||||||
for (VPRecipeBase &R : *VPEarlyExitBlock) {
|
for (VPRecipeBase &R : *VPEarlyExitBlock) {
|
||||||
auto *ExitIRI = cast<VPIRInstruction>(&R);
|
auto *ExitIRI = cast<VPIRInstruction>(&R);
|
||||||
auto *ExitPhi = dyn_cast<PHINode>(&ExitIRI->getInstruction());
|
auto *ExitPhi = dyn_cast<PHINode>(&ExitIRI->getInstruction());
|
||||||
@@ -2115,9 +2120,6 @@ bool VPlanTransforms::handleUncountableEarlyExit(
|
|||||||
|
|
||||||
VPValue *IncomingFromEarlyExit = RecipeBuilder.getVPValueOrAddLiveIn(
|
VPValue *IncomingFromEarlyExit = RecipeBuilder.getVPValueOrAddLiveIn(
|
||||||
ExitPhi->getIncomingValueForBlock(UncountableExitingBlock));
|
ExitPhi->getIncomingValueForBlock(UncountableExitingBlock));
|
||||||
// The incoming value from the early exit must be a live-in for now.
|
|
||||||
if (!IncomingFromEarlyExit->isLiveIn())
|
|
||||||
return false;
|
|
||||||
|
|
||||||
if (OrigLoop->getUniqueExitBlock()) {
|
if (OrigLoop->getUniqueExitBlock()) {
|
||||||
// If there's a unique exit block, VPEarlyExitBlock has 2 predecessors
|
// If there's a unique exit block, VPEarlyExitBlock has 2 predecessors
|
||||||
@@ -2129,6 +2131,10 @@ bool VPlanTransforms::handleUncountableEarlyExit(
|
|||||||
ExitIRI->extractLastLaneOfOperand(MiddleBuilder);
|
ExitIRI->extractLastLaneOfOperand(MiddleBuilder);
|
||||||
}
|
}
|
||||||
// Add the incoming value from the early exit.
|
// Add the incoming value from the early exit.
|
||||||
|
if (!IncomingFromEarlyExit->isLiveIn())
|
||||||
|
IncomingFromEarlyExit =
|
||||||
|
EarlyExitB.createNaryOp(VPInstruction::ExtractFirstActive,
|
||||||
|
{IncomingFromEarlyExit, EarlyExitTakenCond});
|
||||||
ExitIRI->addOperand(IncomingFromEarlyExit);
|
ExitIRI->addOperand(IncomingFromEarlyExit);
|
||||||
}
|
}
|
||||||
MiddleBuilder.createNaryOp(VPInstruction::BranchOnCond, {IsEarlyExitTaken});
|
MiddleBuilder.createNaryOp(VPInstruction::BranchOnCond, {IsEarlyExitTaken});
|
||||||
@@ -2146,5 +2152,4 @@ bool VPlanTransforms::handleUncountableEarlyExit(
|
|||||||
Instruction::Or, {IsEarlyExitTaken, IsLatchExitTaken});
|
Instruction::Or, {IsEarlyExitTaken, IsLatchExitTaken});
|
||||||
Builder.createNaryOp(VPInstruction::BranchOnCond, AnyExitTaken);
|
Builder.createNaryOp(VPInstruction::BranchOnCond, AnyExitTaken);
|
||||||
LatchExitingBranch->eraseFromParent();
|
LatchExitingBranch->eraseFromParent();
|
||||||
return true;
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -155,7 +155,7 @@ struct VPlanTransforms {
|
|||||||
/// exit conditions
|
/// exit conditions
|
||||||
/// * splitting the original middle block to branch to the early exit block
|
/// * splitting the original middle block to branch to the early exit block
|
||||||
/// if taken.
|
/// if taken.
|
||||||
static bool handleUncountableEarlyExit(VPlan &Plan, ScalarEvolution &SE,
|
static void handleUncountableEarlyExit(VPlan &Plan, ScalarEvolution &SE,
|
||||||
Loop *OrigLoop,
|
Loop *OrigLoop,
|
||||||
BasicBlock *UncountableExitingBlock,
|
BasicBlock *UncountableExitingBlock,
|
||||||
VPRecipeBuilder &RecipeBuilder);
|
VPRecipeBuilder &RecipeBuilder);
|
||||||
|
|||||||
@@ -209,7 +209,9 @@ bool VPlanVerifier::verifyVPBasicBlock(const VPBasicBlock *VPBB) {
|
|||||||
auto *UI = cast<VPRecipeBase>(U);
|
auto *UI = cast<VPRecipeBase>(U);
|
||||||
// TODO: check dominance of incoming values for phis properly.
|
// TODO: check dominance of incoming values for phis properly.
|
||||||
if (!UI ||
|
if (!UI ||
|
||||||
isa<VPHeaderPHIRecipe, VPWidenPHIRecipe, VPPredInstPHIRecipe>(UI))
|
isa<VPHeaderPHIRecipe, VPWidenPHIRecipe, VPPredInstPHIRecipe>(UI) ||
|
||||||
|
(isa<VPIRInstruction>(UI) &&
|
||||||
|
isa<PHINode>(cast<VPIRInstruction>(UI)->getInstruction())))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
// If the user is in the same block, check it comes after R in the
|
// If the user is in the same block, check it comes after R in the
|
||||||
|
|||||||
@@ -13,21 +13,70 @@ define i64 @same_exit_block_pre_inc_use1() #1 {
|
|||||||
; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 4
|
; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 4
|
||||||
; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024)
|
; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024)
|
||||||
; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024)
|
; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024)
|
||||||
|
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
|
||||||
|
; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 16
|
||||||
|
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 64, [[TMP1]]
|
||||||
|
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
|
||||||
|
; CHECK: vector.ph:
|
||||||
|
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
|
||||||
|
; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 16
|
||||||
|
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 64, [[TMP3]]
|
||||||
|
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 64, [[N_MOD_VF]]
|
||||||
|
; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
|
||||||
|
; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 16
|
||||||
|
; CHECK-NEXT: [[TMP6:%.*]] = add i64 3, [[N_VEC]]
|
||||||
|
; CHECK-NEXT: [[TMP7:%.*]] = call <vscale x 16 x i64> @llvm.stepvector.nxv16i64()
|
||||||
|
; CHECK-NEXT: [[TMP8:%.*]] = mul <vscale x 16 x i64> [[TMP7]], splat (i64 1)
|
||||||
|
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 16 x i64> splat (i64 3), [[TMP8]]
|
||||||
|
; CHECK-NEXT: [[TMP9:%.*]] = mul i64 1, [[TMP5]]
|
||||||
|
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 16 x i64> poison, i64 [[TMP9]], i64 0
|
||||||
|
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 16 x i64> [[DOTSPLATINSERT]], <vscale x 16 x i64> poison, <vscale x 16 x i32> zeroinitializer
|
||||||
|
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
|
||||||
|
; CHECK: vector.body:
|
||||||
|
; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT3:%.*]], [[VECTOR_BODY]] ]
|
||||||
|
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 16 x i64> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||||
|
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 3, [[INDEX1]]
|
||||||
|
; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[OFFSET_IDX]], 0
|
||||||
|
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[TMP10]]
|
||||||
|
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[TMP11]], i32 0
|
||||||
|
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 16 x i8>, ptr [[TMP12]], align 1
|
||||||
|
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[TMP10]]
|
||||||
|
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP13]], i32 0
|
||||||
|
; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <vscale x 16 x i8>, ptr [[TMP14]], align 1
|
||||||
|
; CHECK-NEXT: [[TMP15:%.*]] = icmp eq <vscale x 16 x i8> [[WIDE_LOAD]], [[WIDE_LOAD2]]
|
||||||
|
; CHECK-NEXT: [[INDEX_NEXT3]] = add nuw i64 [[INDEX1]], [[TMP5]]
|
||||||
|
; CHECK-NEXT: [[TMP16:%.*]] = xor <vscale x 16 x i1> [[TMP15]], splat (i1 true)
|
||||||
|
; CHECK-NEXT: [[TMP17:%.*]] = call i1 @llvm.vector.reduce.or.nxv16i1(<vscale x 16 x i1> [[TMP16]])
|
||||||
|
; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT3]], [[N_VEC]]
|
||||||
|
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 16 x i64> [[VEC_IND]], [[DOTSPLAT]]
|
||||||
|
; CHECK-NEXT: [[TMP19:%.*]] = or i1 [[TMP17]], [[TMP18]]
|
||||||
|
; CHECK-NEXT: br i1 [[TMP19]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
|
||||||
|
; CHECK: middle.split:
|
||||||
|
; CHECK-NEXT: br i1 [[TMP17]], label [[VECTOR_EARLY_EXIT:%.*]], label [[MIDDLE_BLOCK:%.*]]
|
||||||
|
; CHECK: vector.early.exit:
|
||||||
|
; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.nxv16i1(<vscale x 16 x i1> [[TMP16]], i1 true)
|
||||||
|
; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = extractelement <vscale x 16 x i64> [[VEC_IND]], i64 [[FIRST_ACTIVE_LANE]]
|
||||||
|
; CHECK-NEXT: br label [[LOOP_END:%.*]]
|
||||||
|
; CHECK: middle.block:
|
||||||
|
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 64, [[N_VEC]]
|
||||||
|
; CHECK-NEXT: br i1 [[CMP_N]], label [[LOOP_END]], label [[SCALAR_PH]]
|
||||||
|
; CHECK: scalar.ph:
|
||||||
|
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP6]], [[MIDDLE_BLOCK]] ], [ 3, [[ENTRY:%.*]] ]
|
||||||
; CHECK-NEXT: br label [[LOOP:%.*]]
|
; CHECK-NEXT: br label [[LOOP:%.*]]
|
||||||
; CHECK: loop:
|
; CHECK: loop:
|
||||||
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ]
|
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
|
||||||
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]]
|
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]]
|
||||||
; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
|
; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
|
||||||
; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]]
|
; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]]
|
||||||
; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1
|
; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1
|
||||||
; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]]
|
; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]]
|
||||||
; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END:%.*]]
|
; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END]]
|
||||||
; CHECK: loop.inc:
|
; CHECK: loop.inc:
|
||||||
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1
|
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1
|
||||||
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
|
||||||
; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]]
|
; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]], !llvm.loop [[LOOP3:![0-9]+]]
|
||||||
; CHECK: loop.end:
|
; CHECK: loop.end:
|
||||||
; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ], [ 67, [[LOOP_INC]] ]
|
; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ], [ 67, [[LOOP_INC]] ], [ 67, [[MIDDLE_BLOCK]] ], [ [[EARLY_EXIT_VALUE]], [[VECTOR_EARLY_EXIT]] ]
|
||||||
; CHECK-NEXT: ret i64 [[RETVAL]]
|
; CHECK-NEXT: ret i64 [[RETVAL]]
|
||||||
;
|
;
|
||||||
entry:
|
entry:
|
||||||
@@ -66,19 +115,48 @@ define i64 @same_exit_block_pre_inc_use4() {
|
|||||||
; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i64], align 8
|
; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i64], align 8
|
||||||
; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024)
|
; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024)
|
||||||
; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024)
|
; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024)
|
||||||
|
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
|
||||||
|
; CHECK: vector.ph:
|
||||||
|
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
|
||||||
|
; CHECK: vector.body:
|
||||||
|
; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT2:%.*]], [[VECTOR_BODY]] ]
|
||||||
|
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 3, i64 4>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||||
|
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 3, [[INDEX1]]
|
||||||
|
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
|
||||||
|
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[P1]], i64 [[TMP0]]
|
||||||
|
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 0
|
||||||
|
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP2]], align 1
|
||||||
|
; CHECK-NEXT: [[TMP3:%.*]] = icmp ult <2 x i64> [[VEC_IND]], [[WIDE_LOAD]]
|
||||||
|
; CHECK-NEXT: [[INDEX_NEXT2]] = add nuw i64 [[INDEX1]], 2
|
||||||
|
; CHECK-NEXT: [[TMP4:%.*]] = xor <2 x i1> [[TMP3]], splat (i1 true)
|
||||||
|
; CHECK-NEXT: [[TMP5:%.*]] = call i1 @llvm.vector.reduce.or.v2i1(<2 x i1> [[TMP4]])
|
||||||
|
; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT2]], 64
|
||||||
|
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2)
|
||||||
|
; CHECK-NEXT: [[TMP7:%.*]] = or i1 [[TMP5]], [[TMP6]]
|
||||||
|
; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
|
||||||
|
; CHECK: middle.split:
|
||||||
|
; CHECK-NEXT: br i1 [[TMP5]], label [[VECTOR_EARLY_EXIT:%.*]], label [[MIDDLE_BLOCK:%.*]]
|
||||||
|
; CHECK: vector.early.exit:
|
||||||
|
; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v2i1(<2 x i1> [[TMP4]], i1 true)
|
||||||
|
; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = extractelement <2 x i64> [[VEC_IND]], i64 [[FIRST_ACTIVE_LANE]]
|
||||||
|
; CHECK-NEXT: br label [[LOOP_END:%.*]]
|
||||||
|
; CHECK: middle.block:
|
||||||
|
; CHECK-NEXT: br i1 true, label [[LOOP_END]], label [[SCALAR_PH]]
|
||||||
|
; CHECK: scalar.ph:
|
||||||
|
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 67, [[MIDDLE_BLOCK]] ], [ 3, [[ENTRY:%.*]] ]
|
||||||
; CHECK-NEXT: br label [[LOOP:%.*]]
|
; CHECK-NEXT: br label [[LOOP:%.*]]
|
||||||
; CHECK: loop:
|
; CHECK: loop:
|
||||||
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ]
|
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
|
||||||
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[P1]], i64 [[INDEX]]
|
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[P1]], i64 [[INDEX]]
|
||||||
; CHECK-NEXT: [[LD1:%.*]] = load i64, ptr [[ARRAYIDX]], align 1
|
; CHECK-NEXT: [[LD1:%.*]] = load i64, ptr [[ARRAYIDX]], align 1
|
||||||
; CHECK-NEXT: [[CMP3:%.*]] = icmp ult i64 [[INDEX]], [[LD1]]
|
; CHECK-NEXT: [[CMP3:%.*]] = icmp ult i64 [[INDEX]], [[LD1]]
|
||||||
; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END:%.*]]
|
; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END]]
|
||||||
; CHECK: loop.inc:
|
; CHECK: loop.inc:
|
||||||
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1
|
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1
|
||||||
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
|
||||||
; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]]
|
; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]], !llvm.loop [[LOOP5:![0-9]+]]
|
||||||
; CHECK: loop.end:
|
; CHECK: loop.end:
|
||||||
; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ], [ 67, [[LOOP_INC]] ]
|
; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ], [ 67, [[LOOP_INC]] ], [ 67, [[MIDDLE_BLOCK]] ], [ [[EARLY_EXIT_VALUE]], [[VECTOR_EARLY_EXIT]] ]
|
||||||
; CHECK-NEXT: ret i64 [[RETVAL]]
|
; CHECK-NEXT: ret i64 [[RETVAL]]
|
||||||
;
|
;
|
||||||
entry:
|
entry:
|
||||||
@@ -114,20 +192,50 @@ define i64 @loop_contains_safe_call() #1 {
|
|||||||
; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 4
|
; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 4
|
||||||
; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024)
|
; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024)
|
||||||
; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024)
|
; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024)
|
||||||
|
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
|
||||||
|
; CHECK: vector.ph:
|
||||||
|
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
|
||||||
|
; CHECK: vector.body:
|
||||||
|
; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT2:%.*]], [[VECTOR_BODY]] ]
|
||||||
|
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 3, i64 4, i64 5, i64 6>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||||
|
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 3, [[INDEX1]]
|
||||||
|
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
|
||||||
|
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[P1]], i64 [[TMP0]]
|
||||||
|
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 0
|
||||||
|
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP2]], align 1
|
||||||
|
; CHECK-NEXT: [[TMP3:%.*]] = call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> [[WIDE_LOAD]])
|
||||||
|
; CHECK-NEXT: [[TMP4:%.*]] = fcmp fast ult <4 x float> [[TMP3]], splat (float 3.000000e+00)
|
||||||
|
; CHECK-NEXT: [[INDEX_NEXT2]] = add nuw i64 [[INDEX1]], 4
|
||||||
|
; CHECK-NEXT: [[TMP5:%.*]] = xor <4 x i1> [[TMP4]], splat (i1 true)
|
||||||
|
; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP5]])
|
||||||
|
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT2]], 64
|
||||||
|
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
|
||||||
|
; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]]
|
||||||
|
; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
|
||||||
|
; CHECK: middle.split:
|
||||||
|
; CHECK-NEXT: br i1 [[TMP6]], label [[VECTOR_EARLY_EXIT:%.*]], label [[MIDDLE_BLOCK:%.*]]
|
||||||
|
; CHECK: vector.early.exit:
|
||||||
|
; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP5]], i1 true)
|
||||||
|
; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = extractelement <4 x i64> [[VEC_IND]], i64 [[FIRST_ACTIVE_LANE]]
|
||||||
|
; CHECK-NEXT: br label [[LOOP_END:%.*]]
|
||||||
|
; CHECK: middle.block:
|
||||||
|
; CHECK-NEXT: br i1 true, label [[LOOP_END]], label [[SCALAR_PH]]
|
||||||
|
; CHECK: scalar.ph:
|
||||||
|
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 67, [[MIDDLE_BLOCK]] ], [ 3, [[ENTRY:%.*]] ]
|
||||||
; CHECK-NEXT: br label [[LOOP:%.*]]
|
; CHECK-NEXT: br label [[LOOP:%.*]]
|
||||||
; CHECK: loop:
|
; CHECK: loop:
|
||||||
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ]
|
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
|
||||||
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[P1]], i64 [[INDEX]]
|
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[P1]], i64 [[INDEX]]
|
||||||
; CHECK-NEXT: [[LD1:%.*]] = load float, ptr [[ARRAYIDX]], align 1
|
; CHECK-NEXT: [[LD1:%.*]] = load float, ptr [[ARRAYIDX]], align 1
|
||||||
; CHECK-NEXT: [[SQRT:%.*]] = tail call fast float @llvm.sqrt.f32(float [[LD1]])
|
; CHECK-NEXT: [[SQRT:%.*]] = tail call fast float @llvm.sqrt.f32(float [[LD1]])
|
||||||
; CHECK-NEXT: [[CMP:%.*]] = fcmp fast ult float [[SQRT]], 3.000000e+00
|
; CHECK-NEXT: [[CMP:%.*]] = fcmp fast ult float [[SQRT]], 3.000000e+00
|
||||||
; CHECK-NEXT: br i1 [[CMP]], label [[LOOP_INC]], label [[LOOP_END:%.*]]
|
; CHECK-NEXT: br i1 [[CMP]], label [[LOOP_INC]], label [[LOOP_END]]
|
||||||
; CHECK: loop.inc:
|
; CHECK: loop.inc:
|
||||||
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1
|
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1
|
||||||
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
|
||||||
; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]]
|
; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]], !llvm.loop [[LOOP7:![0-9]+]]
|
||||||
; CHECK: loop.end:
|
; CHECK: loop.end:
|
||||||
; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ], [ 67, [[LOOP_INC]] ]
|
; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ], [ 67, [[LOOP_INC]] ], [ 67, [[MIDDLE_BLOCK]] ], [ [[EARLY_EXIT_VALUE]], [[VECTOR_EARLY_EXIT]] ]
|
||||||
; CHECK-NEXT: ret i64 [[RETVAL]]
|
; CHECK-NEXT: ret i64 [[RETVAL]]
|
||||||
;
|
;
|
||||||
entry:
|
entry:
|
||||||
@@ -164,20 +272,50 @@ define i64 @loop_contains_safe_div() #1 {
|
|||||||
; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 4
|
; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 4
|
||||||
; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024)
|
; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024)
|
||||||
; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024)
|
; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024)
|
||||||
|
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
|
||||||
|
; CHECK: vector.ph:
|
||||||
|
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
|
||||||
|
; CHECK: vector.body:
|
||||||
|
; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT2:%.*]], [[VECTOR_BODY]] ]
|
||||||
|
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 3, i64 4>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||||
|
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 3, [[INDEX1]]
|
||||||
|
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
|
||||||
|
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[P1]], i64 [[TMP0]]
|
||||||
|
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0
|
||||||
|
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP2]], align 1
|
||||||
|
; CHECK-NEXT: [[TMP3:%.*]] = udiv <2 x i32> [[WIDE_LOAD]], splat (i32 20000)
|
||||||
|
; CHECK-NEXT: [[TMP4:%.*]] = icmp eq <2 x i32> [[TMP3]], splat (i32 1)
|
||||||
|
; CHECK-NEXT: [[INDEX_NEXT2]] = add nuw i64 [[INDEX1]], 2
|
||||||
|
; CHECK-NEXT: [[TMP5:%.*]] = xor <2 x i1> [[TMP4]], splat (i1 true)
|
||||||
|
; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v2i1(<2 x i1> [[TMP5]])
|
||||||
|
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT2]], 64
|
||||||
|
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2)
|
||||||
|
; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]]
|
||||||
|
; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
|
||||||
|
; CHECK: middle.split:
|
||||||
|
; CHECK-NEXT: br i1 [[TMP6]], label [[VECTOR_EARLY_EXIT:%.*]], label [[MIDDLE_BLOCK:%.*]]
|
||||||
|
; CHECK: vector.early.exit:
|
||||||
|
; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v2i1(<2 x i1> [[TMP5]], i1 true)
|
||||||
|
; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = extractelement <2 x i64> [[VEC_IND]], i64 [[FIRST_ACTIVE_LANE]]
|
||||||
|
; CHECK-NEXT: br label [[LOOP_END:%.*]]
|
||||||
|
; CHECK: middle.block:
|
||||||
|
; CHECK-NEXT: br i1 true, label [[LOOP_END]], label [[SCALAR_PH]]
|
||||||
|
; CHECK: scalar.ph:
|
||||||
|
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 67, [[MIDDLE_BLOCK]] ], [ 3, [[ENTRY:%.*]] ]
|
||||||
; CHECK-NEXT: br label [[LOOP:%.*]]
|
; CHECK-NEXT: br label [[LOOP:%.*]]
|
||||||
; CHECK: loop:
|
; CHECK: loop:
|
||||||
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ]
|
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
|
||||||
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[P1]], i64 [[INDEX]]
|
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[P1]], i64 [[INDEX]]
|
||||||
; CHECK-NEXT: [[LD1:%.*]] = load i32, ptr [[ARRAYIDX]], align 1
|
; CHECK-NEXT: [[LD1:%.*]] = load i32, ptr [[ARRAYIDX]], align 1
|
||||||
; CHECK-NEXT: [[DIV:%.*]] = udiv i32 [[LD1]], 20000
|
; CHECK-NEXT: [[DIV:%.*]] = udiv i32 [[LD1]], 20000
|
||||||
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[DIV]], 1
|
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[DIV]], 1
|
||||||
; CHECK-NEXT: br i1 [[CMP]], label [[LOOP_INC]], label [[LOOP_END:%.*]]
|
; CHECK-NEXT: br i1 [[CMP]], label [[LOOP_INC]], label [[LOOP_END]]
|
||||||
; CHECK: loop.inc:
|
; CHECK: loop.inc:
|
||||||
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1
|
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1
|
||||||
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
|
||||||
; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]]
|
; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]], !llvm.loop [[LOOP9:![0-9]+]]
|
||||||
; CHECK: loop.end:
|
; CHECK: loop.end:
|
||||||
; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ], [ 67, [[LOOP_INC]] ]
|
; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ], [ 67, [[LOOP_INC]] ], [ 67, [[MIDDLE_BLOCK]] ], [ [[EARLY_EXIT_VALUE]], [[VECTOR_EARLY_EXIT]] ]
|
||||||
; CHECK-NEXT: ret i64 [[RETVAL]]
|
; CHECK-NEXT: ret i64 [[RETVAL]]
|
||||||
;
|
;
|
||||||
entry:
|
entry:
|
||||||
@@ -216,21 +354,54 @@ define i64 @loop_contains_load_after_early_exit(ptr dereferenceable(1024) align(
|
|||||||
; CHECK-NEXT: entry:
|
; CHECK-NEXT: entry:
|
||||||
; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 4
|
; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 4
|
||||||
; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024)
|
; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024)
|
||||||
|
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
|
||||||
|
; CHECK: vector.ph:
|
||||||
|
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
|
||||||
|
; CHECK: vector.body:
|
||||||
|
; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT3:%.*]], [[VECTOR_BODY]] ]
|
||||||
|
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 3, i64 4, i64 5, i64 6>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||||
|
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 3, [[INDEX1]]
|
||||||
|
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
|
||||||
|
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[P1]], i64 [[TMP0]]
|
||||||
|
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0
|
||||||
|
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 1
|
||||||
|
; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD]], splat (i32 1)
|
||||||
|
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i64, ptr [[P2]], i64 [[TMP0]]
|
||||||
|
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i64, ptr [[TMP4]], i32 0
|
||||||
|
; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i64>, ptr [[TMP5]], align 8
|
||||||
|
; CHECK-NEXT: [[INDEX_NEXT3]] = add nuw i64 [[INDEX1]], 4
|
||||||
|
; CHECK-NEXT: [[TMP6:%.*]] = xor <4 x i1> [[TMP3]], splat (i1 true)
|
||||||
|
; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP6]])
|
||||||
|
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 64
|
||||||
|
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
|
||||||
|
; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]]
|
||||||
|
; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
|
||||||
|
; CHECK: middle.split:
|
||||||
|
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i64> [[WIDE_LOAD2]], i32 3
|
||||||
|
; CHECK-NEXT: br i1 [[TMP7]], label [[VECTOR_EARLY_EXIT:%.*]], label [[MIDDLE_BLOCK:%.*]]
|
||||||
|
; CHECK: vector.early.exit:
|
||||||
|
; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP6]], i1 true)
|
||||||
|
; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = extractelement <4 x i64> [[VEC_IND]], i64 [[FIRST_ACTIVE_LANE]]
|
||||||
|
; CHECK-NEXT: br label [[LOOP_END:%.*]]
|
||||||
|
; CHECK: middle.block:
|
||||||
|
; CHECK-NEXT: br i1 true, label [[LOOP_END]], label [[SCALAR_PH]]
|
||||||
|
; CHECK: scalar.ph:
|
||||||
|
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 67, [[MIDDLE_BLOCK]] ], [ 3, [[ENTRY:%.*]] ]
|
||||||
; CHECK-NEXT: br label [[LOOP:%.*]]
|
; CHECK-NEXT: br label [[LOOP:%.*]]
|
||||||
; CHECK: loop:
|
; CHECK: loop:
|
||||||
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ]
|
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
|
||||||
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[P1]], i64 [[INDEX]]
|
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[P1]], i64 [[INDEX]]
|
||||||
; CHECK-NEXT: [[LD1:%.*]] = load i32, ptr [[ARRAYIDX]], align 1
|
; CHECK-NEXT: [[LD1:%.*]] = load i32, ptr [[ARRAYIDX]], align 1
|
||||||
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[LD1]], 1
|
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[LD1]], 1
|
||||||
; CHECK-NEXT: br i1 [[CMP]], label [[LOOP_INC]], label [[LOOP_END:%.*]]
|
; CHECK-NEXT: br i1 [[CMP]], label [[LOOP_INC]], label [[LOOP_END]]
|
||||||
; CHECK: loop.inc:
|
; CHECK: loop.inc:
|
||||||
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i64, ptr [[P2]], i64 [[INDEX]]
|
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i64, ptr [[P2]], i64 [[INDEX]]
|
||||||
; CHECK-NEXT: [[LD2:%.*]] = load i64, ptr [[ARRAYIDX2]], align 8
|
; CHECK-NEXT: [[LD2:%.*]] = load i64, ptr [[ARRAYIDX2]], align 8
|
||||||
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1
|
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1
|
||||||
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
|
||||||
; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]]
|
; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]], !llvm.loop [[LOOP11:![0-9]+]]
|
||||||
; CHECK: loop.end:
|
; CHECK: loop.end:
|
||||||
; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ], [ [[LD2]], [[LOOP_INC]] ]
|
; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ], [ [[LD2]], [[LOOP_INC]] ], [ [[TMP10]], [[MIDDLE_BLOCK]] ], [ [[EARLY_EXIT_VALUE]], [[VECTOR_EARLY_EXIT]] ]
|
||||||
; CHECK-NEXT: ret i64 [[RETVAL]]
|
; CHECK-NEXT: ret i64 [[RETVAL]]
|
||||||
;
|
;
|
||||||
entry:
|
entry:
|
||||||
@@ -305,9 +476,11 @@ define i32 @diff_exit_block_needs_scev_check(i32 %end) {
|
|||||||
; CHECK-NEXT: [[TMP15:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP14]])
|
; CHECK-NEXT: [[TMP15:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP14]])
|
||||||
; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
|
; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
|
||||||
; CHECK-NEXT: [[TMP17:%.*]] = or i1 [[TMP15]], [[TMP16]]
|
; CHECK-NEXT: [[TMP17:%.*]] = or i1 [[TMP15]], [[TMP16]]
|
||||||
; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
|
; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
|
||||||
; CHECK: middle.split:
|
; CHECK: middle.split:
|
||||||
; CHECK-NEXT: br i1 [[TMP15]], label [[FOUND:%.*]], label [[MIDDLE_BLOCK:%.*]]
|
; CHECK-NEXT: br i1 [[TMP15]], label [[VECTOR_EARLY_EXIT:%.*]], label [[MIDDLE_BLOCK:%.*]]
|
||||||
|
; CHECK: vector.early.exit:
|
||||||
|
; CHECK-NEXT: br label [[FOUND:%.*]]
|
||||||
; CHECK: middle.block:
|
; CHECK: middle.block:
|
||||||
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[UMAX1]], [[N_VEC]]
|
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[UMAX1]], [[N_VEC]]
|
||||||
; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
|
; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
|
||||||
@@ -329,7 +502,7 @@ define i32 @diff_exit_block_needs_scev_check(i32 %end) {
|
|||||||
; CHECK-NEXT: [[CONV:%.*]] = zext i8 [[IND_NEXT]] to i32
|
; CHECK-NEXT: [[CONV:%.*]] = zext i8 [[IND_NEXT]] to i32
|
||||||
; CHECK-NEXT: [[GEP_IND_NEXT]] = add i64 [[GEP_IND]], 1
|
; CHECK-NEXT: [[GEP_IND_NEXT]] = add i64 [[GEP_IND]], 1
|
||||||
; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[CONV]], [[END_CLAMPED]]
|
; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[CONV]], [[END_CLAMPED]]
|
||||||
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[EXIT]], !llvm.loop [[LOOP3:![0-9]+]]
|
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[EXIT]], !llvm.loop [[LOOP13:![0-9]+]]
|
||||||
; CHECK: found:
|
; CHECK: found:
|
||||||
; CHECK-NEXT: ret i32 1
|
; CHECK-NEXT: ret i32 1
|
||||||
; CHECK: exit:
|
; CHECK: exit:
|
||||||
@@ -420,5 +593,15 @@ attributes #1 = { "target-features"="+sve" vscale_range(1,16) }
|
|||||||
; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
|
; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
|
||||||
; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
|
; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
|
||||||
; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
|
; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
|
||||||
; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META1]]}
|
; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
|
||||||
|
; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]}
|
||||||
|
; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]}
|
||||||
|
; CHECK: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]], [[META2]]}
|
||||||
|
; CHECK: [[LOOP7]] = distinct !{[[LOOP7]], [[META2]], [[META1]]}
|
||||||
|
; CHECK: [[LOOP8]] = distinct !{[[LOOP8]], [[META1]], [[META2]]}
|
||||||
|
; CHECK: [[LOOP9]] = distinct !{[[LOOP9]], [[META2]], [[META1]]}
|
||||||
|
; CHECK: [[LOOP10]] = distinct !{[[LOOP10]], [[META1]], [[META2]]}
|
||||||
|
; CHECK: [[LOOP11]] = distinct !{[[LOOP11]], [[META2]], [[META1]]}
|
||||||
|
; CHECK: [[LOOP12]] = distinct !{[[LOOP12]], [[META1]], [[META2]]}
|
||||||
|
; CHECK: [[LOOP13]] = distinct !{[[LOOP13]], [[META1]]}
|
||||||
;.
|
;.
|
||||||
|
|||||||
@@ -49,7 +49,7 @@ define i64 @same_exit_block_pre_inc_use1() {
|
|||||||
; CHECK-LABEL: LV: Checking a loop in 'same_exit_block_pre_inc_use1'
|
; CHECK-LABEL: LV: Checking a loop in 'same_exit_block_pre_inc_use1'
|
||||||
; CHECK: LV: Found an early exit loop with symbolic max backedge taken count: 63
|
; CHECK: LV: Found an early exit loop with symbolic max backedge taken count: 63
|
||||||
; CHECK-NEXT: LV: We can vectorize this loop!
|
; CHECK-NEXT: LV: We can vectorize this loop!
|
||||||
; CHECK: LV: Not vectorizing: Some exit values in loop with uncountable exit not supported yet.
|
; CHECK-NOT: LV: Not vectorizing
|
||||||
entry:
|
entry:
|
||||||
%p1 = alloca [1024 x i8]
|
%p1 = alloca [1024 x i8]
|
||||||
%p2 = alloca [1024 x i8]
|
%p2 = alloca [1024 x i8]
|
||||||
@@ -141,7 +141,7 @@ define i64 @loop_contains_load_after_early_exit(ptr dereferenceable(1024) align(
|
|||||||
; CHECK-LABEL: LV: Checking a loop in 'loop_contains_load_after_early_exit'
|
; CHECK-LABEL: LV: Checking a loop in 'loop_contains_load_after_early_exit'
|
||||||
; CHECK: LV: Found an early exit loop with symbolic max backedge taken count: 63
|
; CHECK: LV: Found an early exit loop with symbolic max backedge taken count: 63
|
||||||
; CHECK-NEXT: LV: We can vectorize this loop!
|
; CHECK-NEXT: LV: We can vectorize this loop!
|
||||||
; CHECK: LV: Not vectorizing: Some exit values in loop with uncountable exit not supported yet.
|
; CHECK-NOT: LV: Not vectorizing
|
||||||
entry:
|
entry:
|
||||||
%p1 = alloca [1024 x i8]
|
%p1 = alloca [1024 x i8]
|
||||||
call void @init_mem(ptr %p1, i64 1024)
|
call void @init_mem(ptr %p1, i64 1024)
|
||||||
|
|||||||
@@ -32,7 +32,9 @@ define i64 @same_exit_block_phi_of_consts() {
|
|||||||
; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]]
|
; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]]
|
||||||
; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
|
; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
|
||||||
; CHECK: middle.split:
|
; CHECK: middle.split:
|
||||||
; CHECK-NEXT: br i1 [[TMP7]], label [[LOOP_END:%.*]], label [[MIDDLE_BLOCK:%.*]]
|
; CHECK-NEXT: br i1 [[TMP7]], label [[VECTOR_EARLY_EXIT:%.*]], label [[MIDDLE_BLOCK:%.*]]
|
||||||
|
; CHECK: vector.early.exit:
|
||||||
|
; CHECK-NEXT: br label [[LOOP_END:%.*]]
|
||||||
; CHECK: middle.block:
|
; CHECK: middle.block:
|
||||||
; CHECK-NEXT: br i1 true, label [[LOOP_END]], label [[SCALAR_PH]]
|
; CHECK-NEXT: br i1 true, label [[LOOP_END]], label [[SCALAR_PH]]
|
||||||
; CHECK: scalar.ph:
|
; CHECK: scalar.ph:
|
||||||
@@ -51,7 +53,7 @@ define i64 @same_exit_block_phi_of_consts() {
|
|||||||
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
|
||||||
; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]], !llvm.loop [[LOOP3:![0-9]+]]
|
; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]], !llvm.loop [[LOOP3:![0-9]+]]
|
||||||
; CHECK: loop.end:
|
; CHECK: loop.end:
|
||||||
; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ 0, [[LOOP]] ], [ 1, [[LOOP_INC]] ], [ 1, [[MIDDLE_BLOCK]] ], [ 0, [[MIDDLE_SPLIT]] ]
|
; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ 0, [[LOOP]] ], [ 1, [[LOOP_INC]] ], [ 1, [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_EARLY_EXIT]] ]
|
||||||
; CHECK-NEXT: ret i64 [[RETVAL]]
|
; CHECK-NEXT: ret i64 [[RETVAL]]
|
||||||
;
|
;
|
||||||
entry:
|
entry:
|
||||||
@@ -109,7 +111,9 @@ define i64 @diff_exit_block_phi_of_consts() {
|
|||||||
; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]]
|
; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]]
|
||||||
; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
|
; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
|
||||||
; CHECK: middle.split:
|
; CHECK: middle.split:
|
||||||
; CHECK-NEXT: br i1 [[TMP7]], label [[LOOP_EARLY_EXIT:%.*]], label [[MIDDLE_BLOCK:%.*]]
|
; CHECK-NEXT: br i1 [[TMP7]], label [[VECTOR_EARLY_EXIT:%.*]], label [[MIDDLE_BLOCK:%.*]]
|
||||||
|
; CHECK: vector.early.exit:
|
||||||
|
; CHECK-NEXT: br label [[LOOP_EARLY_EXIT:%.*]]
|
||||||
; CHECK: middle.block:
|
; CHECK: middle.block:
|
||||||
; CHECK-NEXT: br i1 true, label [[LOOP_END:%.*]], label [[SCALAR_PH]]
|
; CHECK-NEXT: br i1 true, label [[LOOP_END:%.*]], label [[SCALAR_PH]]
|
||||||
; CHECK: scalar.ph:
|
; CHECK: scalar.ph:
|
||||||
@@ -206,7 +210,9 @@ define i32 @diff_exit_block_needs_scev_check(i32 %end) {
|
|||||||
; CHECK-NEXT: [[TMP17:%.*]] = or i1 [[TMP15]], [[TMP16]]
|
; CHECK-NEXT: [[TMP17:%.*]] = or i1 [[TMP15]], [[TMP16]]
|
||||||
; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
|
; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
|
||||||
; CHECK: middle.split:
|
; CHECK: middle.split:
|
||||||
; CHECK-NEXT: br i1 [[TMP15]], label [[FOUND:%.*]], label [[MIDDLE_BLOCK:%.*]]
|
; CHECK-NEXT: br i1 [[TMP15]], label [[VECTOR_EARLY_EXIT:%.*]], label [[MIDDLE_BLOCK:%.*]]
|
||||||
|
; CHECK: vector.early.exit:
|
||||||
|
; CHECK-NEXT: br label [[FOUND:%.*]]
|
||||||
; CHECK: middle.block:
|
; CHECK: middle.block:
|
||||||
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[UMAX1]], [[N_VEC]]
|
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[UMAX1]], [[N_VEC]]
|
||||||
; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
|
; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
|
||||||
@@ -291,7 +297,9 @@ define i32 @diff_blocks_invariant_early_exit_cond(ptr %s) {
|
|||||||
; CHECK-NEXT: [[TMP3:%.*]] = or i1 [[TMP1]], [[TMP2]]
|
; CHECK-NEXT: [[TMP3:%.*]] = or i1 [[TMP1]], [[TMP2]]
|
||||||
; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
|
; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
|
||||||
; CHECK: middle.split:
|
; CHECK: middle.split:
|
||||||
; CHECK-NEXT: br i1 [[TMP1]], label [[EARLY_EXIT:%.*]], label [[MIDDLE_BLOCK:%.*]]
|
; CHECK-NEXT: br i1 [[TMP1]], label [[VECTOR_EARLY_EXIT:%.*]], label [[MIDDLE_BLOCK:%.*]]
|
||||||
|
; CHECK: vector.early.exit:
|
||||||
|
; CHECK-NEXT: br label [[EARLY_EXIT:%.*]]
|
||||||
; CHECK: middle.block:
|
; CHECK: middle.block:
|
||||||
; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
|
; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
|
||||||
; CHECK: scalar.ph:
|
; CHECK: scalar.ph:
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
@@ -39,7 +39,7 @@ define i64 @multi_exiting_to_different_exits_live_in_exit_values() {
|
|||||||
; CHECK-EMPTY:
|
; CHECK-EMPTY:
|
||||||
; CHECK-NEXT: middle.split:
|
; CHECK-NEXT: middle.split:
|
||||||
; CHECK-NEXT: EMIT branch-on-cond vp<[[EA_TAKEN]]>
|
; CHECK-NEXT: EMIT branch-on-cond vp<[[EA_TAKEN]]>
|
||||||
; CHECK-NEXT: Successor(s): ir-bb<e1>, middle.block
|
; CHECK-NEXT: Successor(s): vector.early.exit, middle.block
|
||||||
; CHECK-EMPTY:
|
; CHECK-EMPTY:
|
||||||
; CHECK-NEXT: middle.block:
|
; CHECK-NEXT: middle.block:
|
||||||
; CHECK-NEXT: EMIT vp<[[MIDDLE_CMP:%.+]]> = icmp eq ir<128>, vp<[[VTC]]>
|
; CHECK-NEXT: EMIT vp<[[MIDDLE_CMP:%.+]]> = icmp eq ir<128>, vp<[[VTC]]>
|
||||||
@@ -58,8 +58,11 @@ define i64 @multi_exiting_to_different_exits_live_in_exit_values() {
|
|||||||
; CHECK-NEXT: IR %p2 = phi i64 [ 1, %loop.latch ] (extra operand: ir<1> from middle.block)
|
; CHECK-NEXT: IR %p2 = phi i64 [ 1, %loop.latch ] (extra operand: ir<1> from middle.block)
|
||||||
; CHECK-NEXT: No successors
|
; CHECK-NEXT: No successors
|
||||||
; CHECK-EMPTY:
|
; CHECK-EMPTY:
|
||||||
|
; CHECK-NEXT: vector.early.exit:
|
||||||
|
; CHECK-NEXT: Successor(s): ir-bb<e1>
|
||||||
|
; CHECK-EMPTY:
|
||||||
; CHECK-NEXT: ir-bb<e1>:
|
; CHECK-NEXT: ir-bb<e1>:
|
||||||
; CHECK-NEXT: IR %p1 = phi i64 [ 0, %loop.header ] (extra operand: ir<0> from middle.split)
|
; CHECK-NEXT: IR %p1 = phi i64 [ 0, %loop.header ] (extra operand: ir<0> from vector.early.exit)
|
||||||
; CHECK-NEXT: No successors
|
; CHECK-NEXT: No successors
|
||||||
; CHECK-NEXT: }
|
; CHECK-NEXT: }
|
||||||
entry:
|
entry:
|
||||||
@@ -122,7 +125,7 @@ define i64 @multi_exiting_to_same_exit_live_in_exit_values() {
|
|||||||
; CHECK-EMPTY:
|
; CHECK-EMPTY:
|
||||||
; CHECK-NEXT: middle.split:
|
; CHECK-NEXT: middle.split:
|
||||||
; CHECK-NEXT: EMIT branch-on-cond vp<[[EA_TAKEN]]>
|
; CHECK-NEXT: EMIT branch-on-cond vp<[[EA_TAKEN]]>
|
||||||
; CHECK-NEXT: Successor(s): ir-bb<exit>, middle.block
|
; CHECK-NEXT: Successor(s): vector.early.exit, middle.block
|
||||||
; CHECK-EMPTY:
|
; CHECK-EMPTY:
|
||||||
; CHECK-NEXT: middle.block:
|
; CHECK-NEXT: middle.block:
|
||||||
; CHECK-NEXT: EMIT vp<[[MIDDLE_CMP:%.+]]> = icmp eq ir<128>, vp<[[VTC]]>
|
; CHECK-NEXT: EMIT vp<[[MIDDLE_CMP:%.+]]> = icmp eq ir<128>, vp<[[VTC]]>
|
||||||
@@ -137,8 +140,11 @@ define i64 @multi_exiting_to_same_exit_live_in_exit_values() {
|
|||||||
; CHECK-NEXT: IR %iv = phi i64 [ %inc, %loop.latch ], [ 0, %entry ] (extra operand: vp<[[RESUME]]> from scalar.ph)
|
; CHECK-NEXT: IR %iv = phi i64 [ %inc, %loop.latch ], [ 0, %entry ] (extra operand: vp<[[RESUME]]> from scalar.ph)
|
||||||
; CHECK: No successors
|
; CHECK: No successors
|
||||||
; CHECK-EMPTY:
|
; CHECK-EMPTY:
|
||||||
|
; CHECK-NEXT: vector.early.exit:
|
||||||
|
; CHECK-NEXT: Successor(s): ir-bb<exit>
|
||||||
|
; CHECK-EMPTY:
|
||||||
; CHECK-NEXT: ir-bb<exit>:
|
; CHECK-NEXT: ir-bb<exit>:
|
||||||
; CHECK-NEXT: IR %p = phi i64 [ 0, %loop.header ], [ 1, %loop.latch ] (extra operands: ir<1> from middle.block, ir<0> from middle.split)
|
; CHECK-NEXT: IR %p = phi i64 [ 0, %loop.header ], [ 1, %loop.latch ] (extra operands: ir<1> from middle.block, ir<0> from vector.early.exit)
|
||||||
; CHECK-NEXT: No successors
|
; CHECK-NEXT: No successors
|
||||||
; CHECK-NEXT: }
|
; CHECK-NEXT: }
|
||||||
|
|
||||||
@@ -198,7 +204,7 @@ define i64 @multi_exiting_to_same_exit_live_in_exit_values_2() {
|
|||||||
; CHECK-EMPTY:
|
; CHECK-EMPTY:
|
||||||
; CHECK-NEXT: middle.split:
|
; CHECK-NEXT: middle.split:
|
||||||
; CHECK-NEXT: EMIT branch-on-cond vp<[[EA_TAKEN]]>
|
; CHECK-NEXT: EMIT branch-on-cond vp<[[EA_TAKEN]]>
|
||||||
; CHECK-NEXT: Successor(s): ir-bb<exit>, middle.block
|
; CHECK-NEXT: Successor(s): vector.early.exit, middle.block
|
||||||
; CHECK-EMPTY:
|
; CHECK-EMPTY:
|
||||||
; CHECK-NEXT: middle.block:
|
; CHECK-NEXT: middle.block:
|
||||||
; CHECK-NEXT: EMIT vp<[[MIDDLE_CMP:%.+]]> = icmp eq ir<128>, vp<[[VTC]]>
|
; CHECK-NEXT: EMIT vp<[[MIDDLE_CMP:%.+]]> = icmp eq ir<128>, vp<[[VTC]]>
|
||||||
@@ -213,8 +219,11 @@ define i64 @multi_exiting_to_same_exit_live_in_exit_values_2() {
|
|||||||
; CHECK-NEXT: IR %iv = phi i64 [ %inc, %loop.latch ], [ 0, %entry ] (extra operand: vp<[[RESUME]]> from scalar.ph)
|
; CHECK-NEXT: IR %iv = phi i64 [ %inc, %loop.latch ], [ 0, %entry ] (extra operand: vp<[[RESUME]]> from scalar.ph)
|
||||||
; CHECK: No successors
|
; CHECK: No successors
|
||||||
; CHECK-EMPTY:
|
; CHECK-EMPTY:
|
||||||
|
; CHECK-NEXT: vector.early.exit:
|
||||||
|
; CHECK-NEXT: Successor(s): ir-bb<exit>
|
||||||
|
; CHECK-EMPTY:
|
||||||
; CHECK-NEXT: ir-bb<exit>:
|
; CHECK-NEXT: ir-bb<exit>:
|
||||||
; CHECK-NEXT: IR %p = phi i64 [ 0, %loop.header ], [ 1, %loop.latch ] (extra operands: ir<1> from middle.block, ir<0> from middle.split)
|
; CHECK-NEXT: IR %p = phi i64 [ 0, %loop.header ], [ 1, %loop.latch ] (extra operands: ir<1> from middle.block, ir<0> from vector.early.exit)
|
||||||
; CHECK-NEXT: No successors
|
; CHECK-NEXT: No successors
|
||||||
; CHECK-NEXT: }
|
; CHECK-NEXT: }
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user