[VPlan] Simplify branch on False in VPlan transform (NFC). (#140409)
Simplify branch on false, starting with the branch from the middle block to the scalar preheader. Initially this helps simplifying the initial VPlan construction. Depends on https://github.com/llvm/llvm-project/pull/140405. PR: https://github.com/llvm/llvm-project/pull/140409
This commit is contained in:
@@ -2380,10 +2380,12 @@ void InnerLoopVectorizer::introduceCheckBlockInVPlan(BasicBlock *CheckIRBB) {
|
||||
|
||||
// We just connected a new block to the scalar preheader. Update all
|
||||
// VPPhis by adding an incoming value for it, replicating the last value.
|
||||
unsigned NumPredecessors = ScalarPH->getNumPredecessors();
|
||||
for (VPRecipeBase &R : cast<VPBasicBlock>(ScalarPH)->phis()) {
|
||||
auto *ResumePhi = cast<VPPhi>(&R);
|
||||
ResumePhi->addOperand(
|
||||
ResumePhi->getOperand(ResumePhi->getNumOperands() - 1));
|
||||
assert(isa<VPPhi>(&R) && "Phi expected to be VPPhi");
|
||||
assert(cast<VPPhi>(&R)->getNumIncoming() == NumPredecessors - 1 &&
|
||||
"must have incoming values for all operands");
|
||||
R.addOperand(R.getOperand(NumPredecessors - 2));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -1136,6 +1136,10 @@ public:
|
||||
return getAsRecipe()->getNumOperands();
|
||||
}
|
||||
|
||||
/// Removes the incoming value for \p IncomingBlock, which must be a
|
||||
/// predecessor.
|
||||
void removeIncomingValueFor(VPBlockBase *IncomingBlock) const;
|
||||
|
||||
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
|
||||
/// Print the recipe.
|
||||
void printPhiOperands(raw_ostream &O, VPSlotTracker &SlotTracker) const;
|
||||
@@ -3545,14 +3549,13 @@ template <> struct CastIsPossible<VPPhiAccessors, const VPRecipeBase *> {
|
||||
};
|
||||
/// Support casting from VPRecipeBase -> VPPhiAccessors, by down-casting to the
|
||||
/// recipe types implementing VPPhiAccessors. Used by cast<>, dyn_cast<> & co.
|
||||
template <>
|
||||
struct CastInfo<VPPhiAccessors, const VPRecipeBase *>
|
||||
: public CastIsPossible<VPPhiAccessors, const VPRecipeBase *> {
|
||||
template <typename SrcTy>
|
||||
struct CastInfoVPPhiAccessors : public CastIsPossible<VPPhiAccessors, SrcTy> {
|
||||
|
||||
using Self = CastInfo<VPPhiAccessors, const VPRecipeBase *>;
|
||||
using Self = CastInfo<VPPhiAccessors, SrcTy>;
|
||||
|
||||
/// doCast is used by cast<>.
|
||||
static inline VPPhiAccessors *doCast(const VPRecipeBase *R) {
|
||||
static inline VPPhiAccessors *doCast(SrcTy R) {
|
||||
return const_cast<VPPhiAccessors *>([R]() -> const VPPhiAccessors * {
|
||||
switch (R->getVPDefID()) {
|
||||
case VPDef::VPInstructionSC:
|
||||
@@ -3568,12 +3571,18 @@ struct CastInfo<VPPhiAccessors, const VPRecipeBase *>
|
||||
}
|
||||
|
||||
/// doCastIfPossible is used by dyn_cast<>.
|
||||
static inline VPPhiAccessors *doCastIfPossible(const VPRecipeBase *f) {
|
||||
static inline VPPhiAccessors *doCastIfPossible(SrcTy f) {
|
||||
if (!Self::isPossible(f))
|
||||
return nullptr;
|
||||
return doCast(f);
|
||||
}
|
||||
};
|
||||
template <>
|
||||
struct CastInfo<VPPhiAccessors, VPRecipeBase *>
|
||||
: CastInfoVPPhiAccessors<VPRecipeBase *> {};
|
||||
template <>
|
||||
struct CastInfo<VPPhiAccessors, const VPRecipeBase *>
|
||||
: CastInfoVPPhiAccessors<const VPRecipeBase *> {};
|
||||
|
||||
/// VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph. It
|
||||
/// holds a sequence of zero or more VPRecipe's each representing a sequence of
|
||||
|
||||
@@ -501,8 +501,10 @@ void VPlanTransforms::prepareForVectorization(
|
||||
cast<VPBasicBlock>(HeaderVPB),
|
||||
cast<VPBasicBlock>(LatchVPB), Range);
|
||||
HandledUncountableEarlyExit = true;
|
||||
} else {
|
||||
for (VPRecipeBase &R : EB->phis())
|
||||
cast<VPIRPhi>(&R)->removeIncomingValueFor(Pred);
|
||||
}
|
||||
|
||||
cast<VPBasicBlock>(Pred)->getTerminator()->eraseFromParent();
|
||||
VPBlockUtils::disconnectBlocks(Pred, EB);
|
||||
}
|
||||
@@ -526,32 +528,6 @@ void VPlanTransforms::prepareForVectorization(
|
||||
VPBasicBlock *ScalarPH = Plan.createVPBasicBlock("scalar.ph");
|
||||
VPBlockUtils::connectBlocks(ScalarPH, Plan.getScalarHeader());
|
||||
|
||||
// If needed, add a check in the middle block to see if we have completed
|
||||
// all of the iterations in the first vector loop. Three cases:
|
||||
// 1) If we require a scalar epilogue, there is no conditional branch as
|
||||
// we unconditionally branch to the scalar preheader. Remove the recipes
|
||||
// from the exit blocks.
|
||||
// 2) If (N - N%VF) == N, then we *don't* need to run the remainder.
|
||||
// Thus if tail is to be folded, we know we don't need to run the
|
||||
// remainder and we can set the condition to true.
|
||||
// 3) Otherwise, construct a runtime check.
|
||||
|
||||
if (!RequiresScalarEpilogueCheck) {
|
||||
if (auto *LatchExitVPB = MiddleVPBB->getSingleSuccessor())
|
||||
VPBlockUtils::disconnectBlocks(MiddleVPBB, LatchExitVPB);
|
||||
VPBlockUtils::connectBlocks(MiddleVPBB, ScalarPH);
|
||||
VPBlockUtils::connectBlocks(Plan.getEntry(), ScalarPH);
|
||||
Plan.getEntry()->swapSuccessors();
|
||||
|
||||
// The exit blocks are unreachable, remove their recipes to make sure no
|
||||
// users remain that may pessimize transforms.
|
||||
for (auto *EB : Plan.getExitBlocks()) {
|
||||
for (VPRecipeBase &R : make_early_inc_range(*EB))
|
||||
R.eraseFromParent();
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
// The connection order corresponds to the operands of the conditional branch,
|
||||
// with the middle block already connected to the exit block.
|
||||
VPBlockUtils::connectBlocks(MiddleVPBB, ScalarPH);
|
||||
@@ -561,21 +537,45 @@ void VPlanTransforms::prepareForVectorization(
|
||||
VPBlockUtils::connectBlocks(Plan.getEntry(), ScalarPH);
|
||||
Plan.getEntry()->swapSuccessors();
|
||||
|
||||
auto *ScalarLatchTerm = TheLoop->getLoopLatch()->getTerminator();
|
||||
// Here we use the same DebugLoc as the scalar loop latch terminator instead
|
||||
// of the corresponding compare because they may have ended up with
|
||||
// different line numbers and we want to avoid awkward line stepping while
|
||||
// debugging. Eg. if the compare has got a line number inside the loop.
|
||||
// If MiddleVPBB has a single successor then the original loop does not exit
|
||||
// via the latch and the single successor must be the scalar preheader.
|
||||
// There's no need to add a runtime check to MiddleVPBB.
|
||||
if (MiddleVPBB->getNumSuccessors() == 1) {
|
||||
assert(MiddleVPBB->getSingleSuccessor() == ScalarPH &&
|
||||
"must have ScalarPH as single successor");
|
||||
return;
|
||||
}
|
||||
|
||||
assert(MiddleVPBB->getNumSuccessors() == 2 && "must have 2 successors");
|
||||
|
||||
// Add a check in the middle block to see if we have completed all of the
|
||||
// iterations in the first vector loop.
|
||||
//
|
||||
// Three cases:
|
||||
// 1) If we require a scalar epilogue, the scalar ph must execute. Set the
|
||||
// condition to false.
|
||||
// 2) If (N - N%VF) == N, then we *don't* need to run the
|
||||
// remainder. Thus if tail is to be folded, we know we don't need to run
|
||||
// the remainder and we can set the condition to true.
|
||||
// 3) Otherwise, construct a runtime check.
|
||||
|
||||
// We use the same DebugLoc as the scalar loop latch terminator instead of
|
||||
// the corresponding compare because they may have ended up with different
|
||||
// line numbers and we want to avoid awkward line stepping while debugging.
|
||||
// E.g., if the compare has got a line number inside the loop.
|
||||
DebugLoc LatchDL = TheLoop->getLoopLatch()->getTerminator()->getDebugLoc();
|
||||
VPBuilder Builder(MiddleVPBB);
|
||||
VPValue *Cmp =
|
||||
TailFolded
|
||||
? Plan.getOrAddLiveIn(ConstantInt::getTrue(
|
||||
IntegerType::getInt1Ty(TripCount->getType()->getContext())))
|
||||
: Builder.createICmp(CmpInst::ICMP_EQ, Plan.getTripCount(),
|
||||
&Plan.getVectorTripCount(),
|
||||
ScalarLatchTerm->getDebugLoc(), "cmp.n");
|
||||
Builder.createNaryOp(VPInstruction::BranchOnCond, {Cmp},
|
||||
ScalarLatchTerm->getDebugLoc());
|
||||
VPValue *Cmp;
|
||||
if (!RequiresScalarEpilogueCheck)
|
||||
Cmp = Plan.getOrAddLiveIn(ConstantInt::getFalse(
|
||||
IntegerType::getInt1Ty(TripCount->getType()->getContext())));
|
||||
else if (TailFolded)
|
||||
Cmp = Plan.getOrAddLiveIn(ConstantInt::getTrue(
|
||||
IntegerType::getInt1Ty(TripCount->getType()->getContext())));
|
||||
else
|
||||
Cmp = Builder.createICmp(CmpInst::ICMP_EQ, Plan.getTripCount(),
|
||||
&Plan.getVectorTripCount(), LatchDL, "cmp.n");
|
||||
Builder.createNaryOp(VPInstruction::BranchOnCond, {Cmp}, LatchDL);
|
||||
}
|
||||
|
||||
void VPlanTransforms::createLoopRegions(VPlan &Plan) {
|
||||
|
||||
@@ -1185,6 +1185,14 @@ void VPIRPhi::execute(VPTransformState &State) {
|
||||
State.Builder.SetInsertPoint(Phi->getParent(), std::next(Phi->getIterator()));
|
||||
}
|
||||
|
||||
void VPPhiAccessors::removeIncomingValueFor(VPBlockBase *IncomingBlock) const {
|
||||
VPRecipeBase *R = const_cast<VPRecipeBase *>(getAsRecipe());
|
||||
assert(R->getNumOperands() == R->getParent()->getNumPredecessors() &&
|
||||
"Number of phi operands must match number of predecessors");
|
||||
unsigned Position = R->getParent()->getIndexForPredecessor(IncomingBlock);
|
||||
R->removeOperand(Position);
|
||||
}
|
||||
|
||||
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
|
||||
void VPPhiAccessors::printPhiOperands(raw_ostream &O,
|
||||
VPSlotTracker &SlotTracker) const {
|
||||
|
||||
@@ -1841,40 +1841,37 @@ void VPlanTransforms::truncateToMinimalBitwidths(
|
||||
}
|
||||
}
|
||||
|
||||
/// Remove BranchOnCond recipes with true conditions together with removing
|
||||
/// dead edges to their successors.
|
||||
static void removeBranchOnCondTrue(VPlan &Plan) {
|
||||
/// Remove BranchOnCond recipes with true or false conditions together with
|
||||
/// removing dead edges to their successors.
|
||||
static void removeBranchOnConst(VPlan &Plan) {
|
||||
using namespace llvm::VPlanPatternMatch;
|
||||
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
|
||||
vp_depth_first_shallow(Plan.getEntry()))) {
|
||||
VPValue *Cond;
|
||||
if (VPBB->getNumSuccessors() != 2 || VPBB == Plan.getEntry() ||
|
||||
!match(&VPBB->back(), m_BranchOnCond(m_True())))
|
||||
!match(&VPBB->back(), m_BranchOnCond(m_VPValue(Cond))))
|
||||
continue;
|
||||
|
||||
VPBasicBlock *RemovedSucc = cast<VPBasicBlock>(VPBB->getSuccessors()[1]);
|
||||
unsigned DeadIdx = RemovedSucc->getIndexForPredecessor(VPBB);
|
||||
unsigned RemovedIdx;
|
||||
if (match(Cond, m_True()))
|
||||
RemovedIdx = 1;
|
||||
else if (match(Cond, m_False()))
|
||||
RemovedIdx = 0;
|
||||
else
|
||||
continue;
|
||||
|
||||
// Values coming from VPBB into ResumePhi recipes of RemoveSucc are removed
|
||||
// from these recipes.
|
||||
for (VPRecipeBase &R : make_early_inc_range(*RemovedSucc)) {
|
||||
assert((!isa<VPIRInstruction>(&R) ||
|
||||
!isa<PHINode>(cast<VPIRInstruction>(&R)->getInstruction())) &&
|
||||
!isa<VPHeaderPHIRecipe>(&R) &&
|
||||
"Cannot update VPIRInstructions wrapping phis or header phis yet");
|
||||
auto *VPI = dyn_cast<VPPhi>(&R);
|
||||
if (!VPI)
|
||||
break;
|
||||
VPBuilder B(VPI);
|
||||
SmallVector<VPValue *> NewOperands;
|
||||
// Create new operand list, with the dead incoming value filtered out.
|
||||
for (const auto &[Idx, Op] : enumerate(VPI->operands())) {
|
||||
if (Idx == DeadIdx)
|
||||
continue;
|
||||
NewOperands.push_back(Op);
|
||||
}
|
||||
VPI->replaceAllUsesWith(
|
||||
B.createScalarPhi(NewOperands, VPI->getDebugLoc(), VPI->getName()));
|
||||
VPI->eraseFromParent();
|
||||
VPBasicBlock *RemovedSucc =
|
||||
cast<VPBasicBlock>(VPBB->getSuccessors()[RemovedIdx]);
|
||||
const auto &Preds = RemovedSucc->getPredecessors();
|
||||
assert(count(Preds, VPBB) == 1 &&
|
||||
"There must be a single edge between VPBB and its successor");
|
||||
// Values coming from VPBB into phi recipes of RemoveSucc are removed from
|
||||
// these recipes.
|
||||
for (VPRecipeBase &R : RemovedSucc->phis()) {
|
||||
auto *Phi = cast<VPPhiAccessors>(&R);
|
||||
assert((!isa<VPIRPhi>(&R) || RemovedSucc->getNumPredecessors() == 1) &&
|
||||
"VPIRPhis must have a single predecessor");
|
||||
Phi->removeIncomingValueFor(VPBB);
|
||||
}
|
||||
// Disconnect blocks and remove the terminator. RemovedSucc will be deleted
|
||||
// automatically on VPlan destruction if it becomes unreachable.
|
||||
@@ -1894,7 +1891,7 @@ void VPlanTransforms::optimize(VPlan &Plan) {
|
||||
runPass(legalizeAndOptimizeInductions, Plan);
|
||||
runPass(removeRedundantExpandSCEVRecipes, Plan);
|
||||
runPass(simplifyRecipes, Plan, *Plan.getCanonicalIV()->getScalarType());
|
||||
runPass(removeBranchOnCondTrue, Plan);
|
||||
runPass(removeBranchOnConst, Plan);
|
||||
runPass(removeDeadRecipes, Plan);
|
||||
|
||||
runPass(createAndOptimizeReplicateRegions, Plan);
|
||||
|
||||
@@ -38,6 +38,7 @@ class VPSlotTracker;
|
||||
class VPUser;
|
||||
class VPRecipeBase;
|
||||
class VPInterleaveRecipe;
|
||||
class VPPhiAccessors;
|
||||
|
||||
// This is the base class of the VPlan Def/Use graph, used for modeling the data
|
||||
// flow into, within and out of the VPlan. VPValues can stand for live-ins
|
||||
@@ -199,8 +200,18 @@ raw_ostream &operator<<(raw_ostream &OS, const VPRecipeBase &R);
|
||||
/// This class augments VPValue with operands which provide the inverse def-use
|
||||
/// edges from VPValue's users to their defs.
|
||||
class VPUser {
|
||||
/// Grant access to removeOperand for VPPhiAccessors, the only supported user.
|
||||
friend class VPPhiAccessors;
|
||||
|
||||
SmallVector<VPValue *, 2> Operands;
|
||||
|
||||
/// Removes the operand at index \p Idx. This also removes the VPUser from the
|
||||
/// use-list of the operand.
|
||||
void removeOperand(unsigned Idx) {
|
||||
getOperand(Idx)->removeUser(*this);
|
||||
Operands.erase(Operands.begin() + Idx);
|
||||
}
|
||||
|
||||
protected:
|
||||
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
|
||||
/// Print the operands to \p O.
|
||||
|
||||
Reference in New Issue
Block a user