[LoopUnroll] Directly update DT instead of DTU.
The scope of DT updates are very limited when unrolling loops: the DT should only need updating for * new blocks added * exiting blocks we simplified branches This can be done manually without too much extra work. MergeBlockIntoPredecessor also needs to be updated to support direct DT updates. This fixes excessive time spent in DTU for same cases. In an internal example, time spent in LoopUnroll with this patch goes from ~200s to 2s. It also is slightly positive for CTMark: * NewPM-O3: -0.13% * NewPM-ReleaseThinLTO: -0.11% * NewPM-ReleaseLTO-g: -0.13% Notable improvements are mafft (~ -0.50%) and lencod (~ -0.30%), with no workload regressed. https://llvm-compile-time-tracker.com/compare.php?from=78a9ee7834331fb4360457cc565fa36f5452f7e0&to=687e08d011b0dc6d3edd223612761e44225c7537&stat=instructions:u Reviewed By: kuhar Differential Revision: https://reviews.llvm.org/D141487
This commit is contained in:
@@ -90,11 +90,14 @@ bool DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI = nullptr,
|
||||
/// if BB's Pred has a branch to BB and to AnotherBB, and BB has a single
|
||||
/// successor Sing. In this case the branch will be updated with Sing instead of
|
||||
/// BB, and BB will still be merged into its predecessor and removed.
|
||||
/// If \p DT is not nullptr, update it directly; in that case, DTU must be
|
||||
/// nullptr.
|
||||
bool MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU = nullptr,
|
||||
LoopInfo *LI = nullptr,
|
||||
MemorySSAUpdater *MSSAU = nullptr,
|
||||
MemoryDependenceResults *MemDep = nullptr,
|
||||
bool PredecessorWithTwoSuccessors = false);
|
||||
bool PredecessorWithTwoSuccessors = false,
|
||||
DominatorTree *DT = nullptr);
|
||||
|
||||
/// Merge block(s) sucessors, if possible. Return true if at least two
|
||||
/// of the blocks were merged together.
|
||||
|
||||
@@ -179,7 +179,8 @@ bool llvm::DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI,
|
||||
bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU,
|
||||
LoopInfo *LI, MemorySSAUpdater *MSSAU,
|
||||
MemoryDependenceResults *MemDep,
|
||||
bool PredecessorWithTwoSuccessors) {
|
||||
bool PredecessorWithTwoSuccessors,
|
||||
DominatorTree *DT) {
|
||||
if (BB->hasAddressTaken())
|
||||
return false;
|
||||
|
||||
@@ -232,10 +233,21 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU,
|
||||
FoldSingleEntryPHINodes(BB, MemDep);
|
||||
}
|
||||
|
||||
if (DT) {
|
||||
assert(!DTU && "cannot use both DT and DTU for updates");
|
||||
DomTreeNode *PredNode = DT->getNode(PredBB);
|
||||
DomTreeNode *BBNode = DT->getNode(BB);
|
||||
if (PredNode) {
|
||||
assert(BBNode && "PredNode unreachable but BBNode reachable?");
|
||||
for (DomTreeNode *C : to_vector(BBNode->children()))
|
||||
C->setIDom(PredNode);
|
||||
}
|
||||
}
|
||||
// DTU update: Collect all the edges that exit BB.
|
||||
// These dominator edges will be redirected from Pred.
|
||||
std::vector<DominatorTree::UpdateType> Updates;
|
||||
if (DTU) {
|
||||
assert(!DT && "cannot use both DT and DTU for updates");
|
||||
// To avoid processing the same predecessor more than once.
|
||||
SmallPtrSet<BasicBlock *, 8> SeenSuccs;
|
||||
SmallPtrSet<BasicBlock *, 2> SuccsOfPredBB(succ_begin(PredBB),
|
||||
@@ -311,6 +323,12 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU,
|
||||
if (DTU)
|
||||
DTU->applyUpdates(Updates);
|
||||
|
||||
if (DT) {
|
||||
assert(succ_empty(BB) &&
|
||||
"successors should have been transferred to PredBB");
|
||||
DT->eraseNode(BB);
|
||||
}
|
||||
|
||||
// Finally, erase the old block and update dominator info.
|
||||
DeleteDeadBlock(BB, DTU);
|
||||
|
||||
|
||||
@@ -321,6 +321,7 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
|
||||
unsigned TripMultiple;
|
||||
unsigned BreakoutTrip;
|
||||
bool ExitOnTrue;
|
||||
BasicBlock *FirstExitingBlock = nullptr;
|
||||
SmallVector<BasicBlock *> ExitingBlocks;
|
||||
};
|
||||
DenseMap<BasicBlock *, ExitInfo> ExitInfos;
|
||||
@@ -680,8 +681,7 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
|
||||
assert(!UnrollVerifyDomtree ||
|
||||
DT->verify(DominatorTree::VerificationLevel::Fast));
|
||||
|
||||
DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy);
|
||||
|
||||
SmallVector<DominatorTree::UpdateType> DTUpdates;
|
||||
auto SetDest = [&](BasicBlock *Src, bool WillExit, bool ExitOnTrue) {
|
||||
auto *Term = cast<BranchInst>(Src->getTerminator());
|
||||
const unsigned Idx = ExitOnTrue ^ WillExit;
|
||||
@@ -695,7 +695,7 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
|
||||
BranchInst::Create(Dest, Term);
|
||||
Term->eraseFromParent();
|
||||
|
||||
DTU.applyUpdates({{DominatorTree::Delete, Src, DeadSucc}});
|
||||
DTUpdates.emplace_back(DominatorTree::Delete, Src, DeadSucc);
|
||||
};
|
||||
|
||||
auto WillExit = [&](const ExitInfo &Info, unsigned i, unsigned j,
|
||||
@@ -733,28 +733,56 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
|
||||
|
||||
// Fold branches for iterations where we know that they will exit or not
|
||||
// exit.
|
||||
for (const auto &Pair : ExitInfos) {
|
||||
const ExitInfo &Info = Pair.second;
|
||||
for (auto &Pair : ExitInfos) {
|
||||
ExitInfo &Info = Pair.second;
|
||||
for (unsigned i = 0, e = Info.ExitingBlocks.size(); i != e; ++i) {
|
||||
// The branch destination.
|
||||
unsigned j = (i + 1) % e;
|
||||
bool IsLatch = Pair.first == LatchBlock;
|
||||
std::optional<bool> KnownWillExit = WillExit(Info, i, j, IsLatch);
|
||||
if (!KnownWillExit)
|
||||
if (!KnownWillExit) {
|
||||
if (!Info.FirstExitingBlock)
|
||||
Info.FirstExitingBlock = Info.ExitingBlocks[i];
|
||||
continue;
|
||||
}
|
||||
|
||||
// We don't fold known-exiting branches for non-latch exits here,
|
||||
// because this ensures that both all loop blocks and all exit blocks
|
||||
// remain reachable in the CFG.
|
||||
// TODO: We could fold these branches, but it would require much more
|
||||
// sophisticated updates to LoopInfo.
|
||||
if (*KnownWillExit && !IsLatch)
|
||||
if (*KnownWillExit && !IsLatch) {
|
||||
if (!Info.FirstExitingBlock)
|
||||
Info.FirstExitingBlock = Info.ExitingBlocks[i];
|
||||
continue;
|
||||
}
|
||||
|
||||
SetDest(Info.ExitingBlocks[i], *KnownWillExit, Info.ExitOnTrue);
|
||||
}
|
||||
}
|
||||
|
||||
DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy);
|
||||
DomTreeUpdater *DTUToUse = &DTU;
|
||||
if (ExitingBlocks.size() == 1) {
|
||||
// Manually update the DT if there's a single exiting node. In that case
|
||||
// there's a single exit node and it is sufficient to update the nodes
|
||||
// immediately dominated by the original exiting block. They will become
|
||||
// dominated by the first exiting block that leaves the loop after
|
||||
// unrolling. Note that the CFG inside the loop does not change, so there's
|
||||
// no need to update the DT inside the unrolled loop.
|
||||
DTUToUse = nullptr;
|
||||
auto &[OriginalExit, Info] = *ExitInfos.begin();
|
||||
if (!Info.FirstExitingBlock)
|
||||
Info.FirstExitingBlock = Info.ExitingBlocks.back();
|
||||
for (auto *C : to_vector(DT->getNode(OriginalExit)->children())) {
|
||||
if (L->contains(C->getBlock()))
|
||||
continue;
|
||||
C->setIDom(DT->getNode(Info.FirstExitingBlock));
|
||||
}
|
||||
} else {
|
||||
DTU.applyUpdates(DTUpdates);
|
||||
}
|
||||
|
||||
// When completely unrolling, the last latch becomes unreachable.
|
||||
if (!LatchIsExiting && CompletelyUnroll) {
|
||||
// There is no need to update the DT here, because there must be a unique
|
||||
@@ -774,16 +802,21 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
|
||||
if (Term && Term->isUnconditional()) {
|
||||
BasicBlock *Dest = Term->getSuccessor(0);
|
||||
BasicBlock *Fold = Dest->getUniquePredecessor();
|
||||
if (MergeBlockIntoPredecessor(Dest, &DTU, LI)) {
|
||||
if (MergeBlockIntoPredecessor(Dest, /*DTU=*/DTUToUse, LI,
|
||||
/*MSSAU=*/nullptr, /*MemDep=*/nullptr,
|
||||
/*PredecessorWithTwoSuccessors=*/false,
|
||||
DTUToUse ? nullptr : DT)) {
|
||||
// Dest has been folded into Fold. Update our worklists accordingly.
|
||||
std::replace(Latches.begin(), Latches.end(), Dest, Fold);
|
||||
llvm::erase_value(UnrolledLoopBlocks, Dest);
|
||||
}
|
||||
}
|
||||
}
|
||||
// Apply updates to the DomTree.
|
||||
DT = &DTU.getDomTree();
|
||||
|
||||
if (DTUToUse) {
|
||||
// Apply updates to the DomTree.
|
||||
DT = &DTU.getDomTree();
|
||||
}
|
||||
assert(!UnrollVerifyDomtree ||
|
||||
DT->verify(DominatorTree::VerificationLevel::Fast));
|
||||
|
||||
|
||||
Reference in New Issue
Block a user