[SCEV] Apply loop guards in reverse order.

This patch updates applyLoopGuards to first collect all conditions and
then applies them in reverse order. This ensures the SCEVs with the
shortest dependency chains are constructed first, limiting the required
stack size.

This fixes a crash reported in D113578.

Note that the order conditions are applied can impact the accuracy of
the result, mostly due to missing min/max simplifications when
constructing SCEVs.

The changed test highlights the impact of the evaluation order. I will
follow up with a SCEV patch to improve min/max simplifications to get
the same results for both orders.
This commit is contained in:
Florian Hahn
2021-12-16 10:52:37 +00:00
parent 9fa15e0073
commit f5f421e0ee
2 changed files with 23 additions and 12 deletions

View File

@@ -13959,11 +13959,12 @@ const SCEV *ScalarEvolution::applyLoopGuards(const SCEV *Expr, const Loop *L) {
ExprsToRewrite.push_back(LHS);
}
};
// Starting at the loop predecessor, climb up the predecessor chain, as long
// as there are predecessors that can be found that have unique successors
// leading to the original header.
// First, collect conditions from dominating branches. Starting at the loop
// predecessor, climb up the predecessor chain, as long as there are
// predecessors that can be found that have unique successors leading to the
// original header.
// TODO: share this logic with isLoopEntryGuardedByCond.
DenseMap<const SCEV *, const SCEV *> RewriteMap;
SmallVector<std::pair<Value *, bool>> Terms;
for (std::pair<const BasicBlock *, const BasicBlock *> Pair(
L->getLoopPredecessor(), L->getHeader());
Pair.first; Pair = getPredecessorWithUniqueSuccessorForBB(Pair.first)) {
@@ -13973,10 +13974,20 @@ const SCEV *ScalarEvolution::applyLoopGuards(const SCEV *Expr, const Loop *L) {
if (!LoopEntryPredicate || LoopEntryPredicate->isUnconditional())
continue;
bool EnterIfTrue = LoopEntryPredicate->getSuccessor(0) == Pair.second;
Terms.emplace_back(LoopEntryPredicate->getCondition(),
LoopEntryPredicate->getSuccessor(0) == Pair.second);
}
// Now apply the information from the collected conditions to RewriteMap.
// Conditions are processed in reverse order, so the earliest conditions is
// processed first. This ensures the SCEVs with the shortest dependency chains
// are constructed first.
DenseMap<const SCEV *, const SCEV *> RewriteMap;
for (auto &E : reverse(Terms)) {
bool EnterIfTrue = E.second;
SmallVector<Value *, 8> Worklist;
SmallPtrSet<Value *, 8> Visited;
Worklist.push_back(LoopEntryPredicate->getCondition());
Worklist.push_back(E.first);
while (!Worklist.empty()) {
Value *Cond = Worklist.pop_back_val();
if (!Visited.insert(Cond).second)

View File

@@ -1480,12 +1480,12 @@ define i32 @sle_sgt_ult_umax_to_smax(i32 %num) {
; CHECK-LABEL: 'sle_sgt_ult_umax_to_smax'
; CHECK-NEXT: Classifying expressions for: @sle_sgt_ult_umax_to_smax
; CHECK-NEXT: %iv = phi i32 [ 0, %guard.3 ], [ %iv.next, %loop ]
; CHECK-NEXT: --> {0,+,4}<nuw><%loop> U: [0,-3) S: [-2147483648,2147483645) Exits: (4 * ((-4 + %num) /u 4))<nuw> LoopDispositions: { %loop: Computable }
; CHECK-NEXT: --> {0,+,4}<nuw><%loop> U: [0,25) S: [0,25) Exits: (4 * ((-4 + %num) /u 4))<nuw> LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %iv.next = add nuw i32 %iv, 4
; CHECK-NEXT: --> {4,+,4}<nuw><%loop> U: [4,-3) S: [-2147483648,2147483645) Exits: (4 + (4 * ((-4 + %num) /u 4))<nuw>) LoopDispositions: { %loop: Computable }
; CHECK-NEXT: --> {4,+,4}<nuw><%loop> U: [4,29) S: [4,29) Exits: (4 + (4 * ((-4 + %num) /u 4))<nuw>) LoopDispositions: { %loop: Computable }
; CHECK-NEXT: Determining loop execution counts for: @sle_sgt_ult_umax_to_smax
; CHECK-NEXT: Loop %loop: backedge-taken count is ((-4 + %num) /u 4)
; CHECK-NEXT: Loop %loop: max backedge-taken count is 1073741823
; CHECK-NEXT: Loop %loop: max backedge-taken count is 6
; CHECK-NEXT: Loop %loop: Predicated backedge-taken count is ((-4 + %num) /u 4)
; CHECK-NEXT: Predicates:
; CHECK: Loop %loop: Trip multiple is 1
@@ -1517,12 +1517,12 @@ define i32 @ult_sle_sgt_umax_to_smax(i32 %num) {
; CHECK-LABEL: 'ult_sle_sgt_umax_to_smax'
; CHECK-NEXT: Classifying expressions for: @ult_sle_sgt_umax_to_smax
; CHECK-NEXT: %iv = phi i32 [ 0, %guard.3 ], [ %iv.next, %loop ]
; CHECK-NEXT: --> {0,+,4}<nuw><%loop> U: [0,25) S: [0,25) Exits: (4 * ((-4 + %num) /u 4))<nuw> LoopDispositions: { %loop: Computable }
; CHECK-NEXT: --> {0,+,4}<nuw><%loop> U: [0,-3) S: [-2147483648,2147483645) Exits: (4 * ((-4 + %num) /u 4))<nuw> LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %iv.next = add nuw i32 %iv, 4
; CHECK-NEXT: --> {4,+,4}<nuw><%loop> U: [4,29) S: [4,29) Exits: (4 + (4 * ((-4 + %num) /u 4))<nuw>) LoopDispositions: { %loop: Computable }
; CHECK-NEXT: --> {4,+,4}<nuw><%loop> U: [4,-3) S: [-2147483648,2147483645) Exits: (4 + (4 * ((-4 + %num) /u 4))<nuw>) LoopDispositions: { %loop: Computable }
; CHECK-NEXT: Determining loop execution counts for: @ult_sle_sgt_umax_to_smax
; CHECK-NEXT: Loop %loop: backedge-taken count is ((-4 + %num) /u 4)
; CHECK-NEXT: Loop %loop: max backedge-taken count is 6
; CHECK-NEXT: Loop %loop: max backedge-taken count is 1073741823
; CHECK-NEXT: Loop %loop: Predicated backedge-taken count is ((-4 + %num) /u 4)
; CHECK-NEXT: Predicates:
; CHECK: Loop %loop: Trip multiple is 1