[LICM] Reassociate & hoist sub expressions

LICM could reassociate mixed variant/invariant comparison/arithmetic operations
and hoist invariant parts out of loop if it can prove that they can be computed
without overflow. Motivating example here:
```
  INV1 - VAR1 < INV2
```
can be turned into
```
  VAR > INV1 - INV2
```
if we can prove no-signed-overflow here. Then `INV1 - INV2` can be computed
out of loop, so we save one arithmetic operation in-loop.

Reviewed By: skatkov
Differential Revision: https://reviews.llvm.org/D148001
This commit is contained in:
Max Kazantsev
2023-05-29 12:05:50 +07:00
parent 0a6aec2fb3
commit dd0cf23e4a
2 changed files with 70 additions and 7 deletions

View File

@@ -2572,6 +2572,68 @@ static bool hoistAdd(ICmpInst::Predicate Pred, Value *VariantLHS,
return true;
}
/// Try to reassociate and hoist the following two patterns:
/// LV - C1 < C2 --> LV < C1 + C2,
/// C1 - LV < C2 --> LV > C1 - C2.
static bool hoistSub(ICmpInst::Predicate Pred, Value *VariantLHS,
Value *InvariantRHS, ICmpInst &ICmp, Loop &L,
ICFLoopSafetyInfo &SafetyInfo, MemorySSAUpdater &MSSAU,
AssumptionCache *AC, DominatorTree *DT) {
assert(ICmpInst::isSigned(Pred) && "Not supported yet!");
assert(!L.isLoopInvariant(VariantLHS) && "Precondition.");
assert(L.isLoopInvariant(InvariantRHS) && "Precondition.");
// Try to represent VariantLHS as sum of invariant and variant operands.
using namespace PatternMatch;
Value *VariantOp, *InvariantOp;
if (!match(VariantLHS, m_NSWSub(m_Value(VariantOp), m_Value(InvariantOp))))
return false;
bool VariantSubtracted = false;
// LHS itself is a loop-variant, try to represent it in the form:
// "VariantOp + InvariantOp". If it is possible, then we can reassociate. If
// the variant operand goes with minus, we use a slightly different scheme.
if (L.isLoopInvariant(VariantOp)) {
std::swap(VariantOp, InvariantOp);
VariantSubtracted = true;
Pred = ICmpInst::getSwappedPredicate(Pred);
}
if (L.isLoopInvariant(VariantOp) || !L.isLoopInvariant(InvariantOp))
return false;
// In order to turn "LV - C1 < C2" into "LV < C2 + C1", we need to be able to
// freely move values from left side of inequality to right side (just as in
// normal linear arithmetics). Overflows make things much more complicated, so
// we want to avoid this. Likewise, for "C1 - LV < C2" we need to prove that
// "C1 - C2" does not overflow.
auto &DL = L.getHeader()->getModule()->getDataLayout();
if (VariantSubtracted) {
// C1 - LV < C2 --> LV > C1 - C2
if (computeOverflowForSignedSub(InvariantOp, InvariantRHS, DL, AC, &ICmp,
DT) != llvm::OverflowResult::NeverOverflows)
return false;
} else {
// LV - C1 < C2 --> LV < C1 + C2
if (computeOverflowForSignedAdd(InvariantOp, InvariantRHS, DL, AC, &ICmp,
DT) != llvm::OverflowResult::NeverOverflows)
return false;
}
auto *Preheader = L.getLoopPreheader();
assert(Preheader && "Loop is not in simplify form?");
IRBuilder<> Builder(Preheader->getTerminator());
Value *NewCmpOp =
VariantSubtracted
? Builder.CreateSub(InvariantOp, InvariantRHS, "invariant.op",
/*HasNUW*/ false, /*HasNSW*/ true)
: Builder.CreateAdd(InvariantOp, InvariantRHS, "invariant.op",
/*HasNUW*/ false, /*HasNSW*/ true);
ICmp.setPredicate(Pred);
ICmp.setOperand(0, VariantOp);
ICmp.setOperand(1, NewCmpOp);
eraseInstruction(cast<Instruction>(*VariantLHS), SafetyInfo, MSSAU);
return true;
}
/// Reassociate and hoist add/sub expressions.
static bool hoistAddSub(Instruction &I, Loop &L, ICFLoopSafetyInfo &SafetyInfo,
MemorySSAUpdater &MSSAU, AssumptionCache *AC,
@@ -2601,7 +2663,8 @@ static bool hoistAddSub(Instruction &I, Loop &L, ICFLoopSafetyInfo &SafetyInfo,
if (hoistAdd(Pred, LHS, RHS, cast<ICmpInst>(I), L, SafetyInfo, MSSAU, AC, DT))
return true;
// TODO: Support Sub.
if (hoistSub(Pred, LHS, RHS, cast<ICmpInst>(I), L, SafetyInfo, MSSAU, AC, DT))
return true;
return false;
}

View File

@@ -1,18 +1,18 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
; RUN: opt -S -passes=licm -verify-memoryssa < %s | FileCheck %s
; TODO: x - iv < 4 ==> iv > x - 4
; x - iv < 4 ==> iv > x - 4
define i32 @test_01(ptr %p, ptr %x_p, ptr %length_p) {
; CHECK-LABEL: define i32 @test_01
; CHECK-SAME: (ptr [[P:%.*]], ptr [[X_P:%.*]], ptr [[LENGTH_P:%.*]]) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[X:%.*]] = load i32, ptr [[X_P]], align 4, !range [[RNG0:![0-9]+]]
; CHECK-NEXT: [[LENGTH:%.*]] = load i32, ptr [[LENGTH_P]], align 4, !range [[RNG0]]
; CHECK-NEXT: [[INVARIANT_OP:%.*]] = sub nsw i32 [[X]], 4
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ]
; CHECK-NEXT: [[ARITH:%.*]] = sub nsw i32 [[X]], [[IV]]
; CHECK-NEXT: [[X_CHECK:%.*]] = icmp slt i32 [[ARITH]], 4
; CHECK-NEXT: [[X_CHECK:%.*]] = icmp sgt i32 [[IV]], [[INVARIANT_OP]]
; CHECK-NEXT: br i1 [[X_CHECK]], label [[OUT_OF_BOUNDS:%.*]], label [[BACKEDGE]]
; CHECK: backedge:
; CHECK-NEXT: [[EL_PTR:%.*]] = getelementptr i32, ptr [[P]], i32 [[IV]]
@@ -278,18 +278,18 @@ failed:
ret i32 -2
}
; TODO: iv - x < 4 ==> iv < 4 + x
; iv - x < 4 ==> iv < 4 + x
define i32 @test_03(ptr %p, ptr %x_p, ptr %length_p) {
; CHECK-LABEL: define i32 @test_03
; CHECK-SAME: (ptr [[P:%.*]], ptr [[X_P:%.*]], ptr [[LENGTH_P:%.*]]) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[X:%.*]] = load i32, ptr [[X_P]], align 4, !range [[RNG1:![0-9]+]]
; CHECK-NEXT: [[LENGTH:%.*]] = load i32, ptr [[LENGTH_P]], align 4, !range [[RNG0]]
; CHECK-NEXT: [[INVARIANT_OP:%.*]] = add nsw i32 [[X]], 4
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ]
; CHECK-NEXT: [[ARITH:%.*]] = sub nsw i32 [[IV]], [[X]]
; CHECK-NEXT: [[X_CHECK:%.*]] = icmp slt i32 [[ARITH]], 4
; CHECK-NEXT: [[X_CHECK:%.*]] = icmp slt i32 [[IV]], [[INVARIANT_OP]]
; CHECK-NEXT: br i1 [[X_CHECK]], label [[OUT_OF_BOUNDS:%.*]], label [[BACKEDGE]]
; CHECK: backedge:
; CHECK-NEXT: [[EL_PTR:%.*]] = getelementptr i32, ptr [[P]], i32 [[IV]]