[LICM] Reassociate & hoist sub expressions

LICM could reassociate mixed variant/invariant comparison/arithmetic operations and hoist invariant parts out of loop if it can prove that they can be computed without overflow. Motivating example here: ``` INV1 - VAR1 < INV2 ``` can be turned into ``` VAR > INV1 - INV2 ``` if we can prove no-signed-overflow here. Then `INV1 - INV2` can be computed out of loop, so we save one arithmetic operation in-loop. Reviewed By: skatkov Differential Revision: https://reviews.llvm.org/D148001
2023-05-29 12:05:50 +07:00
parent 0a6aec2fb3
commit dd0cf23e4a
2 changed files with 70 additions and 7 deletions
--- a/llvm/lib/Transforms/Scalar/LICM.cpp
+++ b/llvm/lib/Transforms/Scalar/LICM.cpp
@@ -2572,6 +2572,68 @@ static bool hoistAdd(ICmpInst::Predicate Pred, Value *VariantLHS,
  return true;
 }

+/// Try to reassociate and hoist the following two patterns:
+/// LV - C1 < C2 --> LV < C1 + C2,
+/// C1 - LV < C2 --> LV > C1 - C2.
+static bool hoistSub(ICmpInst::Predicate Pred, Value *VariantLHS,
+                     Value *InvariantRHS, ICmpInst &ICmp, Loop &L,
+                     ICFLoopSafetyInfo &SafetyInfo, MemorySSAUpdater &MSSAU,
+                     AssumptionCache *AC, DominatorTree *DT) {
+  assert(ICmpInst::isSigned(Pred) && "Not supported yet!");
+  assert(!L.isLoopInvariant(VariantLHS) && "Precondition.");
+  assert(L.isLoopInvariant(InvariantRHS) && "Precondition.");
+
+  // Try to represent VariantLHS as sum of invariant and variant operands.
+  using namespace PatternMatch;
+  Value *VariantOp, *InvariantOp;
+  if (!match(VariantLHS, m_NSWSub(m_Value(VariantOp), m_Value(InvariantOp))))
+    return false;
+
+  bool VariantSubtracted = false;
+  // LHS itself is a loop-variant, try to represent it in the form:
+  // "VariantOp + InvariantOp". If it is possible, then we can reassociate. If
+  // the variant operand goes with minus, we use a slightly different scheme.
+  if (L.isLoopInvariant(VariantOp)) {
+    std::swap(VariantOp, InvariantOp);
+    VariantSubtracted = true;
+    Pred = ICmpInst::getSwappedPredicate(Pred);
+  }
+  if (L.isLoopInvariant(VariantOp) || !L.isLoopInvariant(InvariantOp))
+    return false;
+
+  // In order to turn "LV - C1 < C2" into "LV < C2 + C1", we need to be able to
+  // freely move values from left side of inequality to right side (just as in
+  // normal linear arithmetics). Overflows make things much more complicated, so
+  // we want to avoid this. Likewise, for "C1 - LV < C2" we need to prove that
+  // "C1 - C2" does not overflow.
+  auto &DL = L.getHeader()->getModule()->getDataLayout();
+  if (VariantSubtracted) {
+    // C1 - LV < C2 --> LV > C1 - C2
+    if (computeOverflowForSignedSub(InvariantOp, InvariantRHS, DL, AC, &ICmp,
+                                    DT) != llvm::OverflowResult::NeverOverflows)
+      return false;
+  } else {
+    // LV - C1 < C2 --> LV < C1 + C2
+    if (computeOverflowForSignedAdd(InvariantOp, InvariantRHS, DL, AC, &ICmp,
+                                    DT) != llvm::OverflowResult::NeverOverflows)
+      return false;
+  }
+  auto *Preheader = L.getLoopPreheader();
+  assert(Preheader && "Loop is not in simplify form?");
+  IRBuilder<> Builder(Preheader->getTerminator());
+  Value *NewCmpOp =
+      VariantSubtracted
+          ? Builder.CreateSub(InvariantOp, InvariantRHS, "invariant.op",
+                              /*HasNUW*/ false, /*HasNSW*/ true)
+          : Builder.CreateAdd(InvariantOp, InvariantRHS, "invariant.op",
+                              /*HasNUW*/ false, /*HasNSW*/ true);
+  ICmp.setPredicate(Pred);
+  ICmp.setOperand(0, VariantOp);
+  ICmp.setOperand(1, NewCmpOp);
+  eraseInstruction(cast<Instruction>(*VariantLHS), SafetyInfo, MSSAU);
+  return true;
+}
+
 /// Reassociate and hoist add/sub expressions.
 static bool hoistAddSub(Instruction &I, Loop &L, ICFLoopSafetyInfo &SafetyInfo,
                        MemorySSAUpdater &MSSAU, AssumptionCache *AC,
@@ -2601,7 +2663,8 @@ static bool hoistAddSub(Instruction &I, Loop &L, ICFLoopSafetyInfo &SafetyInfo,
  if (hoistAdd(Pred, LHS, RHS, cast<ICmpInst>(I), L, SafetyInfo, MSSAU, AC, DT))
    return true;

-  // TODO: Support Sub.
+  if (hoistSub(Pred, LHS, RHS, cast<ICmpInst>(I), L, SafetyInfo, MSSAU, AC, DT))
+    return true;

  return false;
 }
--- a/llvm/test/Transforms/LICM/hoist-add-sub.ll
+++ b/llvm/test/Transforms/LICM/hoist-add-sub.ll
@@ -1,18 +1,18 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
 ; RUN: opt -S -passes=licm -verify-memoryssa < %s | FileCheck %s

-; TODO: x - iv < 4 ==> iv > x - 4
+; x - iv < 4 ==> iv > x - 4
 define i32 @test_01(ptr %p, ptr %x_p, ptr %length_p) {
 ; CHECK-LABEL: define i32 @test_01
 ; CHECK-SAME: (ptr [[P:%.*]], ptr [[X_P:%.*]], ptr [[LENGTH_P:%.*]]) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[X:%.*]] = load i32, ptr [[X_P]], align 4, !range [[RNG0:![0-9]+]]
 ; CHECK-NEXT:    [[LENGTH:%.*]] = load i32, ptr [[LENGTH_P]], align 4, !range [[RNG0]]
+; CHECK-NEXT:    [[INVARIANT_OP:%.*]] = sub nsw i32 [[X]], 4
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
 ; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ]
-; CHECK-NEXT:    [[ARITH:%.*]] = sub nsw i32 [[X]], [[IV]]
-; CHECK-NEXT:    [[X_CHECK:%.*]] = icmp slt i32 [[ARITH]], 4
+; CHECK-NEXT:    [[X_CHECK:%.*]] = icmp sgt i32 [[IV]], [[INVARIANT_OP]]
 ; CHECK-NEXT:    br i1 [[X_CHECK]], label [[OUT_OF_BOUNDS:%.*]], label [[BACKEDGE]]
 ; CHECK:       backedge:
 ; CHECK-NEXT:    [[EL_PTR:%.*]] = getelementptr i32, ptr [[P]], i32 [[IV]]
@@ -278,18 +278,18 @@ failed:
  ret i32 -2
 }

-; TODO: iv - x < 4 ==> iv < 4 + x
+; iv - x < 4 ==> iv < 4 + x
 define i32 @test_03(ptr %p, ptr %x_p, ptr %length_p) {
 ; CHECK-LABEL: define i32 @test_03
 ; CHECK-SAME: (ptr [[P:%.*]], ptr [[X_P:%.*]], ptr [[LENGTH_P:%.*]]) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[X:%.*]] = load i32, ptr [[X_P]], align 4, !range [[RNG1:![0-9]+]]
 ; CHECK-NEXT:    [[LENGTH:%.*]] = load i32, ptr [[LENGTH_P]], align 4, !range [[RNG0]]
+; CHECK-NEXT:    [[INVARIANT_OP:%.*]] = add nsw i32 [[X]], 4
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
 ; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ]
-; CHECK-NEXT:    [[ARITH:%.*]] = sub nsw i32 [[IV]], [[X]]
-; CHECK-NEXT:    [[X_CHECK:%.*]] = icmp slt i32 [[ARITH]], 4
+; CHECK-NEXT:    [[X_CHECK:%.*]] = icmp slt i32 [[IV]], [[INVARIANT_OP]]
 ; CHECK-NEXT:    br i1 [[X_CHECK]], label [[OUT_OF_BOUNDS:%.*]], label [[BACKEDGE]]
 ; CHECK:       backedge:
 ; CHECK-NEXT:    [[EL_PTR:%.*]] = getelementptr i32, ptr [[P]], i32 [[IV]]