[SCEVExpander] Skip creating <u 0 check, which is always false.

Unsigned compares of the form <u 0 are always false. Do not create such a redundant check in generateOverflowCheck. The patch introduces a new lambda to create the check, so we can exit early conveniently and skip creating some instructions feeding the check. I am planning to sink a few additional instructions as follow-ups, but I would prefer to do this separately, to keep the changes and diff smaller. Reviewed By: reames Differential Revision: https://reviews.llvm.org/D116811
2022-01-08 10:29:19 +00:00
parent 0e19186c82
commit 9345ab3a45
4 changed files with 44 additions and 48 deletions
--- a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
+++ b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
@@ -2516,38 +2516,46 @@ Value *SCEVExpander::generateOverflowCheck(const SCEVAddRecExpr *AR,
  // And select either 1. or 2. depending on whether step is positive or
  // negative. If Step is known to be positive or negative, only create
  // either 1. or 2.
-  Value *Add = nullptr, *Sub = nullptr;
-  bool NeedPosCheck = !SE.isKnownNegative(Step);
-  bool NeedNegCheck = !SE.isKnownPositive(Step);
+  auto ComputeEndCheck = [&]() -> Value * {
+    // Checking <u 0 is always false.
+    if (!Signed && Start->isZero() && SE.isKnownPositive(Step))
+      return ConstantInt::getFalse(Loc->getContext());

-  if (PointerType *ARPtrTy = dyn_cast<PointerType>(ARTy)) {
-    StartValue = InsertNoopCastOfTo(
-        StartValue, Builder.getInt8PtrTy(ARPtrTy->getAddressSpace()));
-    Value *NegMulV = Builder.CreateNeg(MulV);
-    if (NeedPosCheck)
-      Add = Builder.CreateGEP(Builder.getInt8Ty(), StartValue, MulV);
-    if (NeedNegCheck)
-      Sub = Builder.CreateGEP(Builder.getInt8Ty(), StartValue, NegMulV);
-  } else {
-    if (NeedPosCheck)
-      Add = Builder.CreateAdd(StartValue, MulV);
-    if (NeedNegCheck)
-      Sub = Builder.CreateSub(StartValue, MulV);
-  }
+    Value *Add = nullptr, *Sub = nullptr;
+    bool NeedPosCheck = !SE.isKnownNegative(Step);
+    bool NeedNegCheck = !SE.isKnownPositive(Step);

-  Value *EndCompareLT = nullptr;
-  Value *EndCompareGT = nullptr;
-  Value *EndCheck = nullptr;
-  if (NeedPosCheck)
-    EndCheck = EndCompareLT = Builder.CreateICmp(
-        Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT, Add, StartValue);
-  if (NeedNegCheck)
-    EndCheck = EndCompareGT = Builder.CreateICmp(
-        Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT, Sub, StartValue);
-  if (NeedPosCheck && NeedNegCheck) {
-    // Select the answer based on the sign of Step.
-    EndCheck = Builder.CreateSelect(StepCompare, EndCompareGT, EndCompareLT);
-  }
+    if (PointerType *ARPtrTy = dyn_cast<PointerType>(ARTy)) {
+      StartValue = InsertNoopCastOfTo(
+          StartValue, Builder.getInt8PtrTy(ARPtrTy->getAddressSpace()));
+      Value *NegMulV = Builder.CreateNeg(MulV);
+      if (NeedPosCheck)
+        Add = Builder.CreateGEP(Builder.getInt8Ty(), StartValue, MulV);
+      if (NeedNegCheck)
+        Sub = Builder.CreateGEP(Builder.getInt8Ty(), StartValue, NegMulV);
+    } else {
+      if (NeedPosCheck)
+        Add = Builder.CreateAdd(StartValue, MulV);
+      if (NeedNegCheck)
+        Sub = Builder.CreateSub(StartValue, MulV);
+    }
+
+    Value *EndCompareLT = nullptr;
+    Value *EndCompareGT = nullptr;
+    Value *EndCheck = nullptr;
+    if (NeedPosCheck)
+      EndCheck = EndCompareLT = Builder.CreateICmp(
+          Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT, Add, StartValue);
+    if (NeedNegCheck)
+      EndCheck = EndCompareGT = Builder.CreateICmp(
+          Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT, Sub, StartValue);
+    if (NeedPosCheck && NeedNegCheck) {
+      // Select the answer based on the sign of Step.
+      EndCheck = Builder.CreateSelect(StepCompare, EndCompareGT, EndCompareLT);
+    }
+    return EndCheck;
+  };
+  Value *EndCheck = ComputeEndCheck();

  // If the backedge taken count type is larger than the AR type,
  // check that we don't drop any bits by truncating it. If we are
--- a/llvm/test/Transforms/LoopDistribute/scev-inserted-runtime-check.ll
+++ b/llvm/test/Transforms/LoopDistribute/scev-inserted-runtime-check.ll
@@ -17,10 +17,8 @@ define void @f(i32* noalias %a, i32* noalias %b, i32* noalias %c, i32* noalias %
 ; CHECK-NEXT:    [[MUL1:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 2, i32 [[TMP1]])
 ; CHECK-NEXT:    [[MUL_RESULT:%.*]] = extractvalue { i32, i1 } [[MUL1]], 0
 ; CHECK-NEXT:    [[MUL_OVERFLOW:%.*]] = extractvalue { i32, i1 } [[MUL1]], 1
-; CHECK-NEXT:    [[TMP2:%.*]] = add i32 0, [[MUL_RESULT]]
-; CHECK-NEXT:    [[TMP5:%.*]] = icmp ult i32 [[TMP2]], 0
 ; CHECK-NEXT:    [[TMP7:%.*]] = icmp ugt i64 [[TMP0]], 4294967295
-; CHECK-NEXT:    [[TMP8:%.*]] = or i1 [[TMP5]], [[TMP7]]
+; CHECK-NEXT:    [[TMP8:%.*]] = or i1 false, [[TMP7]]
 ; CHECK-NEXT:    [[TMP9:%.*]] = or i1 [[TMP8]], [[MUL_OVERFLOW]]
 ; CHECK-NEXT:    [[MUL2:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 8, i64 [[TMP0]])
 ; CHECK-NEXT:    [[MUL_RESULT3:%.*]] = extractvalue { i64, i1 } [[MUL2]], 0
@@ -159,10 +157,8 @@ define void @f_with_offset(i32* noalias %b, i32* noalias %c, i32* noalias %d, i3
 ; CHECK-NEXT:    [[MUL1:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 2, i32 [[TMP1]])
 ; CHECK-NEXT:    [[MUL_RESULT:%.*]] = extractvalue { i32, i1 } [[MUL1]], 0
 ; CHECK-NEXT:    [[MUL_OVERFLOW:%.*]] = extractvalue { i32, i1 } [[MUL1]], 1
-; CHECK-NEXT:    [[TMP2:%.*]] = add i32 0, [[MUL_RESULT]]
-; CHECK-NEXT:    [[TMP5:%.*]] = icmp ult i32 [[TMP2]], 0
 ; CHECK-NEXT:    [[TMP7:%.*]] = icmp ugt i64 [[TMP0]], 4294967295
-; CHECK-NEXT:    [[TMP8:%.*]] = or i1 [[TMP5]], [[TMP7]]
+; CHECK-NEXT:    [[TMP8:%.*]] = or i1 false, [[TMP7]]
 ; CHECK-NEXT:    [[TMP9:%.*]] = or i1 [[TMP8]], [[MUL_OVERFLOW]]
 ; CHECK-NEXT:    [[MUL2:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 8, i64 [[TMP0]])
 ; CHECK-NEXT:    [[MUL_RESULT3:%.*]] = extractvalue { i64, i1 } [[MUL2]], 0
--- a/llvm/test/Transforms/LoopVectorize/runtime-check-small-clamped-bounds.ll
+++ b/llvm/test/Transforms/LoopVectorize/runtime-check-small-clamped-bounds.ll
@@ -20,10 +20,8 @@ define void @load_clamped_index(i32* %A, i32* %B, i32 %N) {
 ; CHECK:       vector.scevcheck:
 ; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[N]], -1
 ; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[TMP0]] to i2
-; CHECK-NEXT:    [[TMP2:%.*]] = add i2 0, [[TMP1]]
-; CHECK-NEXT:    [[TMP5:%.*]] = icmp ult i2 [[TMP2]], 0
 ; CHECK-NEXT:    [[TMP7:%.*]] = icmp ugt i32 [[TMP0]], 3
-; CHECK-NEXT:    [[TMP8:%.*]] = or i1 [[TMP5]], [[TMP7]]
+; CHECK-NEXT:    [[TMP8:%.*]] = or i1 false, [[TMP7]]
 ; CHECK-NEXT:    br i1 [[TMP8]], label [[SCALAR_PH]], label [[VECTOR_MEMCHECK:%.*]]
 ; CHECK:       vector.memcheck:
 ; CHECK-NEXT:    [[TMP10:%.*]] = add i32 [[N]], -1
@@ -107,10 +105,8 @@ define void @store_clamped_index(i32* %A, i32* %B, i32 %N) {
 ; CHECK:       vector.scevcheck:
 ; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[N]], -1
 ; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[TMP0]] to i2
-; CHECK-NEXT:    [[TMP2:%.*]] = add i2 0, [[TMP1]]
-; CHECK-NEXT:    [[TMP5:%.*]] = icmp ult i2 [[TMP2]], 0
 ; CHECK-NEXT:    [[TMP7:%.*]] = icmp ugt i32 [[TMP0]], 3
-; CHECK-NEXT:    [[TMP8:%.*]] = or i1 [[TMP5]], [[TMP7]]
+; CHECK-NEXT:    [[TMP8:%.*]] = or i1 false, [[TMP7]]
 ; CHECK-NEXT:    br i1 [[TMP8]], label [[SCALAR_PH]], label [[VECTOR_MEMCHECK:%.*]]
 ; CHECK:       vector.memcheck:
 ; CHECK-NEXT:    [[TMP10:%.*]] = add i32 [[N]], -1
@@ -273,10 +269,8 @@ define void @clamped_index_equal_dependence(i32* %A, i32* %B, i32 %N) {
 ; CHECK:       vector.scevcheck:
 ; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[N]], -1
 ; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[TMP0]] to i2
-; CHECK-NEXT:    [[TMP2:%.*]] = add i2 0, [[TMP1]]
-; CHECK-NEXT:    [[TMP5:%.*]] = icmp ult i2 [[TMP2]], 0
 ; CHECK-NEXT:    [[TMP7:%.*]] = icmp ugt i32 [[TMP0]], 3
-; CHECK-NEXT:    [[TMP8:%.*]] = or i1 [[TMP5]], [[TMP7]]
+; CHECK-NEXT:    [[TMP8:%.*]] = or i1 false, [[TMP7]]
 ; CHECK-NEXT:    br i1 [[TMP8]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
 ; CHECK:       vector.ph:
 ; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i32 [[N]], 2
--- a/llvm/test/Transforms/LoopVersioning/wrapping-pointer-versioning.ll
+++ b/llvm/test/Transforms/LoopVersioning/wrapping-pointer-versioning.ll
@@ -34,10 +34,8 @@ define void @f1(i16* noalias %a,
 ; LV-NEXT:    [[MUL1:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 2, i32 [[TMP1]])
 ; LV-NEXT:    [[MUL_RESULT:%.*]] = extractvalue { i32, i1 } [[MUL1]], 0
 ; LV-NEXT:    [[MUL_OVERFLOW:%.*]] = extractvalue { i32, i1 } [[MUL1]], 1
-; LV-NEXT:    [[TMP2:%.*]] = add i32 0, [[MUL_RESULT]]
-; LV-NEXT:    [[TMP5:%.*]] = icmp ult i32 [[TMP2]], 0
 ; LV-NEXT:    [[TMP7:%.*]] = icmp ugt i64 [[TMP0]], 4294967295
-; LV-NEXT:    [[TMP8:%.*]] = or i1 [[TMP5]], [[TMP7]]
+; LV-NEXT:    [[TMP8:%.*]] = or i1 false, [[TMP7]]
 ; LV-NEXT:    [[TMP9:%.*]] = or i1 [[TMP8]], [[MUL_OVERFLOW]]
 ; LV-NEXT:    [[MUL2:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 4, i64 [[TMP0]])
 ; LV-NEXT:    [[MUL_RESULT3:%.*]] = extractvalue { i64, i1 } [[MUL2]], 0