[SCEVExpander] Skip creating <u 0 check, which is always false.

Unsigned compares of the form <u 0 are always false. Do not create such
a redundant check in generateOverflowCheck.

The patch introduces a new lambda to create the check, so we can
exit early conveniently and skip creating some instructions feeding the
check.

I am planning to sink a few additional instructions as follow-ups, but I
would prefer to do this separately, to keep the changes and diff
smaller.

Reviewed By: reames

Differential Revision: https://reviews.llvm.org/D116811
This commit is contained in:
Florian Hahn
2022-01-08 10:29:19 +00:00
parent 0e19186c82
commit 9345ab3a45
4 changed files with 44 additions and 48 deletions

View File

@@ -2516,38 +2516,46 @@ Value *SCEVExpander::generateOverflowCheck(const SCEVAddRecExpr *AR,
// And select either 1. or 2. depending on whether step is positive or
// negative. If Step is known to be positive or negative, only create
// either 1. or 2.
Value *Add = nullptr, *Sub = nullptr;
bool NeedPosCheck = !SE.isKnownNegative(Step);
bool NeedNegCheck = !SE.isKnownPositive(Step);
auto ComputeEndCheck = [&]() -> Value * {
// Checking <u 0 is always false.
if (!Signed && Start->isZero() && SE.isKnownPositive(Step))
return ConstantInt::getFalse(Loc->getContext());
if (PointerType *ARPtrTy = dyn_cast<PointerType>(ARTy)) {
StartValue = InsertNoopCastOfTo(
StartValue, Builder.getInt8PtrTy(ARPtrTy->getAddressSpace()));
Value *NegMulV = Builder.CreateNeg(MulV);
if (NeedPosCheck)
Add = Builder.CreateGEP(Builder.getInt8Ty(), StartValue, MulV);
if (NeedNegCheck)
Sub = Builder.CreateGEP(Builder.getInt8Ty(), StartValue, NegMulV);
} else {
if (NeedPosCheck)
Add = Builder.CreateAdd(StartValue, MulV);
if (NeedNegCheck)
Sub = Builder.CreateSub(StartValue, MulV);
}
Value *Add = nullptr, *Sub = nullptr;
bool NeedPosCheck = !SE.isKnownNegative(Step);
bool NeedNegCheck = !SE.isKnownPositive(Step);
Value *EndCompareLT = nullptr;
Value *EndCompareGT = nullptr;
Value *EndCheck = nullptr;
if (NeedPosCheck)
EndCheck = EndCompareLT = Builder.CreateICmp(
Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT, Add, StartValue);
if (NeedNegCheck)
EndCheck = EndCompareGT = Builder.CreateICmp(
Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT, Sub, StartValue);
if (NeedPosCheck && NeedNegCheck) {
// Select the answer based on the sign of Step.
EndCheck = Builder.CreateSelect(StepCompare, EndCompareGT, EndCompareLT);
}
if (PointerType *ARPtrTy = dyn_cast<PointerType>(ARTy)) {
StartValue = InsertNoopCastOfTo(
StartValue, Builder.getInt8PtrTy(ARPtrTy->getAddressSpace()));
Value *NegMulV = Builder.CreateNeg(MulV);
if (NeedPosCheck)
Add = Builder.CreateGEP(Builder.getInt8Ty(), StartValue, MulV);
if (NeedNegCheck)
Sub = Builder.CreateGEP(Builder.getInt8Ty(), StartValue, NegMulV);
} else {
if (NeedPosCheck)
Add = Builder.CreateAdd(StartValue, MulV);
if (NeedNegCheck)
Sub = Builder.CreateSub(StartValue, MulV);
}
Value *EndCompareLT = nullptr;
Value *EndCompareGT = nullptr;
Value *EndCheck = nullptr;
if (NeedPosCheck)
EndCheck = EndCompareLT = Builder.CreateICmp(
Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT, Add, StartValue);
if (NeedNegCheck)
EndCheck = EndCompareGT = Builder.CreateICmp(
Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT, Sub, StartValue);
if (NeedPosCheck && NeedNegCheck) {
// Select the answer based on the sign of Step.
EndCheck = Builder.CreateSelect(StepCompare, EndCompareGT, EndCompareLT);
}
return EndCheck;
};
Value *EndCheck = ComputeEndCheck();
// If the backedge taken count type is larger than the AR type,
// check that we don't drop any bits by truncating it. If we are

View File

@@ -17,10 +17,8 @@ define void @f(i32* noalias %a, i32* noalias %b, i32* noalias %c, i32* noalias %
; CHECK-NEXT: [[MUL1:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 2, i32 [[TMP1]])
; CHECK-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i32, i1 } [[MUL1]], 0
; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i32, i1 } [[MUL1]], 1
; CHECK-NEXT: [[TMP2:%.*]] = add i32 0, [[MUL_RESULT]]
; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i32 [[TMP2]], 0
; CHECK-NEXT: [[TMP7:%.*]] = icmp ugt i64 [[TMP0]], 4294967295
; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP5]], [[TMP7]]
; CHECK-NEXT: [[TMP8:%.*]] = or i1 false, [[TMP7]]
; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP8]], [[MUL_OVERFLOW]]
; CHECK-NEXT: [[MUL2:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 8, i64 [[TMP0]])
; CHECK-NEXT: [[MUL_RESULT3:%.*]] = extractvalue { i64, i1 } [[MUL2]], 0
@@ -159,10 +157,8 @@ define void @f_with_offset(i32* noalias %b, i32* noalias %c, i32* noalias %d, i3
; CHECK-NEXT: [[MUL1:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 2, i32 [[TMP1]])
; CHECK-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i32, i1 } [[MUL1]], 0
; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i32, i1 } [[MUL1]], 1
; CHECK-NEXT: [[TMP2:%.*]] = add i32 0, [[MUL_RESULT]]
; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i32 [[TMP2]], 0
; CHECK-NEXT: [[TMP7:%.*]] = icmp ugt i64 [[TMP0]], 4294967295
; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP5]], [[TMP7]]
; CHECK-NEXT: [[TMP8:%.*]] = or i1 false, [[TMP7]]
; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP8]], [[MUL_OVERFLOW]]
; CHECK-NEXT: [[MUL2:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 8, i64 [[TMP0]])
; CHECK-NEXT: [[MUL_RESULT3:%.*]] = extractvalue { i64, i1 } [[MUL2]], 0

View File

@@ -20,10 +20,8 @@ define void @load_clamped_index(i32* %A, i32* %B, i32 %N) {
; CHECK: vector.scevcheck:
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1
; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[TMP0]] to i2
; CHECK-NEXT: [[TMP2:%.*]] = add i2 0, [[TMP1]]
; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i2 [[TMP2]], 0
; CHECK-NEXT: [[TMP7:%.*]] = icmp ugt i32 [[TMP0]], 3
; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP5]], [[TMP7]]
; CHECK-NEXT: [[TMP8:%.*]] = or i1 false, [[TMP7]]
; CHECK-NEXT: br i1 [[TMP8]], label [[SCALAR_PH]], label [[VECTOR_MEMCHECK:%.*]]
; CHECK: vector.memcheck:
; CHECK-NEXT: [[TMP10:%.*]] = add i32 [[N]], -1
@@ -107,10 +105,8 @@ define void @store_clamped_index(i32* %A, i32* %B, i32 %N) {
; CHECK: vector.scevcheck:
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1
; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[TMP0]] to i2
; CHECK-NEXT: [[TMP2:%.*]] = add i2 0, [[TMP1]]
; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i2 [[TMP2]], 0
; CHECK-NEXT: [[TMP7:%.*]] = icmp ugt i32 [[TMP0]], 3
; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP5]], [[TMP7]]
; CHECK-NEXT: [[TMP8:%.*]] = or i1 false, [[TMP7]]
; CHECK-NEXT: br i1 [[TMP8]], label [[SCALAR_PH]], label [[VECTOR_MEMCHECK:%.*]]
; CHECK: vector.memcheck:
; CHECK-NEXT: [[TMP10:%.*]] = add i32 [[N]], -1
@@ -273,10 +269,8 @@ define void @clamped_index_equal_dependence(i32* %A, i32* %B, i32 %N) {
; CHECK: vector.scevcheck:
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1
; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[TMP0]] to i2
; CHECK-NEXT: [[TMP2:%.*]] = add i2 0, [[TMP1]]
; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i2 [[TMP2]], 0
; CHECK-NEXT: [[TMP7:%.*]] = icmp ugt i32 [[TMP0]], 3
; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP5]], [[TMP7]]
; CHECK-NEXT: [[TMP8:%.*]] = or i1 false, [[TMP7]]
; CHECK-NEXT: br i1 [[TMP8]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N]], 2

View File

@@ -34,10 +34,8 @@ define void @f1(i16* noalias %a,
; LV-NEXT: [[MUL1:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 2, i32 [[TMP1]])
; LV-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i32, i1 } [[MUL1]], 0
; LV-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i32, i1 } [[MUL1]], 1
; LV-NEXT: [[TMP2:%.*]] = add i32 0, [[MUL_RESULT]]
; LV-NEXT: [[TMP5:%.*]] = icmp ult i32 [[TMP2]], 0
; LV-NEXT: [[TMP7:%.*]] = icmp ugt i64 [[TMP0]], 4294967295
; LV-NEXT: [[TMP8:%.*]] = or i1 [[TMP5]], [[TMP7]]
; LV-NEXT: [[TMP8:%.*]] = or i1 false, [[TMP7]]
; LV-NEXT: [[TMP9:%.*]] = or i1 [[TMP8]], [[MUL_OVERFLOW]]
; LV-NEXT: [[MUL2:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 4, i64 [[TMP0]])
; LV-NEXT: [[MUL_RESULT3:%.*]] = extractvalue { i64, i1 } [[MUL2]], 0