[SCEVExpander] Skip creating <u 0 check, which is always false.
Unsigned compares of the form <u 0 are always false. Do not create such a redundant check in generateOverflowCheck. The patch introduces a new lambda to create the check, so we can exit early conveniently and skip creating some instructions feeding the check. I am planning to sink a few additional instructions as follow-ups, but I would prefer to do this separately, to keep the changes and diff smaller. Reviewed By: reames Differential Revision: https://reviews.llvm.org/D116811
This commit is contained in:
@@ -2516,38 +2516,46 @@ Value *SCEVExpander::generateOverflowCheck(const SCEVAddRecExpr *AR,
|
||||
// And select either 1. or 2. depending on whether step is positive or
|
||||
// negative. If Step is known to be positive or negative, only create
|
||||
// either 1. or 2.
|
||||
Value *Add = nullptr, *Sub = nullptr;
|
||||
bool NeedPosCheck = !SE.isKnownNegative(Step);
|
||||
bool NeedNegCheck = !SE.isKnownPositive(Step);
|
||||
auto ComputeEndCheck = [&]() -> Value * {
|
||||
// Checking <u 0 is always false.
|
||||
if (!Signed && Start->isZero() && SE.isKnownPositive(Step))
|
||||
return ConstantInt::getFalse(Loc->getContext());
|
||||
|
||||
if (PointerType *ARPtrTy = dyn_cast<PointerType>(ARTy)) {
|
||||
StartValue = InsertNoopCastOfTo(
|
||||
StartValue, Builder.getInt8PtrTy(ARPtrTy->getAddressSpace()));
|
||||
Value *NegMulV = Builder.CreateNeg(MulV);
|
||||
if (NeedPosCheck)
|
||||
Add = Builder.CreateGEP(Builder.getInt8Ty(), StartValue, MulV);
|
||||
if (NeedNegCheck)
|
||||
Sub = Builder.CreateGEP(Builder.getInt8Ty(), StartValue, NegMulV);
|
||||
} else {
|
||||
if (NeedPosCheck)
|
||||
Add = Builder.CreateAdd(StartValue, MulV);
|
||||
if (NeedNegCheck)
|
||||
Sub = Builder.CreateSub(StartValue, MulV);
|
||||
}
|
||||
Value *Add = nullptr, *Sub = nullptr;
|
||||
bool NeedPosCheck = !SE.isKnownNegative(Step);
|
||||
bool NeedNegCheck = !SE.isKnownPositive(Step);
|
||||
|
||||
Value *EndCompareLT = nullptr;
|
||||
Value *EndCompareGT = nullptr;
|
||||
Value *EndCheck = nullptr;
|
||||
if (NeedPosCheck)
|
||||
EndCheck = EndCompareLT = Builder.CreateICmp(
|
||||
Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT, Add, StartValue);
|
||||
if (NeedNegCheck)
|
||||
EndCheck = EndCompareGT = Builder.CreateICmp(
|
||||
Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT, Sub, StartValue);
|
||||
if (NeedPosCheck && NeedNegCheck) {
|
||||
// Select the answer based on the sign of Step.
|
||||
EndCheck = Builder.CreateSelect(StepCompare, EndCompareGT, EndCompareLT);
|
||||
}
|
||||
if (PointerType *ARPtrTy = dyn_cast<PointerType>(ARTy)) {
|
||||
StartValue = InsertNoopCastOfTo(
|
||||
StartValue, Builder.getInt8PtrTy(ARPtrTy->getAddressSpace()));
|
||||
Value *NegMulV = Builder.CreateNeg(MulV);
|
||||
if (NeedPosCheck)
|
||||
Add = Builder.CreateGEP(Builder.getInt8Ty(), StartValue, MulV);
|
||||
if (NeedNegCheck)
|
||||
Sub = Builder.CreateGEP(Builder.getInt8Ty(), StartValue, NegMulV);
|
||||
} else {
|
||||
if (NeedPosCheck)
|
||||
Add = Builder.CreateAdd(StartValue, MulV);
|
||||
if (NeedNegCheck)
|
||||
Sub = Builder.CreateSub(StartValue, MulV);
|
||||
}
|
||||
|
||||
Value *EndCompareLT = nullptr;
|
||||
Value *EndCompareGT = nullptr;
|
||||
Value *EndCheck = nullptr;
|
||||
if (NeedPosCheck)
|
||||
EndCheck = EndCompareLT = Builder.CreateICmp(
|
||||
Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT, Add, StartValue);
|
||||
if (NeedNegCheck)
|
||||
EndCheck = EndCompareGT = Builder.CreateICmp(
|
||||
Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT, Sub, StartValue);
|
||||
if (NeedPosCheck && NeedNegCheck) {
|
||||
// Select the answer based on the sign of Step.
|
||||
EndCheck = Builder.CreateSelect(StepCompare, EndCompareGT, EndCompareLT);
|
||||
}
|
||||
return EndCheck;
|
||||
};
|
||||
Value *EndCheck = ComputeEndCheck();
|
||||
|
||||
// If the backedge taken count type is larger than the AR type,
|
||||
// check that we don't drop any bits by truncating it. If we are
|
||||
|
||||
@@ -17,10 +17,8 @@ define void @f(i32* noalias %a, i32* noalias %b, i32* noalias %c, i32* noalias %
|
||||
; CHECK-NEXT: [[MUL1:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 2, i32 [[TMP1]])
|
||||
; CHECK-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i32, i1 } [[MUL1]], 0
|
||||
; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i32, i1 } [[MUL1]], 1
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = add i32 0, [[MUL_RESULT]]
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i32 [[TMP2]], 0
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = icmp ugt i64 [[TMP0]], 4294967295
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP5]], [[TMP7]]
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = or i1 false, [[TMP7]]
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP8]], [[MUL_OVERFLOW]]
|
||||
; CHECK-NEXT: [[MUL2:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 8, i64 [[TMP0]])
|
||||
; CHECK-NEXT: [[MUL_RESULT3:%.*]] = extractvalue { i64, i1 } [[MUL2]], 0
|
||||
@@ -159,10 +157,8 @@ define void @f_with_offset(i32* noalias %b, i32* noalias %c, i32* noalias %d, i3
|
||||
; CHECK-NEXT: [[MUL1:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 2, i32 [[TMP1]])
|
||||
; CHECK-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i32, i1 } [[MUL1]], 0
|
||||
; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i32, i1 } [[MUL1]], 1
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = add i32 0, [[MUL_RESULT]]
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i32 [[TMP2]], 0
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = icmp ugt i64 [[TMP0]], 4294967295
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP5]], [[TMP7]]
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = or i1 false, [[TMP7]]
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP8]], [[MUL_OVERFLOW]]
|
||||
; CHECK-NEXT: [[MUL2:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 8, i64 [[TMP0]])
|
||||
; CHECK-NEXT: [[MUL_RESULT3:%.*]] = extractvalue { i64, i1 } [[MUL2]], 0
|
||||
|
||||
@@ -20,10 +20,8 @@ define void @load_clamped_index(i32* %A, i32* %B, i32 %N) {
|
||||
; CHECK: vector.scevcheck:
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[TMP0]] to i2
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = add i2 0, [[TMP1]]
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i2 [[TMP2]], 0
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = icmp ugt i32 [[TMP0]], 3
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP5]], [[TMP7]]
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = or i1 false, [[TMP7]]
|
||||
; CHECK-NEXT: br i1 [[TMP8]], label [[SCALAR_PH]], label [[VECTOR_MEMCHECK:%.*]]
|
||||
; CHECK: vector.memcheck:
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = add i32 [[N]], -1
|
||||
@@ -107,10 +105,8 @@ define void @store_clamped_index(i32* %A, i32* %B, i32 %N) {
|
||||
; CHECK: vector.scevcheck:
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[TMP0]] to i2
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = add i2 0, [[TMP1]]
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i2 [[TMP2]], 0
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = icmp ugt i32 [[TMP0]], 3
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP5]], [[TMP7]]
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = or i1 false, [[TMP7]]
|
||||
; CHECK-NEXT: br i1 [[TMP8]], label [[SCALAR_PH]], label [[VECTOR_MEMCHECK:%.*]]
|
||||
; CHECK: vector.memcheck:
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = add i32 [[N]], -1
|
||||
@@ -273,10 +269,8 @@ define void @clamped_index_equal_dependence(i32* %A, i32* %B, i32 %N) {
|
||||
; CHECK: vector.scevcheck:
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[TMP0]] to i2
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = add i2 0, [[TMP1]]
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i2 [[TMP2]], 0
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = icmp ugt i32 [[TMP0]], 3
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP5]], [[TMP7]]
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = or i1 false, [[TMP7]]
|
||||
; CHECK-NEXT: br i1 [[TMP8]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
|
||||
; CHECK: vector.ph:
|
||||
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N]], 2
|
||||
|
||||
@@ -34,10 +34,8 @@ define void @f1(i16* noalias %a,
|
||||
; LV-NEXT: [[MUL1:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 2, i32 [[TMP1]])
|
||||
; LV-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i32, i1 } [[MUL1]], 0
|
||||
; LV-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i32, i1 } [[MUL1]], 1
|
||||
; LV-NEXT: [[TMP2:%.*]] = add i32 0, [[MUL_RESULT]]
|
||||
; LV-NEXT: [[TMP5:%.*]] = icmp ult i32 [[TMP2]], 0
|
||||
; LV-NEXT: [[TMP7:%.*]] = icmp ugt i64 [[TMP0]], 4294967295
|
||||
; LV-NEXT: [[TMP8:%.*]] = or i1 [[TMP5]], [[TMP7]]
|
||||
; LV-NEXT: [[TMP8:%.*]] = or i1 false, [[TMP7]]
|
||||
; LV-NEXT: [[TMP9:%.*]] = or i1 [[TMP8]], [[MUL_OVERFLOW]]
|
||||
; LV-NEXT: [[MUL2:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 4, i64 [[TMP0]])
|
||||
; LV-NEXT: [[MUL_RESULT3:%.*]] = extractvalue { i64, i1 } [[MUL2]], 0
|
||||
|
||||
Reference in New Issue
Block a user