[LSR] Allow already invariant operand for ICmpZero matching
The ICmpZero matching is checking to see if the expression is loop invariant per SCEV and expandable. This allows expressions inside the loop which can be made loop invariant to be seamlessly expanded, but is overly conservative for expressions which already *are* loop invariant. As a simple justification for why this is correct, consider a loop invariant urem as RHS vs an alternate function with that same urem wrapped inside a helper call. Why would it be legal to match the later, but not the former? Differential Revision: https://reviews.llvm.org/D129793
This commit is contained in:
committed by
Philip Reames
parent
ca1cfa3f82
commit
9153515a7b
@@ -3333,14 +3333,24 @@ void LSRInstance::CollectFixupsAndInitialFormulae() {
|
||||
|
||||
// x == y --> x - y == 0
|
||||
const SCEV *N = SE.getSCEV(NV);
|
||||
if (SE.isLoopInvariant(N, L) && Rewriter.isSafeToExpand(N) &&
|
||||
(!NV->getType()->isPointerTy() ||
|
||||
SE.getPointerBase(N) == SE.getPointerBase(S))) {
|
||||
// S is normalized, so normalize N before folding it into S
|
||||
// to keep the result normalized.
|
||||
N = normalizeForPostIncUse(N, TmpPostIncLoops, SE);
|
||||
Kind = LSRUse::ICmpZero;
|
||||
S = SE.getMinusSCEV(N, S);
|
||||
if (!NV->getType()->isPointerTy() ||
|
||||
SE.getPointerBase(N) == SE.getPointerBase(S)) {
|
||||
if (SE.isLoopInvariant(N, L) && Rewriter.isSafeToExpand(N)) {
|
||||
// S is normalized, so normalize N before folding it into S
|
||||
// to keep the result normalized.
|
||||
N = normalizeForPostIncUse(N, TmpPostIncLoops, SE);
|
||||
Kind = LSRUse::ICmpZero;
|
||||
S = SE.getMinusSCEV(N, S);
|
||||
} else if (L->isLoopInvariant(NV)) {
|
||||
// If we can't generally expand the expression (e.g. it contains
|
||||
// a divide), but it is already at a loop invariant point, wrap it
|
||||
// in an unknwon (to prevent the expander from trying to re-expand
|
||||
// in a potentially unsafe way.)
|
||||
N = SE.getUnknown(NV);
|
||||
N = normalizeForPostIncUse(N, TmpPostIncLoops, SE);
|
||||
Kind = LSRUse::ICmpZero;
|
||||
S = SE.getMinusSCEV(N, S);
|
||||
}
|
||||
}
|
||||
|
||||
// -1 and the negations of all interesting strides (except the negation
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -61,18 +61,16 @@ exit:
|
||||
ret void
|
||||
}
|
||||
|
||||
; FIXME: We could handle this case even though we don't know %M. The
|
||||
; faulting instruction is already outside the loop!
|
||||
define void @icmp_zero_urem_invariant(i64 %N, i64 %M, ptr %p) {
|
||||
; CHECK-LABEL: @icmp_zero_urem_invariant(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[UREM:%.*]] = urem i64 [[N:%.*]], [[M:%.*]]
|
||||
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
|
||||
; CHECK: vector.body:
|
||||
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], [[VECTOR_BODY]] ], [ [[UREM]], [[ENTRY:%.*]] ]
|
||||
; CHECK-NEXT: store i64 0, ptr [[P:%.*]], align 8
|
||||
; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 2
|
||||
; CHECK-NEXT: [[DONE:%.*]] = icmp eq i64 [[IV_NEXT]], [[UREM]]
|
||||
; CHECK-NEXT: [[LSR_IV_NEXT]] = add i64 [[LSR_IV]], -2
|
||||
; CHECK-NEXT: [[DONE:%.*]] = icmp eq i64 [[LSR_IV_NEXT]], 0
|
||||
; CHECK-NEXT: br i1 [[DONE]], label [[EXIT:%.*]], label [[VECTOR_BODY]]
|
||||
; CHECK: exit:
|
||||
; CHECK-NEXT: ret void
|
||||
@@ -192,10 +190,10 @@ define void @icmp_zero_urem_vscale_mul8(i64 %N, ptr %p) {
|
||||
; CHECK-NEXT: [[UREM:%.*]] = urem i64 [[N:%.*]], [[MUL]]
|
||||
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
|
||||
; CHECK: vector.body:
|
||||
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], [[VECTOR_BODY]] ], [ [[UREM]], [[ENTRY:%.*]] ]
|
||||
; CHECK-NEXT: store i64 0, ptr [[P:%.*]], align 8
|
||||
; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 2
|
||||
; CHECK-NEXT: [[DONE:%.*]] = icmp eq i64 [[IV_NEXT]], [[UREM]]
|
||||
; CHECK-NEXT: [[LSR_IV_NEXT]] = add i64 [[LSR_IV]], -2
|
||||
; CHECK-NEXT: [[DONE:%.*]] = icmp eq i64 [[LSR_IV_NEXT]], 0
|
||||
; CHECK-NEXT: br i1 [[DONE]], label [[EXIT:%.*]], label [[VECTOR_BODY]]
|
||||
; CHECK: exit:
|
||||
; CHECK-NEXT: ret void
|
||||
@@ -226,10 +224,10 @@ define void @icmp_zero_urem_vscale_mul64(i64 %N, ptr %p) {
|
||||
; CHECK-NEXT: [[UREM:%.*]] = urem i64 [[N:%.*]], [[MUL]]
|
||||
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
|
||||
; CHECK: vector.body:
|
||||
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], [[VECTOR_BODY]] ], [ [[UREM]], [[ENTRY:%.*]] ]
|
||||
; CHECK-NEXT: store i64 0, ptr [[P:%.*]], align 8
|
||||
; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 2
|
||||
; CHECK-NEXT: [[DONE:%.*]] = icmp eq i64 [[IV_NEXT]], [[UREM]]
|
||||
; CHECK-NEXT: [[LSR_IV_NEXT]] = add i64 [[LSR_IV]], -2
|
||||
; CHECK-NEXT: [[DONE:%.*]] = icmp eq i64 [[LSR_IV_NEXT]], 0
|
||||
; CHECK-NEXT: br i1 [[DONE]], label [[EXIT:%.*]], label [[VECTOR_BODY]]
|
||||
; CHECK: exit:
|
||||
; CHECK-NEXT: ret void
|
||||
@@ -259,10 +257,10 @@ define void @icmp_zero_urem_vscale_shl3(i64 %N, ptr %p) {
|
||||
; CHECK-NEXT: [[UREM:%.*]] = urem i64 [[N:%.*]], [[SHL]]
|
||||
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
|
||||
; CHECK: vector.body:
|
||||
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], [[VECTOR_BODY]] ], [ [[UREM]], [[ENTRY:%.*]] ]
|
||||
; CHECK-NEXT: store i64 0, ptr [[P:%.*]], align 8
|
||||
; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 2
|
||||
; CHECK-NEXT: [[DONE:%.*]] = icmp eq i64 [[IV_NEXT]], [[UREM]]
|
||||
; CHECK-NEXT: [[LSR_IV_NEXT]] = add i64 [[LSR_IV]], -2
|
||||
; CHECK-NEXT: [[DONE:%.*]] = icmp eq i64 [[LSR_IV_NEXT]], 0
|
||||
; CHECK-NEXT: br i1 [[DONE]], label [[EXIT:%.*]], label [[VECTOR_BODY]]
|
||||
; CHECK: exit:
|
||||
; CHECK-NEXT: ret void
|
||||
@@ -292,10 +290,10 @@ define void @icmp_zero_urem_vscale_shl6(i64 %N, ptr %p) {
|
||||
; CHECK-NEXT: [[UREM:%.*]] = urem i64 [[N:%.*]], [[SHL]]
|
||||
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
|
||||
; CHECK: vector.body:
|
||||
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], [[VECTOR_BODY]] ], [ [[UREM]], [[ENTRY:%.*]] ]
|
||||
; CHECK-NEXT: store i64 0, ptr [[P:%.*]], align 8
|
||||
; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 2
|
||||
; CHECK-NEXT: [[DONE:%.*]] = icmp eq i64 [[IV_NEXT]], [[UREM]]
|
||||
; CHECK-NEXT: [[LSR_IV_NEXT]] = add i64 [[LSR_IV]], -2
|
||||
; CHECK-NEXT: [[DONE:%.*]] = icmp eq i64 [[LSR_IV_NEXT]], 0
|
||||
; CHECK-NEXT: br i1 [[DONE]], label [[EXIT:%.*]], label [[VECTOR_BODY]]
|
||||
; CHECK: exit:
|
||||
; CHECK-NEXT: ret void
|
||||
|
||||
Reference in New Issue
Block a user