In rL316552, we ban intersection of unsigned latch range with signed range check and vice
versa, unless the entire range check iteration space is known positive. It was a correct
functional fix that saved us from dealing with ambiguous values, but it also appeared
to be a very restrictive limitation. In particular, in the following case:
loop:
%iv = phi i32 [ 0, %preheader ], [ %iv.next, %latch]
%iv.offset = add i32 %iv, 10
%rc = icmp slt i32 %iv.offset, %len
br i1 %rc, label %latch, label %deopt
latch:
%iv.next = add i32 %iv, 11
%cond = icmp i32 ult %iv.next, 100
br it %cond, label %loop, label %exit
Here, the unsigned iteration range is `[0, 100)`, and the safe range for range
check is `[-10, %len - 10)`. For unsigned iteration spaces, we use unsigned
min/max functions for range intersection. Given this, we wanted to avoid dealing
with `-10` because it is interpreted as a very big unsigned value. Semantically, range
check's safe range goes through unsigned border, so in fact it is two disjoint
ranges in IV's iteration space. Intersection of such ranges is not trivial, so we prohibited
this case saying that we are not allowed to intersect such ranges.
What semantics of this safe range actually means is that we can start from `-10` and go
up increasing the `%iv` by one until we reach `%len - 10` (for simplicity let's assume that
`%len - 10` is a reasonably big positive value).
In particular, this safe iteration space includes `0, 1, 2, ..., %len - 11`. So if we were able to return
safe iteration space `[0, %len - 10)`, we could safely intersect it with IV's iteration space. All
values in this range are non-negative, so using signed/unsigned min/max for them is unambiguous.
In this patch, we alter the algorithm of safe range calculation so that it returnes a subset of the
original safe space which is represented by one continuous range that does not go through wrap.
In order to reach this, we use modified SCEV substraction function. It can be imagined as a function
that substracts by `1` (or `-1`) as long as the further substraction does not cause a wrap in IV iteration
space. This allows us to perform IRCE in many situations when we deal with IV space and range check
of different types (in terms of signed/unsigned).
We apply this approach for both matching and not matching types of IV iteration space and the
range check. One implication of this is that now IRCE became smarter in detection of empty safe
ranges. For example, in this case:
loop:
%iv = phi i32 [ %begin, %preheader ], [ %iv.next, %latch]
%iv.offset = sub i32 %iv, 10
%rc = icmp ult i32 %iv.offset, %len
br i1 %rc, label %latch, label %deopt
latch:
%iv.next = add i32 %iv, 11
%cond = icmp i32 ult %iv.next, 100
br it %cond, label %loop, label %exit
If `%len` was less than 10 but SCEV failed to trivially prove that `%begin - 10 >u %len- 10`,
we could end up executing entire loop in safe preloop while the main loop was still generated,
but never executed. Now, cutting the ranges so that if both `begin - 10` and `%len - 10` overflow,
we have a trivially empty range of `[0, 0)`. This in some cases prevents us from meaningless optimization.
Differential Revision: https://reviews.llvm.org/D39954
llvm-svn: 318639
287 lines
12 KiB
LLVM
287 lines
12 KiB
LLVM
; RUN: opt -verify-loop-info -irce-print-changed-loops -irce -S < %s 2>&1 | FileCheck %s
|
|
|
|
; CHECK-LABEL: irce: in function test_01: constrained Loop at depth 1 containing:
|
|
; CHECK-LABEL: irce: in function test_02: constrained Loop at depth 1 containing:
|
|
; CHECK-LABEL: irce: in function test_03: constrained Loop at depth 1 containing:
|
|
; CHECK-LABEL: irce: in function test_04: constrained Loop at depth 1 containing:
|
|
; CHECK-LABEL: irce: in function test_05: constrained Loop at depth 1 containing:
|
|
|
|
; This test used to demonstrate a miscompile: the outer loop's IV iterates in
|
|
; range of [2, 400) and the range check is done against value 331. Due to a bug
|
|
; in range intersection IRCE manages to eliminate the range check without
|
|
; inserting a postloop, which is incorrect. We treat the range of this test as
|
|
; an unsigned range and are able to intersect ranges correctly and insert a
|
|
; postloop.
|
|
|
|
define void @test_01() {
|
|
|
|
; CHECK-LABEL: test_01
|
|
; CHECK-NOT: preloop
|
|
; CHECK: range_check_block: ; preds = %inner_loop
|
|
; CHECK-NEXT: %range_check = icmp slt i32 %iv, 331
|
|
; CHECK-NEXT: br i1 true, label %loop_latch
|
|
; CHECK: loop_latch:
|
|
; CHECK-NEXT: %iv_next = add i32 %iv, 1
|
|
; CHECK-NEXT: %loop_cond = icmp ult i32 %iv_next, 400
|
|
; CHECK-NEXT: [[COND:%[^ ]+]] = icmp ult i32 %iv_next, 331
|
|
; CHECK-NEXT: br i1 [[COND]], label %loop_header, label %main.exit.selector
|
|
; CHECK: main.exit.selector: ; preds = %loop_latch
|
|
; CHECK-NEXT: %iv_next.lcssa = phi i32 [ %iv_next, %loop_latch ]
|
|
; CHECK-NEXT: %iv.lcssa = phi i32 [ %iv, %loop_latch ]
|
|
; CHECK-NEXT: [[MES_COND:%[^ ]+]] = icmp ult i32 %iv_next.lcssa, 400
|
|
; CHECK-NEXT: br i1 [[MES_COND]], label %main.pseudo.exit, label %exit
|
|
; CHECK: loop_latch.postloop: ; preds = %range_check_block.postloop
|
|
; CHECK-NEXT: %iv_next.postloop = add i32 %iv.postloop, 1
|
|
; CHECK-NEXT: %loop_cond.postloop = icmp ult i32 %iv_next.postloop, 400
|
|
; CHECK-NEXT: br i1 %loop_cond.postloop, label %loop_header.postloop, label %exit.loopexit
|
|
|
|
entry:
|
|
br label %loop_header
|
|
|
|
loop_header: ; preds = %loop_latch, %entry
|
|
%iv = phi i32 [ 2, %entry ], [ %iv_next, %loop_latch ]
|
|
%iv.prev = phi i32 [ 1, %entry ], [ %iv, %loop_latch ]
|
|
%tmp2 = icmp sgt i32 %iv.prev, -1
|
|
br i1 %tmp2, label %loop_header.split.us, label %exit
|
|
|
|
loop_header.split.us: ; preds = %loop_header
|
|
br label %inner_loop
|
|
|
|
inner_loop: ; preds = %inner_loop, %loop_header.split.us
|
|
%inner_iv = phi i32 [ 1, %loop_header.split.us ], [ %inner_iv_next, %inner_loop ]
|
|
%inner_iv_next = add nuw nsw i32 %inner_iv, 1
|
|
%inner_cond = icmp ult i32 %inner_iv_next, 31
|
|
br i1 %inner_cond, label %inner_loop, label %range_check_block
|
|
|
|
exit: ; preds = %loop_latch, %loop_header
|
|
ret void
|
|
|
|
range_check_block: ; preds = %inner_loop
|
|
%range_check = icmp slt i32 %iv, 331
|
|
br i1 %range_check, label %loop_latch, label %deopt
|
|
|
|
loop_latch: ; preds = %range_check_block
|
|
%iv_next = add i32 %iv, 1
|
|
%loop_cond = icmp ult i32 %iv_next, 400
|
|
br i1 %loop_cond, label %loop_header, label %exit
|
|
|
|
deopt: ; preds = %range_check_block
|
|
ret void
|
|
}
|
|
|
|
; Similar to test_01, but here the range check is done against 450. No postloop
|
|
; is required.
|
|
|
|
define void @test_02() {
|
|
|
|
; CHECK-LABEL: test_02
|
|
; CHECK-NOT: preloop
|
|
; CHECK-NOT: postloop
|
|
; CHECK: range_check_block: ; preds = %inner_loop
|
|
; CHECK-NEXT: %range_check = icmp slt i32 %iv, 450
|
|
; CHECK-NEXT: br i1 true, label %loop_latch
|
|
; CHECK: loop_latch: ; preds = %range_check_block
|
|
; CHECK-NEXT: %iv_next = add i32 %iv, 1
|
|
; CHECK-NEXT: %loop_cond = icmp ult i32 %iv_next, 400
|
|
; CHECK-NEXT: br i1 %loop_cond, label %loop_header, label %exit
|
|
|
|
entry:
|
|
br label %loop_header
|
|
|
|
loop_header: ; preds = %loop_latch, %entry
|
|
%iv = phi i32 [ 2, %entry ], [ %iv_next, %loop_latch ]
|
|
%iv.prev = phi i32 [ 1, %entry ], [ %iv, %loop_latch ]
|
|
%tmp2 = icmp sgt i32 %iv.prev, -1
|
|
br i1 %tmp2, label %loop_header.split.us, label %exit
|
|
|
|
loop_header.split.us: ; preds = %loop_header
|
|
br label %inner_loop
|
|
|
|
inner_loop: ; preds = %inner_loop, %loop_header.split.us
|
|
%inner_iv = phi i32 [ 1, %loop_header.split.us ], [ %inner_iv_next, %inner_loop ]
|
|
%inner_iv_next = add nuw nsw i32 %inner_iv, 1
|
|
%inner_cond = icmp ult i32 %inner_iv_next, 31
|
|
br i1 %inner_cond, label %inner_loop, label %range_check_block
|
|
|
|
exit: ; preds = %loop_latch, %loop_header
|
|
ret void
|
|
|
|
range_check_block: ; preds = %inner_loop
|
|
%range_check = icmp slt i32 %iv, 450
|
|
br i1 %range_check, label %loop_latch, label %deopt
|
|
|
|
loop_latch: ; preds = %range_check_block
|
|
%iv_next = add i32 %iv, 1
|
|
%loop_cond = icmp ult i32 %iv_next, 400
|
|
br i1 %loop_cond, label %loop_header, label %exit
|
|
|
|
deopt: ; preds = %range_check_block
|
|
ret void
|
|
}
|
|
|
|
; Range check is made against 0, so the safe iteration range is empty. IRCE
|
|
; should not apply to the inner loop. The condition %tmp2 can be eliminated.
|
|
|
|
define void @test_03() {
|
|
|
|
; CHECK-LABEL: test_03
|
|
; CHECK-NOT: preloop
|
|
; CHECK-NOT: postloop
|
|
; CHECK: %tmp2 = icmp sgt i32 %iv.prev, -1
|
|
; CHECK-NEXT: br i1 true, label %loop_header.split.us, label %exit
|
|
; CHECK: range_check_block:
|
|
; CHECK-NEXT: %range_check = icmp slt i32 %iv, 0
|
|
; CHECK-NEXT: br i1 %range_check, label %loop_latch, label %deopt
|
|
|
|
entry:
|
|
br label %loop_header
|
|
|
|
loop_header: ; preds = %loop_latch, %entry
|
|
%iv = phi i32 [ 2, %entry ], [ %iv_next, %loop_latch ]
|
|
%iv.prev = phi i32 [ 1, %entry ], [ %iv, %loop_latch ]
|
|
%tmp2 = icmp sgt i32 %iv.prev, -1
|
|
br i1 %tmp2, label %loop_header.split.us, label %exit
|
|
|
|
loop_header.split.us: ; preds = %loop_header
|
|
br label %inner_loop
|
|
|
|
inner_loop: ; preds = %inner_loop, %loop_header.split.us
|
|
%inner_iv = phi i32 [ 1, %loop_header.split.us ], [ %inner_iv_next, %inner_loop ]
|
|
%inner_iv_next = add nuw nsw i32 %inner_iv, 1
|
|
%inner_cond = icmp ult i32 %inner_iv_next, 31
|
|
br i1 %inner_cond, label %inner_loop, label %range_check_block
|
|
|
|
exit: ; preds = %loop_latch, %loop_header
|
|
ret void
|
|
|
|
range_check_block: ; preds = %inner_loop
|
|
%range_check = icmp slt i32 %iv, 0
|
|
br i1 %range_check, label %loop_latch, label %deopt
|
|
|
|
loop_latch: ; preds = %range_check_block
|
|
%iv_next = add i32 %iv, 1
|
|
%loop_cond = icmp ult i32 %iv_next, 400
|
|
br i1 %loop_cond, label %loop_header, label %exit
|
|
|
|
deopt: ; preds = %range_check_block
|
|
ret void
|
|
}
|
|
|
|
; We do not know whether %n is positive or negative, so we prohibit IRCE in
|
|
; order to avoid incorrect intersection of signed and unsigned ranges.
|
|
; The condition %tmp2 can be eliminated.
|
|
|
|
define void @test_04(i32* %p) {
|
|
|
|
; CHECK-LABEL: test_04
|
|
; CHECK-NOT: preloop
|
|
; CHECK-NOT: postloop
|
|
; CHECK: %tmp2 = icmp sgt i32 %iv.prev, -1
|
|
; CHECK-NEXT: br i1 true, label %loop_header.split.us, label %exit
|
|
; CHECK: range_check_block:
|
|
; CHECK-NEXT: %range_check = icmp slt i32 %iv, %n
|
|
; CHECK-NEXT: br i1 %range_check, label %loop_latch, label %deopt
|
|
|
|
entry:
|
|
%n = load i32, i32* %p
|
|
br label %loop_header
|
|
|
|
loop_header: ; preds = %loop_latch, %entry
|
|
%iv = phi i32 [ 2, %entry ], [ %iv_next, %loop_latch ]
|
|
%iv.prev = phi i32 [ 1, %entry ], [ %iv, %loop_latch ]
|
|
%tmp2 = icmp sgt i32 %iv.prev, -1
|
|
br i1 %tmp2, label %loop_header.split.us, label %exit
|
|
|
|
loop_header.split.us: ; preds = %loop_header
|
|
br label %inner_loop
|
|
|
|
inner_loop: ; preds = %inner_loop, %loop_header.split.us
|
|
%inner_iv = phi i32 [ 1, %loop_header.split.us ], [ %inner_iv_next, %inner_loop ]
|
|
%inner_iv_next = add nuw nsw i32 %inner_iv, 1
|
|
%inner_cond = icmp ult i32 %inner_iv_next, 31
|
|
br i1 %inner_cond, label %inner_loop, label %range_check_block
|
|
|
|
exit: ; preds = %loop_latch, %loop_header
|
|
ret void
|
|
|
|
range_check_block: ; preds = %inner_loop
|
|
%range_check = icmp slt i32 %iv, %n
|
|
br i1 %range_check, label %loop_latch, label %deopt
|
|
|
|
loop_latch: ; preds = %range_check_block
|
|
%iv_next = add i32 %iv, 1
|
|
%loop_cond = icmp ult i32 %iv_next, 400
|
|
br i1 %loop_cond, label %loop_header, label %exit
|
|
|
|
deopt: ; preds = %range_check_block
|
|
ret void
|
|
}
|
|
|
|
; Same as test_04, but range guarantees that %n is positive. So we can safely
|
|
; intersect ranges (with insertion of postloop).
|
|
|
|
define void @test_05(i32* %p) {
|
|
|
|
; CHECK-LABEL: test_05
|
|
; CHECK-NOT: preloop
|
|
; CHECK: entry:
|
|
; CHECK-NEXT: %n = load i32, i32* %p, !range !6
|
|
; CHECK-NEXT: [[CMP_1:%[^ ]+]] = icmp ugt i32 %n, 2
|
|
; CHECK-NEXT: %exit.mainloop.at = select i1 [[CMP_1]], i32 %n, i32 2
|
|
; CHECK-NEXT: [[CMP_2:%[^ ]+]] = icmp ult i32 2, %exit.mainloop.at
|
|
; CHECK-NEXT: br i1 [[CMP_2]], label %loop_header.preheader, label %main.pseudo.exit
|
|
; CHECK: range_check_block: ; preds = %inner_loop
|
|
; CHECK-NEXT: %range_check = icmp slt i32 %iv, %n
|
|
; CHECK-NEXT: br i1 true, label %loop_latch, label %deopt.loopexit2
|
|
; CHECK: loop_latch: ; preds = %range_check_block
|
|
; CHECK-NEXT: %iv_next = add i32 %iv, 1
|
|
; CHECK-NEXT: %loop_cond = icmp ult i32 %iv_next, 400
|
|
; CHECK-NEXT: [[COND:%[^ ]+]] = icmp ult i32 %iv_next, %exit.mainloop.at
|
|
; CHECK-NEXT: br i1 [[COND]], label %loop_header, label %main.exit.selector
|
|
; CHECK: main.exit.selector: ; preds = %loop_latch
|
|
; CHECK-NEXT: %iv_next.lcssa = phi i32 [ %iv_next, %loop_latch ]
|
|
; CHECK-NEXT: %iv.lcssa = phi i32 [ %iv, %loop_latch ]
|
|
; CHECK-NEXT: [[MES_COND:%[^ ]+]] = icmp ult i32 %iv_next.lcssa, 400
|
|
; CHECK-NEXT: br i1 [[MES_COND]], label %main.pseudo.exit, label %exit
|
|
; CHECK: loop_latch.postloop: ; preds = %range_check_block.postloop
|
|
; CHECK-NEXT: %iv_next.postloop = add i32 %iv.postloop, 1
|
|
; CHECK-NEXT: %loop_cond.postloop = icmp ult i32 %iv_next.postloop, 400
|
|
; CHECK-NEXT: br i1 %loop_cond.postloop, label %loop_header.postloop, label %exit.loopexit
|
|
|
|
entry:
|
|
%n = load i32, i32* %p, !range !0
|
|
br label %loop_header
|
|
|
|
loop_header: ; preds = %loop_latch, %entry
|
|
%iv = phi i32 [ 2, %entry ], [ %iv_next, %loop_latch ]
|
|
%iv.prev = phi i32 [ 1, %entry ], [ %iv, %loop_latch ]
|
|
%tmp2 = icmp sgt i32 %iv.prev, -1
|
|
br i1 %tmp2, label %loop_header.split.us, label %exit
|
|
|
|
loop_header.split.us: ; preds = %loop_header
|
|
br label %inner_loop
|
|
|
|
inner_loop: ; preds = %inner_loop, %loop_header.split.us
|
|
%inner_iv = phi i32 [ 1, %loop_header.split.us ], [ %inner_iv_next, %inner_loop ]
|
|
%inner_iv_next = add nuw nsw i32 %inner_iv, 1
|
|
%inner_cond = icmp ult i32 %inner_iv_next, 31
|
|
br i1 %inner_cond, label %inner_loop, label %range_check_block
|
|
|
|
exit: ; preds = %loop_latch, %loop_header
|
|
ret void
|
|
|
|
range_check_block: ; preds = %inner_loop
|
|
%range_check = icmp slt i32 %iv, %n
|
|
br i1 %range_check, label %loop_latch, label %deopt
|
|
|
|
loop_latch: ; preds = %range_check_block
|
|
%iv_next = add i32 %iv, 1
|
|
%loop_cond = icmp ult i32 %iv_next, 400
|
|
br i1 %loop_cond, label %loop_header, label %exit
|
|
|
|
deopt: ; preds = %range_check_block
|
|
ret void
|
|
}
|
|
|
|
!0 = !{i32 0, i32 50}
|