Files
clang-p2996/llvm/test/Transforms/LoopUnroll/runtime-loop.ll
Philip Reames 37ead201e6 [runtime-unroll] Use incrementing IVs instead of decrementing ones
This is one of those wonderful "in theory X doesn't matter, but in practice is does" changes. In this particular case, we shift the IVs inserted by the runtime unroller to clamp iteration count of the loops* from decrementing to incrementing.

Why does this matter?  A couple of reasons:
* SCEV doesn't have a native subtract node.  Instead, all subtracts (A - B) are represented as A + -1 * B and drops any flags invalidated by such.  As a result, SCEV is slightly less good at reasoning about edge cases involving decrementing addrecs than incrementing ones.  (You can see this in the inferred flags in some of the test cases.)
* Other parts of the optimizer produce incrementing IVs, and they're common in idiomatic source language.  We do have support for reversing IVs, but in general if we produce one of each, the pair will persist surprisingly far through the optimizer before being coalesced.  (You can see this looking at nearby phis in the test cases.)

Note that if the hardware prefers decrementing (i.e. zero tested) loops, LSR should convert back immediately before codegen.

* Mostly irrelevant detail: The main loop of the prolog case is handled independently and will simple use the original IV with a changed start value.  We could in theory use this scheme for all iteration clamping, but that's a larger and more invasive change.
2021-11-12 15:44:58 -08:00

286 lines
10 KiB
LLVM

; RUN: opt < %s -S -loop-unroll -unroll-runtime=true -unroll-runtime-epilog=true | FileCheck %s -check-prefixes=EPILOG,COMMON
; RUN: opt < %s -S -loop-unroll -unroll-runtime=true -unroll-runtime-epilog=false | FileCheck %s -check-prefixes=PROLOG,COMMON
;
; RUN: opt < %s -S -passes='require<opt-remark-emit>,loop-unroll' -unroll-runtime=true -unroll-runtime-epilog=true | FileCheck %s -check-prefixes=EPILOG,COMMON
; RUN: opt < %s -S -passes='require<opt-remark-emit>,loop-unroll' -unroll-runtime=true -unroll-runtime-epilog=false | FileCheck %s -check-prefixes=PROLOG,COMMON
;
; Restricted versions of unroll (unroll<peeling;noruntime>, unroll-full) should not be doing runtime unrolling
; even if it is globally enabled through -unroll-runtime option
;
; RUN: opt < %s -S -passes='require<opt-remark-emit>,loop-unroll<peeling;no-runtime>' -unroll-runtime=true -unroll-runtime-epilog=true | FileCheck %s -check-prefixes=NOEPILOG,COMMON
; RUN: opt < %s -S -passes='require<opt-remark-emit>,loop-unroll<peeling;no-runtime>' -unroll-runtime=true -unroll-runtime-epilog=false | FileCheck %s -check-prefixes=NOPROLOG,COMMON
; RUN: opt < %s -S -passes='require<opt-remark-emit>,loop(loop-unroll-full)' -unroll-runtime=true -unroll-runtime-epilog=true | FileCheck %s -check-prefixes=NOEPILOG,COMMON
; RUN: opt < %s -S -passes='require<opt-remark-emit>,loop(loop-unroll-full)' -unroll-runtime=true -unroll-runtime-epilog=false | FileCheck %s -check-prefixes=NOPROLOG,COMMON
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
; Tests for unrolling loops with run-time trip counts
; COMMON-LABEL: @test(
; EPILOG: %xtraiter = and i32 %n
; EPILOG: %lcmp.mod = icmp ne i32 %xtraiter, 0
; EPILOG: br i1 %lcmp.mod, label %for.body.epil.preheader, label %for.end.loopexit
; NOEPILOG-NOT: %xtraiter = and i32 %n
; PROLOG: %xtraiter = and i32 %n
; PROLOG: %lcmp.mod = icmp ne i32 %xtraiter, 0
; PROLOG: br i1 %lcmp.mod, label %for.body.prol.preheader, label %for.body.prol.loopexit
; NOPROLOG-NOT: %xtraiter = and i32 %n
; EPILOG: for.body.epil:
; EPILOG: %indvars.iv.epil = phi i64 [ %indvars.iv.next.epil, %for.body.epil ], [ %indvars.iv.unr, %for.body.epil.preheader ]
; EPILOG: %epil.iter.next = add i32 %epil.iter, 1
; EPILOG: %epil.iter.cmp = icmp ne i32 %epil.iter.next, %xtraiter
; EPILOG: br i1 %epil.iter.cmp, label %for.body.epil, label %for.end.loopexit.epilog-lcssa, !llvm.loop !0
; NOEPILOG: for.body:
; NOEPILOG-NOT: for.body.epil:
; PROLOG: for.body.prol:
; PROLOG: %indvars.iv.prol = phi i64 [ %indvars.iv.next.prol, %for.body.prol ], [ 0, %for.body.prol.preheader ]
; PROLOG: %prol.iter.next = add i32 %prol.iter, 1
; PROLOG: %prol.iter.cmp = icmp ne i32 %prol.iter.next, %xtraiter
; PROLOG: br i1 %prol.iter.cmp, label %for.body.prol, label %for.body.prol.loopexit.unr-lcssa, !llvm.loop !0
; NOPROLOG: for.body:
; NOPROLOG-NOT: for.body.prol:
define i32 @test(i32* nocapture %a, i32 %n) nounwind uwtable readonly {
entry:
%cmp1 = icmp eq i32 %n, 0
br i1 %cmp1, label %for.end, label %for.body
for.body: ; preds = %for.body, %entry
%indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
%sum.02 = phi i32 [ %add, %for.body ], [ 0, %entry ]
%arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
%0 = load i32, i32* %arrayidx, align 4
%add = add nsw i32 %0, %sum.02
%indvars.iv.next = add i64 %indvars.iv, 1
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
%exitcond = icmp eq i32 %lftr.wideiv, %n
br i1 %exitcond, label %for.end, label %for.body
for.end: ; preds = %for.body, %entry
%sum.0.lcssa = phi i32 [ 0, %entry ], [ %add, %for.body ]
ret i32 %sum.0.lcssa
}
; Still try to completely unroll loops with compile-time trip counts
; even if the -unroll-runtime is specified
; COMMON-LABEL: @test1(
; COMMON: for.body:
; COMMON-NOT: for.body.epil:
; COMMON-NOT: for.body.prol:
define i32 @test1(i32* nocapture %a) nounwind uwtable readonly {
entry:
br label %for.body
for.body: ; preds = %for.body, %entry
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
%sum.01 = phi i32 [ 0, %entry ], [ %add, %for.body ]
%arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
%0 = load i32, i32* %arrayidx, align 4
%add = add nsw i32 %0, %sum.01
%indvars.iv.next = add i64 %indvars.iv, 1
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
%exitcond = icmp eq i32 %lftr.wideiv, 5
br i1 %exitcond, label %for.end, label %for.body
for.end: ; preds = %for.body
ret i32 %add
}
; This is test 2007-05-09-UnknownTripCount.ll which can be unrolled now
; if the -unroll-runtime option is turned on
; COMMON-LABEL: @foo(
; EPILOG: bb72.2:
; PROLOG: bb72.2:
; NOEPILOG-NOT: bb72.2:
; NOPROLOG-NOT: bb72.2:
define void @foo(i32 %trips) {
entry:
br label %cond_true.outer
cond_true.outer:
%indvar1.ph = phi i32 [ 0, %entry ], [ %indvar.next2, %bb72 ]
br label %bb72
bb72:
%indvar.next2 = add i32 %indvar1.ph, 1
%exitcond3 = icmp eq i32 %indvar.next2, %trips
br i1 %exitcond3, label %cond_true138, label %cond_true.outer
cond_true138:
ret void
}
; Test run-time unrolling for a loop that counts down by -2.
; COMMON-LABEL: @down(
; EPILOG: for.body.epil:
; EPILOG: br i1 %epil.iter.cmp, label %for.body.epil, label %for.cond.for.end_crit_edge.epilog-lcssa
; NOEPILOG: for.body:
; NOEPILOG-NOT: for.body.epil:
; PROLOG: for.body.prol:
; PROLOG: br i1 %prol.iter.cmp, label %for.body.prol, label %for.body.prol.loopexit
; NOPROLOG: for.body:
; NOPROLOG-NOT: for.body.prol:
define zeroext i16 @down(i16* nocapture %p, i32 %len) nounwind uwtable readonly {
entry:
%cmp2 = icmp eq i32 %len, 0
br i1 %cmp2, label %for.end, label %for.body
for.body: ; preds = %for.body, %entry
%p.addr.05 = phi i16* [ %incdec.ptr, %for.body ], [ %p, %entry ]
%len.addr.04 = phi i32 [ %sub, %for.body ], [ %len, %entry ]
%res.03 = phi i32 [ %add, %for.body ], [ 0, %entry ]
%incdec.ptr = getelementptr inbounds i16, i16* %p.addr.05, i64 1
%0 = load i16, i16* %p.addr.05, align 2
%conv = zext i16 %0 to i32
%add = add i32 %conv, %res.03
%sub = add nsw i32 %len.addr.04, -2
%cmp = icmp eq i32 %sub, 0
br i1 %cmp, label %for.cond.for.end_crit_edge, label %for.body
for.cond.for.end_crit_edge: ; preds = %for.body
%phitmp = trunc i32 %add to i16
br label %for.end
for.end: ; preds = %for.cond.for.end_crit_edge, %entry
%res.0.lcssa = phi i16 [ %phitmp, %for.cond.for.end_crit_edge ], [ 0, %entry ]
ret i16 %res.0.lcssa
}
; Test run-time unrolling disable metadata.
; COMMON-LABEL: @test2(
; EPILOG: for.body:
; EPILOG-NOT: for.body.epil:
; NOEPILOG: for.body:
; NOEPILOG-NOT: for.body.epil:
; PROLOG: for.body:
; PROLOG-NOT: for.body.prol:
; NOPROLOG: for.body:
; NOPROLOG-NOT: for.body.prol:
define zeroext i16 @test2(i16* nocapture %p, i32 %len) nounwind uwtable readonly {
entry:
%cmp2 = icmp eq i32 %len, 0
br i1 %cmp2, label %for.end, label %for.body
for.body: ; preds = %for.body, %entry
%p.addr.05 = phi i16* [ %incdec.ptr, %for.body ], [ %p, %entry ]
%len.addr.04 = phi i32 [ %sub, %for.body ], [ %len, %entry ]
%res.03 = phi i32 [ %add, %for.body ], [ 0, %entry ]
%incdec.ptr = getelementptr inbounds i16, i16* %p.addr.05, i64 1
%0 = load i16, i16* %p.addr.05, align 2
%conv = zext i16 %0 to i32
%add = add i32 %conv, %res.03
%sub = add nsw i32 %len.addr.04, -2
%cmp = icmp eq i32 %sub, 0
br i1 %cmp, label %for.cond.for.end_crit_edge, label %for.body, !llvm.loop !0
for.cond.for.end_crit_edge: ; preds = %for.body
%phitmp = trunc i32 %add to i16
br label %for.end
for.end: ; preds = %for.cond.for.end_crit_edge, %entry
%res.0.lcssa = phi i16 [ %phitmp, %for.cond.for.end_crit_edge ], [ 0, %entry ]
ret i16 %res.0.lcssa
}
; dont unroll loop with multiple exit/exiting blocks, unless
; -runtime-unroll-multi-exit=true
; single exit, multiple exiting blocks.
define void @unique_exit(i32 %arg) {
; COMMON-LABEL: @unique_exit(
; COMMON-NOT: .unr
entry:
%tmp = icmp sgt i32 undef, %arg
br i1 %tmp, label %preheader, label %returnblock
preheader: ; preds = %entry
br label %header
LoopExit: ; preds = %header, %latch
%tmp2.ph = phi i32 [ %tmp4, %header ], [ -1, %latch ]
br label %returnblock
returnblock: ; preds = %LoopExit, %entry
%tmp2 = phi i32 [ -1, %entry ], [ %tmp2.ph, %LoopExit ]
ret void
header: ; preds = %preheader, %latch
%tmp4 = phi i32 [ %inc, %latch ], [ %arg, %preheader ]
%inc = add nsw i32 %tmp4, 1
br i1 true, label %LoopExit, label %latch
latch: ; preds = %header
%cmp = icmp slt i32 %inc, undef
br i1 %cmp, label %header, label %LoopExit
}
; multiple exit blocks. don't unroll
define void @multi_exit(i64 %trip, i1 %cond) {
; COMMON-LABEL: @multi_exit(
; COMMON-NOT: .unr
entry:
br label %loop_header
loop_header:
%iv = phi i64 [ 0, %entry ], [ %iv_next, %loop_latch ]
br i1 %cond, label %loop_latch, label %loop_exiting_bb1
loop_exiting_bb1:
br i1 false, label %loop_exiting_bb2, label %exit1
loop_exiting_bb2:
br i1 false, label %loop_latch, label %exit3
exit3:
ret void
loop_latch:
%iv_next = add i64 %iv, 1
%cmp = icmp ne i64 %iv_next, %trip
br i1 %cmp, label %loop_header, label %exit2.loopexit
exit1:
ret void
exit2.loopexit:
ret void
}
!0 = distinct !{!0, !1}
!1 = !{!"llvm.loop.unroll.runtime.disable"}
; need to use LABEL here to separate function IR matching from metadata matching
; COMMON-LABEL: {{^}}!0 =
; EPILOG-SAME: distinct !{!0, !1}
; EPILOG: !1 = !{!"llvm.loop.unroll.disable"}
; PROLOG-SAME: distinct !{!0, !1}
; PROLOG: !1 = !{!"llvm.loop.unroll.disable"}