[SCEV] Add additional test coverage for loop-guards reasoning.

Add additional tests showing missed opportunities when using loop guards
for reasoning in SCEV, depending on the order the guards appear in the
IR.
This commit is contained in:
Florian Hahn
2025-06-01 22:38:33 +01:00
parent b68565b8c7
commit 0ba63b2f22
7 changed files with 944 additions and 211 deletions

View File

@@ -349,3 +349,90 @@ loop:
exit:
ret void
}
; TODO Should be able to determine no-dep, same as @nodep_via_logical_and_2.
define void @nodep_via_logical_and_1(ptr %A, i32 %index, i32 %n) {
; CHECK-LABEL: 'nodep_via_logical_and_1'
; CHECK-NEXT: loop:
; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
; CHECK-NEXT: Unknown data dependence.
; CHECK-NEXT: Dependences:
; CHECK-NEXT: Unknown:
; CHECK-NEXT: %0 = load double, ptr %gep.load, align 8 ->
; CHECK-NEXT: store double %0, ptr %gep.store, align 8
; CHECK-EMPTY:
; CHECK-NEXT: Run-time memory checks:
; CHECK-NEXT: Grouped accesses:
; CHECK-EMPTY:
; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
; CHECK-NEXT: SCEV assumptions:
; CHECK-EMPTY:
; CHECK-NEXT: Expressions re-written:
;
entry:
%pre.0 = icmp sgt i32 %index, 0
%pre.1 = icmp slt i32 %index, %n
%and.pre = select i1 %pre.1, i1 %pre.0, i1 false
br i1 %and.pre, label %ph, label %exit
ph:
%idx.1 = add i32 %index, 1
%start = zext i32 %idx.1 to i64
br label %loop
loop:
%iv = phi i64 [ %start, %ph ], [ %iv.next, %loop ]
%gep.load = getelementptr double, ptr %A, i64 %iv
%1 = load double, ptr %gep.load, align 8
%index.ext = zext i32 %index to i64
%gep.store = getelementptr double, ptr %A, i64 %index.ext
store double %1, ptr %gep.store, align 8
%iv.next = add i64 %iv, 1
%t = trunc i64 %iv to i32
%ec = icmp slt i32 %t, 1
br i1 %ec, label %loop, label %exit
exit:
ret void
}
; Same as nodep_via_logical_and_1 but with different operand order of the logical and.
define void @nodep_via_logical_and_2(ptr %A, i32 %index, i32 %n) {
; CHECK-LABEL: 'nodep_via_logical_and_2'
; CHECK-NEXT: loop:
; CHECK-NEXT: Memory dependences are safe
; CHECK-NEXT: Dependences:
; CHECK-NEXT: Run-time memory checks:
; CHECK-NEXT: Grouped accesses:
; CHECK-EMPTY:
; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
; CHECK-NEXT: SCEV assumptions:
; CHECK-EMPTY:
; CHECK-NEXT: Expressions re-written:
;
entry:
%pre.0 = icmp sgt i32 %index, 0
%pre.1 = icmp slt i32 %index, %n
%and.pre = select i1 %pre.0, i1 %pre.1, i1 false
br i1 %and.pre, label %ph, label %exit
ph:
%idx.1 = add i32 %index, 1
%start = zext i32 %idx.1 to i64
br label %loop
loop:
%iv = phi i64 [ %start, %ph ], [ %iv.next, %loop ]
%gep.load = getelementptr double, ptr %A, i64 %iv
%1 = load double, ptr %gep.load, align 8
%index.ext = zext i32 %index to i64
%gep.store = getelementptr double, ptr %A, i64 %index.ext
store double %1, ptr %gep.store, align 8
%iv.next = add i64 %iv, 1
%t = trunc i64 %iv to i32
%ec = icmp slt i32 %t, 1
br i1 %ec, label %loop, label %exit
exit:
ret void
}

View File

@@ -0,0 +1,288 @@
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
; RUN: opt -passes='print<scalar-evolution>' -disable-output %s 2>&1 | FileCheck %s
define void @test_multiple_const_guards_order1(ptr nocapture %a, i64 %i) {
; CHECK-LABEL: 'test_multiple_const_guards_order1'
; CHECK-NEXT: Classifying expressions for: @test_multiple_const_guards_order1
; CHECK-NEXT: %iv = phi i64 [ %iv.next, %loop ], [ 0, %guardbb ]
; CHECK-NEXT: --> {0,+,1}<nuw><nsw><%loop> U: [0,10) S: [0,10) Exits: %i LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %idx = getelementptr inbounds i32, ptr %a, i64 %iv
; CHECK-NEXT: --> {%a,+,4}<nuw><%loop> U: full-set S: full-set Exits: ((4 * %i) + %a) LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %iv.next = add nuw nsw i64 %iv, 1
; CHECK-NEXT: --> {1,+,1}<nuw><nsw><%loop> U: [1,11) S: [1,11) Exits: (1 + %i) LoopDispositions: { %loop: Computable }
; CHECK-NEXT: Determining loop execution counts for: @test_multiple_const_guards_order1
; CHECK-NEXT: Loop %loop: backedge-taken count is %i
; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 9
; CHECK-NEXT: Loop %loop: symbolic max backedge-taken count is %i
; CHECK-NEXT: Loop %loop: Trip multiple is 1
;
entry:
%c.1 = icmp ult i64 %i, 16
br i1 %c.1, label %guardbb, label %exit
guardbb:
%c.2 = icmp ult i64 %i, 10
br i1 %c.2, label %loop, label %exit
loop:
%iv = phi i64 [ %iv.next, %loop ], [ 0, %guardbb ]
%idx = getelementptr inbounds i32, ptr %a, i64 %iv
store i32 1, ptr %idx, align 4
%iv.next = add nuw nsw i64 %iv, 1
%exitcond = icmp eq i64 %iv, %i
br i1 %exitcond, label %exit, label %loop
exit:
ret void
}
define void @test_multiple_const_guards_order2(ptr nocapture %a, i64 %i) {
; CHECK-LABEL: 'test_multiple_const_guards_order2'
; CHECK-NEXT: Classifying expressions for: @test_multiple_const_guards_order2
; CHECK-NEXT: %iv = phi i64 [ %iv.next, %loop ], [ 0, %guardbb ]
; CHECK-NEXT: --> {0,+,1}<nuw><nsw><%loop> U: [0,10) S: [0,10) Exits: %i LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %idx = getelementptr inbounds i32, ptr %a, i64 %iv
; CHECK-NEXT: --> {%a,+,4}<nuw><%loop> U: full-set S: full-set Exits: ((4 * %i) + %a) LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %iv.next = add nuw nsw i64 %iv, 1
; CHECK-NEXT: --> {1,+,1}<nuw><nsw><%loop> U: [1,11) S: [1,11) Exits: (1 + %i) LoopDispositions: { %loop: Computable }
; CHECK-NEXT: Determining loop execution counts for: @test_multiple_const_guards_order2
; CHECK-NEXT: Loop %loop: backedge-taken count is %i
; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 9
; CHECK-NEXT: Loop %loop: symbolic max backedge-taken count is %i
; CHECK-NEXT: Loop %loop: Trip multiple is 1
;
entry:
%c.1 = icmp ult i64 %i, 10
br i1 %c.1, label %guardbb, label %exit
guardbb:
%c.2 = icmp ult i64 %i, 16
br i1 %c.2, label %loop, label %exit
loop:
%iv = phi i64 [ %iv.next, %loop ], [ 0, %guardbb ]
%idx = getelementptr inbounds i32, ptr %a, i64 %iv
store i32 1, ptr %idx, align 4
%iv.next = add nuw nsw i64 %iv, 1
%exitcond = icmp eq i64 %iv, %i
br i1 %exitcond, label %exit, label %loop
exit:
ret void
}
define void @test_multiple_var_guards_order1(ptr nocapture %a, i64 %i, i64 %N) {
; CHECK-LABEL: 'test_multiple_var_guards_order1'
; CHECK-NEXT: Classifying expressions for: @test_multiple_var_guards_order1
; CHECK-NEXT: %iv = phi i64 [ %iv.next, %loop ], [ 0, %guardbb ]
; CHECK-NEXT: --> {0,+,1}<nuw><nsw><%loop> U: [0,11) S: [0,11) Exits: %i LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %idx = getelementptr inbounds i32, ptr %a, i64 %iv
; CHECK-NEXT: --> {%a,+,4}<nuw><%loop> U: full-set S: full-set Exits: ((4 * %i) + %a) LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %iv.next = add nuw nsw i64 %iv, 1
; CHECK-NEXT: --> {1,+,1}<nuw><nsw><%loop> U: [1,12) S: [1,12) Exits: (1 + %i) LoopDispositions: { %loop: Computable }
; CHECK-NEXT: Determining loop execution counts for: @test_multiple_var_guards_order1
; CHECK-NEXT: Loop %loop: backedge-taken count is %i
; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 10
; CHECK-NEXT: Loop %loop: symbolic max backedge-taken count is %i
; CHECK-NEXT: Loop %loop: Trip multiple is 1
;
entry:
%c.1 = icmp ult i64 %N, 12
br i1 %c.1, label %guardbb, label %exit
guardbb:
%c.2 = icmp ult i64 %i, %N
br i1 %c.2, label %loop, label %exit
loop:
%iv = phi i64 [ %iv.next, %loop ], [ 0, %guardbb ]
%idx = getelementptr inbounds i32, ptr %a, i64 %iv
store i32 1, ptr %idx, align 4
%iv.next = add nuw nsw i64 %iv, 1
%exitcond = icmp eq i64 %iv, %i
br i1 %exitcond, label %exit, label %loop
exit:
ret void
}
define void @test_multiple_var_guards_order2(ptr nocapture %a, i64 %i, i64 %N) {
; CHECK-LABEL: 'test_multiple_var_guards_order2'
; CHECK-NEXT: Classifying expressions for: @test_multiple_var_guards_order2
; CHECK-NEXT: %iv = phi i64 [ %iv.next, %loop ], [ 0, %guardbb ]
; CHECK-NEXT: --> {0,+,1}<nuw><nsw><%loop> U: [0,11) S: [0,11) Exits: %i LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %idx = getelementptr inbounds i32, ptr %a, i64 %iv
; CHECK-NEXT: --> {%a,+,4}<nuw><%loop> U: full-set S: full-set Exits: ((4 * %i) + %a) LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %iv.next = add nuw nsw i64 %iv, 1
; CHECK-NEXT: --> {1,+,1}<nuw><nsw><%loop> U: [1,12) S: [1,12) Exits: (1 + %i) LoopDispositions: { %loop: Computable }
; CHECK-NEXT: Determining loop execution counts for: @test_multiple_var_guards_order2
; CHECK-NEXT: Loop %loop: backedge-taken count is %i
; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 10
; CHECK-NEXT: Loop %loop: symbolic max backedge-taken count is %i
; CHECK-NEXT: Loop %loop: Trip multiple is 1
;
entry:
%c.1 = icmp ult i64 %i, %N
br i1 %c.1, label %guardbb, label %exit
guardbb:
%c.2 = icmp ult i64 %N, 12
br i1 %c.2, label %loop, label %exit
loop:
%iv = phi i64 [ %iv.next, %loop ], [ 0, %guardbb ]
%idx = getelementptr inbounds i32, ptr %a, i64 %iv
store i32 1, ptr %idx, align 4
%iv.next = add nuw nsw i64 %iv, 1
%exitcond = icmp eq i64 %iv, %i
br i1 %exitcond, label %exit, label %loop
exit:
ret void
}
define i32 @sle_sgt_ult_umax_to_smax(i32 %num) {
; CHECK-LABEL: 'sle_sgt_ult_umax_to_smax'
; CHECK-NEXT: Classifying expressions for: @sle_sgt_ult_umax_to_smax
; CHECK-NEXT: %iv = phi i32 [ 0, %guard.3 ], [ %iv.next, %loop ]
; CHECK-NEXT: --> {0,+,4}<nuw><nsw><%loop> U: [0,25) S: [0,25) Exits: (4 * ((-4 + %num) /u 4))<nuw> LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %iv.next = add nuw i32 %iv, 4
; CHECK-NEXT: --> {4,+,4}<nuw><nsw><%loop> U: [4,29) S: [4,29) Exits: (4 + (4 * ((-4 + %num) /u 4))<nuw>) LoopDispositions: { %loop: Computable }
; CHECK-NEXT: Determining loop execution counts for: @sle_sgt_ult_umax_to_smax
; CHECK-NEXT: Loop %loop: backedge-taken count is ((-4 + %num) /u 4)
; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i32 6
; CHECK-NEXT: Loop %loop: symbolic max backedge-taken count is ((-4 + %num) /u 4)
; CHECK-NEXT: Loop %loop: Trip multiple is 1
;
guard.1:
%cmp.1 = icmp sle i32 %num, 0
br i1 %cmp.1, label %exit, label %guard.2
guard.2:
%cmp.2 = icmp sgt i32 %num, 28
br i1 %cmp.2, label %exit, label %guard.3
guard.3:
%cmp.3 = icmp ult i32 %num, 4
br i1 %cmp.3, label %exit, label %loop
loop:
%iv = phi i32 [ 0, %guard.3 ], [ %iv.next, %loop ]
%iv.next = add nuw i32 %iv, 4
%ec = icmp eq i32 %iv.next, %num
br i1 %ec, label %exit, label %loop
exit:
ret i32 0
}
; Similar to @sle_sgt_ult_umax_to_smax but with different predicate order.
define i32 @ult_sle_sgt_umax_to_smax(i32 %num) {
; CHECK-LABEL: 'ult_sle_sgt_umax_to_smax'
; CHECK-NEXT: Classifying expressions for: @ult_sle_sgt_umax_to_smax
; CHECK-NEXT: %iv = phi i32 [ 0, %guard.3 ], [ %iv.next, %loop ]
; CHECK-NEXT: --> {0,+,4}<nuw><%loop> U: [0,-3) S: [-2147483648,2147483645) Exits: (4 * ((-4 + %num) /u 4))<nuw> LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %iv.next = add nuw i32 %iv, 4
; CHECK-NEXT: --> {4,+,4}<nuw><%loop> U: [4,-3) S: [-2147483648,2147483645) Exits: (4 + (4 * ((-4 + %num) /u 4))<nuw>) LoopDispositions: { %loop: Computable }
; CHECK-NEXT: Determining loop execution counts for: @ult_sle_sgt_umax_to_smax
; CHECK-NEXT: Loop %loop: backedge-taken count is ((-4 + %num) /u 4)
; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i32 1073741823
; CHECK-NEXT: Loop %loop: symbolic max backedge-taken count is ((-4 + %num) /u 4)
; CHECK-NEXT: Loop %loop: Trip multiple is 1
;
guard.1:
%cmp.1 = icmp ult i32 %num, 4
br i1 %cmp.1, label %exit, label %guard.2
guard.2:
%cmp.2 = icmp sgt i32 %num, 28
br i1 %cmp.2, label %exit, label %guard.3
guard.3:
%cmp.3 = icmp sle i32 %num, 0
br i1 %cmp.3, label %exit, label %loop
loop:
%iv = phi i32 [ 0, %guard.3 ], [ %iv.next, %loop ]
%iv.next = add nuw i32 %iv, 4
%ec = icmp eq i32 %iv.next, %num
br i1 %ec, label %exit, label %loop
exit:
ret i32 0
}
define void @const_max_btc_32_or_order_1(i64 %n) {
; CHECK-LABEL: 'const_max_btc_32_or_order_1'
; CHECK-NEXT: Classifying expressions for: @const_max_btc_32_or_order_1
; CHECK-NEXT: %and.pre = and i1 %pre.1, %pre.0
; CHECK-NEXT: --> (%pre.1 umin %pre.0) U: full-set S: full-set
; CHECK-NEXT: %iv = phi i64 [ %iv.next, %loop ], [ 0, %ph ]
; CHECK-NEXT: --> {0,+,1}<nuw><%loop> U: [0,-9223372036854775808) S: [0,-9223372036854775808) Exits: %n LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %iv.next = add i64 %iv, 1
; CHECK-NEXT: --> {1,+,1}<nuw><%loop> U: [1,-9223372036854775807) S: [1,-9223372036854775807) Exits: (1 + %n) LoopDispositions: { %loop: Computable }
; CHECK-NEXT: Determining loop execution counts for: @const_max_btc_32_or_order_1
; CHECK-NEXT: Loop %loop: backedge-taken count is %n
; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 9223372036854775807
; CHECK-NEXT: Loop %loop: symbolic max backedge-taken count is %n
; CHECK-NEXT: Loop %loop: Trip multiple is 1
;
entry:
%pre.0 = icmp slt i64 %n, 33
%pre.1 = icmp ne i64 %n, 0
%and.pre = and i1 %pre.1, %pre.0
br i1 %and.pre, label %ph, label %exit
ph:
%pre.2 = icmp sgt i64 %n, 0
br i1 %pre.2, label %loop, label %exit
loop:
%iv = phi i64 [ %iv.next, %loop ], [ 0, %ph ]
call void @foo()
%iv.next = add i64 %iv, 1
%ec = icmp eq i64 %iv, %n
br i1 %ec, label %exit, label %loop
exit:
ret void
}
; Same as @const_max_btc_32_or_order_1, but with operands in the OR swapped.
define void @const_max_btc_32_or_order_2(i64 %n) {
; CHECK-LABEL: 'const_max_btc_32_or_order_2'
; CHECK-NEXT: Classifying expressions for: @const_max_btc_32_or_order_2
; CHECK-NEXT: %and.pre = and i1 %pre.0, %pre.1
; CHECK-NEXT: --> (%pre.0 umin %pre.1) U: full-set S: full-set
; CHECK-NEXT: %iv = phi i64 [ %iv.next, %loop ], [ 0, %ph ]
; CHECK-NEXT: --> {0,+,1}<nuw><nsw><%loop> U: [0,33) S: [0,33) Exits: %n LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %iv.next = add i64 %iv, 1
; CHECK-NEXT: --> {1,+,1}<nuw><nsw><%loop> U: [1,34) S: [1,34) Exits: (1 + %n) LoopDispositions: { %loop: Computable }
; CHECK-NEXT: Determining loop execution counts for: @const_max_btc_32_or_order_2
; CHECK-NEXT: Loop %loop: backedge-taken count is %n
; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 32
; CHECK-NEXT: Loop %loop: symbolic max backedge-taken count is %n
; CHECK-NEXT: Loop %loop: Trip multiple is 1
;
entry:
%pre.0 = icmp slt i64 %n, 33
%pre.1 = icmp ne i64 %n, 0
%and.pre = and i1 %pre.0, %pre.1
br i1 %and.pre, label %ph, label %exit
ph:
%pre.2 = icmp sgt i64 %n, 0
br i1 %pre.2, label %loop, label %exit
loop:
%iv = phi i64 [ %iv.next, %loop ], [ 0, %ph ]
call void @foo()
%iv.next = add i64 %iv, 1
%ec = icmp eq i64 %iv, %n
br i1 %ec, label %exit, label %loop
exit:
ret void
}
declare void @foo()

View File

@@ -798,6 +798,60 @@ exit:
ret i32 0
}
define void @rewrite_add_rec() {
; CHECK-LABEL: 'rewrite_add_rec'
; CHECK-NEXT: Classifying expressions for: @rewrite_add_rec
; CHECK-NEXT: %iv = phi i64 [ 0, %entry ], [ %iv.next, %outer.latch ]
; CHECK-NEXT: --> {0,+,1}<nuw><nsw><%outer.header> U: [0,10) S: [0,10) Exits: 9 LoopDispositions: { %outer.header: Computable, %inner: Invariant }
; CHECK-NEXT: %sub = sub i64 9, %iv
; CHECK-NEXT: --> {9,+,-1}<nsw><%outer.header> U: [0,10) S: [0,10) Exits: 0 LoopDispositions: { %outer.header: Computable, %inner: Invariant }
; CHECK-NEXT: %n.vec = and i64 %sub, -2
; CHECK-NEXT: --> (2 * ({9,+,-1}<nsw><%outer.header> /u 2))<nuw><nsw> U: [0,9) S: [0,9) Exits: 0 LoopDispositions: { %outer.header: Computable, %inner: Invariant }
; CHECK-NEXT: %inner.iv = phi i64 [ 0, %inner.ph ], [ %inner.iv.next, %inner ]
; CHECK-NEXT: --> {0,+,2}<%inner> U: [0,-1) S: [-9223372036854775808,9223372036854775807) Exits: (2 * ((-2 + (2 * ({9,+,-1}<nsw><%outer.header> /u 2))<nuw><nsw>)<nsw> /u 2))<nuw> LoopDispositions: { %inner: Computable, %outer.header: Variant }
; CHECK-NEXT: %inner.iv.next = add i64 %inner.iv, 2
; CHECK-NEXT: --> {2,+,2}<%inner> U: [0,-1) S: [-9223372036854775808,9223372036854775807) Exits: (2 + (2 * ((-2 + (2 * ({9,+,-1}<nsw><%outer.header> /u 2))<nuw><nsw>)<nsw> /u 2))<nuw>) LoopDispositions: { %inner: Computable, %outer.header: Variant }
; CHECK-NEXT: %iv.next = add i64 %iv, 1
; CHECK-NEXT: --> {1,+,1}<nuw><nsw><%outer.header> U: [1,11) S: [1,11) Exits: 10 LoopDispositions: { %outer.header: Computable, %inner: Invariant }
; CHECK-NEXT: Determining loop execution counts for: @rewrite_add_rec
; CHECK-NEXT: Loop %inner: backedge-taken count is ((-2 + (2 * ({9,+,-1}<nsw><%outer.header> /u 2))<nuw><nsw>)<nsw> /u 2)
; CHECK-NEXT: Loop %inner: constant max backedge-taken count is i64 9223372036854775807
; CHECK-NEXT: Loop %inner: symbolic max backedge-taken count is ((-2 + (2 * ({9,+,-1}<nsw><%outer.header> /u 2))<nuw><nsw>)<nsw> /u 2)
; CHECK-NEXT: Loop %inner: Trip multiple is 1
; CHECK-NEXT: Loop %outer.header: backedge-taken count is i64 9
; CHECK-NEXT: Loop %outer.header: constant max backedge-taken count is i64 9
; CHECK-NEXT: Loop %outer.header: symbolic max backedge-taken count is i64 9
; CHECK-NEXT: Loop %outer.header: Trip multiple is 10
;
entry:
br label %outer.header
outer.header:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %outer.latch ]
%sub = sub i64 9, %iv
%min.iters.check = icmp ult i64 %sub, 2
br i1 %min.iters.check, label %outer.latch, label %inner.ph
inner.ph:
%n.vec = and i64 %sub, -2
br label %inner
inner:
%inner.iv = phi i64 [ 0, %inner.ph ], [ %inner.iv.next, %inner ]
%inner.iv.next = add i64 %inner.iv, 2
call void @use(i64 %inner.iv)
%ec.inner = icmp eq i64 %inner.iv.next, %n.vec
br i1 %ec.inner, label %outer.latch, label %inner
outer.latch:
%iv.next = add i64 %iv, 1
%ec.outer = icmp eq i64 %iv.next, 10
br i1 %ec.outer, label %exit, label %outer.header
exit:
ret void
}
declare void @use(i64)
declare i32 @llvm.umin.i32(i32, i32)

View File

@@ -220,146 +220,6 @@ exit:
ret void
}
define void @test_multiple_const_guards_order1(ptr nocapture %a, i64 %i) {
; CHECK-LABEL: 'test_multiple_const_guards_order1'
; CHECK-NEXT: Classifying expressions for: @test_multiple_const_guards_order1
; CHECK-NEXT: %iv = phi i64 [ %iv.next, %loop ], [ 0, %guardbb ]
; CHECK-NEXT: --> {0,+,1}<nuw><nsw><%loop> U: [0,10) S: [0,10) Exits: %i LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %idx = getelementptr inbounds i32, ptr %a, i64 %iv
; CHECK-NEXT: --> {%a,+,4}<nuw><%loop> U: full-set S: full-set Exits: ((4 * %i) + %a) LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %iv.next = add nuw nsw i64 %iv, 1
; CHECK-NEXT: --> {1,+,1}<nuw><nsw><%loop> U: [1,11) S: [1,11) Exits: (1 + %i) LoopDispositions: { %loop: Computable }
; CHECK-NEXT: Determining loop execution counts for: @test_multiple_const_guards_order1
; CHECK-NEXT: Loop %loop: backedge-taken count is %i
; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 9
; CHECK-NEXT: Loop %loop: symbolic max backedge-taken count is %i
; CHECK-NEXT: Loop %loop: Trip multiple is 1
;
entry:
%c.1 = icmp ult i64 %i, 16
br i1 %c.1, label %guardbb, label %exit
guardbb:
%c.2 = icmp ult i64 %i, 10
br i1 %c.2, label %loop, label %exit
loop:
%iv = phi i64 [ %iv.next, %loop ], [ 0, %guardbb ]
%idx = getelementptr inbounds i32, ptr %a, i64 %iv
store i32 1, ptr %idx, align 4
%iv.next = add nuw nsw i64 %iv, 1
%exitcond = icmp eq i64 %iv, %i
br i1 %exitcond, label %exit, label %loop
exit:
ret void
}
define void @test_multiple_const_guards_order2(ptr nocapture %a, i64 %i) {
; CHECK-LABEL: 'test_multiple_const_guards_order2'
; CHECK-NEXT: Classifying expressions for: @test_multiple_const_guards_order2
; CHECK-NEXT: %iv = phi i64 [ %iv.next, %loop ], [ 0, %guardbb ]
; CHECK-NEXT: --> {0,+,1}<nuw><nsw><%loop> U: [0,10) S: [0,10) Exits: %i LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %idx = getelementptr inbounds i32, ptr %a, i64 %iv
; CHECK-NEXT: --> {%a,+,4}<nuw><%loop> U: full-set S: full-set Exits: ((4 * %i) + %a) LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %iv.next = add nuw nsw i64 %iv, 1
; CHECK-NEXT: --> {1,+,1}<nuw><nsw><%loop> U: [1,11) S: [1,11) Exits: (1 + %i) LoopDispositions: { %loop: Computable }
; CHECK-NEXT: Determining loop execution counts for: @test_multiple_const_guards_order2
; CHECK-NEXT: Loop %loop: backedge-taken count is %i
; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 9
; CHECK-NEXT: Loop %loop: symbolic max backedge-taken count is %i
; CHECK-NEXT: Loop %loop: Trip multiple is 1
;
entry:
%c.1 = icmp ult i64 %i, 10
br i1 %c.1, label %guardbb, label %exit
guardbb:
%c.2 = icmp ult i64 %i, 16
br i1 %c.2, label %loop, label %exit
loop:
%iv = phi i64 [ %iv.next, %loop ], [ 0, %guardbb ]
%idx = getelementptr inbounds i32, ptr %a, i64 %iv
store i32 1, ptr %idx, align 4
%iv.next = add nuw nsw i64 %iv, 1
%exitcond = icmp eq i64 %iv, %i
br i1 %exitcond, label %exit, label %loop
exit:
ret void
}
define void @test_multiple_var_guards_order1(ptr nocapture %a, i64 %i, i64 %N) {
; CHECK-LABEL: 'test_multiple_var_guards_order1'
; CHECK-NEXT: Classifying expressions for: @test_multiple_var_guards_order1
; CHECK-NEXT: %iv = phi i64 [ %iv.next, %loop ], [ 0, %guardbb ]
; CHECK-NEXT: --> {0,+,1}<nuw><nsw><%loop> U: [0,11) S: [0,11) Exits: %i LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %idx = getelementptr inbounds i32, ptr %a, i64 %iv
; CHECK-NEXT: --> {%a,+,4}<nuw><%loop> U: full-set S: full-set Exits: ((4 * %i) + %a) LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %iv.next = add nuw nsw i64 %iv, 1
; CHECK-NEXT: --> {1,+,1}<nuw><nsw><%loop> U: [1,12) S: [1,12) Exits: (1 + %i) LoopDispositions: { %loop: Computable }
; CHECK-NEXT: Determining loop execution counts for: @test_multiple_var_guards_order1
; CHECK-NEXT: Loop %loop: backedge-taken count is %i
; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 10
; CHECK-NEXT: Loop %loop: symbolic max backedge-taken count is %i
; CHECK-NEXT: Loop %loop: Trip multiple is 1
;
entry:
%c.1 = icmp ult i64 %N, 12
br i1 %c.1, label %guardbb, label %exit
guardbb:
%c.2 = icmp ult i64 %i, %N
br i1 %c.2, label %loop, label %exit
loop:
%iv = phi i64 [ %iv.next, %loop ], [ 0, %guardbb ]
%idx = getelementptr inbounds i32, ptr %a, i64 %iv
store i32 1, ptr %idx, align 4
%iv.next = add nuw nsw i64 %iv, 1
%exitcond = icmp eq i64 %iv, %i
br i1 %exitcond, label %exit, label %loop
exit:
ret void
}
define void @test_multiple_var_guards_order2(ptr nocapture %a, i64 %i, i64 %N) {
; CHECK-LABEL: 'test_multiple_var_guards_order2'
; CHECK-NEXT: Classifying expressions for: @test_multiple_var_guards_order2
; CHECK-NEXT: %iv = phi i64 [ %iv.next, %loop ], [ 0, %guardbb ]
; CHECK-NEXT: --> {0,+,1}<nuw><nsw><%loop> U: [0,11) S: [0,11) Exits: %i LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %idx = getelementptr inbounds i32, ptr %a, i64 %iv
; CHECK-NEXT: --> {%a,+,4}<nuw><%loop> U: full-set S: full-set Exits: ((4 * %i) + %a) LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %iv.next = add nuw nsw i64 %iv, 1
; CHECK-NEXT: --> {1,+,1}<nuw><nsw><%loop> U: [1,12) S: [1,12) Exits: (1 + %i) LoopDispositions: { %loop: Computable }
; CHECK-NEXT: Determining loop execution counts for: @test_multiple_var_guards_order2
; CHECK-NEXT: Loop %loop: backedge-taken count is %i
; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 10
; CHECK-NEXT: Loop %loop: symbolic max backedge-taken count is %i
; CHECK-NEXT: Loop %loop: Trip multiple is 1
;
entry:
%c.1 = icmp ult i64 %i, %N
br i1 %c.1, label %guardbb, label %exit
guardbb:
%c.2 = icmp ult i64 %N, 12
br i1 %c.2, label %loop, label %exit
loop:
%iv = phi i64 [ %iv.next, %loop ], [ 0, %guardbb ]
%idx = getelementptr inbounds i32, ptr %a, i64 %iv
store i32 1, ptr %idx, align 4
%iv.next = add nuw nsw i64 %iv, 1
%exitcond = icmp eq i64 %iv, %i
br i1 %exitcond, label %exit, label %loop
exit:
ret void
}
; The guards here reference each other in a cycle.
define void @test_multiple_var_guards_cycle(ptr nocapture %a, i64 %i, i64 %N) {
; CHECK-LABEL: 'test_multiple_var_guards_cycle'
@@ -1470,77 +1330,6 @@ exit:
ret void
}
define i32 @sle_sgt_ult_umax_to_smax(i32 %num) {
; CHECK-LABEL: 'sle_sgt_ult_umax_to_smax'
; CHECK-NEXT: Classifying expressions for: @sle_sgt_ult_umax_to_smax
; CHECK-NEXT: %iv = phi i32 [ 0, %guard.3 ], [ %iv.next, %loop ]
; CHECK-NEXT: --> {0,+,4}<nuw><nsw><%loop> U: [0,25) S: [0,25) Exits: (4 * ((-4 + %num) /u 4))<nuw> LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %iv.next = add nuw i32 %iv, 4
; CHECK-NEXT: --> {4,+,4}<nuw><nsw><%loop> U: [4,29) S: [4,29) Exits: (4 + (4 * ((-4 + %num) /u 4))<nuw>) LoopDispositions: { %loop: Computable }
; CHECK-NEXT: Determining loop execution counts for: @sle_sgt_ult_umax_to_smax
; CHECK-NEXT: Loop %loop: backedge-taken count is ((-4 + %num) /u 4)
; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i32 6
; CHECK-NEXT: Loop %loop: symbolic max backedge-taken count is ((-4 + %num) /u 4)
; CHECK-NEXT: Loop %loop: Trip multiple is 1
;
guard.1:
%cmp.1 = icmp sle i32 %num, 0
br i1 %cmp.1, label %exit, label %guard.2
guard.2:
%cmp.2 = icmp sgt i32 %num, 28
br i1 %cmp.2, label %exit, label %guard.3
guard.3:
%cmp.3 = icmp ult i32 %num, 4
br i1 %cmp.3, label %exit, label %loop
loop:
%iv = phi i32 [ 0, %guard.3 ], [ %iv.next, %loop ]
%iv.next = add nuw i32 %iv, 4
%ec = icmp eq i32 %iv.next, %num
br i1 %ec, label %exit, label %loop
exit:
ret i32 0
}
; Similar to @sle_sgt_ult_umax_to_smax but with different predicate order.
define i32 @ult_sle_sgt_umax_to_smax(i32 %num) {
; CHECK-LABEL: 'ult_sle_sgt_umax_to_smax'
; CHECK-NEXT: Classifying expressions for: @ult_sle_sgt_umax_to_smax
; CHECK-NEXT: %iv = phi i32 [ 0, %guard.3 ], [ %iv.next, %loop ]
; CHECK-NEXT: --> {0,+,4}<nuw><%loop> U: [0,-3) S: [-2147483648,2147483645) Exits: (4 * ((-4 + %num) /u 4))<nuw> LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %iv.next = add nuw i32 %iv, 4
; CHECK-NEXT: --> {4,+,4}<nuw><%loop> U: [4,-3) S: [-2147483648,2147483645) Exits: (4 + (4 * ((-4 + %num) /u 4))<nuw>) LoopDispositions: { %loop: Computable }
; CHECK-NEXT: Determining loop execution counts for: @ult_sle_sgt_umax_to_smax
; CHECK-NEXT: Loop %loop: backedge-taken count is ((-4 + %num) /u 4)
; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i32 1073741823
; CHECK-NEXT: Loop %loop: symbolic max backedge-taken count is ((-4 + %num) /u 4)
; CHECK-NEXT: Loop %loop: Trip multiple is 1
;
guard.1:
%cmp.1 = icmp ult i32 %num, 4
br i1 %cmp.1, label %exit, label %guard.2
guard.2:
%cmp.2 = icmp sgt i32 %num, 28
br i1 %cmp.2, label %exit, label %guard.3
guard.3:
%cmp.3 = icmp sle i32 %num, 0
br i1 %cmp.3, label %exit, label %loop
loop:
%iv = phi i32 [ 0, %guard.3 ], [ %iv.next, %loop ]
%iv.next = add nuw i32 %iv, 4
%ec = icmp eq i32 %iv.next, %num
br i1 %ec, label %exit, label %loop
exit:
ret i32 0
}
define i32 @ptr_induction_ult_1(ptr %a, ptr %b) {
; CHECK-LABEL: 'ptr_induction_ult_1'
; CHECK-NEXT: Classifying expressions for: @ptr_induction_ult_1

View File

@@ -0,0 +1,64 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
; RUN: opt -p indvars -S %s | FileCheck %s
target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-n32:64-S128-Fn32"
target triple = "arm64-apple-macosx15.0.0"
define i32 @guards_applied_to_add_rec(ptr %dst) {
; CHECK-LABEL: define i32 @guards_applied_to_add_rec(
; CHECK-SAME: ptr [[DST:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: br label %[[OUTER_HEADER:.*]]
; CHECK: [[OUTER_HEADER]]:
; CHECK-NEXT: [[OUTER_IV_0:%.*]] = phi i32 [ 2, %[[ENTRY]] ], [ [[OUTER_IV_0_NEXT:%.*]], %[[OUTER_LATCH:.*]] ]
; CHECK-NEXT: [[OUTER_IV_1:%.*]] = phi i32 [ 1, %[[ENTRY]] ], [ [[OUTER_IV_0]], %[[OUTER_LATCH]] ]
; CHECK-NEXT: [[SHR28:%.*]] = lshr i32 [[OUTER_IV_1]], 1
; CHECK-NEXT: [[PRE:%.*]] = icmp samesign ult i32 [[OUTER_IV_1]], 2
; CHECK-NEXT: br i1 [[PRE]], label %[[OUTER_LATCH]], label %[[INNER_PREHEADER:.*]]
; CHECK: [[INNER_PREHEADER]]:
; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[SHR28]] to i64
; CHECK-NEXT: br label %[[INNER:.*]]
; CHECK: [[INNER]]:
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[INNER_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[INNER]] ]
; CHECK-NEXT: [[GEP_DST:%.*]] = getelementptr i32, ptr [[DST]], i64 [[INDVARS_IV]]
; CHECK-NEXT: [[TMP1:%.*]] = trunc nuw nsw i64 [[INDVARS_IV]] to i32
; CHECK-NEXT: store i32 [[TMP1]], ptr [[GEP_DST]], align 4
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
; CHECK-NEXT: [[CMP29:%.*]] = icmp samesign ult i64 [[INDVARS_IV_NEXT]], [[TMP0]]
; CHECK-NEXT: br i1 [[CMP29]], label %[[INNER]], label %[[OUTER_LATCH_LOOPEXIT:.*]]
; CHECK: [[OUTER_LATCH_LOOPEXIT]]:
; CHECK-NEXT: br label %[[OUTER_LATCH]]
; CHECK: [[OUTER_LATCH]]:
; CHECK-NEXT: [[OUTER_IV_0_NEXT]] = add nuw i32 [[OUTER_IV_0]], 1
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[OUTER_IV_0_NEXT]], -2147483647
; CHECK-NEXT: br i1 [[EXITCOND]], label %[[OUTER_HEADER]], label %[[EXIT:.*]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: ret i32 0
;
entry:
br label %outer.header
outer.header:
%outer.iv.0 = phi i32 [ 2, %entry ], [ %outer.iv.0.next, %outer.latch ]
%outer.iv.1 = phi i32 [ 1, %entry ], [ %outer.iv.0, %outer.latch ]
%shr28 = lshr i32 %outer.iv.1, 1
%pre = icmp samesign ult i32 %outer.iv.1, 2
br i1 %pre, label %outer.latch, label %inner
inner:
%inner.iv = phi i32 [ 0, %outer.header ], [ %inc, %inner ]
%ext.iv = zext nneg i32 %inner.iv to i64
%gep.dst = getelementptr i32, ptr %dst, i64 %ext.iv
store i32 %inner.iv, ptr %gep.dst, align 4
%inc = add nuw nsw i32 %inner.iv, 1
%cmp29 = icmp samesign ult i32 %inc, %shr28
br i1 %cmp29, label %inner, label %outer.latch
outer.latch:
%outer.iv.0.next = add i32 %outer.iv.0, 1
%outer.ec = icmp sgt i32 %outer.iv.0, 0
br i1 %outer.ec, label %outer.header, label %exit
exit:
ret i32 0
}

View File

@@ -0,0 +1,98 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
; RUN: opt -p indvars -S %s | FileCheck %s
declare void @foo()
define void @narrow_iv_precondition_order_1(ptr %start, i32 %base, i8 %n) {
; CHECK-LABEL: define void @narrow_iv_precondition_order_1(
; CHECK-SAME: ptr [[START:%.*]], i32 [[BASE:%.*]], i8 [[N:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[PRE_0:%.*]] = icmp sgt i32 [[BASE]], 0
; CHECK-NEXT: br i1 [[PRE_0]], label %[[EXIT:.*]], label %[[PH:.*]]
; CHECK: [[PH]]:
; CHECK-NEXT: [[N_EXT:%.*]] = zext i8 [[N]] to i32
; CHECK-NEXT: [[PRE_1:%.*]] = icmp sgt i32 [[BASE]], [[N_EXT]]
; CHECK-NEXT: br i1 [[PRE_1]], label %[[LOOP_PREHEADER:.*]], label %[[EXIT]]
; CHECK: [[LOOP_PREHEADER]]:
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK-NEXT: [[IV:%.*]] = phi ptr [ [[GEP:%.*]], %[[LOOP]] ], [ [[START]], %[[LOOP_PREHEADER]] ]
; CHECK-NEXT: call void @foo()
; CHECK-NEXT: [[END:%.*]] = load i8, ptr [[IV]], align 1
; CHECK-NEXT: [[END_EXT:%.*]] = zext i8 [[END]] to i32
; CHECK-NEXT: [[GEP]] = getelementptr inbounds i8, ptr [[IV]], i64 1
; CHECK-NEXT: [[EC:%.*]] = icmp sgt i32 [[BASE]], [[END_EXT]]
; CHECK-NEXT: br i1 [[EC]], label %[[LOOP]], label %[[EXIT_LOOPEXIT:.*]]
; CHECK: [[EXIT_LOOPEXIT]]:
; CHECK-NEXT: br label %[[EXIT]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: ret void
;
entry:
%pre.0 = icmp sgt i32 %base, 0
br i1 %pre.0, label %exit, label %ph
ph: ; preds = %entry
%n.ext = zext i8 %n to i32
%pre.1 = icmp sgt i32 %base, %n.ext
br i1 %pre.1, label %loop, label %exit
loop:
%iv = phi ptr [ %start, %ph ], [ %gep, %loop ]
call void @foo()
%end = load i8, ptr %iv, align 1
%end.ext = zext i8 %end to i32
%gep = getelementptr inbounds i8, ptr %iv, i64 1
%ec = icmp sgt i32 %base, %end.ext
br i1 %ec, label %loop, label %exit
exit:
ret void
}
define void @narrow_iv_precondition_order_2(ptr %start, i32 %base, i8 %n) {
; CHECK-LABEL: define void @narrow_iv_precondition_order_2(
; CHECK-SAME: ptr [[START:%.*]], i32 [[BASE:%.*]], i8 [[N:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[N_EXT:%.*]] = zext i8 [[N]] to i32
; CHECK-NEXT: [[PRE_1:%.*]] = icmp sgt i32 [[BASE]], [[N_EXT]]
; CHECK-NEXT: br i1 [[PRE_1]], label %[[EXIT:.*]], label %[[PH:.*]]
; CHECK: [[PH]]:
; CHECK-NEXT: [[PRE_0:%.*]] = icmp sgt i32 [[BASE]], 0
; CHECK-NEXT: br i1 [[PRE_0]], label %[[LOOP_PREHEADER:.*]], label %[[EXIT]]
; CHECK: [[LOOP_PREHEADER]]:
; CHECK-NEXT: [[TMP0:%.*]] = trunc i32 [[BASE]] to i8
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK-NEXT: [[IV:%.*]] = phi ptr [ [[GEP:%.*]], %[[LOOP]] ], [ [[START]], %[[LOOP_PREHEADER]] ]
; CHECK-NEXT: call void @foo()
; CHECK-NEXT: [[END:%.*]] = load i8, ptr [[IV]], align 1
; CHECK-NEXT: [[GEP]] = getelementptr inbounds i8, ptr [[IV]], i64 1
; CHECK-NEXT: [[EC:%.*]] = icmp ugt i8 [[TMP0]], [[END]]
; CHECK-NEXT: br i1 [[EC]], label %[[LOOP]], label %[[EXIT_LOOPEXIT:.*]]
; CHECK: [[EXIT_LOOPEXIT]]:
; CHECK-NEXT: br label %[[EXIT]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: ret void
;
entry:
%n.ext = zext i8 %n to i32
%pre.1 = icmp sgt i32 %base, %n.ext
br i1 %pre.1, label %exit, label %ph
ph: ; preds = %entry
%pre.0 = icmp sgt i32 %base, 0
br i1 %pre.0, label %loop, label %exit
loop:
%iv = phi ptr [ %start, %ph ], [ %gep, %loop ]
call void @foo()
%end = load i8, ptr %iv, align 1
%end.ext = zext i8 %end to i32
%gep = getelementptr inbounds i8, ptr %iv, i64 1
%ec = icmp sgt i32 %base, %end.ext
br i1 %ec, label %loop, label %exit
exit:
ret void
}

View File

@@ -0,0 +1,353 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 5
; RUN: opt -p loop-vectorize -force-vector-width=2 -force-vector-interleave=1 -S %s | FileCheck %s
define i32 @loop_with_at_least_2_iterations_via_guards_order_1(ptr %dst, i32 %n) {
; CHECK-LABEL: define i32 @loop_with_at_least_2_iterations_via_guards_order_1(
; CHECK-SAME: ptr [[DST:%.*]], i32 [[N:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[PRE_0:%.*]] = icmp ne i32 [[N]], 0
; CHECK-NEXT: [[PRE_1:%.*]] = icmp slt i32 [[N]], 5
; CHECK-NEXT: [[AND_PRE:%.*]] = and i1 [[PRE_0]], [[PRE_1]]
; CHECK-NEXT: br i1 [[AND_PRE]], label %[[PH:.*]], label %[[EXIT:.*]]
; CHECK: [[PH]]:
; CHECK-NEXT: [[N_EXT:%.*]] = zext i32 [[N]] to i64
; CHECK-NEXT: [[TMP0:%.*]] = add nuw nsw i64 [[N_EXT]], 1
; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 2
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]]
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0
; CHECK-NEXT: store <2 x i32> splat (i32 1), ptr [[TMP2]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT_LOOPEXIT:.*]], label %[[SCALAR_PH]]
; CHECK: [[SCALAR_PH]]:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[PH]] ]
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[IV]]
; CHECK-NEXT: store i32 1, ptr [[GEP]], align 4
; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], [[N_EXT]]
; CHECK-NEXT: br i1 [[EC]], label %[[EXIT_LOOPEXIT]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
; CHECK: [[EXIT_LOOPEXIT]]:
; CHECK-NEXT: br label %[[EXIT]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: ret i32 0
;
entry:
%pre.0 = icmp ne i32 %n, 0
%pre.1 = icmp slt i32 %n, 5
%and.pre = and i1 %pre.0, %pre.1
br i1 %and.pre, label %ph, label %exit
ph: ; preds = %entry
%n.ext = zext i32 %n to i64
br label %loop
loop: ; preds = %loop, %ph
%iv = phi i64 [ 0, %ph ], [ %iv.next, %loop ]
%gep = getelementptr inbounds i32, ptr %dst, i64 %iv
store i32 1, ptr %gep
%iv.next = add i64 %iv, 1
%ec = icmp eq i64 %iv, %n.ext
br i1 %ec, label %exit, label %loop
exit: ; preds = %loop, %entry
ret i32 0
}
; Same as loop_with_at_least_2_iterations_via_guards_order_1 but with operands
; of the AND swapped.
; TODO: Should be able to prove that %min.iters.check is false.
define i32 @loop_with_at_least_2_iterations_via_guards_order_2(ptr %dst, i32 %n) {
; CHECK-LABEL: define i32 @loop_with_at_least_2_iterations_via_guards_order_2(
; CHECK-SAME: ptr [[DST:%.*]], i32 [[N:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[PRE_0:%.*]] = icmp ne i32 [[N]], 0
; CHECK-NEXT: [[PRE_1:%.*]] = icmp slt i32 [[N]], 5
; CHECK-NEXT: [[AND_PRE:%.*]] = and i1 [[PRE_1]], [[PRE_0]]
; CHECK-NEXT: br i1 [[AND_PRE]], label %[[PH:.*]], label %[[EXIT:.*]]
; CHECK: [[PH]]:
; CHECK-NEXT: [[N_EXT:%.*]] = zext i32 [[N]] to i64
; CHECK-NEXT: [[TMP0:%.*]] = add nuw nsw i64 [[N_EXT]], 1
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 2
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 2
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]]
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0
; CHECK-NEXT: store <2 x i32> splat (i32 1), ptr [[TMP2]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT_LOOPEXIT:.*]], label %[[SCALAR_PH]]
; CHECK: [[SCALAR_PH]]:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[PH]] ]
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[IV]]
; CHECK-NEXT: store i32 1, ptr [[GEP]], align 4
; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], [[N_EXT]]
; CHECK-NEXT: br i1 [[EC]], label %[[EXIT_LOOPEXIT]], label %[[LOOP]], !llvm.loop [[LOOP5:![0-9]+]]
; CHECK: [[EXIT_LOOPEXIT]]:
; CHECK-NEXT: br label %[[EXIT]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: ret i32 0
;
entry:
%pre.0 = icmp ne i32 %n, 0
%pre.1 = icmp slt i32 %n, 5
%and.pre = and i1 %pre.1, %pre.0
br i1 %and.pre, label %ph, label %exit
ph:
%n.ext = zext i32 %n to i64
br label %loop
loop:
%iv = phi i64 [ 0, %ph ], [ %iv.next, %loop ]
%gep = getelementptr inbounds i32, ptr %dst, i64 %iv
store i32 1, ptr %gep
%iv.next = add i64 %iv, 1
%ec = icmp eq i64 %iv, %n.ext
br i1 %ec, label %exit, label %loop
exit:
ret i32 0
}
; TODO: Should be able to prove min.iters.check is false, same as
; @loop_never_executes_precondition_order_1_predicates_flipped.
define void @loop_never_executes_precondition_order_1(i64 %start, ptr %dst) {
; CHECK-LABEL: define void @loop_never_executes_precondition_order_1(
; CHECK-SAME: i64 [[START:%.*]], ptr [[DST:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[PRE_0:%.*]] = icmp ugt i64 [[START]], 1
; CHECK-NEXT: br i1 [[PRE_0]], label %[[EXIT:.*]], label %[[PH:.*]]
; CHECK: [[PH]]:
; CHECK-NEXT: [[MUL:%.*]] = shl i64 [[START]], 1
; CHECK-NEXT: [[PRE_1:%.*]] = icmp slt i64 [[MUL]], [[START]]
; CHECK-NEXT: br i1 [[PRE_1]], label %[[LOOP_PREHEADER:.*]], label %[[EXIT]]
; CHECK: [[LOOP_PREHEADER]]:
; CHECK-NEXT: [[TMP0:%.*]] = sub i64 1, [[START]]
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 2
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 2
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]]
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[START]], [[N_VEC]]
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[START]], i64 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i64> [[DOTSPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer
; CHECK-NEXT: [[INDUCTION:%.*]] = add <2 x i64> [[DOTSPLAT]], <i64 0, i64 1>
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 [[START]], [[INDEX]]
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[DST]], i64 [[OFFSET_IDX]]
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 0
; CHECK-NEXT: store <2 x i64> [[VEC_IND]], ptr [[TMP3]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2)
; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT_LOOPEXIT:.*]], label %[[SCALAR_PH]]
; CHECK: [[SCALAR_PH]]:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP1]], %[[MIDDLE_BLOCK]] ], [ [[START]], %[[LOOP_PREHEADER]] ]
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i64, ptr [[DST]], i64 [[IV]]
; CHECK-NEXT: store i64 [[IV]], ptr [[GEP]], align 4
; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], 0
; CHECK-NEXT: br i1 [[EC]], label %[[EXIT_LOOPEXIT]], label %[[LOOP]], !llvm.loop [[LOOP7:![0-9]+]]
; CHECK: [[EXIT_LOOPEXIT]]:
; CHECK-NEXT: br label %[[EXIT]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: ret void
;
entry:
%pre.0 = icmp ugt i64 %start, 1
br i1 %pre.0, label %exit, label %ph
ph:
%mul = shl i64 %start, 1
%pre.1 = icmp slt i64 %mul, %start
br i1 %pre.1, label %loop, label %exit
loop:
%iv = phi i64 [ %start, %ph ], [ %iv.next, %loop ]
%gep = getelementptr inbounds i64, ptr %dst, i64 %iv
store i64 %iv, ptr %gep
%iv.next = add i64 %iv, 1
%ec = icmp eq i64 %iv, 0
br i1 %ec, label %exit, label %loop
exit:
ret void
}
; TODO: Should be able to prove min.iters.check is false, same as
; @loop_never_executes_precondition_order_1_predicates_flipped.
define void @loop_never_executes_precondition_order_1_predicates_flipped(i64 %start, ptr %dst) {
; CHECK-LABEL: define void @loop_never_executes_precondition_order_1_predicates_flipped(
; CHECK-SAME: i64 [[START:%.*]], ptr [[DST:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[PRE_0:%.*]] = icmp ule i64 [[START]], 1
; CHECK-NEXT: br i1 [[PRE_0]], label %[[PH:.*]], label %[[EXIT:.*]]
; CHECK: [[PH]]:
; CHECK-NEXT: [[MUL:%.*]] = shl i64 [[START]], 1
; CHECK-NEXT: [[PRE_1:%.*]] = icmp slt i64 [[MUL]], [[START]]
; CHECK-NEXT: br i1 [[PRE_1]], label %[[LOOP_PREHEADER:.*]], label %[[EXIT]]
; CHECK: [[LOOP_PREHEADER]]:
; CHECK-NEXT: [[TMP0:%.*]] = sub i64 1, [[START]]
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 2
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 2
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]]
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[START]], [[N_VEC]]
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[START]], i64 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i64> [[DOTSPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer
; CHECK-NEXT: [[INDUCTION:%.*]] = add <2 x i64> [[DOTSPLAT]], <i64 0, i64 1>
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 [[START]], [[INDEX]]
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[DST]], i64 [[OFFSET_IDX]]
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 0
; CHECK-NEXT: store <2 x i64> [[VEC_IND]], ptr [[TMP3]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2)
; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT_LOOPEXIT:.*]], label %[[SCALAR_PH]]
; CHECK: [[SCALAR_PH]]:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP1]], %[[MIDDLE_BLOCK]] ], [ [[START]], %[[LOOP_PREHEADER]] ]
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i64, ptr [[DST]], i64 [[IV]]
; CHECK-NEXT: store i64 [[IV]], ptr [[GEP]], align 4
; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], 0
; CHECK-NEXT: br i1 [[EC]], label %[[EXIT_LOOPEXIT]], label %[[LOOP]], !llvm.loop [[LOOP9:![0-9]+]]
; CHECK: [[EXIT_LOOPEXIT]]:
; CHECK-NEXT: br label %[[EXIT]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: ret void
;
entry:
%pre.0 = icmp ule i64 %start, 1
br i1 %pre.0, label %ph, label %exit
ph:
%mul = shl i64 %start, 1
%pre.1 = icmp slt i64 %mul, %start
br i1 %pre.1, label %loop, label %exit
loop:
%iv = phi i64 [ %start, %ph ], [ %iv.next, %loop ]
%gep = getelementptr inbounds i64, ptr %dst, i64 %iv
store i64 %iv, ptr %gep
%iv.next = add i64 %iv, 1
%ec = icmp eq i64 %iv, 0
br i1 %ec, label %exit, label %loop
exit:
ret void
}
define void @loop_never_executes_precondition_order_2_predicates_flipped(i64 %start, ptr %dst) {
; CHECK-LABEL: define void @loop_never_executes_precondition_order_2_predicates_flipped(
; CHECK-SAME: i64 [[START:%.*]], ptr [[DST:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[MUL:%.*]] = shl i64 [[START]], 1
; CHECK-NEXT: [[PRE_1:%.*]] = icmp slt i64 [[MUL]], [[START]]
; CHECK-NEXT: br i1 [[PRE_1]], label %[[PH:.*]], label %[[EXIT:.*]]
; CHECK: [[PH]]:
; CHECK-NEXT: [[PRE_0:%.*]] = icmp ule i64 [[START]], 1
; CHECK-NEXT: br i1 [[PRE_0]], label %[[LOOP_PREHEADER:.*]], label %[[EXIT]]
; CHECK: [[LOOP_PREHEADER]]:
; CHECK-NEXT: [[TMP0:%.*]] = sub i64 1, [[START]]
; CHECK-NEXT: br i1 true, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 2
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]]
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[START]], [[N_VEC]]
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[START]], i64 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i64> [[DOTSPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer
; CHECK-NEXT: [[INDUCTION:%.*]] = add <2 x i64> [[DOTSPLAT]], <i64 0, i64 1>
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 [[START]], [[INDEX]]
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[DST]], i64 [[OFFSET_IDX]]
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 0
; CHECK-NEXT: store <2 x i64> [[VEC_IND]], ptr [[TMP3]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2)
; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT_LOOPEXIT:.*]], label %[[SCALAR_PH]]
; CHECK: [[SCALAR_PH]]:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP1]], %[[MIDDLE_BLOCK]] ], [ [[START]], %[[LOOP_PREHEADER]] ]
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i64, ptr [[DST]], i64 [[IV]]
; CHECK-NEXT: store i64 [[IV]], ptr [[GEP]], align 4
; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], 0
; CHECK-NEXT: br i1 [[EC]], label %[[EXIT_LOOPEXIT]], label %[[LOOP]], !llvm.loop [[LOOP11:![0-9]+]]
; CHECK: [[EXIT_LOOPEXIT]]:
; CHECK-NEXT: br label %[[EXIT]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: ret void
;
entry:
%mul = shl i64 %start, 1
%pre.1 = icmp slt i64 %mul, %start
br i1 %pre.1, label %ph, label %exit
ph:
%pre.0 = icmp ule i64 %start, 1
br i1 %pre.0, label %loop, label %exit
loop:
%iv = phi i64 [ %start, %ph ], [ %iv.next, %loop ]
%gep = getelementptr inbounds i64, ptr %dst, i64 %iv
store i64 %iv, ptr %gep
%iv.next = add i64 %iv, 1
%ec = icmp eq i64 %iv, 0
br i1 %ec, label %exit, label %loop
exit:
ret void
}