From 0ba63b2f22bfb00848ec2d71fbc11cf52b93a290 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Sun, 1 Jun 2025 22:38:33 +0100 Subject: [PATCH] [SCEV] Add additional test coverage for loop-guards reasoning. Add additional tests showing missed opportunities when using loop guards for reasoning in SCEV, depending on the order the guards appear in the IR. --- .../no-dep-via-loop-guards.ll | 87 +++++ ...ge-taken-count-guard-info-operand-order.ll | 288 ++++++++++++++ ...en-count-guard-info-rewrite-expressions.ll | 54 +++ .../max-backedge-taken-count-guard-info.ll | 211 ----------- .../IndVarSimplify/AArch64/loop-guards.ll | 64 ++++ .../IndVarSimplify/loop-guard-order.ll | 98 +++++ .../min-trip-count-known-via-scev.ll | 353 ++++++++++++++++++ 7 files changed, 944 insertions(+), 211 deletions(-) create mode 100644 llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info-operand-order.ll create mode 100644 llvm/test/Transforms/IndVarSimplify/AArch64/loop-guards.ll create mode 100644 llvm/test/Transforms/IndVarSimplify/loop-guard-order.ll create mode 100644 llvm/test/Transforms/LoopVectorize/min-trip-count-known-via-scev.ll diff --git a/llvm/test/Analysis/LoopAccessAnalysis/no-dep-via-loop-guards.ll b/llvm/test/Analysis/LoopAccessAnalysis/no-dep-via-loop-guards.ll index 2cb98b1a8199..e00f91d9b2e8 100644 --- a/llvm/test/Analysis/LoopAccessAnalysis/no-dep-via-loop-guards.ll +++ b/llvm/test/Analysis/LoopAccessAnalysis/no-dep-via-loop-guards.ll @@ -349,3 +349,90 @@ loop: exit: ret void } + +; TODO Should be able to determine no-dep, same as @nodep_via_logical_and_2. +define void @nodep_via_logical_and_1(ptr %A, i32 %index, i32 %n) { +; CHECK-LABEL: 'nodep_via_logical_and_1' +; CHECK-NEXT: loop: +; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop +; CHECK-NEXT: Unknown data dependence. +; CHECK-NEXT: Dependences: +; CHECK-NEXT: Unknown: +; CHECK-NEXT: %0 = load double, ptr %gep.load, align 8 -> +; CHECK-NEXT: store double %0, ptr %gep.store, align 8 +; CHECK-EMPTY: +; CHECK-NEXT: Run-time memory checks: +; CHECK-NEXT: Grouped accesses: +; CHECK-EMPTY: +; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. +; CHECK-NEXT: SCEV assumptions: +; CHECK-EMPTY: +; CHECK-NEXT: Expressions re-written: +; +entry: + %pre.0 = icmp sgt i32 %index, 0 + %pre.1 = icmp slt i32 %index, %n + %and.pre = select i1 %pre.1, i1 %pre.0, i1 false + br i1 %and.pre, label %ph, label %exit + +ph: + %idx.1 = add i32 %index, 1 + %start = zext i32 %idx.1 to i64 + br label %loop + +loop: + %iv = phi i64 [ %start, %ph ], [ %iv.next, %loop ] + %gep.load = getelementptr double, ptr %A, i64 %iv + %1 = load double, ptr %gep.load, align 8 + %index.ext = zext i32 %index to i64 + %gep.store = getelementptr double, ptr %A, i64 %index.ext + store double %1, ptr %gep.store, align 8 + %iv.next = add i64 %iv, 1 + %t = trunc i64 %iv to i32 + %ec = icmp slt i32 %t, 1 + br i1 %ec, label %loop, label %exit + +exit: + ret void +} + +; Same as nodep_via_logical_and_1 but with different operand order of the logical and. +define void @nodep_via_logical_and_2(ptr %A, i32 %index, i32 %n) { +; CHECK-LABEL: 'nodep_via_logical_and_2' +; CHECK-NEXT: loop: +; CHECK-NEXT: Memory dependences are safe +; CHECK-NEXT: Dependences: +; CHECK-NEXT: Run-time memory checks: +; CHECK-NEXT: Grouped accesses: +; CHECK-EMPTY: +; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. +; CHECK-NEXT: SCEV assumptions: +; CHECK-EMPTY: +; CHECK-NEXT: Expressions re-written: +; +entry: + %pre.0 = icmp sgt i32 %index, 0 + %pre.1 = icmp slt i32 %index, %n + %and.pre = select i1 %pre.0, i1 %pre.1, i1 false + br i1 %and.pre, label %ph, label %exit + +ph: + %idx.1 = add i32 %index, 1 + %start = zext i32 %idx.1 to i64 + br label %loop + +loop: + %iv = phi i64 [ %start, %ph ], [ %iv.next, %loop ] + %gep.load = getelementptr double, ptr %A, i64 %iv + %1 = load double, ptr %gep.load, align 8 + %index.ext = zext i32 %index to i64 + %gep.store = getelementptr double, ptr %A, i64 %index.ext + store double %1, ptr %gep.store, align 8 + %iv.next = add i64 %iv, 1 + %t = trunc i64 %iv to i32 + %ec = icmp slt i32 %t, 1 + br i1 %ec, label %loop, label %exit + +exit: + ret void +} diff --git a/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info-operand-order.ll b/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info-operand-order.ll new file mode 100644 index 000000000000..17a6b706685c --- /dev/null +++ b/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info-operand-order.ll @@ -0,0 +1,288 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt -passes='print' -disable-output %s 2>&1 | FileCheck %s + +define void @test_multiple_const_guards_order1(ptr nocapture %a, i64 %i) { +; CHECK-LABEL: 'test_multiple_const_guards_order1' +; CHECK-NEXT: Classifying expressions for: @test_multiple_const_guards_order1 +; CHECK-NEXT: %iv = phi i64 [ %iv.next, %loop ], [ 0, %guardbb ] +; CHECK-NEXT: --> {0,+,1}<%loop> U: [0,10) S: [0,10) Exits: %i LoopDispositions: { %loop: Computable } +; CHECK-NEXT: %idx = getelementptr inbounds i32, ptr %a, i64 %iv +; CHECK-NEXT: --> {%a,+,4}<%loop> U: full-set S: full-set Exits: ((4 * %i) + %a) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: %iv.next = add nuw nsw i64 %iv, 1 +; CHECK-NEXT: --> {1,+,1}<%loop> U: [1,11) S: [1,11) Exits: (1 + %i) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: Determining loop execution counts for: @test_multiple_const_guards_order1 +; CHECK-NEXT: Loop %loop: backedge-taken count is %i +; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 9 +; CHECK-NEXT: Loop %loop: symbolic max backedge-taken count is %i +; CHECK-NEXT: Loop %loop: Trip multiple is 1 +; +entry: + %c.1 = icmp ult i64 %i, 16 + br i1 %c.1, label %guardbb, label %exit + +guardbb: + %c.2 = icmp ult i64 %i, 10 + br i1 %c.2, label %loop, label %exit + +loop: + %iv = phi i64 [ %iv.next, %loop ], [ 0, %guardbb ] + %idx = getelementptr inbounds i32, ptr %a, i64 %iv + store i32 1, ptr %idx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv, %i + br i1 %exitcond, label %exit, label %loop + +exit: + ret void +} + +define void @test_multiple_const_guards_order2(ptr nocapture %a, i64 %i) { +; CHECK-LABEL: 'test_multiple_const_guards_order2' +; CHECK-NEXT: Classifying expressions for: @test_multiple_const_guards_order2 +; CHECK-NEXT: %iv = phi i64 [ %iv.next, %loop ], [ 0, %guardbb ] +; CHECK-NEXT: --> {0,+,1}<%loop> U: [0,10) S: [0,10) Exits: %i LoopDispositions: { %loop: Computable } +; CHECK-NEXT: %idx = getelementptr inbounds i32, ptr %a, i64 %iv +; CHECK-NEXT: --> {%a,+,4}<%loop> U: full-set S: full-set Exits: ((4 * %i) + %a) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: %iv.next = add nuw nsw i64 %iv, 1 +; CHECK-NEXT: --> {1,+,1}<%loop> U: [1,11) S: [1,11) Exits: (1 + %i) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: Determining loop execution counts for: @test_multiple_const_guards_order2 +; CHECK-NEXT: Loop %loop: backedge-taken count is %i +; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 9 +; CHECK-NEXT: Loop %loop: symbolic max backedge-taken count is %i +; CHECK-NEXT: Loop %loop: Trip multiple is 1 +; +entry: + %c.1 = icmp ult i64 %i, 10 + br i1 %c.1, label %guardbb, label %exit + +guardbb: + %c.2 = icmp ult i64 %i, 16 + br i1 %c.2, label %loop, label %exit + +loop: + %iv = phi i64 [ %iv.next, %loop ], [ 0, %guardbb ] + %idx = getelementptr inbounds i32, ptr %a, i64 %iv + store i32 1, ptr %idx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv, %i + br i1 %exitcond, label %exit, label %loop + +exit: + ret void +} + +define void @test_multiple_var_guards_order1(ptr nocapture %a, i64 %i, i64 %N) { +; CHECK-LABEL: 'test_multiple_var_guards_order1' +; CHECK-NEXT: Classifying expressions for: @test_multiple_var_guards_order1 +; CHECK-NEXT: %iv = phi i64 [ %iv.next, %loop ], [ 0, %guardbb ] +; CHECK-NEXT: --> {0,+,1}<%loop> U: [0,11) S: [0,11) Exits: %i LoopDispositions: { %loop: Computable } +; CHECK-NEXT: %idx = getelementptr inbounds i32, ptr %a, i64 %iv +; CHECK-NEXT: --> {%a,+,4}<%loop> U: full-set S: full-set Exits: ((4 * %i) + %a) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: %iv.next = add nuw nsw i64 %iv, 1 +; CHECK-NEXT: --> {1,+,1}<%loop> U: [1,12) S: [1,12) Exits: (1 + %i) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: Determining loop execution counts for: @test_multiple_var_guards_order1 +; CHECK-NEXT: Loop %loop: backedge-taken count is %i +; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 10 +; CHECK-NEXT: Loop %loop: symbolic max backedge-taken count is %i +; CHECK-NEXT: Loop %loop: Trip multiple is 1 +; +entry: + %c.1 = icmp ult i64 %N, 12 + br i1 %c.1, label %guardbb, label %exit + +guardbb: + %c.2 = icmp ult i64 %i, %N + br i1 %c.2, label %loop, label %exit + +loop: + %iv = phi i64 [ %iv.next, %loop ], [ 0, %guardbb ] + %idx = getelementptr inbounds i32, ptr %a, i64 %iv + store i32 1, ptr %idx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv, %i + br i1 %exitcond, label %exit, label %loop + +exit: + ret void +} + +define void @test_multiple_var_guards_order2(ptr nocapture %a, i64 %i, i64 %N) { +; CHECK-LABEL: 'test_multiple_var_guards_order2' +; CHECK-NEXT: Classifying expressions for: @test_multiple_var_guards_order2 +; CHECK-NEXT: %iv = phi i64 [ %iv.next, %loop ], [ 0, %guardbb ] +; CHECK-NEXT: --> {0,+,1}<%loop> U: [0,11) S: [0,11) Exits: %i LoopDispositions: { %loop: Computable } +; CHECK-NEXT: %idx = getelementptr inbounds i32, ptr %a, i64 %iv +; CHECK-NEXT: --> {%a,+,4}<%loop> U: full-set S: full-set Exits: ((4 * %i) + %a) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: %iv.next = add nuw nsw i64 %iv, 1 +; CHECK-NEXT: --> {1,+,1}<%loop> U: [1,12) S: [1,12) Exits: (1 + %i) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: Determining loop execution counts for: @test_multiple_var_guards_order2 +; CHECK-NEXT: Loop %loop: backedge-taken count is %i +; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 10 +; CHECK-NEXT: Loop %loop: symbolic max backedge-taken count is %i +; CHECK-NEXT: Loop %loop: Trip multiple is 1 +; +entry: + %c.1 = icmp ult i64 %i, %N + br i1 %c.1, label %guardbb, label %exit + +guardbb: + %c.2 = icmp ult i64 %N, 12 + br i1 %c.2, label %loop, label %exit + +loop: + %iv = phi i64 [ %iv.next, %loop ], [ 0, %guardbb ] + %idx = getelementptr inbounds i32, ptr %a, i64 %iv + store i32 1, ptr %idx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv, %i + br i1 %exitcond, label %exit, label %loop + +exit: + ret void +} + +define i32 @sle_sgt_ult_umax_to_smax(i32 %num) { +; CHECK-LABEL: 'sle_sgt_ult_umax_to_smax' +; CHECK-NEXT: Classifying expressions for: @sle_sgt_ult_umax_to_smax +; CHECK-NEXT: %iv = phi i32 [ 0, %guard.3 ], [ %iv.next, %loop ] +; CHECK-NEXT: --> {0,+,4}<%loop> U: [0,25) S: [0,25) Exits: (4 * ((-4 + %num) /u 4)) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: %iv.next = add nuw i32 %iv, 4 +; CHECK-NEXT: --> {4,+,4}<%loop> U: [4,29) S: [4,29) Exits: (4 + (4 * ((-4 + %num) /u 4))) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: Determining loop execution counts for: @sle_sgt_ult_umax_to_smax +; CHECK-NEXT: Loop %loop: backedge-taken count is ((-4 + %num) /u 4) +; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i32 6 +; CHECK-NEXT: Loop %loop: symbolic max backedge-taken count is ((-4 + %num) /u 4) +; CHECK-NEXT: Loop %loop: Trip multiple is 1 +; +guard.1: + %cmp.1 = icmp sle i32 %num, 0 + br i1 %cmp.1, label %exit, label %guard.2 + +guard.2: + %cmp.2 = icmp sgt i32 %num, 28 + br i1 %cmp.2, label %exit, label %guard.3 + +guard.3: + %cmp.3 = icmp ult i32 %num, 4 + br i1 %cmp.3, label %exit, label %loop + +loop: + %iv = phi i32 [ 0, %guard.3 ], [ %iv.next, %loop ] + %iv.next = add nuw i32 %iv, 4 + %ec = icmp eq i32 %iv.next, %num + br i1 %ec, label %exit, label %loop + +exit: + ret i32 0 +} + +; Similar to @sle_sgt_ult_umax_to_smax but with different predicate order. +define i32 @ult_sle_sgt_umax_to_smax(i32 %num) { +; CHECK-LABEL: 'ult_sle_sgt_umax_to_smax' +; CHECK-NEXT: Classifying expressions for: @ult_sle_sgt_umax_to_smax +; CHECK-NEXT: %iv = phi i32 [ 0, %guard.3 ], [ %iv.next, %loop ] +; CHECK-NEXT: --> {0,+,4}<%loop> U: [0,-3) S: [-2147483648,2147483645) Exits: (4 * ((-4 + %num) /u 4)) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: %iv.next = add nuw i32 %iv, 4 +; CHECK-NEXT: --> {4,+,4}<%loop> U: [4,-3) S: [-2147483648,2147483645) Exits: (4 + (4 * ((-4 + %num) /u 4))) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: Determining loop execution counts for: @ult_sle_sgt_umax_to_smax +; CHECK-NEXT: Loop %loop: backedge-taken count is ((-4 + %num) /u 4) +; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i32 1073741823 +; CHECK-NEXT: Loop %loop: symbolic max backedge-taken count is ((-4 + %num) /u 4) +; CHECK-NEXT: Loop %loop: Trip multiple is 1 +; +guard.1: + %cmp.1 = icmp ult i32 %num, 4 + br i1 %cmp.1, label %exit, label %guard.2 + +guard.2: + %cmp.2 = icmp sgt i32 %num, 28 + br i1 %cmp.2, label %exit, label %guard.3 + +guard.3: + %cmp.3 = icmp sle i32 %num, 0 + br i1 %cmp.3, label %exit, label %loop + +loop: + %iv = phi i32 [ 0, %guard.3 ], [ %iv.next, %loop ] + %iv.next = add nuw i32 %iv, 4 + %ec = icmp eq i32 %iv.next, %num + br i1 %ec, label %exit, label %loop + +exit: + ret i32 0 +} + +define void @const_max_btc_32_or_order_1(i64 %n) { +; CHECK-LABEL: 'const_max_btc_32_or_order_1' +; CHECK-NEXT: Classifying expressions for: @const_max_btc_32_or_order_1 +; CHECK-NEXT: %and.pre = and i1 %pre.1, %pre.0 +; CHECK-NEXT: --> (%pre.1 umin %pre.0) U: full-set S: full-set +; CHECK-NEXT: %iv = phi i64 [ %iv.next, %loop ], [ 0, %ph ] +; CHECK-NEXT: --> {0,+,1}<%loop> U: [0,-9223372036854775808) S: [0,-9223372036854775808) Exits: %n LoopDispositions: { %loop: Computable } +; CHECK-NEXT: %iv.next = add i64 %iv, 1 +; CHECK-NEXT: --> {1,+,1}<%loop> U: [1,-9223372036854775807) S: [1,-9223372036854775807) Exits: (1 + %n) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: Determining loop execution counts for: @const_max_btc_32_or_order_1 +; CHECK-NEXT: Loop %loop: backedge-taken count is %n +; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 9223372036854775807 +; CHECK-NEXT: Loop %loop: symbolic max backedge-taken count is %n +; CHECK-NEXT: Loop %loop: Trip multiple is 1 +; +entry: + %pre.0 = icmp slt i64 %n, 33 + %pre.1 = icmp ne i64 %n, 0 + %and.pre = and i1 %pre.1, %pre.0 + br i1 %and.pre, label %ph, label %exit + +ph: + %pre.2 = icmp sgt i64 %n, 0 + br i1 %pre.2, label %loop, label %exit + +loop: + %iv = phi i64 [ %iv.next, %loop ], [ 0, %ph ] + call void @foo() + %iv.next = add i64 %iv, 1 + %ec = icmp eq i64 %iv, %n + br i1 %ec, label %exit, label %loop + +exit: + ret void +} + +; Same as @const_max_btc_32_or_order_1, but with operands in the OR swapped. +define void @const_max_btc_32_or_order_2(i64 %n) { +; CHECK-LABEL: 'const_max_btc_32_or_order_2' +; CHECK-NEXT: Classifying expressions for: @const_max_btc_32_or_order_2 +; CHECK-NEXT: %and.pre = and i1 %pre.0, %pre.1 +; CHECK-NEXT: --> (%pre.0 umin %pre.1) U: full-set S: full-set +; CHECK-NEXT: %iv = phi i64 [ %iv.next, %loop ], [ 0, %ph ] +; CHECK-NEXT: --> {0,+,1}<%loop> U: [0,33) S: [0,33) Exits: %n LoopDispositions: { %loop: Computable } +; CHECK-NEXT: %iv.next = add i64 %iv, 1 +; CHECK-NEXT: --> {1,+,1}<%loop> U: [1,34) S: [1,34) Exits: (1 + %n) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: Determining loop execution counts for: @const_max_btc_32_or_order_2 +; CHECK-NEXT: Loop %loop: backedge-taken count is %n +; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 32 +; CHECK-NEXT: Loop %loop: symbolic max backedge-taken count is %n +; CHECK-NEXT: Loop %loop: Trip multiple is 1 +; +entry: + %pre.0 = icmp slt i64 %n, 33 + %pre.1 = icmp ne i64 %n, 0 + %and.pre = and i1 %pre.0, %pre.1 + br i1 %and.pre, label %ph, label %exit + +ph: + %pre.2 = icmp sgt i64 %n, 0 + br i1 %pre.2, label %loop, label %exit + +loop: + %iv = phi i64 [ %iv.next, %loop ], [ 0, %ph ] + call void @foo() + %iv.next = add i64 %iv, 1 + %ec = icmp eq i64 %iv, %n + br i1 %ec, label %exit, label %loop + +exit: + ret void +} + +declare void @foo() diff --git a/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info-rewrite-expressions.ll b/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info-rewrite-expressions.ll index 58044915ae87..8c77d704eac6 100644 --- a/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info-rewrite-expressions.ll +++ b/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info-rewrite-expressions.ll @@ -798,6 +798,60 @@ exit: ret i32 0 } +define void @rewrite_add_rec() { +; CHECK-LABEL: 'rewrite_add_rec' +; CHECK-NEXT: Classifying expressions for: @rewrite_add_rec +; CHECK-NEXT: %iv = phi i64 [ 0, %entry ], [ %iv.next, %outer.latch ] +; CHECK-NEXT: --> {0,+,1}<%outer.header> U: [0,10) S: [0,10) Exits: 9 LoopDispositions: { %outer.header: Computable, %inner: Invariant } +; CHECK-NEXT: %sub = sub i64 9, %iv +; CHECK-NEXT: --> {9,+,-1}<%outer.header> U: [0,10) S: [0,10) Exits: 0 LoopDispositions: { %outer.header: Computable, %inner: Invariant } +; CHECK-NEXT: %n.vec = and i64 %sub, -2 +; CHECK-NEXT: --> (2 * ({9,+,-1}<%outer.header> /u 2)) U: [0,9) S: [0,9) Exits: 0 LoopDispositions: { %outer.header: Computable, %inner: Invariant } +; CHECK-NEXT: %inner.iv = phi i64 [ 0, %inner.ph ], [ %inner.iv.next, %inner ] +; CHECK-NEXT: --> {0,+,2}<%inner> U: [0,-1) S: [-9223372036854775808,9223372036854775807) Exits: (2 * ((-2 + (2 * ({9,+,-1}<%outer.header> /u 2))) /u 2)) LoopDispositions: { %inner: Computable, %outer.header: Variant } +; CHECK-NEXT: %inner.iv.next = add i64 %inner.iv, 2 +; CHECK-NEXT: --> {2,+,2}<%inner> U: [0,-1) S: [-9223372036854775808,9223372036854775807) Exits: (2 + (2 * ((-2 + (2 * ({9,+,-1}<%outer.header> /u 2))) /u 2))) LoopDispositions: { %inner: Computable, %outer.header: Variant } +; CHECK-NEXT: %iv.next = add i64 %iv, 1 +; CHECK-NEXT: --> {1,+,1}<%outer.header> U: [1,11) S: [1,11) Exits: 10 LoopDispositions: { %outer.header: Computable, %inner: Invariant } +; CHECK-NEXT: Determining loop execution counts for: @rewrite_add_rec +; CHECK-NEXT: Loop %inner: backedge-taken count is ((-2 + (2 * ({9,+,-1}<%outer.header> /u 2))) /u 2) +; CHECK-NEXT: Loop %inner: constant max backedge-taken count is i64 9223372036854775807 +; CHECK-NEXT: Loop %inner: symbolic max backedge-taken count is ((-2 + (2 * ({9,+,-1}<%outer.header> /u 2))) /u 2) +; CHECK-NEXT: Loop %inner: Trip multiple is 1 +; CHECK-NEXT: Loop %outer.header: backedge-taken count is i64 9 +; CHECK-NEXT: Loop %outer.header: constant max backedge-taken count is i64 9 +; CHECK-NEXT: Loop %outer.header: symbolic max backedge-taken count is i64 9 +; CHECK-NEXT: Loop %outer.header: Trip multiple is 10 +; +entry: + br label %outer.header + +outer.header: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %outer.latch ] + %sub = sub i64 9, %iv + %min.iters.check = icmp ult i64 %sub, 2 + br i1 %min.iters.check, label %outer.latch, label %inner.ph + +inner.ph: + %n.vec = and i64 %sub, -2 + br label %inner + +inner: + %inner.iv = phi i64 [ 0, %inner.ph ], [ %inner.iv.next, %inner ] + %inner.iv.next = add i64 %inner.iv, 2 + call void @use(i64 %inner.iv) + %ec.inner = icmp eq i64 %inner.iv.next, %n.vec + br i1 %ec.inner, label %outer.latch, label %inner + +outer.latch: + %iv.next = add i64 %iv, 1 + %ec.outer = icmp eq i64 %iv.next, 10 + br i1 %ec.outer, label %exit, label %outer.header + +exit: + ret void +} + declare void @use(i64) declare i32 @llvm.umin.i32(i32, i32) diff --git a/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info.ll b/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info.ll index 30c3bbfb12e5..9bf2427eddb9 100644 --- a/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info.ll +++ b/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info.ll @@ -220,146 +220,6 @@ exit: ret void } -define void @test_multiple_const_guards_order1(ptr nocapture %a, i64 %i) { -; CHECK-LABEL: 'test_multiple_const_guards_order1' -; CHECK-NEXT: Classifying expressions for: @test_multiple_const_guards_order1 -; CHECK-NEXT: %iv = phi i64 [ %iv.next, %loop ], [ 0, %guardbb ] -; CHECK-NEXT: --> {0,+,1}<%loop> U: [0,10) S: [0,10) Exits: %i LoopDispositions: { %loop: Computable } -; CHECK-NEXT: %idx = getelementptr inbounds i32, ptr %a, i64 %iv -; CHECK-NEXT: --> {%a,+,4}<%loop> U: full-set S: full-set Exits: ((4 * %i) + %a) LoopDispositions: { %loop: Computable } -; CHECK-NEXT: %iv.next = add nuw nsw i64 %iv, 1 -; CHECK-NEXT: --> {1,+,1}<%loop> U: [1,11) S: [1,11) Exits: (1 + %i) LoopDispositions: { %loop: Computable } -; CHECK-NEXT: Determining loop execution counts for: @test_multiple_const_guards_order1 -; CHECK-NEXT: Loop %loop: backedge-taken count is %i -; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 9 -; CHECK-NEXT: Loop %loop: symbolic max backedge-taken count is %i -; CHECK-NEXT: Loop %loop: Trip multiple is 1 -; -entry: - %c.1 = icmp ult i64 %i, 16 - br i1 %c.1, label %guardbb, label %exit - -guardbb: - %c.2 = icmp ult i64 %i, 10 - br i1 %c.2, label %loop, label %exit - -loop: - %iv = phi i64 [ %iv.next, %loop ], [ 0, %guardbb ] - %idx = getelementptr inbounds i32, ptr %a, i64 %iv - store i32 1, ptr %idx, align 4 - %iv.next = add nuw nsw i64 %iv, 1 - %exitcond = icmp eq i64 %iv, %i - br i1 %exitcond, label %exit, label %loop - -exit: - ret void -} - -define void @test_multiple_const_guards_order2(ptr nocapture %a, i64 %i) { -; CHECK-LABEL: 'test_multiple_const_guards_order2' -; CHECK-NEXT: Classifying expressions for: @test_multiple_const_guards_order2 -; CHECK-NEXT: %iv = phi i64 [ %iv.next, %loop ], [ 0, %guardbb ] -; CHECK-NEXT: --> {0,+,1}<%loop> U: [0,10) S: [0,10) Exits: %i LoopDispositions: { %loop: Computable } -; CHECK-NEXT: %idx = getelementptr inbounds i32, ptr %a, i64 %iv -; CHECK-NEXT: --> {%a,+,4}<%loop> U: full-set S: full-set Exits: ((4 * %i) + %a) LoopDispositions: { %loop: Computable } -; CHECK-NEXT: %iv.next = add nuw nsw i64 %iv, 1 -; CHECK-NEXT: --> {1,+,1}<%loop> U: [1,11) S: [1,11) Exits: (1 + %i) LoopDispositions: { %loop: Computable } -; CHECK-NEXT: Determining loop execution counts for: @test_multiple_const_guards_order2 -; CHECK-NEXT: Loop %loop: backedge-taken count is %i -; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 9 -; CHECK-NEXT: Loop %loop: symbolic max backedge-taken count is %i -; CHECK-NEXT: Loop %loop: Trip multiple is 1 -; -entry: - %c.1 = icmp ult i64 %i, 10 - br i1 %c.1, label %guardbb, label %exit - -guardbb: - %c.2 = icmp ult i64 %i, 16 - br i1 %c.2, label %loop, label %exit - -loop: - %iv = phi i64 [ %iv.next, %loop ], [ 0, %guardbb ] - %idx = getelementptr inbounds i32, ptr %a, i64 %iv - store i32 1, ptr %idx, align 4 - %iv.next = add nuw nsw i64 %iv, 1 - %exitcond = icmp eq i64 %iv, %i - br i1 %exitcond, label %exit, label %loop - -exit: - ret void -} - -define void @test_multiple_var_guards_order1(ptr nocapture %a, i64 %i, i64 %N) { -; CHECK-LABEL: 'test_multiple_var_guards_order1' -; CHECK-NEXT: Classifying expressions for: @test_multiple_var_guards_order1 -; CHECK-NEXT: %iv = phi i64 [ %iv.next, %loop ], [ 0, %guardbb ] -; CHECK-NEXT: --> {0,+,1}<%loop> U: [0,11) S: [0,11) Exits: %i LoopDispositions: { %loop: Computable } -; CHECK-NEXT: %idx = getelementptr inbounds i32, ptr %a, i64 %iv -; CHECK-NEXT: --> {%a,+,4}<%loop> U: full-set S: full-set Exits: ((4 * %i) + %a) LoopDispositions: { %loop: Computable } -; CHECK-NEXT: %iv.next = add nuw nsw i64 %iv, 1 -; CHECK-NEXT: --> {1,+,1}<%loop> U: [1,12) S: [1,12) Exits: (1 + %i) LoopDispositions: { %loop: Computable } -; CHECK-NEXT: Determining loop execution counts for: @test_multiple_var_guards_order1 -; CHECK-NEXT: Loop %loop: backedge-taken count is %i -; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 10 -; CHECK-NEXT: Loop %loop: symbolic max backedge-taken count is %i -; CHECK-NEXT: Loop %loop: Trip multiple is 1 -; -entry: - %c.1 = icmp ult i64 %N, 12 - br i1 %c.1, label %guardbb, label %exit - -guardbb: - %c.2 = icmp ult i64 %i, %N - br i1 %c.2, label %loop, label %exit - -loop: - %iv = phi i64 [ %iv.next, %loop ], [ 0, %guardbb ] - %idx = getelementptr inbounds i32, ptr %a, i64 %iv - store i32 1, ptr %idx, align 4 - %iv.next = add nuw nsw i64 %iv, 1 - %exitcond = icmp eq i64 %iv, %i - br i1 %exitcond, label %exit, label %loop - -exit: - ret void -} - -define void @test_multiple_var_guards_order2(ptr nocapture %a, i64 %i, i64 %N) { -; CHECK-LABEL: 'test_multiple_var_guards_order2' -; CHECK-NEXT: Classifying expressions for: @test_multiple_var_guards_order2 -; CHECK-NEXT: %iv = phi i64 [ %iv.next, %loop ], [ 0, %guardbb ] -; CHECK-NEXT: --> {0,+,1}<%loop> U: [0,11) S: [0,11) Exits: %i LoopDispositions: { %loop: Computable } -; CHECK-NEXT: %idx = getelementptr inbounds i32, ptr %a, i64 %iv -; CHECK-NEXT: --> {%a,+,4}<%loop> U: full-set S: full-set Exits: ((4 * %i) + %a) LoopDispositions: { %loop: Computable } -; CHECK-NEXT: %iv.next = add nuw nsw i64 %iv, 1 -; CHECK-NEXT: --> {1,+,1}<%loop> U: [1,12) S: [1,12) Exits: (1 + %i) LoopDispositions: { %loop: Computable } -; CHECK-NEXT: Determining loop execution counts for: @test_multiple_var_guards_order2 -; CHECK-NEXT: Loop %loop: backedge-taken count is %i -; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 10 -; CHECK-NEXT: Loop %loop: symbolic max backedge-taken count is %i -; CHECK-NEXT: Loop %loop: Trip multiple is 1 -; -entry: - %c.1 = icmp ult i64 %i, %N - br i1 %c.1, label %guardbb, label %exit - -guardbb: - %c.2 = icmp ult i64 %N, 12 - br i1 %c.2, label %loop, label %exit - -loop: - %iv = phi i64 [ %iv.next, %loop ], [ 0, %guardbb ] - %idx = getelementptr inbounds i32, ptr %a, i64 %iv - store i32 1, ptr %idx, align 4 - %iv.next = add nuw nsw i64 %iv, 1 - %exitcond = icmp eq i64 %iv, %i - br i1 %exitcond, label %exit, label %loop - -exit: - ret void -} - ; The guards here reference each other in a cycle. define void @test_multiple_var_guards_cycle(ptr nocapture %a, i64 %i, i64 %N) { ; CHECK-LABEL: 'test_multiple_var_guards_cycle' @@ -1470,77 +1330,6 @@ exit: ret void } -define i32 @sle_sgt_ult_umax_to_smax(i32 %num) { -; CHECK-LABEL: 'sle_sgt_ult_umax_to_smax' -; CHECK-NEXT: Classifying expressions for: @sle_sgt_ult_umax_to_smax -; CHECK-NEXT: %iv = phi i32 [ 0, %guard.3 ], [ %iv.next, %loop ] -; CHECK-NEXT: --> {0,+,4}<%loop> U: [0,25) S: [0,25) Exits: (4 * ((-4 + %num) /u 4)) LoopDispositions: { %loop: Computable } -; CHECK-NEXT: %iv.next = add nuw i32 %iv, 4 -; CHECK-NEXT: --> {4,+,4}<%loop> U: [4,29) S: [4,29) Exits: (4 + (4 * ((-4 + %num) /u 4))) LoopDispositions: { %loop: Computable } -; CHECK-NEXT: Determining loop execution counts for: @sle_sgt_ult_umax_to_smax -; CHECK-NEXT: Loop %loop: backedge-taken count is ((-4 + %num) /u 4) -; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i32 6 -; CHECK-NEXT: Loop %loop: symbolic max backedge-taken count is ((-4 + %num) /u 4) -; CHECK-NEXT: Loop %loop: Trip multiple is 1 -; -guard.1: - %cmp.1 = icmp sle i32 %num, 0 - br i1 %cmp.1, label %exit, label %guard.2 - -guard.2: - %cmp.2 = icmp sgt i32 %num, 28 - br i1 %cmp.2, label %exit, label %guard.3 - -guard.3: - %cmp.3 = icmp ult i32 %num, 4 - br i1 %cmp.3, label %exit, label %loop - -loop: - %iv = phi i32 [ 0, %guard.3 ], [ %iv.next, %loop ] - %iv.next = add nuw i32 %iv, 4 - %ec = icmp eq i32 %iv.next, %num - br i1 %ec, label %exit, label %loop - -exit: - ret i32 0 -} - -; Similar to @sle_sgt_ult_umax_to_smax but with different predicate order. -define i32 @ult_sle_sgt_umax_to_smax(i32 %num) { -; CHECK-LABEL: 'ult_sle_sgt_umax_to_smax' -; CHECK-NEXT: Classifying expressions for: @ult_sle_sgt_umax_to_smax -; CHECK-NEXT: %iv = phi i32 [ 0, %guard.3 ], [ %iv.next, %loop ] -; CHECK-NEXT: --> {0,+,4}<%loop> U: [0,-3) S: [-2147483648,2147483645) Exits: (4 * ((-4 + %num) /u 4)) LoopDispositions: { %loop: Computable } -; CHECK-NEXT: %iv.next = add nuw i32 %iv, 4 -; CHECK-NEXT: --> {4,+,4}<%loop> U: [4,-3) S: [-2147483648,2147483645) Exits: (4 + (4 * ((-4 + %num) /u 4))) LoopDispositions: { %loop: Computable } -; CHECK-NEXT: Determining loop execution counts for: @ult_sle_sgt_umax_to_smax -; CHECK-NEXT: Loop %loop: backedge-taken count is ((-4 + %num) /u 4) -; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i32 1073741823 -; CHECK-NEXT: Loop %loop: symbolic max backedge-taken count is ((-4 + %num) /u 4) -; CHECK-NEXT: Loop %loop: Trip multiple is 1 -; -guard.1: - %cmp.1 = icmp ult i32 %num, 4 - br i1 %cmp.1, label %exit, label %guard.2 - -guard.2: - %cmp.2 = icmp sgt i32 %num, 28 - br i1 %cmp.2, label %exit, label %guard.3 - -guard.3: - %cmp.3 = icmp sle i32 %num, 0 - br i1 %cmp.3, label %exit, label %loop - -loop: - %iv = phi i32 [ 0, %guard.3 ], [ %iv.next, %loop ] - %iv.next = add nuw i32 %iv, 4 - %ec = icmp eq i32 %iv.next, %num - br i1 %ec, label %exit, label %loop - -exit: - ret i32 0 -} - define i32 @ptr_induction_ult_1(ptr %a, ptr %b) { ; CHECK-LABEL: 'ptr_induction_ult_1' ; CHECK-NEXT: Classifying expressions for: @ptr_induction_ult_1 diff --git a/llvm/test/Transforms/IndVarSimplify/AArch64/loop-guards.ll b/llvm/test/Transforms/IndVarSimplify/AArch64/loop-guards.ll new file mode 100644 index 000000000000..409622c255ea --- /dev/null +++ b/llvm/test/Transforms/IndVarSimplify/AArch64/loop-guards.ll @@ -0,0 +1,64 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -p indvars -S %s | FileCheck %s + +target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-n32:64-S128-Fn32" +target triple = "arm64-apple-macosx15.0.0" + +define i32 @guards_applied_to_add_rec(ptr %dst) { +; CHECK-LABEL: define i32 @guards_applied_to_add_rec( +; CHECK-SAME: ptr [[DST:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[OUTER_HEADER:.*]] +; CHECK: [[OUTER_HEADER]]: +; CHECK-NEXT: [[OUTER_IV_0:%.*]] = phi i32 [ 2, %[[ENTRY]] ], [ [[OUTER_IV_0_NEXT:%.*]], %[[OUTER_LATCH:.*]] ] +; CHECK-NEXT: [[OUTER_IV_1:%.*]] = phi i32 [ 1, %[[ENTRY]] ], [ [[OUTER_IV_0]], %[[OUTER_LATCH]] ] +; CHECK-NEXT: [[SHR28:%.*]] = lshr i32 [[OUTER_IV_1]], 1 +; CHECK-NEXT: [[PRE:%.*]] = icmp samesign ult i32 [[OUTER_IV_1]], 2 +; CHECK-NEXT: br i1 [[PRE]], label %[[OUTER_LATCH]], label %[[INNER_PREHEADER:.*]] +; CHECK: [[INNER_PREHEADER]]: +; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[SHR28]] to i64 +; CHECK-NEXT: br label %[[INNER:.*]] +; CHECK: [[INNER]]: +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[INNER_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[INNER]] ] +; CHECK-NEXT: [[GEP_DST:%.*]] = getelementptr i32, ptr [[DST]], i64 [[INDVARS_IV]] +; CHECK-NEXT: [[TMP1:%.*]] = trunc nuw nsw i64 [[INDVARS_IV]] to i32 +; CHECK-NEXT: store i32 [[TMP1]], ptr [[GEP_DST]], align 4 +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 +; CHECK-NEXT: [[CMP29:%.*]] = icmp samesign ult i64 [[INDVARS_IV_NEXT]], [[TMP0]] +; CHECK-NEXT: br i1 [[CMP29]], label %[[INNER]], label %[[OUTER_LATCH_LOOPEXIT:.*]] +; CHECK: [[OUTER_LATCH_LOOPEXIT]]: +; CHECK-NEXT: br label %[[OUTER_LATCH]] +; CHECK: [[OUTER_LATCH]]: +; CHECK-NEXT: [[OUTER_IV_0_NEXT]] = add nuw i32 [[OUTER_IV_0]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[OUTER_IV_0_NEXT]], -2147483647 +; CHECK-NEXT: br i1 [[EXITCOND]], label %[[OUTER_HEADER]], label %[[EXIT:.*]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret i32 0 +; +entry: + br label %outer.header + +outer.header: + %outer.iv.0 = phi i32 [ 2, %entry ], [ %outer.iv.0.next, %outer.latch ] + %outer.iv.1 = phi i32 [ 1, %entry ], [ %outer.iv.0, %outer.latch ] + %shr28 = lshr i32 %outer.iv.1, 1 + %pre = icmp samesign ult i32 %outer.iv.1, 2 + br i1 %pre, label %outer.latch, label %inner + +inner: + %inner.iv = phi i32 [ 0, %outer.header ], [ %inc, %inner ] + %ext.iv = zext nneg i32 %inner.iv to i64 + %gep.dst = getelementptr i32, ptr %dst, i64 %ext.iv + store i32 %inner.iv, ptr %gep.dst, align 4 + %inc = add nuw nsw i32 %inner.iv, 1 + %cmp29 = icmp samesign ult i32 %inc, %shr28 + br i1 %cmp29, label %inner, label %outer.latch + +outer.latch: + %outer.iv.0.next = add i32 %outer.iv.0, 1 + %outer.ec = icmp sgt i32 %outer.iv.0, 0 + br i1 %outer.ec, label %outer.header, label %exit + +exit: + ret i32 0 +} diff --git a/llvm/test/Transforms/IndVarSimplify/loop-guard-order.ll b/llvm/test/Transforms/IndVarSimplify/loop-guard-order.ll new file mode 100644 index 000000000000..b946bbf74708 --- /dev/null +++ b/llvm/test/Transforms/IndVarSimplify/loop-guard-order.ll @@ -0,0 +1,98 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -p indvars -S %s | FileCheck %s + +declare void @foo() + +define void @narrow_iv_precondition_order_1(ptr %start, i32 %base, i8 %n) { +; CHECK-LABEL: define void @narrow_iv_precondition_order_1( +; CHECK-SAME: ptr [[START:%.*]], i32 [[BASE:%.*]], i8 [[N:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[PRE_0:%.*]] = icmp sgt i32 [[BASE]], 0 +; CHECK-NEXT: br i1 [[PRE_0]], label %[[EXIT:.*]], label %[[PH:.*]] +; CHECK: [[PH]]: +; CHECK-NEXT: [[N_EXT:%.*]] = zext i8 [[N]] to i32 +; CHECK-NEXT: [[PRE_1:%.*]] = icmp sgt i32 [[BASE]], [[N_EXT]] +; CHECK-NEXT: br i1 [[PRE_1]], label %[[LOOP_PREHEADER:.*]], label %[[EXIT]] +; CHECK: [[LOOP_PREHEADER]]: +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi ptr [ [[GEP:%.*]], %[[LOOP]] ], [ [[START]], %[[LOOP_PREHEADER]] ] +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: [[END:%.*]] = load i8, ptr [[IV]], align 1 +; CHECK-NEXT: [[END_EXT:%.*]] = zext i8 [[END]] to i32 +; CHECK-NEXT: [[GEP]] = getelementptr inbounds i8, ptr [[IV]], i64 1 +; CHECK-NEXT: [[EC:%.*]] = icmp sgt i32 [[BASE]], [[END_EXT]] +; CHECK-NEXT: br i1 [[EC]], label %[[LOOP]], label %[[EXIT_LOOPEXIT:.*]] +; CHECK: [[EXIT_LOOPEXIT]]: +; CHECK-NEXT: br label %[[EXIT]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void +; +entry: + %pre.0 = icmp sgt i32 %base, 0 + br i1 %pre.0, label %exit, label %ph + +ph: ; preds = %entry + %n.ext = zext i8 %n to i32 + %pre.1 = icmp sgt i32 %base, %n.ext + br i1 %pre.1, label %loop, label %exit + +loop: + %iv = phi ptr [ %start, %ph ], [ %gep, %loop ] + call void @foo() + %end = load i8, ptr %iv, align 1 + %end.ext = zext i8 %end to i32 + %gep = getelementptr inbounds i8, ptr %iv, i64 1 + %ec = icmp sgt i32 %base, %end.ext + br i1 %ec, label %loop, label %exit + +exit: + ret void +} + +define void @narrow_iv_precondition_order_2(ptr %start, i32 %base, i8 %n) { +; CHECK-LABEL: define void @narrow_iv_precondition_order_2( +; CHECK-SAME: ptr [[START:%.*]], i32 [[BASE:%.*]], i8 [[N:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[N_EXT:%.*]] = zext i8 [[N]] to i32 +; CHECK-NEXT: [[PRE_1:%.*]] = icmp sgt i32 [[BASE]], [[N_EXT]] +; CHECK-NEXT: br i1 [[PRE_1]], label %[[EXIT:.*]], label %[[PH:.*]] +; CHECK: [[PH]]: +; CHECK-NEXT: [[PRE_0:%.*]] = icmp sgt i32 [[BASE]], 0 +; CHECK-NEXT: br i1 [[PRE_0]], label %[[LOOP_PREHEADER:.*]], label %[[EXIT]] +; CHECK: [[LOOP_PREHEADER]]: +; CHECK-NEXT: [[TMP0:%.*]] = trunc i32 [[BASE]] to i8 +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi ptr [ [[GEP:%.*]], %[[LOOP]] ], [ [[START]], %[[LOOP_PREHEADER]] ] +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: [[END:%.*]] = load i8, ptr [[IV]], align 1 +; CHECK-NEXT: [[GEP]] = getelementptr inbounds i8, ptr [[IV]], i64 1 +; CHECK-NEXT: [[EC:%.*]] = icmp ugt i8 [[TMP0]], [[END]] +; CHECK-NEXT: br i1 [[EC]], label %[[LOOP]], label %[[EXIT_LOOPEXIT:.*]] +; CHECK: [[EXIT_LOOPEXIT]]: +; CHECK-NEXT: br label %[[EXIT]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void +; +entry: + %n.ext = zext i8 %n to i32 + %pre.1 = icmp sgt i32 %base, %n.ext + br i1 %pre.1, label %exit, label %ph + +ph: ; preds = %entry + %pre.0 = icmp sgt i32 %base, 0 + br i1 %pre.0, label %loop, label %exit + +loop: + %iv = phi ptr [ %start, %ph ], [ %gep, %loop ] + call void @foo() + %end = load i8, ptr %iv, align 1 + %end.ext = zext i8 %end to i32 + %gep = getelementptr inbounds i8, ptr %iv, i64 1 + %ec = icmp sgt i32 %base, %end.ext + br i1 %ec, label %loop, label %exit + +exit: + ret void +} diff --git a/llvm/test/Transforms/LoopVectorize/min-trip-count-known-via-scev.ll b/llvm/test/Transforms/LoopVectorize/min-trip-count-known-via-scev.ll new file mode 100644 index 000000000000..6ced1f13f7e2 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/min-trip-count-known-via-scev.ll @@ -0,0 +1,353 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 5 +; RUN: opt -p loop-vectorize -force-vector-width=2 -force-vector-interleave=1 -S %s | FileCheck %s + +define i32 @loop_with_at_least_2_iterations_via_guards_order_1(ptr %dst, i32 %n) { +; CHECK-LABEL: define i32 @loop_with_at_least_2_iterations_via_guards_order_1( +; CHECK-SAME: ptr [[DST:%.*]], i32 [[N:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[PRE_0:%.*]] = icmp ne i32 [[N]], 0 +; CHECK-NEXT: [[PRE_1:%.*]] = icmp slt i32 [[N]], 5 +; CHECK-NEXT: [[AND_PRE:%.*]] = and i1 [[PRE_0]], [[PRE_1]] +; CHECK-NEXT: br i1 [[AND_PRE]], label %[[PH:.*]], label %[[EXIT:.*]] +; CHECK: [[PH]]: +; CHECK-NEXT: [[N_EXT:%.*]] = zext i32 [[N]] to i64 +; CHECK-NEXT: [[TMP0:%.*]] = add nuw nsw i64 [[N_EXT]], 1 +; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 2 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]] +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 +; CHECK-NEXT: store <2 x i32> splat (i32 1), ptr [[TMP2]], align 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT_LOOPEXIT:.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[PH]] ] +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[IV]] +; CHECK-NEXT: store i32 1, ptr [[GEP]], align 4 +; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 +; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], [[N_EXT]] +; CHECK-NEXT: br i1 [[EC]], label %[[EXIT_LOOPEXIT]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK: [[EXIT_LOOPEXIT]]: +; CHECK-NEXT: br label %[[EXIT]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret i32 0 +; +entry: + %pre.0 = icmp ne i32 %n, 0 + %pre.1 = icmp slt i32 %n, 5 + %and.pre = and i1 %pre.0, %pre.1 + br i1 %and.pre, label %ph, label %exit + +ph: ; preds = %entry + %n.ext = zext i32 %n to i64 + br label %loop + +loop: ; preds = %loop, %ph + %iv = phi i64 [ 0, %ph ], [ %iv.next, %loop ] + %gep = getelementptr inbounds i32, ptr %dst, i64 %iv + store i32 1, ptr %gep + %iv.next = add i64 %iv, 1 + %ec = icmp eq i64 %iv, %n.ext + br i1 %ec, label %exit, label %loop + +exit: ; preds = %loop, %entry + ret i32 0 +} + +; Same as loop_with_at_least_2_iterations_via_guards_order_1 but with operands +; of the AND swapped. +; TODO: Should be able to prove that %min.iters.check is false. +define i32 @loop_with_at_least_2_iterations_via_guards_order_2(ptr %dst, i32 %n) { +; CHECK-LABEL: define i32 @loop_with_at_least_2_iterations_via_guards_order_2( +; CHECK-SAME: ptr [[DST:%.*]], i32 [[N:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[PRE_0:%.*]] = icmp ne i32 [[N]], 0 +; CHECK-NEXT: [[PRE_1:%.*]] = icmp slt i32 [[N]], 5 +; CHECK-NEXT: [[AND_PRE:%.*]] = and i1 [[PRE_1]], [[PRE_0]] +; CHECK-NEXT: br i1 [[AND_PRE]], label %[[PH:.*]], label %[[EXIT:.*]] +; CHECK: [[PH]]: +; CHECK-NEXT: [[N_EXT:%.*]] = zext i32 [[N]] to i64 +; CHECK-NEXT: [[TMP0:%.*]] = add nuw nsw i64 [[N_EXT]], 1 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 2 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 2 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]] +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 +; CHECK-NEXT: store <2 x i32> splat (i32 1), ptr [[TMP2]], align 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT_LOOPEXIT:.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[PH]] ] +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[IV]] +; CHECK-NEXT: store i32 1, ptr [[GEP]], align 4 +; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 +; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], [[N_EXT]] +; CHECK-NEXT: br i1 [[EC]], label %[[EXIT_LOOPEXIT]], label %[[LOOP]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK: [[EXIT_LOOPEXIT]]: +; CHECK-NEXT: br label %[[EXIT]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret i32 0 +; +entry: + %pre.0 = icmp ne i32 %n, 0 + %pre.1 = icmp slt i32 %n, 5 + %and.pre = and i1 %pre.1, %pre.0 + br i1 %and.pre, label %ph, label %exit + +ph: + %n.ext = zext i32 %n to i64 + br label %loop + +loop: + %iv = phi i64 [ 0, %ph ], [ %iv.next, %loop ] + %gep = getelementptr inbounds i32, ptr %dst, i64 %iv + store i32 1, ptr %gep + %iv.next = add i64 %iv, 1 + %ec = icmp eq i64 %iv, %n.ext + br i1 %ec, label %exit, label %loop + +exit: + ret i32 0 +} + +; TODO: Should be able to prove min.iters.check is false, same as +; @loop_never_executes_precondition_order_1_predicates_flipped. +define void @loop_never_executes_precondition_order_1(i64 %start, ptr %dst) { +; CHECK-LABEL: define void @loop_never_executes_precondition_order_1( +; CHECK-SAME: i64 [[START:%.*]], ptr [[DST:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[PRE_0:%.*]] = icmp ugt i64 [[START]], 1 +; CHECK-NEXT: br i1 [[PRE_0]], label %[[EXIT:.*]], label %[[PH:.*]] +; CHECK: [[PH]]: +; CHECK-NEXT: [[MUL:%.*]] = shl i64 [[START]], 1 +; CHECK-NEXT: [[PRE_1:%.*]] = icmp slt i64 [[MUL]], [[START]] +; CHECK-NEXT: br i1 [[PRE_1]], label %[[LOOP_PREHEADER:.*]], label %[[EXIT]] +; CHECK: [[LOOP_PREHEADER]]: +; CHECK-NEXT: [[TMP0:%.*]] = sub i64 1, [[START]] +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 2 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 2 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]] +; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[START]], [[N_VEC]] +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[START]], i64 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i64> [[DOTSPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[INDUCTION:%.*]] = add <2 x i64> [[DOTSPLAT]], +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 [[START]], [[INDEX]] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[DST]], i64 [[OFFSET_IDX]] +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 0 +; CHECK-NEXT: store <2 x i64> [[VEC_IND]], ptr [[TMP3]], align 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) +; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT_LOOPEXIT:.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP1]], %[[MIDDLE_BLOCK]] ], [ [[START]], %[[LOOP_PREHEADER]] ] +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i64, ptr [[DST]], i64 [[IV]] +; CHECK-NEXT: store i64 [[IV]], ptr [[GEP]], align 4 +; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 +; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], 0 +; CHECK-NEXT: br i1 [[EC]], label %[[EXIT_LOOPEXIT]], label %[[LOOP]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK: [[EXIT_LOOPEXIT]]: +; CHECK-NEXT: br label %[[EXIT]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void +; +entry: + %pre.0 = icmp ugt i64 %start, 1 + br i1 %pre.0, label %exit, label %ph + +ph: + %mul = shl i64 %start, 1 + %pre.1 = icmp slt i64 %mul, %start + br i1 %pre.1, label %loop, label %exit + +loop: + %iv = phi i64 [ %start, %ph ], [ %iv.next, %loop ] + %gep = getelementptr inbounds i64, ptr %dst, i64 %iv + store i64 %iv, ptr %gep + %iv.next = add i64 %iv, 1 + %ec = icmp eq i64 %iv, 0 + br i1 %ec, label %exit, label %loop + +exit: + ret void +} + +; TODO: Should be able to prove min.iters.check is false, same as +; @loop_never_executes_precondition_order_1_predicates_flipped. +define void @loop_never_executes_precondition_order_1_predicates_flipped(i64 %start, ptr %dst) { +; CHECK-LABEL: define void @loop_never_executes_precondition_order_1_predicates_flipped( +; CHECK-SAME: i64 [[START:%.*]], ptr [[DST:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[PRE_0:%.*]] = icmp ule i64 [[START]], 1 +; CHECK-NEXT: br i1 [[PRE_0]], label %[[PH:.*]], label %[[EXIT:.*]] +; CHECK: [[PH]]: +; CHECK-NEXT: [[MUL:%.*]] = shl i64 [[START]], 1 +; CHECK-NEXT: [[PRE_1:%.*]] = icmp slt i64 [[MUL]], [[START]] +; CHECK-NEXT: br i1 [[PRE_1]], label %[[LOOP_PREHEADER:.*]], label %[[EXIT]] +; CHECK: [[LOOP_PREHEADER]]: +; CHECK-NEXT: [[TMP0:%.*]] = sub i64 1, [[START]] +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 2 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 2 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]] +; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[START]], [[N_VEC]] +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[START]], i64 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i64> [[DOTSPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[INDUCTION:%.*]] = add <2 x i64> [[DOTSPLAT]], +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 [[START]], [[INDEX]] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[DST]], i64 [[OFFSET_IDX]] +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 0 +; CHECK-NEXT: store <2 x i64> [[VEC_IND]], ptr [[TMP3]], align 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) +; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT_LOOPEXIT:.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP1]], %[[MIDDLE_BLOCK]] ], [ [[START]], %[[LOOP_PREHEADER]] ] +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i64, ptr [[DST]], i64 [[IV]] +; CHECK-NEXT: store i64 [[IV]], ptr [[GEP]], align 4 +; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 +; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], 0 +; CHECK-NEXT: br i1 [[EC]], label %[[EXIT_LOOPEXIT]], label %[[LOOP]], !llvm.loop [[LOOP9:![0-9]+]] +; CHECK: [[EXIT_LOOPEXIT]]: +; CHECK-NEXT: br label %[[EXIT]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void +; +entry: + %pre.0 = icmp ule i64 %start, 1 + br i1 %pre.0, label %ph, label %exit + +ph: + %mul = shl i64 %start, 1 + %pre.1 = icmp slt i64 %mul, %start + br i1 %pre.1, label %loop, label %exit + +loop: + %iv = phi i64 [ %start, %ph ], [ %iv.next, %loop ] + %gep = getelementptr inbounds i64, ptr %dst, i64 %iv + store i64 %iv, ptr %gep + %iv.next = add i64 %iv, 1 + %ec = icmp eq i64 %iv, 0 + br i1 %ec, label %exit, label %loop + +exit: + ret void +} + +define void @loop_never_executes_precondition_order_2_predicates_flipped(i64 %start, ptr %dst) { +; CHECK-LABEL: define void @loop_never_executes_precondition_order_2_predicates_flipped( +; CHECK-SAME: i64 [[START:%.*]], ptr [[DST:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[MUL:%.*]] = shl i64 [[START]], 1 +; CHECK-NEXT: [[PRE_1:%.*]] = icmp slt i64 [[MUL]], [[START]] +; CHECK-NEXT: br i1 [[PRE_1]], label %[[PH:.*]], label %[[EXIT:.*]] +; CHECK: [[PH]]: +; CHECK-NEXT: [[PRE_0:%.*]] = icmp ule i64 [[START]], 1 +; CHECK-NEXT: br i1 [[PRE_0]], label %[[LOOP_PREHEADER:.*]], label %[[EXIT]] +; CHECK: [[LOOP_PREHEADER]]: +; CHECK-NEXT: [[TMP0:%.*]] = sub i64 1, [[START]] +; CHECK-NEXT: br i1 true, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 2 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]] +; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[START]], [[N_VEC]] +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[START]], i64 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i64> [[DOTSPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[INDUCTION:%.*]] = add <2 x i64> [[DOTSPLAT]], +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 [[START]], [[INDEX]] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[DST]], i64 [[OFFSET_IDX]] +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 0 +; CHECK-NEXT: store <2 x i64> [[VEC_IND]], ptr [[TMP3]], align 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) +; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT_LOOPEXIT:.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP1]], %[[MIDDLE_BLOCK]] ], [ [[START]], %[[LOOP_PREHEADER]] ] +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i64, ptr [[DST]], i64 [[IV]] +; CHECK-NEXT: store i64 [[IV]], ptr [[GEP]], align 4 +; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 +; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], 0 +; CHECK-NEXT: br i1 [[EC]], label %[[EXIT_LOOPEXIT]], label %[[LOOP]], !llvm.loop [[LOOP11:![0-9]+]] +; CHECK: [[EXIT_LOOPEXIT]]: +; CHECK-NEXT: br label %[[EXIT]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void +; +entry: + %mul = shl i64 %start, 1 + %pre.1 = icmp slt i64 %mul, %start + br i1 %pre.1, label %ph, label %exit + +ph: + %pre.0 = icmp ule i64 %start, 1 + br i1 %pre.0, label %loop, label %exit + +loop: + %iv = phi i64 [ %start, %ph ], [ %iv.next, %loop ] + %gep = getelementptr inbounds i64, ptr %dst, i64 %iv + store i64 %iv, ptr %gep + %iv.next = add i64 %iv, 1 + %ec = icmp eq i64 %iv, 0 + br i1 %ec, label %exit, label %loop + +exit: + ret void +}