Files
clang-p2996/llvm/test/Transforms/LoopVectorize/pointer-induction.ll
Luke Lau 9dd1c66e8f [VPlan] Expand VPWidenIntOrFpInductionRecipe into separate recipes (#118638)
The motivation of this PR is to make #115274 easier to implement, and
should allow us to add EVL support by just passing EVL to the VF
operand.

The current difficulty with widening IVs with EVL is that
VPWidenIntOrFpInductionRecipe generates its own backedge value. Since
it's a VPHeaderPHIRecipe the VF operand must be in the preheader, which
means we can't use the EVL since it's defined in the loop body.

The gist in this PR is to take the approach in #114305 and expand
VPWidenIntOrFpInductionRecipe into several recipes for the initial
value, phi and backedge value just before execution. I.e. this example:

```
  vector.ph:
  Successor(s): vector loop

  <x1> vector loop: {
    vector.body:
      WIDEN-INDUCTION %i = phi %start, %step, %vf
      ...
      EMIT branch-on-count ...
    No successors
  }
```

gets expanded to:

``` 
vector.ph:
  ...
  vp<%induction.start> = ...
  vp<%induction.increment> = ...

Successor(s): vector loop

<x1> vector loop: {
  vector.body:
    ir<%i> = WIDEN-PHI vp<%induction.start>, vp<%vec.ind.next>
    ...
    vp<%vec.ind.next> = add ir<%i>, vp<%induction.increment>
    EMIT branch-on-count ...
  No successors
}
```

This allows us to a value defined in the loop in the backedge value, and
also means we can just reuse the existing backedge fixups in
VPlan::execute without having to specially handle it ourselves.

After this #115274 should just become a matter of setting the VF operand
to EVL (and building the increment step in the loop body, not the
preheader).
2025-06-17 18:24:07 +01:00

423 lines
25 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -passes=loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -S | FileCheck --check-prefixes=CHECK,DEFAULT %s
; RUN: opt < %s -passes=loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -lv-strided-pointer-ivs=true -S | FileCheck --check-prefixes=CHECK,STRIDED %s
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
; Note: Most scalar pointer induction GEPs could be sunk into the conditional
; blocks.
; Function Attrs: nofree norecurse nounwind
define void @a(ptr readnone %b) {
; CHECK-LABEL: @a(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[B1:%.*]] = ptrtoint ptr [[B:%.*]] to i64
; CHECK-NEXT: [[CMP_NOT4:%.*]] = icmp eq ptr [[B]], null
; CHECK-NEXT: br i1 [[CMP_NOT4]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY_PREHEADER:%.*]]
; CHECK: for.body.preheader:
; CHECK-NEXT: [[TMP0:%.*]] = sub i64 0, [[B1]]
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 4
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 4
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]]
; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[N_VEC]], -1
; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr null, i64 [[TMP1]]
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE10:%.*]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 0, [[INDEX]]
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 0
; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[OFFSET_IDX]], -1
; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[OFFSET_IDX]], -2
; CHECK-NEXT: [[TMP17:%.*]] = add i64 [[OFFSET_IDX]], -3
; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr null, i64 [[TMP2]]
; CHECK-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr null, i64 [[TMP11]]
; CHECK-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr null, i64 [[TMP14]]
; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i8, ptr null, i64 [[TMP17]]
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[NEXT_GEP]], i64 -1
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i32 0
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 -3
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP5]], align 1
; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i8> [[WIDE_LOAD]], <4 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
; CHECK-NEXT: [[TMP7:%.*]] = icmp ne <4 x i8> [[REVERSE]], zeroinitializer
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i1> [[TMP7]], i32 0
; CHECK-NEXT: br i1 [[TMP8]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
; CHECK: pred.store.if:
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[NEXT_GEP]], i64 -1
; CHECK-NEXT: store i8 95, ptr [[TMP9]], align 1
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]]
; CHECK: pred.store.continue:
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP7]], i32 1
; CHECK-NEXT: br i1 [[TMP10]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6:%.*]]
; CHECK: pred.store.if5:
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[NEXT_GEP2]], i64 -1
; CHECK-NEXT: store i8 95, ptr [[TMP12]], align 1
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE6]]
; CHECK: pred.store.continue6:
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i1> [[TMP7]], i32 2
; CHECK-NEXT: br i1 [[TMP13]], label [[PRED_STORE_IF7:%.*]], label [[PRED_STORE_CONTINUE8:%.*]]
; CHECK: pred.store.if7:
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8, ptr [[NEXT_GEP3]], i64 -1
; CHECK-NEXT: store i8 95, ptr [[TMP15]], align 1
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE8]]
; CHECK: pred.store.continue8:
; CHECK-NEXT: [[TMP16:%.*]] = extractelement <4 x i1> [[TMP7]], i32 3
; CHECK-NEXT: br i1 [[TMP16]], label [[PRED_STORE_IF9:%.*]], label [[PRED_STORE_CONTINUE10]]
; CHECK: pred.store.if9:
; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[NEXT_GEP4]], i64 -1
; CHECK-NEXT: store i8 95, ptr [[TMP18]], align 1
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE10]]
; CHECK: pred.store.continue10:
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ null, [[FOR_BODY_PREHEADER]] ]
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.cond.cleanup.loopexit:
; CHECK-NEXT: br label [[FOR_COND_CLEANUP]]
; CHECK: for.cond.cleanup:
; CHECK-NEXT: ret void
; CHECK: for.body:
; CHECK-NEXT: [[C_05:%.*]] = phi ptr [ [[INCDEC_PTR:%.*]], [[IF_END:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds i8, ptr [[C_05]], i64 -1
; CHECK-NEXT: [[TMP20:%.*]] = load i8, ptr [[INCDEC_PTR]], align 1
; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i8 [[TMP20]], 0
; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[IF_END]], label [[IF_THEN:%.*]]
; CHECK: if.then:
; CHECK-NEXT: store i8 95, ptr [[INCDEC_PTR]], align 1
; CHECK-NEXT: br label [[IF_END]]
; CHECK: if.end:
; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq ptr [[INCDEC_PTR]], [[B]]
; CHECK-NEXT: br i1 [[CMP_NOT]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
;
entry:
%cmp.not4 = icmp eq ptr %b, null
br i1 %cmp.not4, label %for.cond.cleanup, label %for.body.preheader
for.body.preheader: ; preds = %entry
br label %for.body
for.cond.cleanup.loopexit: ; preds = %if.end
br label %for.cond.cleanup
for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry
ret void
for.body: ; preds = %for.body.preheader, %if.end
%c.05 = phi ptr [ %incdec.ptr, %if.end ], [ null, %for.body.preheader ]
%incdec.ptr = getelementptr inbounds i8, ptr %c.05, i64 -1
%0 = load i8, ptr %incdec.ptr, align 1
%tobool.not = icmp eq i8 %0, 0
br i1 %tobool.not, label %if.end, label %if.then
if.then: ; preds = %for.body
store i8 95, ptr %incdec.ptr, align 1
br label %if.end
if.end: ; preds = %for.body, %if.then
%cmp.not = icmp eq ptr %incdec.ptr, %b
br i1 %cmp.not, label %for.cond.cleanup.loopexit, label %for.body
}
; In the test below the pointer phi %ptr.iv.2 is used as
; 1. As a uniform address for the load, and
; 2. Non-uniform use by the getelementptr which is stored. This requires the
; vector value.
define void @pointer_induction_used_as_vector(ptr noalias %start.1, ptr noalias %start.2, i64 %N) {
; CHECK-LABEL: @pointer_induction_used_as_vector(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 4
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; CHECK-NEXT: [[TMP0:%.*]] = mul i64 [[N_VEC]], 8
; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[START_1:%.*]], i64 [[TMP0]]
; CHECK-NEXT: [[IND_END2:%.*]] = getelementptr i8, ptr [[START_2:%.*]], i64 [[N_VEC]]
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[START_2]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VECTOR_GEP:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i64> <i64 0, i64 1, i64 2, i64 3>
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8
; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START_1]], i64 [[OFFSET_IDX]]
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, <4 x ptr> [[VECTOR_GEP]], i64 1
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr ptr, ptr [[NEXT_GEP]], i32 0
; CHECK-NEXT: store <4 x ptr> [[TMP2]], ptr [[TMP3]], align 8
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x ptr> [[VECTOR_GEP]], i32 0
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[TMP4]], i32 0
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP5]], align 1
; CHECK-NEXT: [[TMP6:%.*]] = add <4 x i8> [[WIDE_LOAD]], splat (i8 1)
; CHECK-NEXT: store <4 x i8> [[TMP6]], ptr [[TMP5]], align 1
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 4
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[START_1]], [[ENTRY]] ]
; CHECK-NEXT: [[BC_RESUME_VAL3:%.*]] = phi ptr [ [[IND_END2]], [[MIDDLE_BLOCK]] ], [ [[START_2]], [[ENTRY]] ]
; CHECK-NEXT: br label [[LOOP_BODY:%.*]]
; CHECK: loop.body:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_BODY]] ]
; CHECK-NEXT: [[PTR_IV_1:%.*]] = phi ptr [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[PTR_IV_1_NEXT:%.*]], [[LOOP_BODY]] ]
; CHECK-NEXT: [[PTR_IV_2:%.*]] = phi ptr [ [[BC_RESUME_VAL3]], [[SCALAR_PH]] ], [ [[PTR_IV_2_NEXT:%.*]], [[LOOP_BODY]] ]
; CHECK-NEXT: [[PTR_IV_1_NEXT]] = getelementptr inbounds ptr, ptr [[PTR_IV_1]], i64 1
; CHECK-NEXT: [[PTR_IV_2_NEXT]] = getelementptr inbounds i8, ptr [[PTR_IV_2]], i64 1
; CHECK-NEXT: store ptr [[PTR_IV_2_NEXT]], ptr [[PTR_IV_1]], align 8
; CHECK-NEXT: [[LV:%.*]] = load i8, ptr [[PTR_IV_2]], align 1
; CHECK-NEXT: [[ADD:%.*]] = add i8 [[LV]], 1
; CHECK-NEXT: store i8 [[ADD]], ptr [[PTR_IV_2]], align 1
; CHECK-NEXT: [[IV_NEXT]] = add nuw i64 [[IV]], 1
; CHECK-NEXT: [[C:%.*]] = icmp ne i64 [[IV_NEXT]], [[N]]
; CHECK-NEXT: br i1 [[C]], label [[LOOP_BODY]], label [[EXIT]], !llvm.loop [[LOOP5:![0-9]+]]
; CHECK: exit:
; CHECK-NEXT: ret void
;
entry:
br label %loop.body
loop.body: ; preds = %loop.body, %entry
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.body ]
%ptr.iv.1 = phi ptr [ %start.1, %entry ], [ %ptr.iv.1.next, %loop.body ]
%ptr.iv.2 = phi ptr [ %start.2, %entry ], [ %ptr.iv.2.next, %loop.body ]
%ptr.iv.1.next = getelementptr inbounds ptr, ptr %ptr.iv.1, i64 1
%ptr.iv.2.next = getelementptr inbounds i8, ptr %ptr.iv.2, i64 1
store ptr %ptr.iv.2.next, ptr %ptr.iv.1, align 8
%lv = load i8, ptr %ptr.iv.2, align 1
%add = add i8 %lv, 1
store i8 %add, ptr %ptr.iv.2, align 1
%iv.next = add nuw i64 %iv, 1
%c = icmp ne i64 %iv.next, %N
br i1 %c, label %loop.body, label %exit
exit: ; preds = %loop.body
ret void
}
; Test the vector expansion of a non-constant stride pointer IV
define void @non_constant_vector_expansion(i32 %0, ptr %call) {
; DEFAULT-LABEL: @non_constant_vector_expansion(
; DEFAULT-NEXT: entry:
; DEFAULT-NEXT: [[MUL:%.*]] = shl i32 [[TMP0:%.*]], 1
; DEFAULT-NEXT: br label [[FOR_COND:%.*]]
; DEFAULT: for.cond:
; DEFAULT-NEXT: [[TMP1:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_COND]] ]
; DEFAULT-NEXT: [[P_0:%.*]] = phi ptr [ null, [[ENTRY]] ], [ [[ADD_PTR:%.*]], [[FOR_COND]] ]
; DEFAULT-NEXT: [[ADD_PTR]] = getelementptr i8, ptr [[P_0]], i32 [[MUL]]
; DEFAULT-NEXT: [[ARRAYIDX:%.*]] = getelementptr ptr, ptr [[CALL:%.*]], i32 [[TMP1]]
; DEFAULT-NEXT: store ptr [[P_0]], ptr [[ARRAYIDX]], align 4
; DEFAULT-NEXT: [[INC]] = add i32 [[TMP1]], 1
; DEFAULT-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP1]], 100
; DEFAULT-NEXT: br i1 [[TOBOOL_NOT]], label [[FOR_END:%.*]], label [[FOR_COND]]
; DEFAULT: for.end:
; DEFAULT-NEXT: ret void
;
; STRIDED-LABEL: @non_constant_vector_expansion(
; STRIDED-NEXT: entry:
; STRIDED-NEXT: [[MUL:%.*]] = shl i32 [[TMP0:%.*]], 1
; STRIDED-NEXT: [[TMP1:%.*]] = sext i32 [[MUL]] to i64
; STRIDED-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; STRIDED: vector.ph:
; STRIDED-NEXT: [[TMP2:%.*]] = mul i64 100, [[TMP1]]
; STRIDED-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr null, i64 [[TMP2]]
; STRIDED-NEXT: br label [[VECTOR_BODY:%.*]]
; STRIDED: vector.body:
; STRIDED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; STRIDED-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ null, [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ]
; STRIDED-NEXT: [[TMP3:%.*]] = mul i64 [[TMP1]], 4
; STRIDED-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[TMP1]], i64 0
; STRIDED-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i64> [[DOTSPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
; STRIDED-NEXT: [[TMP4:%.*]] = mul <4 x i64> <i64 0, i64 1, i64 2, i64 3>, [[DOTSPLAT]]
; STRIDED-NEXT: [[VECTOR_GEP:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i64> [[TMP4]]
; STRIDED-NEXT: [[OFFSET_IDX:%.*]] = trunc i64 [[INDEX]] to i32
; STRIDED-NEXT: [[TMP6:%.*]] = getelementptr ptr, ptr [[CALL:%.*]], i32 [[OFFSET_IDX]]
; STRIDED-NEXT: [[TMP7:%.*]] = getelementptr ptr, ptr [[TMP6]], i32 0
; STRIDED-NEXT: store <4 x ptr> [[VECTOR_GEP]], ptr [[TMP7]], align 4
; STRIDED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; STRIDED-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 [[TMP3]]
; STRIDED-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100
; STRIDED-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
; STRIDED: middle.block:
; STRIDED-NEXT: br i1 false, label [[FOR_END:%.*]], label [[SCALAR_PH]]
; STRIDED: scalar.ph:
; STRIDED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 100, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
; STRIDED-NEXT: [[BC_RESUME_VAL1:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ null, [[ENTRY]] ]
; STRIDED-NEXT: br label [[FOR_COND:%.*]]
; STRIDED: for.cond:
; STRIDED-NEXT: [[TMP9:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_COND]] ]
; STRIDED-NEXT: [[P_0:%.*]] = phi ptr [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[ADD_PTR:%.*]], [[FOR_COND]] ]
; STRIDED-NEXT: [[ADD_PTR]] = getelementptr i8, ptr [[P_0]], i32 [[MUL]]
; STRIDED-NEXT: [[ARRAYIDX:%.*]] = getelementptr ptr, ptr [[CALL]], i32 [[TMP9]]
; STRIDED-NEXT: store ptr [[P_0]], ptr [[ARRAYIDX]], align 4
; STRIDED-NEXT: [[INC]] = add i32 [[TMP9]], 1
; STRIDED-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP9]], 100
; STRIDED-NEXT: br i1 [[TOBOOL_NOT]], label [[FOR_END]], label [[FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]]
; STRIDED: for.end:
; STRIDED-NEXT: ret void
;
entry:
%mul = shl i32 %0, 1
br label %for.cond
for.cond: ; preds = %for.body, %entry
%1 = phi i32 [ 0, %entry ], [ %inc, %for.cond ]
%p.0 = phi ptr [ null, %entry ], [ %add.ptr, %for.cond ]
%add.ptr = getelementptr i8, ptr %p.0, i32 %mul
%arrayidx = getelementptr ptr, ptr %call, i32 %1
store ptr %p.0, ptr %arrayidx, align 4
%inc = add i32 %1, 1
%tobool.not = icmp eq i32 %1, 100
br i1 %tobool.not, label %for.end, label %for.cond
for.end: ; preds = %for.cond
ret void
}
; Test that when WidenPointerInductionRecipes are ordered before other
; WidenIntOrFpInductionRecipes that their PHIs are emitted in the right place.
define void @outside_lattice(ptr noalias %p, ptr noalias %q, i32 %n) {
; DEFAULT-LABEL: @outside_lattice(
; DEFAULT-NEXT: entry:
; DEFAULT-NEXT: [[TMP0:%.*]] = zext i32 [[N:%.*]] to i64
; DEFAULT-NEXT: [[UMAX1:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP0]], i64 1)
; DEFAULT-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[UMAX1]], 4
; DEFAULT-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
; DEFAULT: vector.scevcheck:
; DEFAULT-NEXT: [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[N]], i32 1)
; DEFAULT-NEXT: [[TMP1:%.*]] = add i32 [[UMAX]], -1
; DEFAULT-NEXT: [[TMP2:%.*]] = icmp slt i32 [[TMP1]], 0
; DEFAULT-NEXT: br i1 [[TMP2]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
; DEFAULT: vector.ph:
; DEFAULT-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[UMAX1]], 4
; DEFAULT-NEXT: [[N_VEC:%.*]] = sub i64 [[UMAX1]], [[N_MOD_VF]]
; DEFAULT-NEXT: [[TMP3:%.*]] = mul i64 [[N_VEC]], 4
; DEFAULT-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 [[TMP3]]
; DEFAULT-NEXT: [[IND_END2:%.*]] = trunc i64 [[N_VEC]] to i32
; DEFAULT-NEXT: br label [[VECTOR_BODY:%.*]]
; DEFAULT: vector.body:
; DEFAULT-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; DEFAULT-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[P]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ]
; DEFAULT-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; DEFAULT-NEXT: [[VECTOR_GEP:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i64> <i64 0, i64 4, i64 8, i64 12>
; DEFAULT-NEXT: [[OFFSET_IDX:%.*]] = trunc i64 [[INDEX]] to i32
; DEFAULT-NEXT: [[TMP5:%.*]] = getelementptr inbounds ptr, ptr [[P]], i32 [[OFFSET_IDX]]
; DEFAULT-NEXT: [[TMP6:%.*]] = getelementptr inbounds ptr, ptr [[TMP5]], i32 0
; DEFAULT-NEXT: store <4 x ptr> [[VECTOR_GEP]], ptr [[TMP6]], align 8
; DEFAULT-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[Q:%.*]], i32 [[OFFSET_IDX]]
; DEFAULT-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 0
; DEFAULT-NEXT: store <4 x i32> [[VEC_IND]], ptr [[TMP8]], align 4
; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; DEFAULT-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
; DEFAULT-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 16
; DEFAULT-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; DEFAULT-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
; DEFAULT: middle.block:
; DEFAULT-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[UMAX1]], [[N_VEC]]
; DEFAULT-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
; DEFAULT: scalar.ph:
; DEFAULT-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[TMP4]], [[MIDDLE_BLOCK]] ], [ [[P]], [[ENTRY:%.*]] ], [ [[P]], [[VECTOR_SCEVCHECK]] ]
; DEFAULT-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i32 [ [[IND_END2]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
; DEFAULT-NEXT: br label [[FOR_BODY:%.*]]
; DEFAULT: for.body:
; DEFAULT-NEXT: [[IV_PTR:%.*]] = phi ptr [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_PTR_NEXT:%.*]], [[FOR_BODY]] ]
; DEFAULT-NEXT: [[IV_INT:%.*]] = phi i32 [ [[BC_RESUME_VAL3]], [[SCALAR_PH]] ], [ [[IV_INT_NEXT:%.*]], [[FOR_BODY]] ]
; DEFAULT-NEXT: [[P_GEP:%.*]] = getelementptr inbounds ptr, ptr [[P]], i32 [[IV_INT]]
; DEFAULT-NEXT: store ptr [[IV_PTR]], ptr [[P_GEP]], align 8
; DEFAULT-NEXT: [[Q_GEP:%.*]] = getelementptr inbounds i32, ptr [[Q]], i32 [[IV_INT]]
; DEFAULT-NEXT: store i32 [[IV_INT]], ptr [[Q_GEP]], align 4
; DEFAULT-NEXT: [[IV_INT_NEXT]] = add i32 [[IV_INT]], 1
; DEFAULT-NEXT: [[IV_PTR_NEXT]] = getelementptr inbounds i32, ptr [[IV_PTR]], i32 1
; DEFAULT-NEXT: [[DONE:%.*]] = icmp ult i32 [[IV_INT_NEXT]], [[N]]
; DEFAULT-NEXT: br i1 [[DONE]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP7:![0-9]+]]
; DEFAULT: for.end:
; DEFAULT-NEXT: ret void
;
; STRIDED-LABEL: @outside_lattice(
; STRIDED-NEXT: entry:
; STRIDED-NEXT: [[TMP0:%.*]] = zext i32 [[N:%.*]] to i64
; STRIDED-NEXT: [[UMAX1:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP0]], i64 1)
; STRIDED-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[UMAX1]], 4
; STRIDED-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
; STRIDED: vector.scevcheck:
; STRIDED-NEXT: [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[N]], i32 1)
; STRIDED-NEXT: [[TMP1:%.*]] = add i32 [[UMAX]], -1
; STRIDED-NEXT: [[TMP2:%.*]] = icmp slt i32 [[TMP1]], 0
; STRIDED-NEXT: br i1 [[TMP2]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
; STRIDED: vector.ph:
; STRIDED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[UMAX1]], 4
; STRIDED-NEXT: [[N_VEC:%.*]] = sub i64 [[UMAX1]], [[N_MOD_VF]]
; STRIDED-NEXT: [[TMP3:%.*]] = mul i64 [[N_VEC]], 4
; STRIDED-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 [[TMP3]]
; STRIDED-NEXT: [[IND_END2:%.*]] = trunc i64 [[N_VEC]] to i32
; STRIDED-NEXT: br label [[VECTOR_BODY:%.*]]
; STRIDED: vector.body:
; STRIDED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; STRIDED-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[P]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ]
; STRIDED-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; STRIDED-NEXT: [[VECTOR_GEP:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i64> <i64 0, i64 4, i64 8, i64 12>
; STRIDED-NEXT: [[OFFSET_IDX:%.*]] = trunc i64 [[INDEX]] to i32
; STRIDED-NEXT: [[TMP5:%.*]] = getelementptr inbounds ptr, ptr [[P]], i32 [[OFFSET_IDX]]
; STRIDED-NEXT: [[TMP6:%.*]] = getelementptr inbounds ptr, ptr [[TMP5]], i32 0
; STRIDED-NEXT: store <4 x ptr> [[VECTOR_GEP]], ptr [[TMP6]], align 8
; STRIDED-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[Q:%.*]], i32 [[OFFSET_IDX]]
; STRIDED-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 0
; STRIDED-NEXT: store <4 x i32> [[VEC_IND]], ptr [[TMP8]], align 4
; STRIDED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; STRIDED-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
; STRIDED-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 16
; STRIDED-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; STRIDED-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
; STRIDED: middle.block:
; STRIDED-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[UMAX1]], [[N_VEC]]
; STRIDED-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
; STRIDED: scalar.ph:
; STRIDED-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[TMP4]], [[MIDDLE_BLOCK]] ], [ [[P]], [[ENTRY:%.*]] ], [ [[P]], [[VECTOR_SCEVCHECK]] ]
; STRIDED-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i32 [ [[IND_END2]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
; STRIDED-NEXT: br label [[FOR_BODY:%.*]]
; STRIDED: for.body:
; STRIDED-NEXT: [[IV_PTR:%.*]] = phi ptr [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_PTR_NEXT:%.*]], [[FOR_BODY]] ]
; STRIDED-NEXT: [[IV_INT:%.*]] = phi i32 [ [[BC_RESUME_VAL3]], [[SCALAR_PH]] ], [ [[IV_INT_NEXT:%.*]], [[FOR_BODY]] ]
; STRIDED-NEXT: [[P_GEP:%.*]] = getelementptr inbounds ptr, ptr [[P]], i32 [[IV_INT]]
; STRIDED-NEXT: store ptr [[IV_PTR]], ptr [[P_GEP]], align 8
; STRIDED-NEXT: [[Q_GEP:%.*]] = getelementptr inbounds i32, ptr [[Q]], i32 [[IV_INT]]
; STRIDED-NEXT: store i32 [[IV_INT]], ptr [[Q_GEP]], align 4
; STRIDED-NEXT: [[IV_INT_NEXT]] = add i32 [[IV_INT]], 1
; STRIDED-NEXT: [[IV_PTR_NEXT]] = getelementptr inbounds i32, ptr [[IV_PTR]], i32 1
; STRIDED-NEXT: [[DONE:%.*]] = icmp ult i32 [[IV_INT_NEXT]], [[N]]
; STRIDED-NEXT: br i1 [[DONE]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP9:![0-9]+]]
; STRIDED: for.end:
; STRIDED-NEXT: ret void
;
entry:
br label %for.body
for.body: ; preds = %for.body, %entry
%iv.ptr = phi ptr [ %p, %entry ], [ %iv.ptr.next, %for.body ]
%iv.int = phi i32 [ 0, %entry ], [ %iv.int.next, %for.body ]
%p.gep = getelementptr inbounds ptr, ptr %p, i32 %iv.int
store ptr %iv.ptr, ptr %p.gep
%q.gep = getelementptr inbounds i32, ptr %q, i32 %iv.int
store i32 %iv.int, ptr %q.gep
%iv.int.next = add i32 %iv.int, 1
%iv.ptr.next = getelementptr inbounds i32, ptr %iv.ptr, i32 1
%done = icmp ult i32 %iv.int.next, %n
br i1 %done, label %for.body, label %for.end
for.end: ; preds = %for.body
ret void
}