Follow-up as discussed when using VPInstruction::ResumePhi for all resume values (#112147). This patch explicitly adds incoming values for each predecessor in VPlan. This simplifies codegen and allows transformations adjusting the predecessors of blocks with NFC modulo incoming block order in phis.
814 lines
42 KiB
LLVM
814 lines
42 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --prefix-filecheck-ir-name unnamed --version 5
|
|
; RUN: opt -S -passes=loop-vectorize -force-vector-interleave=1 -force-vector-width=2 < %s 2>&1 | FileCheck %s
|
|
|
|
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32-S128"
|
|
|
|
@f = common global i32 0, align 4
|
|
@.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1
|
|
@c = common global i32 0, align 4
|
|
@a = common global i32 0, align 4
|
|
@b = common global i32 0, align 4
|
|
@e = common global i32 0, align 4
|
|
|
|
; It has a value that is used outside of the loop
|
|
; and is not a recognized reduction variable "tmp17".
|
|
; However, tmp17 is a non-header phi which is an allowed exit.
|
|
|
|
|
|
define i32 @test1() {
|
|
; CHECK-LABEL: define i32 @test1() {
|
|
; CHECK-NEXT: [[BB:.*]]:
|
|
; CHECK-NEXT: [[B_PROMOTED:%.*]] = load i32, ptr @b, align 4
|
|
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[B_PROMOTED]], 1
|
|
; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[TMP0]], i32 4)
|
|
; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[SMAX]], [[B_PROMOTED]]
|
|
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP1]], 2
|
|
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
|
|
; CHECK: [[VECTOR_PH]]:
|
|
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP1]], 2
|
|
; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP1]], [[N_MOD_VF]]
|
|
; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[B_PROMOTED]], [[N_VEC]]
|
|
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[B_PROMOTED]], i64 0
|
|
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
|
|
; CHECK-NEXT: [[INDUCTION:%.*]] = add <2 x i32> [[DOTSPLAT]], <i32 0, i32 1>
|
|
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
|
|
; CHECK: [[VECTOR_BODY]]:
|
|
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
|
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
|
; CHECK-NEXT: [[TMP3:%.*]] = icmp sgt <2 x i32> [[VEC_IND]], splat (i32 10)
|
|
; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP3]], <2 x i32> splat (i32 1), <2 x i32> zeroinitializer
|
|
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
|
|
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2)
|
|
; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
|
|
; CHECK-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
|
|
; CHECK: [[MIDDLE_BLOCK]]:
|
|
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i32> [[PREDPHI]], i32 1
|
|
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP1]], [[N_VEC]]
|
|
; CHECK-NEXT: br i1 [[CMP_N]], label %[[F1_EXIT_LOOPEXIT:.*]], label %[[SCALAR_PH]]
|
|
; CHECK: [[SCALAR_PH]]:
|
|
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[TMP2]], %[[MIDDLE_BLOCK]] ], [ [[B_PROMOTED]], %[[BB]] ]
|
|
; CHECK-NEXT: br label %[[DOTLR_PH_I:.*]]
|
|
; CHECK: [[_LR_PH_I:.*:]]
|
|
; CHECK-NEXT: [[UNNAMEDTMP8:%.*]] = phi i32 [ [[UNNAMEDTMP18:%.*]], %[[BB16:.*]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
|
|
; CHECK-NEXT: [[UNNAMEDTMP2:%.*]] = icmp sgt i32 [[UNNAMEDTMP8]], 10
|
|
; CHECK-NEXT: br i1 [[UNNAMEDTMP2]], label %[[BB16]], label %[[UNNAMEDBB10:.*]]
|
|
; CHECK: [[UNNAMEDBB10]]:
|
|
; CHECK-NEXT: br label %[[BB16]]
|
|
; CHECK: [[BB16]]:
|
|
; CHECK-NEXT: [[UNNAMEDTMP17:%.*]] = phi i32 [ 0, %[[UNNAMEDBB10]] ], [ 1, %[[DOTLR_PH_I]] ]
|
|
; CHECK-NEXT: [[UNNAMEDTMP18]] = add nsw i32 [[UNNAMEDTMP8]], 1
|
|
; CHECK-NEXT: [[UNNAMEDTMP19:%.*]] = icmp slt i32 [[UNNAMEDTMP18]], 4
|
|
; CHECK-NEXT: br i1 [[UNNAMEDTMP19]], label %[[DOTLR_PH_I]], label %[[F1_EXIT_LOOPEXIT]], !llvm.loop [[LOOP3:![0-9]+]]
|
|
; CHECK: [[F1_EXIT_LOOPEXIT]]:
|
|
; CHECK-NEXT: [[DOTLCSSA:%.*]] = phi i32 [ [[UNNAMEDTMP17]], %[[BB16]] ], [ [[TMP5]], %[[MIDDLE_BLOCK]] ]
|
|
; CHECK-NEXT: ret i32 [[DOTLCSSA]]
|
|
;
|
|
bb:
|
|
%b.promoted = load i32, ptr @b, align 4
|
|
br label %.lr.ph.i
|
|
|
|
.lr.ph.i:
|
|
%tmp8 = phi i32 [ %tmp18, %bb16 ], [ %b.promoted, %bb ]
|
|
%tmp2 = icmp sgt i32 %tmp8, 10
|
|
br i1 %tmp2, label %bb16, label %bb10
|
|
|
|
bb10:
|
|
br label %bb16
|
|
|
|
bb16:
|
|
%tmp17 = phi i32 [ 0, %bb10 ], [ 1, %.lr.ph.i ]
|
|
%tmp18 = add nsw i32 %tmp8, 1
|
|
%tmp19 = icmp slt i32 %tmp18, 4
|
|
br i1 %tmp19, label %.lr.ph.i, label %f1.exit.loopexit
|
|
|
|
f1.exit.loopexit:
|
|
%.lcssa = phi i32 [ %tmp17, %bb16 ]
|
|
ret i32 %.lcssa
|
|
}
|
|
|
|
; non-hdr phi depends on header phi.
|
|
|
|
define i32 @test2() {
|
|
; CHECK-LABEL: define i32 @test2() {
|
|
; CHECK-NEXT: [[BB:.*]]:
|
|
; CHECK-NEXT: [[B_PROMOTED:%.*]] = load i32, ptr @b, align 4
|
|
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[B_PROMOTED]], 1
|
|
; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[TMP0]], i32 4)
|
|
; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[SMAX]], [[B_PROMOTED]]
|
|
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP1]], 2
|
|
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
|
|
; CHECK: [[VECTOR_PH]]:
|
|
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP1]], 2
|
|
; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP1]], [[N_MOD_VF]]
|
|
; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[B_PROMOTED]], [[N_VEC]]
|
|
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[B_PROMOTED]], i64 0
|
|
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
|
|
; CHECK-NEXT: [[INDUCTION:%.*]] = add <2 x i32> [[DOTSPLAT]], <i32 0, i32 1>
|
|
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
|
|
; CHECK: [[VECTOR_BODY]]:
|
|
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
|
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
|
; CHECK-NEXT: [[TMP3:%.*]] = icmp sgt <2 x i32> [[VEC_IND]], splat (i32 10)
|
|
; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP3]], <2 x i32> splat (i32 1), <2 x i32> [[VEC_IND]]
|
|
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
|
|
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2)
|
|
; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
|
|
; CHECK-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
|
|
; CHECK: [[MIDDLE_BLOCK]]:
|
|
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i32> [[PREDPHI]], i32 1
|
|
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP1]], [[N_VEC]]
|
|
; CHECK-NEXT: br i1 [[CMP_N]], label %[[F1_EXIT_LOOPEXIT:.*]], label %[[SCALAR_PH]]
|
|
; CHECK: [[SCALAR_PH]]:
|
|
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[TMP2]], %[[MIDDLE_BLOCK]] ], [ [[B_PROMOTED]], %[[BB]] ]
|
|
; CHECK-NEXT: br label %[[DOTLR_PH_I:.*]]
|
|
; CHECK: [[_LR_PH_I:.*:]]
|
|
; CHECK-NEXT: [[UNNAMEDTMP8:%.*]] = phi i32 [ [[UNNAMEDTMP18:%.*]], %[[BB16:.*]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
|
|
; CHECK-NEXT: [[UNNAMEDTMP2:%.*]] = icmp sgt i32 [[UNNAMEDTMP8]], 10
|
|
; CHECK-NEXT: br i1 [[UNNAMEDTMP2]], label %[[BB16]], label %[[UNNAMEDBB10:.*]]
|
|
; CHECK: [[UNNAMEDBB10]]:
|
|
; CHECK-NEXT: br label %[[BB16]]
|
|
; CHECK: [[BB16]]:
|
|
; CHECK-NEXT: [[UNNAMEDTMP17:%.*]] = phi i32 [ [[UNNAMEDTMP8]], %[[UNNAMEDBB10]] ], [ 1, %[[DOTLR_PH_I]] ]
|
|
; CHECK-NEXT: [[UNNAMEDTMP18]] = add nsw i32 [[UNNAMEDTMP8]], 1
|
|
; CHECK-NEXT: [[UNNAMEDTMP19:%.*]] = icmp slt i32 [[UNNAMEDTMP18]], 4
|
|
; CHECK-NEXT: br i1 [[UNNAMEDTMP19]], label %[[DOTLR_PH_I]], label %[[F1_EXIT_LOOPEXIT]], !llvm.loop [[LOOP5:![0-9]+]]
|
|
; CHECK: [[F1_EXIT_LOOPEXIT]]:
|
|
; CHECK-NEXT: [[DOTLCSSA:%.*]] = phi i32 [ [[UNNAMEDTMP17]], %[[BB16]] ], [ [[TMP5]], %[[MIDDLE_BLOCK]] ]
|
|
; CHECK-NEXT: ret i32 [[DOTLCSSA]]
|
|
;
|
|
bb:
|
|
%b.promoted = load i32, ptr @b, align 4
|
|
br label %.lr.ph.i
|
|
|
|
.lr.ph.i:
|
|
%tmp8 = phi i32 [ %tmp18, %bb16 ], [ %b.promoted, %bb ]
|
|
%tmp2 = icmp sgt i32 %tmp8, 10
|
|
br i1 %tmp2, label %bb16, label %bb10
|
|
|
|
bb10:
|
|
br label %bb16
|
|
|
|
bb16:
|
|
%tmp17 = phi i32 [ %tmp8, %bb10 ], [ 1, %.lr.ph.i ]
|
|
%tmp18 = add nsw i32 %tmp8, 1
|
|
%tmp19 = icmp slt i32 %tmp18, 4
|
|
br i1 %tmp19, label %.lr.ph.i, label %f1.exit.loopexit
|
|
|
|
f1.exit.loopexit:
|
|
%.lcssa = phi i32 [ %tmp17, %bb16 ]
|
|
ret i32 %.lcssa
|
|
}
|
|
|
|
; more than 2 incoming values for tmp17 phi that is used outside loop.
|
|
define i32 @test3(i32 %N) {
|
|
; CHECK-LABEL: define i32 @test3(
|
|
; CHECK-SAME: i32 [[N:%.*]]) {
|
|
; CHECK-NEXT: [[BB:.*]]:
|
|
; CHECK-NEXT: [[B_PROMOTED:%.*]] = load i32, ptr @b, align 4
|
|
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[B_PROMOTED]], 1
|
|
; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[TMP0]], i32 4)
|
|
; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[SMAX]], [[B_PROMOTED]]
|
|
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP1]], 2
|
|
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
|
|
; CHECK: [[VECTOR_PH]]:
|
|
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP1]], 2
|
|
; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP1]], [[N_MOD_VF]]
|
|
; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[B_PROMOTED]], [[N_VEC]]
|
|
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[B_PROMOTED]], i64 0
|
|
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
|
|
; CHECK-NEXT: [[INDUCTION:%.*]] = add <2 x i32> [[DOTSPLAT]], <i32 0, i32 1>
|
|
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[N]], i64 0
|
|
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
|
|
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
|
|
; CHECK: [[VECTOR_BODY]]:
|
|
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
|
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
|
; CHECK-NEXT: [[TMP3:%.*]] = icmp sgt <2 x i32> [[VEC_IND]], splat (i32 10)
|
|
; CHECK-NEXT: [[TMP4:%.*]] = xor <2 x i1> [[TMP3]], splat (i1 true)
|
|
; CHECK-NEXT: [[TMP5:%.*]] = icmp sgt <2 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]]
|
|
; CHECK-NEXT: [[TMP6:%.*]] = select <2 x i1> [[TMP4]], <2 x i1> [[TMP5]], <2 x i1> zeroinitializer
|
|
; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP3]], <2 x i32> splat (i32 1), <2 x i32> zeroinitializer
|
|
; CHECK-NEXT: [[PREDPHI1:%.*]] = select <2 x i1> [[TMP6]], <2 x i32> splat (i32 2), <2 x i32> [[PREDPHI]]
|
|
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
|
|
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2)
|
|
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
|
|
; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
|
|
; CHECK: [[MIDDLE_BLOCK]]:
|
|
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i32> [[PREDPHI1]], i32 1
|
|
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP1]], [[N_VEC]]
|
|
; CHECK-NEXT: br i1 [[CMP_N]], label %[[F1_EXIT_LOOPEXIT:.*]], label %[[SCALAR_PH]]
|
|
; CHECK: [[SCALAR_PH]]:
|
|
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[TMP2]], %[[MIDDLE_BLOCK]] ], [ [[B_PROMOTED]], %[[BB]] ]
|
|
; CHECK-NEXT: br label %[[DOTLR_PH_I:.*]]
|
|
; CHECK: [[_LR_PH_I:.*:]]
|
|
; CHECK-NEXT: [[UNNAMEDTMP8:%.*]] = phi i32 [ [[UNNAMEDTMP18:%.*]], %[[BB16:.*]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
|
|
; CHECK-NEXT: [[UNNAMEDTMP2:%.*]] = icmp sgt i32 [[UNNAMEDTMP8]], 10
|
|
; CHECK-NEXT: br i1 [[UNNAMEDTMP2]], label %[[BB16]], label %[[UNNAMEDBB10:.*]]
|
|
; CHECK: [[UNNAMEDBB10]]:
|
|
; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[UNNAMEDTMP8]], [[N]]
|
|
; CHECK-NEXT: br i1 [[CMP]], label %[[UNNAMEDBB12:.*]], label %[[BB16]]
|
|
; CHECK: [[UNNAMEDBB12]]:
|
|
; CHECK-NEXT: br label %[[BB16]]
|
|
; CHECK: [[BB16]]:
|
|
; CHECK-NEXT: [[UNNAMEDTMP17:%.*]] = phi i32 [ 0, %[[UNNAMEDBB10]] ], [ 1, %[[DOTLR_PH_I]] ], [ 2, %[[UNNAMEDBB12]] ]
|
|
; CHECK-NEXT: [[UNNAMEDTMP18]] = add nsw i32 [[UNNAMEDTMP8]], 1
|
|
; CHECK-NEXT: [[UNNAMEDTMP19:%.*]] = icmp slt i32 [[UNNAMEDTMP18]], 4
|
|
; CHECK-NEXT: br i1 [[UNNAMEDTMP19]], label %[[DOTLR_PH_I]], label %[[F1_EXIT_LOOPEXIT]], !llvm.loop [[LOOP7:![0-9]+]]
|
|
; CHECK: [[F1_EXIT_LOOPEXIT]]:
|
|
; CHECK-NEXT: [[DOTLCSSA:%.*]] = phi i32 [ [[UNNAMEDTMP17]], %[[BB16]] ], [ [[TMP8]], %[[MIDDLE_BLOCK]] ]
|
|
; CHECK-NEXT: ret i32 [[DOTLCSSA]]
|
|
;
|
|
bb:
|
|
%b.promoted = load i32, ptr @b, align 4
|
|
br label %.lr.ph.i
|
|
|
|
.lr.ph.i:
|
|
%tmp8 = phi i32 [ %tmp18, %bb16 ], [ %b.promoted, %bb ]
|
|
%tmp2 = icmp sgt i32 %tmp8, 10
|
|
br i1 %tmp2, label %bb16, label %bb10
|
|
|
|
bb10:
|
|
%cmp = icmp sgt i32 %tmp8, %N
|
|
br i1 %cmp, label %bb12, label %bb16
|
|
|
|
bb12:
|
|
br label %bb16
|
|
|
|
bb16:
|
|
%tmp17 = phi i32 [ 0, %bb10 ], [ 1, %.lr.ph.i ], [ 2, %bb12 ]
|
|
%tmp18 = add nsw i32 %tmp8, 1
|
|
%tmp19 = icmp slt i32 %tmp18, 4
|
|
br i1 %tmp19, label %.lr.ph.i, label %f1.exit.loopexit
|
|
|
|
f1.exit.loopexit:
|
|
%.lcssa = phi i32 [ %tmp17, %bb16 ]
|
|
ret i32 %.lcssa
|
|
}
|
|
|
|
; more than one incoming value for outside user: %.lcssa
|
|
define i32 @test4(i32 %N) {
|
|
; CHECK-LABEL: define i32 @test4(
|
|
; CHECK-SAME: i32 [[N:%.*]]) {
|
|
; CHECK-NEXT: [[BB:.*]]:
|
|
; CHECK-NEXT: [[B_PROMOTED:%.*]] = load i32, ptr @b, align 4
|
|
; CHECK-NEXT: [[ICMP:%.*]] = icmp slt i32 [[B_PROMOTED]], [[N]]
|
|
; CHECK-NEXT: br i1 [[ICMP]], label %[[F1_EXIT_LOOPEXIT:.*]], label %[[DOTLR_PH_I_PREHEADER:.*]]
|
|
; CHECK: [[_LR_PH_I_PREHEADER:.*:]]
|
|
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[B_PROMOTED]], 1
|
|
; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[TMP0]], i32 4)
|
|
; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[SMAX]], [[B_PROMOTED]]
|
|
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP1]], 2
|
|
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
|
|
; CHECK: [[VECTOR_PH]]:
|
|
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP1]], 2
|
|
; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP1]], [[N_MOD_VF]]
|
|
; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[B_PROMOTED]], [[N_VEC]]
|
|
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[B_PROMOTED]], i64 0
|
|
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
|
|
; CHECK-NEXT: [[INDUCTION:%.*]] = add <2 x i32> [[DOTSPLAT]], <i32 0, i32 1>
|
|
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
|
|
; CHECK: [[VECTOR_BODY]]:
|
|
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
|
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
|
; CHECK-NEXT: [[TMP3:%.*]] = icmp sgt <2 x i32> [[VEC_IND]], splat (i32 10)
|
|
; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP3]], <2 x i32> splat (i32 1), <2 x i32> zeroinitializer
|
|
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
|
|
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2)
|
|
; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
|
|
; CHECK-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
|
|
; CHECK: [[MIDDLE_BLOCK]]:
|
|
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i32> [[PREDPHI]], i32 1
|
|
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP1]], [[N_VEC]]
|
|
; CHECK-NEXT: br i1 [[CMP_N]], label %[[F1_EXIT_LOOPEXIT_LOOPEXIT:.*]], label %[[SCALAR_PH]]
|
|
; CHECK: [[SCALAR_PH]]:
|
|
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[TMP2]], %[[MIDDLE_BLOCK]] ], [ [[B_PROMOTED]], %[[DOTLR_PH_I_PREHEADER]] ]
|
|
; CHECK-NEXT: br label %[[DOTLR_PH_I:.*]]
|
|
; CHECK: [[_LR_PH_I:.*:]]
|
|
; CHECK-NEXT: [[UNNAMEDTMP8:%.*]] = phi i32 [ [[UNNAMEDTMP18:%.*]], %[[BB16:.*]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
|
|
; CHECK-NEXT: [[UNNAMEDTMP2:%.*]] = icmp sgt i32 [[UNNAMEDTMP8]], 10
|
|
; CHECK-NEXT: br i1 [[UNNAMEDTMP2]], label %[[BB16]], label %[[UNNAMEDBB10:.*]]
|
|
; CHECK: [[UNNAMEDBB10]]:
|
|
; CHECK-NEXT: br label %[[BB16]]
|
|
; CHECK: [[BB16]]:
|
|
; CHECK-NEXT: [[UNNAMEDTMP17:%.*]] = phi i32 [ 0, %[[UNNAMEDBB10]] ], [ 1, %[[DOTLR_PH_I]] ]
|
|
; CHECK-NEXT: [[UNNAMEDTMP18]] = add nsw i32 [[UNNAMEDTMP8]], 1
|
|
; CHECK-NEXT: [[UNNAMEDTMP19:%.*]] = icmp slt i32 [[UNNAMEDTMP18]], 4
|
|
; CHECK-NEXT: br i1 [[UNNAMEDTMP19]], label %[[DOTLR_PH_I]], label %[[F1_EXIT_LOOPEXIT_LOOPEXIT]], !llvm.loop [[LOOP9:![0-9]+]]
|
|
; CHECK: [[F1_EXIT_LOOPEXIT_LOOPEXIT]]:
|
|
; CHECK-NEXT: [[TMP17_LCSSA:%.*]] = phi i32 [ [[UNNAMEDTMP17]], %[[BB16]] ], [ [[TMP5]], %[[MIDDLE_BLOCK]] ]
|
|
; CHECK-NEXT: br label %[[F1_EXIT_LOOPEXIT]]
|
|
; CHECK: [[F1_EXIT_LOOPEXIT]]:
|
|
; CHECK-NEXT: [[DOTLCSSA:%.*]] = phi i32 [ 2, %[[BB]] ], [ [[TMP17_LCSSA]], %[[F1_EXIT_LOOPEXIT_LOOPEXIT]] ]
|
|
; CHECK-NEXT: ret i32 [[DOTLCSSA]]
|
|
;
|
|
bb:
|
|
%b.promoted = load i32, ptr @b, align 4
|
|
%icmp = icmp slt i32 %b.promoted, %N
|
|
br i1 %icmp, label %f1.exit.loopexit, label %.lr.ph.i
|
|
|
|
.lr.ph.i:
|
|
%tmp8 = phi i32 [ %tmp18, %bb16 ], [ %b.promoted, %bb ]
|
|
%tmp2 = icmp sgt i32 %tmp8, 10
|
|
br i1 %tmp2, label %bb16, label %bb10
|
|
|
|
bb10:
|
|
br label %bb16
|
|
|
|
bb16:
|
|
%tmp17 = phi i32 [ 0, %bb10 ], [ 1, %.lr.ph.i ]
|
|
%tmp18 = add nsw i32 %tmp8, 1
|
|
%tmp19 = icmp slt i32 %tmp18, 4
|
|
br i1 %tmp19, label %.lr.ph.i, label %f1.exit.loopexit
|
|
|
|
f1.exit.loopexit:
|
|
%.lcssa = phi i32 [ %tmp17, %bb16 ], [ 2, %bb ]
|
|
ret i32 %.lcssa
|
|
}
|
|
|
|
; non hdr phi that depends on reduction and is used outside the loop.
|
|
; reduction phis are only allowed to have bump or reduction operations as the inside user, so we should
|
|
; not vectorize this.
|
|
define i32 @reduction_sum(i32 %n, ptr noalias nocapture %A, ptr noalias nocapture %B) nounwind uwtable readonly noinline ssp {
|
|
; CHECK-LABEL: define i32 @reduction_sum(
|
|
; CHECK-SAME: i32 [[N:%.*]], ptr noalias captures(none) [[A:%.*]], ptr noalias captures(none) [[B:%.*]]) #[[ATTR0:[0-9]+]] {
|
|
; CHECK-NEXT: [[ENTRY:.*]]:
|
|
; CHECK-NEXT: [[C1:%.*]] = icmp sgt i32 [[N]], 0
|
|
; CHECK-NEXT: br i1 [[C1]], label %[[HEADER_PREHEADER:.*]], [[DOT_CRIT_EDGE:label %.*]]
|
|
; CHECK: [[HEADER_PREHEADER]]:
|
|
; CHECK-NEXT: br label %[[HEADER:.*]]
|
|
; CHECK: [[HEADER]]:
|
|
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[BB16:.*]] ], [ 0, %[[HEADER_PREHEADER]] ]
|
|
; CHECK-NEXT: [[SUM_02:%.*]] = phi i32 [ [[C9:%.*]], %[[BB16]] ], [ 0, %[[HEADER_PREHEADER]] ]
|
|
; CHECK-NEXT: [[C2:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]]
|
|
; CHECK-NEXT: [[C3:%.*]] = load i32, ptr [[C2]], align 4
|
|
; CHECK-NEXT: [[C4:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]]
|
|
; CHECK-NEXT: [[C5:%.*]] = load i32, ptr [[C4]], align 4
|
|
; CHECK-NEXT: [[UNNAMEDTMP2:%.*]] = icmp sgt i32 [[SUM_02]], 10
|
|
; CHECK-NEXT: br i1 [[UNNAMEDTMP2]], label %[[BB16]], label %[[UNNAMEDBB10:.*]]
|
|
; CHECK: [[UNNAMEDBB10]]:
|
|
; CHECK-NEXT: br label %[[BB16]]
|
|
; CHECK: [[BB16]]:
|
|
; CHECK-NEXT: [[UNNAMEDTMP17:%.*]] = phi i32 [ [[SUM_02]], %[[UNNAMEDBB10]] ], [ 1, %[[HEADER]] ]
|
|
; CHECK-NEXT: [[C6:%.*]] = trunc i64 [[INDVARS_IV]] to i32
|
|
; CHECK-NEXT: [[C7:%.*]] = add i32 [[SUM_02]], [[C6]]
|
|
; CHECK-NEXT: [[C8:%.*]] = add i32 [[C7]], [[C3]]
|
|
; CHECK-NEXT: [[C9]] = add i32 [[C8]], [[C5]]
|
|
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1
|
|
; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label %[[DOT_CRIT_EDGE_LOOPEXIT:.*]], label %[[HEADER]]
|
|
; CHECK: [[__CRIT_EDGE_LOOPEXIT:.*:]]
|
|
; CHECK-NEXT: [[TMP17_LCSSA:%.*]] = phi i32 [ [[UNNAMEDTMP17]], %[[BB16]] ]
|
|
; CHECK-NEXT: [[C9_LCSSA:%.*]] = phi i32 [ [[C9]], %[[BB16]] ]
|
|
; CHECK-NEXT: br [[DOT_CRIT_EDGE]]
|
|
; CHECK: [[__CRIT_EDGE:.*:]]
|
|
; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[C9_LCSSA]], %[[DOT_CRIT_EDGE_LOOPEXIT]] ]
|
|
; CHECK-NEXT: [[NONHDR_LCSSA:%.*]] = phi i32 [ 1, %[[ENTRY]] ], [ [[TMP17_LCSSA]], %[[DOT_CRIT_EDGE_LOOPEXIT]] ]
|
|
; CHECK-NEXT: ret i32 [[SUM_0_LCSSA]]
|
|
;
|
|
entry:
|
|
%c1 = icmp sgt i32 %n, 0
|
|
br i1 %c1, label %header, label %._crit_edge
|
|
|
|
header: ; preds = %0, %.lr.ph
|
|
%indvars.iv = phi i64 [ %indvars.iv.next, %bb16 ], [ 0, %entry ]
|
|
%sum.02 = phi i32 [ %c9, %bb16 ], [ 0, %entry ]
|
|
%c2 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
|
|
%c3 = load i32, ptr %c2, align 4
|
|
%c4 = getelementptr inbounds i32, ptr %B, i64 %indvars.iv
|
|
%c5 = load i32, ptr %c4, align 4
|
|
%tmp2 = icmp sgt i32 %sum.02, 10
|
|
br i1 %tmp2, label %bb16, label %bb10
|
|
|
|
bb10:
|
|
br label %bb16
|
|
|
|
bb16:
|
|
%tmp17 = phi i32 [ %sum.02, %bb10 ], [ 1, %header ]
|
|
%c6 = trunc i64 %indvars.iv to i32
|
|
%c7 = add i32 %sum.02, %c6
|
|
%c8 = add i32 %c7, %c3
|
|
%c9 = add i32 %c8, %c5
|
|
%indvars.iv.next = add i64 %indvars.iv, 1
|
|
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
|
|
%exitcond = icmp eq i32 %lftr.wideiv, %n
|
|
br i1 %exitcond, label %._crit_edge, label %header
|
|
|
|
._crit_edge: ; preds = %.lr.ph, %0
|
|
%sum.0.lcssa = phi i32 [ 0, %entry ], [ %c9, %bb16 ]
|
|
%nonhdr.lcssa = phi i32 [ 1, %entry], [ %tmp17, %bb16 ]
|
|
ret i32 %sum.0.lcssa
|
|
}
|
|
|
|
; invalid cyclic dependency with header phi iv, which prevents iv from being
|
|
; recognized as induction var.
|
|
; cannot vectorize.
|
|
define i32 @cyclic_dep_with_indvar() {
|
|
; CHECK-LABEL: define i32 @cyclic_dep_with_indvar() {
|
|
; CHECK-NEXT: [[BB:.*]]:
|
|
; CHECK-NEXT: [[B_PROMOTED:%.*]] = load i32, ptr @b, align 4
|
|
; CHECK-NEXT: br label %[[DOTLR_PH_I:.*]]
|
|
; CHECK: [[_LR_PH_I:.*:]]
|
|
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IVNEXT:%.*]], %[[BB16:.*]] ], [ [[B_PROMOTED]], %[[BB]] ]
|
|
; CHECK-NEXT: [[UNNAMEDTMP2:%.*]] = icmp sgt i32 [[IV]], 10
|
|
; CHECK-NEXT: br i1 [[UNNAMEDTMP2]], label %[[BB16]], label %[[UNNAMEDBB10:.*]]
|
|
; CHECK: [[UNNAMEDBB10]]:
|
|
; CHECK-NEXT: br label %[[BB16]]
|
|
; CHECK: [[BB16]]:
|
|
; CHECK-NEXT: [[UNNAMEDTMP17:%.*]] = phi i32 [ 0, %[[UNNAMEDBB10]] ], [ [[IV]], %[[DOTLR_PH_I]] ]
|
|
; CHECK-NEXT: [[IVNEXT]] = add nsw i32 [[UNNAMEDTMP17]], 1
|
|
; CHECK-NEXT: [[UNNAMEDTMP19:%.*]] = icmp slt i32 [[IVNEXT]], 4
|
|
; CHECK-NEXT: br i1 [[UNNAMEDTMP19]], label %[[DOTLR_PH_I]], label %[[F1_EXIT_LOOPEXIT:.*]]
|
|
; CHECK: [[F1_EXIT_LOOPEXIT]]:
|
|
; CHECK-NEXT: [[DOTLCSSA:%.*]] = phi i32 [ [[UNNAMEDTMP17]], %[[BB16]] ]
|
|
; CHECK-NEXT: ret i32 [[DOTLCSSA]]
|
|
;
|
|
bb:
|
|
%b.promoted = load i32, ptr @b, align 4
|
|
br label %.lr.ph.i
|
|
|
|
.lr.ph.i:
|
|
%iv = phi i32 [ %ivnext, %bb16 ], [ %b.promoted, %bb ]
|
|
%tmp2 = icmp sgt i32 %iv, 10
|
|
br i1 %tmp2, label %bb16, label %bb10
|
|
|
|
bb10:
|
|
br label %bb16
|
|
|
|
bb16:
|
|
%tmp17 = phi i32 [ 0, %bb10 ], [ %iv, %.lr.ph.i ]
|
|
%ivnext = add nsw i32 %tmp17, 1
|
|
%tmp19 = icmp slt i32 %ivnext, 4
|
|
br i1 %tmp19, label %.lr.ph.i, label %f1.exit.loopexit
|
|
|
|
f1.exit.loopexit:
|
|
%.lcssa = phi i32 [ %tmp17, %bb16 ]
|
|
ret i32 %.lcssa
|
|
}
|
|
|
|
; non-reduction phi 'tmp17' used outside loop has cyclic dependence with %x.05 phi
|
|
; cannot vectorize.
|
|
define i32 @not_valid_reduction(i32 %n, ptr noalias nocapture %A) nounwind uwtable readonly {
|
|
; CHECK-LABEL: define i32 @not_valid_reduction(
|
|
; CHECK-SAME: i32 [[N:%.*]], ptr noalias captures(none) [[A:%.*]]) #[[ATTR1:[0-9]+]] {
|
|
; CHECK-NEXT: [[ENTRY:.*]]:
|
|
; CHECK-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[N]], 0
|
|
; CHECK-NEXT: br i1 [[CMP4]], label %[[FOR_BODY_PREHEADER:.*]], label %[[FOR_END:.*]]
|
|
; CHECK: [[FOR_BODY_PREHEADER]]:
|
|
; CHECK-NEXT: br label %[[FOR_BODY:.*]]
|
|
; CHECK: [[FOR_BODY]]:
|
|
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[LATCH:.*]] ], [ 0, %[[FOR_BODY_PREHEADER]] ]
|
|
; CHECK-NEXT: [[X_05:%.*]] = phi i32 [ [[UNNAMEDTMP17:%.*]], %[[LATCH]] ], [ 0, %[[FOR_BODY_PREHEADER]] ]
|
|
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]]
|
|
; CHECK-NEXT: [[UNNAMEDTMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
|
|
; CHECK-NEXT: [[UNNAMEDTMP2:%.*]] = icmp sgt i64 [[INDVARS_IV]], 10
|
|
; CHECK-NEXT: [[SUB:%.*]] = sub nsw i32 [[X_05]], [[UNNAMEDTMP0]]
|
|
; CHECK-NEXT: br i1 [[UNNAMEDTMP2]], label %[[UNNAMEDBB16:.*]], label %[[UNNAMEDBB10:.*]]
|
|
; CHECK: [[UNNAMEDBB10]]:
|
|
; CHECK-NEXT: br label %[[UNNAMEDBB16]]
|
|
; CHECK: [[UNNAMEDBB16]]:
|
|
; CHECK-NEXT: [[UNNAMEDTMP17]] = phi i32 [ 1, %[[UNNAMEDBB10]] ], [ [[SUB]], %[[FOR_BODY]] ]
|
|
; CHECK-NEXT: br label %[[LATCH]]
|
|
; CHECK: [[LATCH]]:
|
|
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1
|
|
; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_END_LOOPEXIT:.*]], label %[[FOR_BODY]]
|
|
; CHECK: [[FOR_END_LOOPEXIT]]:
|
|
; CHECK-NEXT: [[TMP17_LCSSA:%.*]] = phi i32 [ [[UNNAMEDTMP17]], %[[LATCH]] ]
|
|
; CHECK-NEXT: br label %[[FOR_END]]
|
|
; CHECK: [[FOR_END]]:
|
|
; CHECK-NEXT: [[X_0_LCSSA:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[TMP17_LCSSA]], %[[FOR_END_LOOPEXIT]] ]
|
|
; CHECK-NEXT: ret i32 [[X_0_LCSSA]]
|
|
;
|
|
entry:
|
|
%cmp4 = icmp sgt i32 %n, 0
|
|
br i1 %cmp4, label %for.body, label %for.end
|
|
|
|
for.body: ; preds = %entry, %for.body
|
|
%indvars.iv = phi i64 [ %indvars.iv.next, %latch ], [ 0, %entry ]
|
|
%x.05 = phi i32 [ %tmp17, %latch ], [ 0, %entry ]
|
|
%arrayidx = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
|
|
%tmp0 = load i32, ptr %arrayidx, align 4
|
|
%tmp2 = icmp sgt i64 %indvars.iv, 10
|
|
%sub = sub nsw i32 %x.05, %tmp0
|
|
br i1 %tmp2, label %bb16, label %bb10
|
|
|
|
bb10:
|
|
br label %bb16
|
|
|
|
bb16:
|
|
%tmp17 = phi i32 [ 1, %bb10 ], [ %sub, %for.body ]
|
|
br label %latch
|
|
|
|
latch:
|
|
%indvars.iv.next = add i64 %indvars.iv, 1
|
|
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
|
|
%exitcond = icmp eq i32 %lftr.wideiv, %n
|
|
br i1 %exitcond, label %for.end, label %for.body
|
|
|
|
for.end: ; preds = %for.body, %entry
|
|
%x.0.lcssa = phi i32 [ 0, %entry ], [ %tmp17 , %latch ]
|
|
ret i32 %x.0.lcssa
|
|
}
|
|
|
|
define i8 @outside_user_non_phi() {
|
|
; CHECK-LABEL: define i8 @outside_user_non_phi() {
|
|
; CHECK-NEXT: [[BB:.*]]:
|
|
; CHECK-NEXT: [[B_PROMOTED:%.*]] = load i32, ptr @b, align 4
|
|
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[B_PROMOTED]], 1
|
|
; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[TMP0]], i32 4)
|
|
; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[SMAX]], [[B_PROMOTED]]
|
|
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP1]], 2
|
|
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
|
|
; CHECK: [[VECTOR_PH]]:
|
|
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP1]], 2
|
|
; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP1]], [[N_MOD_VF]]
|
|
; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[B_PROMOTED]], [[N_VEC]]
|
|
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[B_PROMOTED]], i64 0
|
|
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
|
|
; CHECK-NEXT: [[INDUCTION:%.*]] = add <2 x i32> [[DOTSPLAT]], <i32 0, i32 1>
|
|
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
|
|
; CHECK: [[VECTOR_BODY]]:
|
|
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
|
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
|
; CHECK-NEXT: [[TMP3:%.*]] = icmp sgt <2 x i32> [[VEC_IND]], splat (i32 10)
|
|
; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP3]], <2 x i32> splat (i32 1), <2 x i32> zeroinitializer
|
|
; CHECK-NEXT: [[TMP4:%.*]] = trunc <2 x i32> [[PREDPHI]] to <2 x i8>
|
|
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
|
|
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2)
|
|
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
|
|
; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
|
|
; CHECK: [[MIDDLE_BLOCK]]:
|
|
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i8> [[TMP4]], i32 1
|
|
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP1]], [[N_VEC]]
|
|
; CHECK-NEXT: br i1 [[CMP_N]], label %[[F1_EXIT_LOOPEXIT:.*]], label %[[SCALAR_PH]]
|
|
; CHECK: [[SCALAR_PH]]:
|
|
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[TMP2]], %[[MIDDLE_BLOCK]] ], [ [[B_PROMOTED]], %[[BB]] ]
|
|
; CHECK-NEXT: br label %[[DOTLR_PH_I:.*]]
|
|
; CHECK: [[_LR_PH_I:.*:]]
|
|
; CHECK-NEXT: [[UNNAMEDTMP8:%.*]] = phi i32 [ [[UNNAMEDTMP18:%.*]], %[[BB16:.*]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
|
|
; CHECK-NEXT: [[UNNAMEDTMP2:%.*]] = icmp sgt i32 [[UNNAMEDTMP8]], 10
|
|
; CHECK-NEXT: br i1 [[UNNAMEDTMP2]], label %[[BB16]], label %[[UNNAMEDBB10:.*]]
|
|
; CHECK: [[UNNAMEDBB10]]:
|
|
; CHECK-NEXT: br label %[[BB16]]
|
|
; CHECK: [[BB16]]:
|
|
; CHECK-NEXT: [[UNNAMEDTMP17:%.*]] = phi i32 [ 0, %[[UNNAMEDBB10]] ], [ 1, %[[DOTLR_PH_I]] ]
|
|
; CHECK-NEXT: [[TMP17_TRUNC:%.*]] = trunc i32 [[UNNAMEDTMP17]] to i8
|
|
; CHECK-NEXT: [[UNNAMEDTMP18]] = add nsw i32 [[UNNAMEDTMP8]], 1
|
|
; CHECK-NEXT: [[UNNAMEDTMP19:%.*]] = icmp slt i32 [[UNNAMEDTMP18]], 4
|
|
; CHECK-NEXT: br i1 [[UNNAMEDTMP19]], label %[[DOTLR_PH_I]], label %[[F1_EXIT_LOOPEXIT]], !llvm.loop [[LOOP11:![0-9]+]]
|
|
; CHECK: [[F1_EXIT_LOOPEXIT]]:
|
|
; CHECK-NEXT: [[DOTLCSSA:%.*]] = phi i8 [ [[TMP17_TRUNC]], %[[BB16]] ], [ [[TMP6]], %[[MIDDLE_BLOCK]] ]
|
|
; CHECK-NEXT: ret i8 [[DOTLCSSA]]
|
|
;
|
|
bb:
|
|
%b.promoted = load i32, ptr @b, align 4
|
|
br label %.lr.ph.i
|
|
|
|
.lr.ph.i:
|
|
%tmp8 = phi i32 [ %tmp18, %bb16 ], [ %b.promoted, %bb ]
|
|
%tmp2 = icmp sgt i32 %tmp8, 10
|
|
br i1 %tmp2, label %bb16, label %bb10
|
|
|
|
bb10:
|
|
br label %bb16
|
|
|
|
bb16:
|
|
%tmp17 = phi i32 [ 0, %bb10 ], [ 1, %.lr.ph.i ]
|
|
%tmp17.trunc = trunc i32 %tmp17 to i8
|
|
%tmp18 = add nsw i32 %tmp8, 1
|
|
%tmp19 = icmp slt i32 %tmp18, 4
|
|
br i1 %tmp19, label %.lr.ph.i, label %f1.exit.loopexit
|
|
|
|
f1.exit.loopexit:
|
|
%.lcssa = phi i8 [ %tmp17.trunc, %bb16 ]
|
|
ret i8 %.lcssa
|
|
}
|
|
|
|
define i32 @no_vectorize_reduction_with_outside_use(i32 %n, ptr nocapture %A, ptr nocapture %B) nounwind uwtable readonly {
|
|
; CHECK-LABEL: define i32 @no_vectorize_reduction_with_outside_use(
|
|
; CHECK-SAME: i32 [[N:%.*]], ptr captures(none) [[A:%.*]], ptr captures(none) [[B:%.*]]) #[[ATTR1]] {
|
|
; CHECK-NEXT: [[ENTRY:.*]]:
|
|
; CHECK-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[N]], 0
|
|
; CHECK-NEXT: br i1 [[CMP7]], label %[[FOR_BODY_PREHEADER:.*]], label %[[FOR_END:.*]]
|
|
; CHECK: [[FOR_BODY_PREHEADER]]:
|
|
; CHECK-NEXT: br label %[[FOR_BODY:.*]]
|
|
; CHECK: [[FOR_BODY]]:
|
|
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ], [ 0, %[[FOR_BODY_PREHEADER]] ]
|
|
; CHECK-NEXT: [[RESULT_08:%.*]] = phi i32 [ [[OR:%.*]], %[[FOR_BODY]] ], [ 0, %[[FOR_BODY_PREHEADER]] ]
|
|
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]]
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
|
|
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]]
|
|
; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4
|
|
; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[TMP0]]
|
|
; CHECK-NEXT: [[OR]] = or i32 [[ADD]], [[RESULT_08]]
|
|
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1
|
|
; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_END_LOOPEXIT:.*]], label %[[FOR_BODY]]
|
|
; CHECK: [[FOR_END_LOOPEXIT]]:
|
|
; CHECK-NEXT: [[DOTLCSSA:%.*]] = phi i32 [ [[TMP1]], %[[FOR_BODY]] ]
|
|
; CHECK-NEXT: br label %[[FOR_END]]
|
|
; CHECK: [[FOR_END]]:
|
|
; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[DOTLCSSA]], %[[FOR_END_LOOPEXIT]] ]
|
|
; CHECK-NEXT: ret i32 [[RESULT_0_LCSSA]]
|
|
;
|
|
entry:
|
|
%cmp7 = icmp sgt i32 %n, 0
|
|
br i1 %cmp7, label %for.body, label %for.end
|
|
|
|
for.body: ; preds = %entry, %for.body
|
|
%indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
|
|
%result.08 = phi i32 [ %or, %for.body ], [ 0, %entry ]
|
|
%arrayidx = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
|
|
%0 = load i32, ptr %arrayidx, align 4
|
|
%arrayidx2 = getelementptr inbounds i32, ptr %B, i64 %indvars.iv
|
|
%1 = load i32, ptr %arrayidx2, align 4
|
|
%add = add nsw i32 %1, %0
|
|
%or = or i32 %add, %result.08
|
|
%indvars.iv.next = add i64 %indvars.iv, 1
|
|
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
|
|
%exitcond = icmp eq i32 %lftr.wideiv, %n
|
|
br i1 %exitcond, label %for.end, label %for.body
|
|
|
|
for.end: ; preds = %for.body, %entry
|
|
%result.0.lcssa = phi i32 [ 0, %entry ], [ %1, %for.body ]
|
|
ret i32 %result.0.lcssa
|
|
}
|
|
|
|
; vectorize c[i] = a[i] + b[i] loop where result of c[i] is used outside the
|
|
; loop
|
|
define i32 @sum_arrays_outside_use(ptr %B, ptr %A, ptr %C, i32 %N) {
|
|
; CHECK-LABEL: define i32 @sum_arrays_outside_use(
|
|
; CHECK-SAME: ptr [[B:%.*]], ptr [[A:%.*]], ptr [[C:%.*]], i32 [[N:%.*]]) {
|
|
; CHECK-NEXT: [[BB:.*]]:
|
|
; CHECK-NEXT: [[A3:%.*]] = ptrtoint ptr [[A]] to i32
|
|
; CHECK-NEXT: [[B2:%.*]] = ptrtoint ptr [[B]] to i32
|
|
; CHECK-NEXT: [[C1:%.*]] = ptrtoint ptr [[C]] to i32
|
|
; CHECK-NEXT: [[B_PROMOTED:%.*]] = load i32, ptr @b, align 4
|
|
; CHECK-NEXT: [[TMP0:%.*]] = add nsw i32 [[B_PROMOTED]], 1
|
|
; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N]], i32 [[TMP0]])
|
|
; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[SMAX]], [[B_PROMOTED]]
|
|
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP1]], 2
|
|
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]]
|
|
; CHECK: [[VECTOR_MEMCHECK]]:
|
|
; CHECK-NEXT: [[TMP2:%.*]] = sub i32 [[C1]], [[B2]]
|
|
; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i32 [[TMP2]], 8
|
|
; CHECK-NEXT: [[TMP3:%.*]] = sub i32 [[C1]], [[A3]]
|
|
; CHECK-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i32 [[TMP3]], 8
|
|
; CHECK-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]]
|
|
; CHECK-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
|
|
; CHECK: [[VECTOR_PH]]:
|
|
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP1]], 2
|
|
; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP1]], [[N_MOD_VF]]
|
|
; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[B_PROMOTED]], [[N_VEC]]
|
|
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
|
|
; CHECK: [[VECTOR_BODY]]:
|
|
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
|
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i32 [[B_PROMOTED]], [[INDEX]]
|
|
; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[OFFSET_IDX]], 0
|
|
; CHECK-NEXT: [[TMP6:%.*]] = sext i32 [[TMP5]] to i64
|
|
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP6]]
|
|
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 0
|
|
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP8]], align 4
|
|
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP6]]
|
|
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i32 0
|
|
; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <2 x i32>, ptr [[TMP10]], align 4
|
|
; CHECK-NEXT: [[TMP11:%.*]] = add nsw <2 x i32> [[WIDE_LOAD]], [[WIDE_LOAD5]]
|
|
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[TMP6]]
|
|
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i32 0
|
|
; CHECK-NEXT: store <2 x i32> [[TMP11]], ptr [[TMP13]], align 4
|
|
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
|
|
; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
|
|
; CHECK-NEXT: br i1 [[TMP14]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
|
|
; CHECK: [[MIDDLE_BLOCK]]:
|
|
; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x i32> [[TMP11]], i32 1
|
|
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP1]], [[N_VEC]]
|
|
; CHECK-NEXT: br i1 [[CMP_N]], label %[[F1_EXIT_LOOPEXIT:.*]], label %[[SCALAR_PH]]
|
|
; CHECK: [[SCALAR_PH]]:
|
|
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[TMP4]], %[[MIDDLE_BLOCK]] ], [ [[B_PROMOTED]], %[[BB]] ], [ [[B_PROMOTED]], %[[VECTOR_MEMCHECK]] ]
|
|
; CHECK-NEXT: br label %[[DOTLR_PH_I:.*]]
|
|
; CHECK: [[_LR_PH_I:.*:]]
|
|
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IVNEXT:%.*]], %[[DOTLR_PH_I]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
|
|
; CHECK-NEXT: [[INDVARS_IV:%.*]] = sext i32 [[IV]] to i64
|
|
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]]
|
|
; CHECK-NEXT: [[BLOAD:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4
|
|
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]]
|
|
; CHECK-NEXT: [[ALOAD:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
|
|
; CHECK-NEXT: [[SUM:%.*]] = add nsw i32 [[BLOAD]], [[ALOAD]]
|
|
; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[INDVARS_IV]]
|
|
; CHECK-NEXT: store i32 [[SUM]], ptr [[ARRAYIDX3]], align 4
|
|
; CHECK-NEXT: [[IVNEXT]] = add nsw i32 [[IV]], 1
|
|
; CHECK-NEXT: [[UNNAMEDTMP19:%.*]] = icmp slt i32 [[IVNEXT]], [[N]]
|
|
; CHECK-NEXT: br i1 [[UNNAMEDTMP19]], label %[[DOTLR_PH_I]], label %[[F1_EXIT_LOOPEXIT]], !llvm.loop [[LOOP13:![0-9]+]]
|
|
; CHECK: [[F1_EXIT_LOOPEXIT]]:
|
|
; CHECK-NEXT: [[DOTLCSSA:%.*]] = phi i32 [ [[SUM]], %[[DOTLR_PH_I]] ], [ [[TMP15]], %[[MIDDLE_BLOCK]] ]
|
|
; CHECK-NEXT: ret i32 [[DOTLCSSA]]
|
|
;
|
|
bb:
|
|
%b.promoted = load i32, ptr @b, align 4
|
|
br label %.lr.ph.i
|
|
|
|
.lr.ph.i:
|
|
%iv = phi i32 [ %ivnext, %.lr.ph.i ], [ %b.promoted, %bb ]
|
|
%indvars.iv = sext i32 %iv to i64
|
|
%arrayidx2 = getelementptr inbounds i32, ptr %B, i64 %indvars.iv
|
|
%Bload = load i32, ptr %arrayidx2, align 4
|
|
%arrayidx = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
|
|
%Aload = load i32, ptr %arrayidx, align 4
|
|
%sum = add nsw i32 %Bload, %Aload
|
|
%arrayidx3 = getelementptr inbounds i32, ptr %C, i64 %indvars.iv
|
|
store i32 %sum, ptr %arrayidx3, align 4
|
|
%ivnext = add nsw i32 %iv, 1
|
|
%tmp19 = icmp slt i32 %ivnext, %N
|
|
br i1 %tmp19, label %.lr.ph.i, label %f1.exit.loopexit
|
|
|
|
f1.exit.loopexit:
|
|
%.lcssa = phi i32 [ %sum, %.lr.ph.i ]
|
|
ret i32 %.lcssa
|
|
}
|
|
|
|
@tab = common global [32 x i8] zeroinitializer, align 1
|
|
|
|
define i32 @non_uniform_live_out() {
|
|
; CHECK-LABEL: define i32 @non_uniform_live_out() {
|
|
; CHECK-NEXT: [[ENTRY:.*]]:
|
|
; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
|
|
; CHECK: [[VECTOR_PH]]:
|
|
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
|
|
; CHECK: [[VECTOR_BODY]]:
|
|
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
|
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
|
; CHECK-NEXT: [[TMP0:%.*]] = add <2 x i32> [[VEC_IND]], splat (i32 7)
|
|
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i32> [[TMP0]], i32 0
|
|
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 [[TMP1]]
|
|
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i32 0
|
|
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i8>, ptr [[TMP3]], align 1
|
|
; CHECK-NEXT: [[TMP4:%.*]] = add <2 x i8> [[WIDE_LOAD]], splat (i8 1)
|
|
; CHECK-NEXT: store <2 x i8> [[TMP4]], ptr [[TMP3]], align 1
|
|
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
|
|
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2)
|
|
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], 20000
|
|
; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
|
|
; CHECK: [[MIDDLE_BLOCK]]:
|
|
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i32> [[TMP0]], i32 1
|
|
; CHECK-NEXT: br i1 false, label %[[FOR_END:.*]], label %[[SCALAR_PH]]
|
|
; CHECK: [[SCALAR_PH]]:
|
|
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 20000, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
|
|
; CHECK-NEXT: br label %[[FOR_BODY:.*]]
|
|
; CHECK: [[FOR_BODY]]:
|
|
; CHECK-NEXT: [[I_08:%.*]] = phi i32 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INC:%.*]], %[[FOR_BODY]] ]
|
|
; CHECK-NEXT: [[I_09:%.*]] = add i32 [[I_08]], 7
|
|
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 [[I_09]]
|
|
; CHECK-NEXT: [[TMP7:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
|
|
; CHECK-NEXT: [[BUMP:%.*]] = add i8 [[TMP7]], 1
|
|
; CHECK-NEXT: store i8 [[BUMP]], ptr [[ARRAYIDX]], align 1
|
|
; CHECK-NEXT: [[INC]] = add nsw i32 [[I_08]], 1
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[I_08]], 20000
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_END]], label %[[FOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]]
|
|
; CHECK: [[FOR_END]]:
|
|
; CHECK-NEXT: [[LCSSA:%.*]] = phi i32 [ [[I_09]], %[[FOR_BODY]] ], [ [[TMP6]], %[[MIDDLE_BLOCK]] ]
|
|
; CHECK-NEXT: [[ARRAYIDX_OUT:%.*]] = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 [[LCSSA]]
|
|
; CHECK-NEXT: store i8 42, ptr [[ARRAYIDX_OUT]], align 1
|
|
; CHECK-NEXT: ret i32 0
|
|
;
|
|
entry:
|
|
br label %for.body
|
|
|
|
for.body: ; preds = %for.body, %entry
|
|
%i.08 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
|
|
%i.09 = add i32 %i.08, 7
|
|
%arrayidx = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 %i.09
|
|
%0 = load i8, ptr %arrayidx, align 1
|
|
%bump = add i8 %0, 1
|
|
store i8 %bump, ptr %arrayidx, align 1
|
|
%inc = add nsw i32 %i.08, 1
|
|
%exitcond = icmp eq i32 %i.08, 20000
|
|
br i1 %exitcond, label %for.end, label %for.body
|
|
|
|
for.end: ; preds = %for.body
|
|
%lcssa = phi i32 [%i.09, %for.body]
|
|
%arrayidx.out = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 %lcssa
|
|
store i8 42, ptr %arrayidx.out, align 1
|
|
ret i32 0
|
|
}
|
|
;.
|
|
; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
|
|
; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
|
|
; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
|
|
; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
|
|
; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]}
|
|
; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]}
|
|
; CHECK: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]], [[META2]]}
|
|
; CHECK: [[LOOP7]] = distinct !{[[LOOP7]], [[META2]], [[META1]]}
|
|
; CHECK: [[LOOP8]] = distinct !{[[LOOP8]], [[META1]], [[META2]]}
|
|
; CHECK: [[LOOP9]] = distinct !{[[LOOP9]], [[META2]], [[META1]]}
|
|
; CHECK: [[LOOP10]] = distinct !{[[LOOP10]], [[META1]], [[META2]]}
|
|
; CHECK: [[LOOP11]] = distinct !{[[LOOP11]], [[META2]], [[META1]]}
|
|
; CHECK: [[LOOP12]] = distinct !{[[LOOP12]], [[META1]], [[META2]]}
|
|
; CHECK: [[LOOP13]] = distinct !{[[LOOP13]], [[META1]]}
|
|
; CHECK: [[LOOP14]] = distinct !{[[LOOP14]], [[META1]], [[META2]]}
|
|
; CHECK: [[LOOP15]] = distinct !{[[LOOP15]], [[META2]], [[META1]]}
|
|
;.
|