At the moment, the full cost of all interleave group members is assigned to the instruction at the group's insert position, even if the decision was to not form an interleave group. This can lead to inaccurate cost estimates, e.g. if the instruction at the insert position is dead. If the decision is to not vectorize but scalarize or scather/gather, then the cost will be to total cost for all members. In those cases, assign individual the cost per member, to more closely reflect to choice per instruction. This fixes a divergence between legacy and VPlan-based cost model. Fixes https://github.com/llvm/llvm-project/issues/108098.
520 lines
30 KiB
LLVM
520 lines
30 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
|
|
; RUN: opt -p loop-vectorize -mtriple riscv64-linux-gnu -mattr=+v,+f -S %s | FileCheck %s
|
|
|
|
target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128"
|
|
|
|
; Test with a dead load in the loop, from
|
|
; https://github.com/llvm/llvm-project/issues/99701
|
|
define void @dead_load(ptr %p, i16 %start) {
|
|
; CHECK-LABEL: define void @dead_load(
|
|
; CHECK-SAME: ptr [[P:%.*]], i16 [[START:%.*]]) #[[ATTR0:[0-9]+]] {
|
|
; CHECK-NEXT: [[ENTRY:.*]]:
|
|
; CHECK-NEXT: [[START_EXT:%.*]] = sext i16 [[START]] to i64
|
|
; CHECK-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[START_EXT]], i64 111)
|
|
; CHECK-NEXT: [[TMP0:%.*]] = sub i64 [[SMAX]], [[START_EXT]]
|
|
; CHECK-NEXT: [[UMIN:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP0]], i64 1)
|
|
; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[SMAX]], [[UMIN]]
|
|
; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], [[START_EXT]]
|
|
; CHECK-NEXT: [[TMP3:%.*]] = udiv i64 [[TMP2]], 3
|
|
; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[UMIN]], [[TMP3]]
|
|
; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[TMP4]], 1
|
|
; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
|
|
; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 8
|
|
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i64 [[TMP5]], [[TMP7]]
|
|
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
|
|
; CHECK: [[VECTOR_PH]]:
|
|
; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64()
|
|
; CHECK-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 8
|
|
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP5]], [[TMP9]]
|
|
; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[N_MOD_VF]], 0
|
|
; CHECK-NEXT: [[TMP11:%.*]] = select i1 [[TMP10]], i64 [[TMP9]], i64 [[N_MOD_VF]]
|
|
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP5]], [[TMP11]]
|
|
; CHECK-NEXT: [[TMP12:%.*]] = mul i64 [[N_VEC]], 3
|
|
; CHECK-NEXT: [[IND_END:%.*]] = add i64 [[START_EXT]], [[TMP12]]
|
|
; CHECK-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64()
|
|
; CHECK-NEXT: [[TMP14:%.*]] = mul i64 [[TMP13]], 8
|
|
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[START_EXT]], i64 0
|
|
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[DOTSPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
|
|
; CHECK-NEXT: [[TMP15:%.*]] = call <vscale x 8 x i64> @llvm.stepvector.nxv8i64()
|
|
; CHECK-NEXT: [[TMP16:%.*]] = add <vscale x 8 x i64> [[TMP15]], zeroinitializer
|
|
; CHECK-NEXT: [[TMP17:%.*]] = mul <vscale x 8 x i64> [[TMP16]], shufflevector (<vscale x 8 x i64> insertelement (<vscale x 8 x i64> poison, i64 3, i64 0), <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer)
|
|
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 8 x i64> [[DOTSPLAT]], [[TMP17]]
|
|
; CHECK-NEXT: [[TMP20:%.*]] = mul i64 3, [[TMP14]]
|
|
; CHECK-NEXT: [[DOTSPLATINSERT1:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[TMP20]], i64 0
|
|
; CHECK-NEXT: [[DOTSPLAT2:%.*]] = shufflevector <vscale x 8 x i64> [[DOTSPLATINSERT1]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
|
|
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
|
|
; CHECK: [[VECTOR_BODY]]:
|
|
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
|
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 8 x i64> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
|
; CHECK-NEXT: [[TMP21:%.*]] = getelementptr i16, ptr [[P]], <vscale x 8 x i64> [[VEC_IND]]
|
|
; CHECK-NEXT: call void @llvm.masked.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x ptr> [[TMP21]], i32 2, <vscale x 8 x i1> shufflevector (<vscale x 8 x i1> insertelement (<vscale x 8 x i1> poison, i1 true, i64 0), <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer))
|
|
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP14]]
|
|
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 8 x i64> [[VEC_IND]], [[DOTSPLAT2]]
|
|
; CHECK-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
|
|
; CHECK-NEXT: br i1 [[TMP22]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
|
|
; CHECK: [[MIDDLE_BLOCK]]:
|
|
; CHECK-NEXT: br label %[[SCALAR_PH]]
|
|
; CHECK: [[SCALAR_PH]]:
|
|
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ [[START_EXT]], %[[ENTRY]] ]
|
|
; CHECK-NEXT: br label %[[LOOP:.*]]
|
|
; CHECK: [[LOOP]]:
|
|
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
|
|
; CHECK-NEXT: [[GEP:%.*]] = getelementptr i16, ptr [[P]], i64 [[IV]]
|
|
; CHECK-NEXT: store i16 0, ptr [[GEP]], align 2
|
|
; CHECK-NEXT: [[L:%.*]] = load i16, ptr [[GEP]], align 2
|
|
; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 3
|
|
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i64 [[IV]], 111
|
|
; CHECK-NEXT: br i1 [[CMP]], label %[[LOOP]], label %[[EXIT:.*]], !llvm.loop [[LOOP3:![0-9]+]]
|
|
; CHECK: [[EXIT]]:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
%start.ext = sext i16 %start to i64
|
|
br label %loop
|
|
|
|
loop:
|
|
%iv = phi i64 [ %start.ext, %entry ], [ %iv.next, %loop ]
|
|
%gep = getelementptr i16, ptr %p, i64 %iv
|
|
store i16 0, ptr %gep, align 2
|
|
%l = load i16, ptr %gep, align 2
|
|
%iv.next = add i64 %iv, 3
|
|
%cmp = icmp slt i64 %iv, 111
|
|
br i1 %cmp, label %loop, label %exit
|
|
|
|
exit:
|
|
ret void
|
|
}
|
|
|
|
; Test case for https://github.com/llvm/llvm-project/issues/100464.
|
|
; Loop with a live-out %l and scalar epilogue required due to an interleave
|
|
; group. As the scalar epilogue is required the live-out is fed from the scalar
|
|
; epilogue and dead in the vector loop.
|
|
define i8 @dead_live_out_due_to_scalar_epilogue_required(ptr %src, ptr %dst) {
|
|
; CHECK-LABEL: define i8 @dead_live_out_due_to_scalar_epilogue_required(
|
|
; CHECK-SAME: ptr [[SRC:%.*]], ptr [[DST:%.*]]) #[[ATTR0]] {
|
|
; CHECK-NEXT: [[ENTRY:.*]]:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.vscale.i32()
|
|
; CHECK-NEXT: [[TMP1:%.*]] = mul i32 [[TMP0]], 4
|
|
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.umax.i32(i32 8, i32 [[TMP1]])
|
|
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 252, [[TMP2]]
|
|
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]]
|
|
; CHECK: [[VECTOR_MEMCHECK]]:
|
|
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[DST]], i64 1005
|
|
; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[SRC]], i64 1005
|
|
; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[DST]], [[SCEVGEP1]]
|
|
; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[SRC]], [[SCEVGEP]]
|
|
; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
|
|
; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
|
|
; CHECK: [[VECTOR_PH]]:
|
|
; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.vscale.i32()
|
|
; CHECK-NEXT: [[TMP4:%.*]] = mul i32 [[TMP3]], 4
|
|
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 252, [[TMP4]]
|
|
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[N_MOD_VF]], 0
|
|
; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i32 [[TMP4]], i32 [[N_MOD_VF]]
|
|
; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 252, [[TMP6]]
|
|
; CHECK-NEXT: [[IND_END:%.*]] = mul i32 [[N_VEC]], 4
|
|
; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.vscale.i32()
|
|
; CHECK-NEXT: [[TMP8:%.*]] = mul i32 [[TMP7]], 4
|
|
; CHECK-NEXT: [[TMP9:%.*]] = call <vscale x 4 x i32> @llvm.stepvector.nxv4i32()
|
|
; CHECK-NEXT: [[TMP10:%.*]] = add <vscale x 4 x i32> [[TMP9]], zeroinitializer
|
|
; CHECK-NEXT: [[TMP11:%.*]] = mul <vscale x 4 x i32> [[TMP10]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 4, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
|
|
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 4 x i32> zeroinitializer, [[TMP11]]
|
|
; CHECK-NEXT: [[TMP14:%.*]] = mul i32 4, [[TMP8]]
|
|
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[TMP14]], i64 0
|
|
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[DOTSPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
|
|
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
|
|
; CHECK: [[VECTOR_BODY]]:
|
|
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
|
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 4 x i32> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
|
; CHECK-NEXT: [[TMP15:%.*]] = sext <vscale x 4 x i32> [[VEC_IND]] to <vscale x 4 x i64>
|
|
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[DST]], <vscale x 4 x i64> [[TMP15]]
|
|
; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i8.nxv4p0(<vscale x 4 x i8> zeroinitializer, <vscale x 4 x ptr> [[TMP16]], i32 1, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)), !alias.scope [[META4:![0-9]+]], !noalias [[META7:![0-9]+]]
|
|
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP8]]
|
|
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i32> [[VEC_IND]], [[DOTSPLAT]]
|
|
; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
|
|
; CHECK-NEXT: br i1 [[TMP17]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
|
|
; CHECK: [[MIDDLE_BLOCK]]:
|
|
; CHECK-NEXT: br label %[[SCALAR_PH]]
|
|
; CHECK: [[SCALAR_PH]]:
|
|
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_MEMCHECK]] ]
|
|
; CHECK-NEXT: br label %[[LOOP:.*]]
|
|
; CHECK: [[LOOP]]:
|
|
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
|
|
; CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[IV]] to i64
|
|
; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[IDXPROM]]
|
|
; CHECK-NEXT: [[L:%.*]] = load i8, ptr [[GEP_SRC]], align 1
|
|
; CHECK-NEXT: [[GEP_DST:%.*]] = getelementptr i8, ptr [[DST]], i64 [[IDXPROM]]
|
|
; CHECK-NEXT: store i8 0, ptr [[GEP_DST]], align 1
|
|
; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 4
|
|
; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[IV]], 1001
|
|
; CHECK-NEXT: br i1 [[CMP]], label %[[LOOP]], label %[[EXIT:.*]], !llvm.loop [[LOOP10:![0-9]+]]
|
|
; CHECK: [[EXIT]]:
|
|
; CHECK-NEXT: [[R:%.*]] = phi i8 [ [[L]], %[[LOOP]] ]
|
|
; CHECK-NEXT: ret i8 [[R]]
|
|
;
|
|
entry:
|
|
br label %loop
|
|
|
|
loop:
|
|
%iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
|
|
%idxprom = sext i32 %iv to i64
|
|
%gep.src = getelementptr i8, ptr %src, i64 %idxprom
|
|
%l = load i8, ptr %gep.src, align 1
|
|
%gep.dst = getelementptr i8, ptr %dst, i64 %idxprom
|
|
store i8 0, ptr %gep.dst, align 1
|
|
%iv.next = add i32 %iv, 4
|
|
%cmp = icmp ult i32 %iv, 1001
|
|
br i1 %cmp, label %loop, label %exit
|
|
|
|
exit:
|
|
%r = phi i8 [ %l, %loop ]
|
|
ret i8 %r
|
|
}
|
|
|
|
declare i16 @llvm.umax.i16(i16, i16)
|
|
|
|
; Test case for https://github.com/llvm/llvm-project/issues/106780.
|
|
define i32 @cost_of_exit_branch_and_cond_insts(ptr %a, ptr %b, i1 %c, i16 %x) #0 {
|
|
; CHECK-LABEL: define i32 @cost_of_exit_branch_and_cond_insts(
|
|
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i1 [[C:%.*]], i16 [[X:%.*]]) #[[ATTR2:[0-9]+]] {
|
|
; CHECK-NEXT: [[ENTRY:.*]]:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[X]] to i32
|
|
; CHECK-NEXT: [[UMAX3:%.*]] = call i32 @llvm.umax.i32(i32 [[TMP0]], i32 111)
|
|
; CHECK-NEXT: [[TMP1:%.*]] = sub i32 770, [[UMAX3]]
|
|
; CHECK-NEXT: [[SMAX4:%.*]] = call i32 @llvm.smax.i32(i32 [[TMP1]], i32 0)
|
|
; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i32 [[SMAX4]], 1
|
|
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[TMP2]], 24
|
|
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]]
|
|
; CHECK: [[VECTOR_MEMCHECK]]:
|
|
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[A]], i64 1
|
|
; CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[X]] to i32
|
|
; CHECK-NEXT: [[UMAX1:%.*]] = call i32 @llvm.umax.i32(i32 [[TMP3]], i32 111)
|
|
; CHECK-NEXT: [[TMP4:%.*]] = sub i32 770, [[UMAX1]]
|
|
; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[TMP4]], i32 0)
|
|
; CHECK-NEXT: [[TMP5:%.*]] = zext nneg i32 [[SMAX]] to i64
|
|
; CHECK-NEXT: [[TMP6:%.*]] = shl nuw nsw i64 [[TMP5]], 2
|
|
; CHECK-NEXT: [[TMP7:%.*]] = add nuw nsw i64 [[TMP6]], 4
|
|
; CHECK-NEXT: [[SCEVGEP2:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP7]]
|
|
; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[A]], [[SCEVGEP2]]
|
|
; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[B]], [[SCEVGEP]]
|
|
; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
|
|
; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
|
|
; CHECK: [[VECTOR_PH]]:
|
|
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP2]], 8
|
|
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[N_MOD_VF]], 0
|
|
; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i32 8, i32 [[N_MOD_VF]]
|
|
; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP2]], [[TMP9]]
|
|
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i1> poison, i1 [[C]], i64 0
|
|
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i1> [[BROADCAST_SPLATINSERT]], <8 x i1> poison, <8 x i32> zeroinitializer
|
|
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
|
|
; CHECK: [[VECTOR_BODY]]:
|
|
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE18:.*]] ]
|
|
; CHECK-NEXT: [[TMP10:%.*]] = add i32 [[INDEX]], 0
|
|
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[B]], i32 [[TMP10]]
|
|
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <8 x i1> [[BROADCAST_SPLAT]], i32 0
|
|
; CHECK-NEXT: br i1 [[TMP12]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
|
|
; CHECK: [[PRED_STORE_IF]]:
|
|
; CHECK-NEXT: store i1 false, ptr [[A]], align 1, !alias.scope [[META11:![0-9]+]], !noalias [[META14:![0-9]+]]
|
|
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]]
|
|
; CHECK: [[PRED_STORE_CONTINUE]]:
|
|
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <8 x i1> [[BROADCAST_SPLAT]], i32 1
|
|
; CHECK-NEXT: br i1 [[TMP13]], label %[[PRED_STORE_IF5:.*]], label %[[PRED_STORE_CONTINUE6:.*]]
|
|
; CHECK: [[PRED_STORE_IF5]]:
|
|
; CHECK-NEXT: store i1 false, ptr [[A]], align 1, !alias.scope [[META11]], !noalias [[META14]]
|
|
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE6]]
|
|
; CHECK: [[PRED_STORE_CONTINUE6]]:
|
|
; CHECK-NEXT: [[TMP14:%.*]] = extractelement <8 x i1> [[BROADCAST_SPLAT]], i32 2
|
|
; CHECK-NEXT: br i1 [[TMP14]], label %[[PRED_STORE_IF7:.*]], label %[[PRED_STORE_CONTINUE8:.*]]
|
|
; CHECK: [[PRED_STORE_IF7]]:
|
|
; CHECK-NEXT: store i1 false, ptr [[A]], align 1, !alias.scope [[META11]], !noalias [[META14]]
|
|
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE8]]
|
|
; CHECK: [[PRED_STORE_CONTINUE8]]:
|
|
; CHECK-NEXT: [[TMP15:%.*]] = extractelement <8 x i1> [[BROADCAST_SPLAT]], i32 3
|
|
; CHECK-NEXT: br i1 [[TMP15]], label %[[PRED_STORE_IF9:.*]], label %[[PRED_STORE_CONTINUE10:.*]]
|
|
; CHECK: [[PRED_STORE_IF9]]:
|
|
; CHECK-NEXT: store i1 false, ptr [[A]], align 1, !alias.scope [[META11]], !noalias [[META14]]
|
|
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE10]]
|
|
; CHECK: [[PRED_STORE_CONTINUE10]]:
|
|
; CHECK-NEXT: [[TMP16:%.*]] = extractelement <8 x i1> [[BROADCAST_SPLAT]], i32 4
|
|
; CHECK-NEXT: br i1 [[TMP16]], label %[[PRED_STORE_IF11:.*]], label %[[PRED_STORE_CONTINUE12:.*]]
|
|
; CHECK: [[PRED_STORE_IF11]]:
|
|
; CHECK-NEXT: store i1 false, ptr [[A]], align 1, !alias.scope [[META11]], !noalias [[META14]]
|
|
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE12]]
|
|
; CHECK: [[PRED_STORE_CONTINUE12]]:
|
|
; CHECK-NEXT: [[TMP17:%.*]] = extractelement <8 x i1> [[BROADCAST_SPLAT]], i32 5
|
|
; CHECK-NEXT: br i1 [[TMP17]], label %[[PRED_STORE_IF13:.*]], label %[[PRED_STORE_CONTINUE14:.*]]
|
|
; CHECK: [[PRED_STORE_IF13]]:
|
|
; CHECK-NEXT: store i1 false, ptr [[A]], align 1, !alias.scope [[META11]], !noalias [[META14]]
|
|
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE14]]
|
|
; CHECK: [[PRED_STORE_CONTINUE14]]:
|
|
; CHECK-NEXT: [[TMP18:%.*]] = extractelement <8 x i1> [[BROADCAST_SPLAT]], i32 6
|
|
; CHECK-NEXT: br i1 [[TMP18]], label %[[PRED_STORE_IF15:.*]], label %[[PRED_STORE_CONTINUE16:.*]]
|
|
; CHECK: [[PRED_STORE_IF15]]:
|
|
; CHECK-NEXT: store i1 false, ptr [[A]], align 1, !alias.scope [[META11]], !noalias [[META14]]
|
|
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE16]]
|
|
; CHECK: [[PRED_STORE_CONTINUE16]]:
|
|
; CHECK-NEXT: [[TMP19:%.*]] = extractelement <8 x i1> [[BROADCAST_SPLAT]], i32 7
|
|
; CHECK-NEXT: br i1 [[TMP19]], label %[[PRED_STORE_IF17:.*]], label %[[PRED_STORE_CONTINUE18]]
|
|
; CHECK: [[PRED_STORE_IF17]]:
|
|
; CHECK-NEXT: store i1 false, ptr [[A]], align 1, !alias.scope [[META11]], !noalias [[META14]]
|
|
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE18]]
|
|
; CHECK: [[PRED_STORE_CONTINUE18]]:
|
|
; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i32, ptr [[TMP11]], i32 0
|
|
; CHECK-NEXT: call void @llvm.masked.store.v8i32.p0(<8 x i32> zeroinitializer, ptr [[TMP20]], i32 4, <8 x i1> [[BROADCAST_SPLAT]]), !alias.scope [[META14]]
|
|
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
|
|
; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
|
|
; CHECK-NEXT: br i1 [[TMP21]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
|
|
; CHECK: [[MIDDLE_BLOCK]]:
|
|
; CHECK-NEXT: br label %[[SCALAR_PH]]
|
|
; CHECK: [[SCALAR_PH]]:
|
|
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_MEMCHECK]] ]
|
|
; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
|
|
; CHECK: [[LOOP_HEADER]]:
|
|
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
|
|
; CHECK-NEXT: br i1 [[C]], label %[[THEN:.*]], label %[[LOOP_EXITING:.*]]
|
|
; CHECK: [[THEN]]:
|
|
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[IV]]
|
|
; CHECK-NEXT: store i1 false, ptr [[A]], align 1
|
|
; CHECK-NEXT: store i32 0, ptr [[GEP]], align 4
|
|
; CHECK-NEXT: br label %[[LOOP_EXITING]]
|
|
; CHECK: [[LOOP_EXITING]]:
|
|
; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1
|
|
; CHECK-NEXT: [[UMAX:%.*]] = tail call i16 @llvm.umax.i16(i16 [[X]], i16 111)
|
|
; CHECK-NEXT: [[UMAX_EXT:%.*]] = zext i16 [[UMAX]] to i32
|
|
; CHECK-NEXT: [[SUB:%.*]] = sub i32 770, [[UMAX_EXT]]
|
|
; CHECK-NEXT: [[EC:%.*]] = icmp slt i32 [[IV]], [[SUB]]
|
|
; CHECK-NEXT: br i1 [[EC]], label %[[LOOP_LATCH]], label %[[EXIT:.*]]
|
|
; CHECK: [[LOOP_LATCH]]:
|
|
; CHECK-NEXT: br label %[[LOOP_HEADER]], !llvm.loop [[LOOP17:![0-9]+]]
|
|
; CHECK: [[EXIT]]:
|
|
; CHECK-NEXT: br label %[[RETURN:.*]]
|
|
; CHECK: [[RETURN]]:
|
|
; CHECK-NEXT: ret i32 0
|
|
;
|
|
entry:
|
|
br label %loop.header
|
|
|
|
loop.header:
|
|
%iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ]
|
|
br i1 %c, label %then, label %loop.exiting
|
|
|
|
then:
|
|
%gep = getelementptr inbounds i32, ptr %b, i32 %iv
|
|
store i1 false, ptr %a, align 1
|
|
store i32 0, ptr %gep, align 4
|
|
br label %loop.exiting
|
|
|
|
loop.exiting:
|
|
%iv.next = add i32 %iv, 1
|
|
%umax = tail call i16 @llvm.umax.i16(i16 %x, i16 111)
|
|
%umax.ext = zext i16 %umax to i32
|
|
%sub = sub i32 770, %umax.ext
|
|
%ec = icmp slt i32 %iv, %sub
|
|
br i1 %ec, label %loop.latch, label %exit
|
|
|
|
loop.latch:
|
|
br label %loop.header
|
|
|
|
exit:
|
|
br label %return
|
|
|
|
return:
|
|
ret i32 0
|
|
}
|
|
|
|
; Test case for https://github.com/llvm/llvm-project/issues/107473.
|
|
define void @test_phi_in_latch_redundant(ptr %dst, i32 %a) {
|
|
; CHECK-LABEL: define void @test_phi_in_latch_redundant(
|
|
; CHECK-SAME: ptr [[DST:%.*]], i32 [[A:%.*]]) #[[ATTR0]] {
|
|
; CHECK-NEXT: [[ENTRY:.*]]:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
|
|
; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2
|
|
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 37, [[TMP1]]
|
|
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
|
|
; CHECK: [[VECTOR_PH]]:
|
|
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
|
|
; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2
|
|
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 37, [[TMP3]]
|
|
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 37, [[N_MOD_VF]]
|
|
; CHECK-NEXT: [[IND_END:%.*]] = mul i64 [[N_VEC]], 9
|
|
; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
|
|
; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 2
|
|
; CHECK-NEXT: [[TMP6:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64()
|
|
; CHECK-NEXT: [[TMP7:%.*]] = add <vscale x 2 x i64> [[TMP6]], zeroinitializer
|
|
; CHECK-NEXT: [[TMP8:%.*]] = mul <vscale x 2 x i64> [[TMP7]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 9, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
|
|
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i64> zeroinitializer, [[TMP8]]
|
|
; CHECK-NEXT: [[TMP9:%.*]] = mul i64 9, [[TMP5]]
|
|
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP9]], i64 0
|
|
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
|
|
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i32> poison, i32 [[A]], i64 0
|
|
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
|
|
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
|
|
; CHECK: [[VECTOR_BODY]]:
|
|
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
|
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 2 x i64> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
|
; CHECK-NEXT: [[TMP10:%.*]] = xor <vscale x 2 x i32> [[BROADCAST_SPLAT]], shufflevector (<vscale x 2 x i32> insertelement (<vscale x 2 x i32> poison, i32 -1, i64 0), <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer)
|
|
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[DST]], <vscale x 2 x i64> [[VEC_IND]]
|
|
; CHECK-NEXT: call void @llvm.masked.scatter.nxv2i32.nxv2p0(<vscale x 2 x i32> [[TMP10]], <vscale x 2 x ptr> [[TMP11]], i32 4, <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer))
|
|
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
|
|
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i64> [[VEC_IND]], [[DOTSPLAT]]
|
|
; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
|
|
; CHECK-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
|
|
; CHECK: [[MIDDLE_BLOCK]]:
|
|
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 37, [[N_VEC]]
|
|
; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
|
|
; CHECK: [[SCALAR_PH]]:
|
|
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
|
|
; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
|
|
; CHECK: [[LOOP_HEADER]]:
|
|
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
|
|
; CHECK-NEXT: br i1 false, label %[[LOOP_LATCH]], label %[[THEN:.*]]
|
|
; CHECK: [[THEN]]:
|
|
; CHECK-NEXT: [[NOT_A:%.*]] = xor i32 [[A]], -1
|
|
; CHECK-NEXT: br label %[[LOOP_LATCH]]
|
|
; CHECK: [[LOOP_LATCH]]:
|
|
; CHECK-NEXT: [[P:%.*]] = phi i32 [ [[NOT_A]], %[[THEN]] ], [ 0, %[[LOOP_HEADER]] ]
|
|
; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, ptr [[DST]], i64 [[IV]]
|
|
; CHECK-NEXT: store i32 [[P]], ptr [[GEP]], align 4
|
|
; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 9
|
|
; CHECK-NEXT: [[EC:%.*]] = icmp slt i64 [[IV]], 322
|
|
; CHECK-NEXT: br i1 [[EC]], label %[[LOOP_HEADER]], label %[[EXIT]], !llvm.loop [[LOOP19:![0-9]+]]
|
|
; CHECK: [[EXIT]]:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
br label %loop.header
|
|
|
|
loop.header:
|
|
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
|
|
br i1 false, label %loop.latch, label %then
|
|
|
|
then:
|
|
%not.a = xor i32 %a, -1
|
|
br label %loop.latch
|
|
|
|
loop.latch:
|
|
%p = phi i32 [ %not.a, %then ], [ 0, %loop.header ]
|
|
%gep = getelementptr i32, ptr %dst, i64 %iv
|
|
store i32 %p, ptr %gep, align 4
|
|
%iv.next = add i64 %iv, 9
|
|
%ec = icmp slt i64 %iv, 322
|
|
br i1 %ec, label %loop.header, label %exit
|
|
|
|
exit:
|
|
ret void
|
|
}
|
|
|
|
; Test for https://github.com/llvm/llvm-project/issues/108098.
|
|
define void @gather_interleave_group_with_dead_insert_pos(i64 %N, ptr noalias %src, ptr noalias %dst) #0 {
|
|
; CHECK-LABEL: define void @gather_interleave_group_with_dead_insert_pos(
|
|
; CHECK-SAME: i64 [[N:%.*]], ptr noalias [[SRC:%.*]], ptr noalias [[DST:%.*]]) #[[ATTR2]] {
|
|
; CHECK-NEXT: [[ENTRY:.*]]:
|
|
; CHECK-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N]], i64 0)
|
|
; CHECK-NEXT: [[TMP0:%.*]] = add nuw i64 [[SMAX]], 3
|
|
; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 [[TMP0]], 2
|
|
; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
|
|
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i64 [[TMP2]], 16
|
|
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
|
|
; CHECK: [[VECTOR_PH]]:
|
|
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 16
|
|
; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[N_MOD_VF]], 0
|
|
; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i64 16, i64 [[N_MOD_VF]]
|
|
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[TMP4]]
|
|
; CHECK-NEXT: [[IND_END:%.*]] = mul i64 [[N_VEC]], 4
|
|
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
|
|
; CHECK: [[VECTOR_BODY]]:
|
|
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
|
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <8 x i64> [ <i64 0, i64 4, i64 8, i64 12, i64 16, i64 20, i64 24, i64 28>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
|
; CHECK-NEXT: [[STEP_ADD:%.*]] = add <8 x i64> [[VEC_IND]], <i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32>
|
|
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 4
|
|
; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 0
|
|
; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 32
|
|
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP5]]
|
|
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP6]]
|
|
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[TMP7]], i32 0
|
|
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[TMP8]], i32 0
|
|
; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <32 x i8>, ptr [[TMP9]], align 1
|
|
; CHECK-NEXT: [[WIDE_VEC2:%.*]] = load <32 x i8>, ptr [[TMP10]], align 1
|
|
; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <32 x i8> [[WIDE_VEC]], <32 x i8> poison, <8 x i32> <i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28>
|
|
; CHECK-NEXT: [[STRIDED_VEC3:%.*]] = shufflevector <32 x i8> [[WIDE_VEC2]], <32 x i8> poison, <8 x i32> <i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28>
|
|
; CHECK-NEXT: [[STRIDED_VEC4:%.*]] = shufflevector <32 x i8> [[WIDE_VEC]], <32 x i8> poison, <8 x i32> <i32 1, i32 5, i32 9, i32 13, i32 17, i32 21, i32 25, i32 29>
|
|
; CHECK-NEXT: [[STRIDED_VEC5:%.*]] = shufflevector <32 x i8> [[WIDE_VEC2]], <32 x i8> poison, <8 x i32> <i32 1, i32 5, i32 9, i32 13, i32 17, i32 21, i32 25, i32 29>
|
|
; CHECK-NEXT: [[TMP11:%.*]] = zext <8 x i8> [[STRIDED_VEC4]] to <8 x i32>
|
|
; CHECK-NEXT: [[TMP12:%.*]] = zext <8 x i8> [[STRIDED_VEC5]] to <8 x i32>
|
|
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[DST]], <8 x i64> [[VEC_IND]]
|
|
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[DST]], <8 x i64> [[STEP_ADD]]
|
|
; CHECK-NEXT: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> [[TMP11]], <8 x ptr> [[TMP13]], i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
|
|
; CHECK-NEXT: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> [[TMP12]], <8 x ptr> [[TMP14]], i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
|
|
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
|
|
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[STEP_ADD]], <i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32>
|
|
; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
|
|
; CHECK-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]]
|
|
; CHECK: [[MIDDLE_BLOCK]]:
|
|
; CHECK-NEXT: br label %[[SCALAR_PH]]
|
|
; CHECK: [[SCALAR_PH]]:
|
|
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
|
|
; CHECK-NEXT: br label %[[LOOP:.*]]
|
|
; CHECK: [[LOOP]]:
|
|
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
|
|
; CHECK-NEXT: [[GEP_SRC_0:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[IV]]
|
|
; CHECK-NEXT: [[L_DEAD:%.*]] = load i8, ptr [[GEP_SRC_0]], align 1
|
|
; CHECK-NEXT: [[IV_1:%.*]] = add i64 [[IV]], 1
|
|
; CHECK-NEXT: [[GEP_SRC_1:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[IV_1]]
|
|
; CHECK-NEXT: [[L_1:%.*]] = load i8, ptr [[GEP_SRC_1]], align 1
|
|
; CHECK-NEXT: [[EXT:%.*]] = zext i8 [[L_1]] to i32
|
|
; CHECK-NEXT: [[GEP_DST:%.*]] = getelementptr i32, ptr [[DST]], i64 [[IV]]
|
|
; CHECK-NEXT: store i32 [[EXT]], ptr [[GEP_DST]], align 4
|
|
; CHECK-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 4
|
|
; CHECK-NEXT: [[EC:%.*]] = icmp slt i64 [[IV]], [[N]]
|
|
; CHECK-NEXT: br i1 [[EC]], label %[[LOOP]], label %[[EXIT:.*]], !llvm.loop [[LOOP21:![0-9]+]]
|
|
; CHECK: [[EXIT]]:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
br label %loop
|
|
|
|
loop:
|
|
%iv = phi i64 [ %iv.next, %loop ], [ 0, %entry ]
|
|
%gep.src.0 = getelementptr i8, ptr %src, i64 %iv
|
|
%l.dead = load i8, ptr %gep.src.0, align 1
|
|
%iv.1 = add i64 %iv, 1
|
|
%gep.src.1 = getelementptr i8, ptr %src, i64 %iv.1
|
|
%l.1 = load i8, ptr %gep.src.1, align 1
|
|
%ext = zext i8 %l.1 to i32
|
|
%gep.dst = getelementptr i32, ptr %dst, i64 %iv
|
|
store i32 %ext, ptr %gep.dst, align 4
|
|
%iv.next = add nsw i64 %iv, 4
|
|
%ec = icmp slt i64 %iv, %N
|
|
br i1 %ec, label %loop, label %exit
|
|
|
|
exit:
|
|
ret void
|
|
}
|
|
|
|
attributes #0 = { "target-features"="+64bit,+v" }
|
|
|
|
;.
|
|
; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
|
|
; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
|
|
; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
|
|
; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
|
|
; CHECK: [[META4]] = !{[[META5:![0-9]+]]}
|
|
; CHECK: [[META5]] = distinct !{[[META5]], [[META6:![0-9]+]]}
|
|
; CHECK: [[META6]] = distinct !{[[META6]], !"LVerDomain"}
|
|
; CHECK: [[META7]] = !{[[META8:![0-9]+]]}
|
|
; CHECK: [[META8]] = distinct !{[[META8]], [[META6]]}
|
|
; CHECK: [[LOOP9]] = distinct !{[[LOOP9]], [[META1]], [[META2]]}
|
|
; CHECK: [[LOOP10]] = distinct !{[[LOOP10]], [[META1]]}
|
|
; CHECK: [[META11]] = !{[[META12:![0-9]+]]}
|
|
; CHECK: [[META12]] = distinct !{[[META12]], [[META13:![0-9]+]]}
|
|
; CHECK: [[META13]] = distinct !{[[META13]], !"LVerDomain"}
|
|
; CHECK: [[META14]] = !{[[META15:![0-9]+]]}
|
|
; CHECK: [[META15]] = distinct !{[[META15]], [[META13]]}
|
|
; CHECK: [[LOOP16]] = distinct !{[[LOOP16]], [[META1]], [[META2]]}
|
|
; CHECK: [[LOOP17]] = distinct !{[[LOOP17]], [[META1]]}
|
|
; CHECK: [[LOOP18]] = distinct !{[[LOOP18]], [[META1]], [[META2]]}
|
|
; CHECK: [[LOOP19]] = distinct !{[[LOOP19]], [[META2]], [[META1]]}
|
|
; CHECK: [[LOOP20]] = distinct !{[[LOOP20]], [[META1]], [[META2]]}
|
|
; CHECK: [[LOOP21]] = distinct !{[[LOOP21]], [[META2]], [[META1]]}
|
|
;.
|