Create a IR BB directly for the middle.block, instead of creating the IR BB during skeleton creation and then replacing the middle VPBB with a VPIRBB. This moves another part of skeleton creation to VPlan and simplififes the code slightly by removing code to disconnect the middle block and vector preheader + the corresponding DT update. NFC modulo IR block naming and block creation order, which changes the IR names for the blocks.
354 lines
17 KiB
LLVM
354 lines
17 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
|
|
; RUN: opt -S < %s -p loop-vectorize -enable-early-exit-vectorization -force-vector-width=4 | FileCheck %s
|
|
|
|
declare void @init_mem(ptr, i64);
|
|
|
|
|
|
define i64 @same_exit_block_phi_of_consts() {
|
|
; CHECK-LABEL: define i64 @same_exit_block_phi_of_consts() {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 1
|
|
; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 1
|
|
; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024)
|
|
; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024)
|
|
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
|
|
; CHECK: vector.ph:
|
|
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
|
|
; CHECK: vector.body:
|
|
; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT3:%.*]], [[VECTOR_BODY]] ]
|
|
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 3, [[INDEX1]]
|
|
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
|
|
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[TMP0]]
|
|
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 0
|
|
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP2]], align 1
|
|
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[TMP0]]
|
|
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i32 0
|
|
; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP4]], align 1
|
|
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD2]]
|
|
; CHECK-NEXT: [[INDEX_NEXT3]] = add nuw i64 [[INDEX1]], 4
|
|
; CHECK-NEXT: [[TMP6:%.*]] = xor <4 x i1> [[TMP5]], splat (i1 true)
|
|
; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP6]])
|
|
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 64
|
|
; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]]
|
|
; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
|
|
; CHECK: middle.split:
|
|
; CHECK-NEXT: br i1 [[TMP7]], label [[VECTOR_EARLY_EXIT:%.*]], label [[MIDDLE_BLOCK:%.*]]
|
|
; CHECK: middle.block:
|
|
; CHECK-NEXT: br i1 true, label [[LOOP_END:%.*]], label [[SCALAR_PH]]
|
|
; CHECK: vector.early.exit:
|
|
; CHECK-NEXT: br label [[LOOP_END]]
|
|
; CHECK: scalar.ph:
|
|
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 67, [[MIDDLE_BLOCK]] ], [ 3, [[ENTRY:%.*]] ]
|
|
; CHECK-NEXT: br label [[LOOP:%.*]]
|
|
; CHECK: loop:
|
|
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
|
|
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]]
|
|
; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
|
|
; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]]
|
|
; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1
|
|
; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]]
|
|
; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END]]
|
|
; CHECK: loop.inc:
|
|
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]], !llvm.loop [[LOOP3:![0-9]+]]
|
|
; CHECK: loop.end:
|
|
; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ 0, [[LOOP]] ], [ 1, [[LOOP_INC]] ], [ 1, [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_EARLY_EXIT]] ]
|
|
; CHECK-NEXT: ret i64 [[RETVAL]]
|
|
;
|
|
entry:
|
|
%p1 = alloca [1024 x i8]
|
|
%p2 = alloca [1024 x i8]
|
|
call void @init_mem(ptr %p1, i64 1024)
|
|
call void @init_mem(ptr %p2, i64 1024)
|
|
br label %loop
|
|
|
|
loop:
|
|
%index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ]
|
|
%arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index
|
|
%ld1 = load i8, ptr %arrayidx, align 1
|
|
%arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index
|
|
%ld2 = load i8, ptr %arrayidx1, align 1
|
|
%cmp3 = icmp eq i8 %ld1, %ld2
|
|
br i1 %cmp3, label %loop.inc, label %loop.end
|
|
|
|
loop.inc:
|
|
%index.next = add i64 %index, 1
|
|
%exitcond = icmp ne i64 %index.next, 67
|
|
br i1 %exitcond, label %loop, label %loop.end
|
|
|
|
loop.end:
|
|
%retval = phi i64 [ 0, %loop ], [ 1, %loop.inc ]
|
|
ret i64 %retval
|
|
}
|
|
|
|
|
|
define i64 @diff_exit_block_phi_of_consts() {
|
|
; CHECK-LABEL: define i64 @diff_exit_block_phi_of_consts() {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 1
|
|
; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 1
|
|
; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024)
|
|
; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024)
|
|
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
|
|
; CHECK: vector.ph:
|
|
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
|
|
; CHECK: vector.body:
|
|
; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT3:%.*]], [[VECTOR_BODY]] ]
|
|
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 3, [[INDEX1]]
|
|
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
|
|
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[TMP0]]
|
|
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 0
|
|
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP2]], align 1
|
|
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[TMP0]]
|
|
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i32 0
|
|
; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP4]], align 1
|
|
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD2]]
|
|
; CHECK-NEXT: [[INDEX_NEXT3]] = add nuw i64 [[INDEX1]], 4
|
|
; CHECK-NEXT: [[TMP6:%.*]] = xor <4 x i1> [[TMP5]], splat (i1 true)
|
|
; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP6]])
|
|
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 64
|
|
; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]]
|
|
; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
|
|
; CHECK: middle.split:
|
|
; CHECK-NEXT: br i1 [[TMP7]], label [[VECTOR_EARLY_EXIT:%.*]], label [[MIDDLE_BLOCK:%.*]]
|
|
; CHECK: middle.block:
|
|
; CHECK-NEXT: br i1 true, label [[LOOP_END:%.*]], label [[SCALAR_PH]]
|
|
; CHECK: vector.early.exit:
|
|
; CHECK-NEXT: br label [[LOOP_EARLY_EXIT:%.*]]
|
|
; CHECK: scalar.ph:
|
|
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 67, [[MIDDLE_BLOCK]] ], [ 3, [[ENTRY:%.*]] ]
|
|
; CHECK-NEXT: br label [[LOOP:%.*]]
|
|
; CHECK: loop:
|
|
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
|
|
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]]
|
|
; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
|
|
; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]]
|
|
; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1
|
|
; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]]
|
|
; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_EARLY_EXIT]]
|
|
; CHECK: loop.inc:
|
|
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]], !llvm.loop [[LOOP5:![0-9]+]]
|
|
; CHECK: loop.early.exit:
|
|
; CHECK-NEXT: ret i64 0
|
|
; CHECK: loop.end:
|
|
; CHECK-NEXT: ret i64 1
|
|
;
|
|
entry:
|
|
%p1 = alloca [1024 x i8]
|
|
%p2 = alloca [1024 x i8]
|
|
call void @init_mem(ptr %p1, i64 1024)
|
|
call void @init_mem(ptr %p2, i64 1024)
|
|
br label %loop
|
|
|
|
loop:
|
|
%index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ]
|
|
%arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index
|
|
%ld1 = load i8, ptr %arrayidx, align 1
|
|
%arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index
|
|
%ld2 = load i8, ptr %arrayidx1, align 1
|
|
%cmp3 = icmp eq i8 %ld1, %ld2
|
|
br i1 %cmp3, label %loop.inc, label %loop.early.exit
|
|
|
|
loop.inc:
|
|
%index.next = add i64 %index, 1
|
|
%exitcond = icmp ne i64 %index.next, 67
|
|
br i1 %exitcond, label %loop, label %loop.end
|
|
|
|
loop.early.exit:
|
|
ret i64 0
|
|
|
|
loop.end:
|
|
ret i64 1
|
|
}
|
|
|
|
|
|
; The form of the induction variables requires SCEV predicates.
|
|
define i32 @diff_exit_block_needs_scev_check(i32 %end) {
|
|
; CHECK-LABEL: define i32 @diff_exit_block_needs_scev_check(
|
|
; CHECK-SAME: i32 [[END:%.*]]) {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i32], align 4
|
|
; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i32], align 4
|
|
; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024)
|
|
; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024)
|
|
; CHECK-NEXT: [[END_CLAMPED:%.*]] = and i32 [[END]], 1023
|
|
; CHECK-NEXT: [[TMP0:%.*]] = trunc i32 [[END]] to i10
|
|
; CHECK-NEXT: [[TMP1:%.*]] = zext i10 [[TMP0]] to i64
|
|
; CHECK-NEXT: [[UMAX1:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP1]], i64 1)
|
|
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[UMAX1]], 4
|
|
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
|
|
; CHECK: vector.scevcheck:
|
|
; CHECK-NEXT: [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[END_CLAMPED]], i32 1)
|
|
; CHECK-NEXT: [[TMP2:%.*]] = add nsw i32 [[UMAX]], -1
|
|
; CHECK-NEXT: [[TMP3:%.*]] = trunc i32 [[TMP2]] to i8
|
|
; CHECK-NEXT: [[TMP4:%.*]] = add i8 1, [[TMP3]]
|
|
; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i8 [[TMP4]], 1
|
|
; CHECK-NEXT: [[TMP6:%.*]] = icmp ugt i32 [[TMP2]], 255
|
|
; CHECK-NEXT: [[TMP7:%.*]] = or i1 [[TMP5]], [[TMP6]]
|
|
; CHECK-NEXT: br i1 [[TMP7]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
|
|
; CHECK: vector.ph:
|
|
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[UMAX1]], 4
|
|
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[UMAX1]], [[N_MOD_VF]]
|
|
; CHECK-NEXT: [[TMP8:%.*]] = trunc i64 [[N_VEC]] to i8
|
|
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
|
|
; CHECK: vector.body:
|
|
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
|
; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 0
|
|
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[P1]], i64 [[TMP9]]
|
|
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 0
|
|
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP11]], align 4
|
|
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[P2]], i64 [[TMP9]]
|
|
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i32 0
|
|
; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i32>, ptr [[TMP13]], align 4
|
|
; CHECK-NEXT: [[TMP14:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD]], [[WIDE_LOAD2]]
|
|
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
|
|
; CHECK-NEXT: [[TMP15:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP14]])
|
|
; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
|
|
; CHECK-NEXT: [[TMP17:%.*]] = or i1 [[TMP15]], [[TMP16]]
|
|
; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
|
|
; CHECK: middle.split:
|
|
; CHECK-NEXT: br i1 [[TMP15]], label [[VECTOR_EARLY_EXIT:%.*]], label [[MIDDLE_BLOCK:%.*]]
|
|
; CHECK: middle.block:
|
|
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[UMAX1]], [[N_VEC]]
|
|
; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
|
|
; CHECK: vector.early.exit:
|
|
; CHECK-NEXT: br label [[FOUND:%.*]]
|
|
; CHECK: scalar.ph:
|
|
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[TMP8]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
|
|
; CHECK-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
; CHECK: for.body:
|
|
; CHECK-NEXT: [[IND:%.*]] = phi i8 [ [[IND_NEXT:%.*]], [[FOR_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
|
|
; CHECK-NEXT: [[GEP_IND:%.*]] = phi i64 [ [[GEP_IND_NEXT:%.*]], [[FOR_INC]] ], [ [[BC_RESUME_VAL3]], [[SCALAR_PH]] ]
|
|
; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[P1]], i64 [[GEP_IND]]
|
|
; CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr [[ARRAYIDX1]], align 4
|
|
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[P2]], i64 [[GEP_IND]]
|
|
; CHECK-NEXT: [[TMP19:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4
|
|
; CHECK-NEXT: [[CMP_EARLY:%.*]] = icmp eq i32 [[TMP18]], [[TMP19]]
|
|
; CHECK-NEXT: br i1 [[CMP_EARLY]], label [[FOUND]], label [[FOR_INC]]
|
|
; CHECK: for.inc:
|
|
; CHECK-NEXT: [[IND_NEXT]] = add i8 [[IND]], 1
|
|
; CHECK-NEXT: [[CONV:%.*]] = zext i8 [[IND_NEXT]] to i32
|
|
; CHECK-NEXT: [[GEP_IND_NEXT]] = add i64 [[GEP_IND]], 1
|
|
; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[CONV]], [[END_CLAMPED]]
|
|
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[EXIT]], !llvm.loop [[LOOP7:![0-9]+]]
|
|
; CHECK: found:
|
|
; CHECK-NEXT: ret i32 1
|
|
; CHECK: exit:
|
|
; CHECK-NEXT: ret i32 0
|
|
;
|
|
entry:
|
|
%p1 = alloca [1024 x i32]
|
|
%p2 = alloca [1024 x i32]
|
|
call void @init_mem(ptr %p1, i64 1024)
|
|
call void @init_mem(ptr %p2, i64 1024)
|
|
%end.clamped = and i32 %end, 1023
|
|
br label %for.body
|
|
|
|
for.body:
|
|
%ind = phi i8 [ %ind.next, %for.inc ], [ 0, %entry ]
|
|
%gep.ind = phi i64 [ %gep.ind.next, %for.inc ], [ 0, %entry ]
|
|
%arrayidx1 = getelementptr inbounds i32, ptr %p1, i64 %gep.ind
|
|
%0 = load i32, ptr %arrayidx1, align 4
|
|
%arrayidx2 = getelementptr inbounds i32, ptr %p2, i64 %gep.ind
|
|
%1 = load i32, ptr %arrayidx2, align 4
|
|
%cmp.early = icmp eq i32 %0, %1
|
|
br i1 %cmp.early, label %found, label %for.inc
|
|
|
|
for.inc:
|
|
%ind.next = add i8 %ind, 1
|
|
%conv = zext i8 %ind.next to i32
|
|
%gep.ind.next = add i64 %gep.ind, 1
|
|
%cmp = icmp ult i32 %conv, %end.clamped
|
|
br i1 %cmp, label %for.body, label %exit
|
|
|
|
found:
|
|
ret i32 1
|
|
|
|
exit:
|
|
ret i32 0
|
|
}
|
|
|
|
|
|
declare void @abort()
|
|
|
|
; This is a variant of an early exit loop where the condition for leaving
|
|
; early is loop invariant.
|
|
define i32 @diff_blocks_invariant_early_exit_cond(ptr %s) {
|
|
; CHECK-LABEL: define i32 @diff_blocks_invariant_early_exit_cond(
|
|
; CHECK-SAME: ptr [[S:%.*]]) {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[SVAL:%.*]] = load i32, ptr [[S]], align 4
|
|
; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[SVAL]], 0
|
|
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
|
|
; CHECK: vector.ph:
|
|
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[COND]], i64 0
|
|
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer
|
|
; CHECK-NEXT: [[TMP0:%.*]] = xor <4 x i1> [[BROADCAST_SPLAT]], splat (i1 true)
|
|
; CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP0]])
|
|
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
|
|
; CHECK: vector.body:
|
|
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
|
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
|
|
; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[INDEX_NEXT]], 276
|
|
; CHECK-NEXT: [[TMP3:%.*]] = or i1 [[TMP1]], [[TMP2]]
|
|
; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
|
|
; CHECK: middle.split:
|
|
; CHECK-NEXT: br i1 [[TMP1]], label [[VECTOR_EARLY_EXIT:%.*]], label [[MIDDLE_BLOCK:%.*]]
|
|
; CHECK: middle.block:
|
|
; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
|
|
; CHECK: vector.early.exit:
|
|
; CHECK-NEXT: br label [[EARLY_EXIT:%.*]]
|
|
; CHECK: scalar.ph:
|
|
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 266, [[MIDDLE_BLOCK]] ], [ -10, [[ENTRY:%.*]] ]
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
; CHECK: for.body:
|
|
; CHECK-NEXT: [[IND:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IND_NEXT:%.*]], [[FOR_INC:%.*]] ]
|
|
; CHECK-NEXT: br i1 [[COND]], label [[FOR_INC]], label [[EARLY_EXIT]]
|
|
; CHECK: for.inc:
|
|
; CHECK-NEXT: [[IND_NEXT]] = add nsw i32 [[IND]], 1
|
|
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[IND_NEXT]], 266
|
|
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
|
|
; CHECK: early.exit:
|
|
; CHECK-NEXT: tail call void @abort()
|
|
; CHECK-NEXT: unreachable
|
|
; CHECK: for.end:
|
|
; CHECK-NEXT: ret i32 0
|
|
;
|
|
entry:
|
|
%sval = load i32, ptr %s, align 4
|
|
%cond = icmp eq i32 %sval, 0
|
|
br label %for.body
|
|
|
|
for.body:
|
|
%ind = phi i32 [ -10, %entry ], [ %ind.next, %for.inc ]
|
|
br i1 %cond, label %for.inc, label %early.exit
|
|
|
|
for.inc:
|
|
%ind.next = add nsw i32 %ind, 1
|
|
%exitcond.not = icmp eq i32 %ind.next, 266
|
|
br i1 %exitcond.not, label %for.end, label %for.body
|
|
|
|
early.exit:
|
|
tail call void @abort()
|
|
unreachable
|
|
|
|
for.end:
|
|
ret i32 0
|
|
}
|
|
;.
|
|
; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
|
|
; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
|
|
; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
|
|
; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
|
|
; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]}
|
|
; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]}
|
|
; CHECK: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]], [[META2]]}
|
|
; CHECK: [[LOOP7]] = distinct !{[[LOOP7]], [[META1]]}
|
|
; CHECK: [[LOOP8]] = distinct !{[[LOOP8]], [[META1]], [[META2]]}
|
|
; CHECK: [[LOOP9]] = distinct !{[[LOOP9]], [[META2]], [[META1]]}
|
|
;.
|