Files
clang-p2996/llvm/test/Transforms/LoopVectorize/runtime-check-small-clamped-bounds.ll
Roman Lebedev b291597112 Revert rest of IRBuilderBase's short-circuiting folds
Upon further investigation and discussion,
this is actually the opposite direction from what we should be taking,
and this direction wouldn't solve the motivational problem anyway.

Additionally, some more (polly) tests have escaped being updated.
So, let's just take a step back here.

This reverts commit f3190dedee.
This reverts commit 749581d21f.
This reverts commit f3df87d57e.
This reverts commit ab1dbcecd6.
2021-10-28 02:15:14 +03:00

348 lines
17 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -loop-vectorize -force-vector-width=2 -S %s | FileCheck %s
; Tests where the indices of some accesses are clamped to a small range.
; FIXME: At the moment, the runtime checks require that the indices do not wrap
; and runtime checks are emitted to ensure that. The clamped indices do
; wrap, so the vector loops are dead at the moment. But it is still
; possible to compute the bounds of the accesses and generate proper
; runtime checks.
; The relevant bounds for %gep.A are [%A, %A+4).
define void @load_clamped_index(i32* %A, i32* %B, i32 %N) {
; CHECK-LABEL: @load_clamped_index(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[B1:%.*]] = bitcast i32* [[B:%.*]] to i8*
; CHECK-NEXT: [[A3:%.*]] = bitcast i32* [[A:%.*]] to i8*
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N:%.*]], 2
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
; CHECK: vector.scevcheck:
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1
; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[TMP0]] to i2
; CHECK-NEXT: [[TMP2:%.*]] = add i2 0, [[TMP1]]
; CHECK-NEXT: [[TMP3:%.*]] = sub i2 0, [[TMP1]]
; CHECK-NEXT: [[TMP4:%.*]] = icmp ugt i2 [[TMP3]], 0
; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i2 [[TMP2]], 0
; CHECK-NEXT: [[TMP6:%.*]] = select i1 false, i1 [[TMP4]], i1 [[TMP5]]
; CHECK-NEXT: [[TMP7:%.*]] = icmp ugt i32 [[TMP0]], 3
; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]]
; CHECK-NEXT: [[TMP9:%.*]] = or i1 false, [[TMP8]]
; CHECK-NEXT: br i1 [[TMP9]], label [[SCALAR_PH]], label [[VECTOR_MEMCHECK:%.*]]
; CHECK: vector.memcheck:
; CHECK-NEXT: [[TMP10:%.*]] = add i32 [[N]], -1
; CHECK-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64
; CHECK-NEXT: [[TMP12:%.*]] = add nuw nsw i64 [[TMP11]], 1
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i32, i32* [[B]], i64 [[TMP12]]
; CHECK-NEXT: [[SCEVGEP2:%.*]] = bitcast i32* [[SCEVGEP]] to i8*
; CHECK-NEXT: [[SCEVGEP4:%.*]] = getelementptr i32, i32* [[A]], i64 [[TMP12]]
; CHECK-NEXT: [[SCEVGEP45:%.*]] = bitcast i32* [[SCEVGEP4]] to i8*
; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult i8* [[B1]], [[SCEVGEP45]]
; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult i8* [[A3]], [[SCEVGEP2]]
; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N]], 2
; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[N]], [[N_MOD_VF]]
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP13:%.*]] = add i32 [[INDEX]], 0
; CHECK-NEXT: [[TMP14:%.*]] = urem i32 [[TMP13]], 4
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[TMP14]]
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, i32* [[TMP15]], i32 0
; CHECK-NEXT: [[TMP17:%.*]] = bitcast i32* [[TMP16]] to <2 x i32>*
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, <2 x i32>* [[TMP17]], align 4, !alias.scope !0
; CHECK-NEXT: [[TMP18:%.*]] = add <2 x i32> [[WIDE_LOAD]], <i32 10, i32 10>
; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, i32* [[B]], i32 [[TMP13]]
; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, i32* [[TMP19]], i32 0
; CHECK-NEXT: [[TMP21:%.*]] = bitcast i32* [[TMP20]] to <2 x i32>*
; CHECK-NEXT: store <2 x i32> [[TMP18]], <2 x i32>* [[TMP21]], align 4, !alias.scope !3, !noalias !0
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
; CHECK-NEXT: [[TMP22:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[VECTOR_MEMCHECK]] ]
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
; CHECK-NEXT: [[CLAMPED_INDEX:%.*]] = urem i32 [[IV]], 4
; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[CLAMPED_INDEX]]
; CHECK-NEXT: [[LV:%.*]] = load i32, i32* [[GEP_A]], align 4
; CHECK-NEXT: [[ADD:%.*]] = add i32 [[LV]], 10
; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds i32, i32* [[B]], i32 [[IV]]
; CHECK-NEXT: store i32 [[ADD]], i32* [[GEP_B]], align 4
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1
; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[IV_NEXT]], [[N]]
; CHECK-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP7:![0-9]+]]
; CHECK: exit:
; CHECK-NEXT: ret void
;
entry:
br label %loop
loop:
%iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
%clamped.index = urem i32 %iv, 4
%gep.A = getelementptr inbounds i32, i32* %A, i32 %clamped.index
%lv = load i32, i32* %gep.A
%add = add i32 %lv, 10
%gep.B = getelementptr inbounds i32, i32* %B, i32 %iv
store i32 %add, i32* %gep.B
%iv.next = add nuw nsw i32 %iv, 1
%cond = icmp eq i32 %iv.next, %N
br i1 %cond, label %exit, label %loop
exit:
ret void
}
; The relevant bounds for %gep.A are [%A, %A+4).
define void @store_clamped_index(i32* %A, i32* %B, i32 %N) {
; CHECK-LABEL: @store_clamped_index(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[B1:%.*]] = bitcast i32* [[B:%.*]] to i8*
; CHECK-NEXT: [[A3:%.*]] = bitcast i32* [[A:%.*]] to i8*
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N:%.*]], 2
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
; CHECK: vector.scevcheck:
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1
; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[TMP0]] to i2
; CHECK-NEXT: [[TMP2:%.*]] = add i2 0, [[TMP1]]
; CHECK-NEXT: [[TMP3:%.*]] = sub i2 0, [[TMP1]]
; CHECK-NEXT: [[TMP4:%.*]] = icmp ugt i2 [[TMP3]], 0
; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i2 [[TMP2]], 0
; CHECK-NEXT: [[TMP6:%.*]] = select i1 false, i1 [[TMP4]], i1 [[TMP5]]
; CHECK-NEXT: [[TMP7:%.*]] = icmp ugt i32 [[TMP0]], 3
; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]]
; CHECK-NEXT: [[TMP9:%.*]] = or i1 false, [[TMP8]]
; CHECK-NEXT: br i1 [[TMP9]], label [[SCALAR_PH]], label [[VECTOR_MEMCHECK:%.*]]
; CHECK: vector.memcheck:
; CHECK-NEXT: [[TMP10:%.*]] = add i32 [[N]], -1
; CHECK-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64
; CHECK-NEXT: [[TMP12:%.*]] = add nuw nsw i64 [[TMP11]], 1
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i32, i32* [[B]], i64 [[TMP12]]
; CHECK-NEXT: [[SCEVGEP2:%.*]] = bitcast i32* [[SCEVGEP]] to i8*
; CHECK-NEXT: [[SCEVGEP4:%.*]] = getelementptr i32, i32* [[A]], i64 [[TMP12]]
; CHECK-NEXT: [[SCEVGEP45:%.*]] = bitcast i32* [[SCEVGEP4]] to i8*
; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult i8* [[B1]], [[SCEVGEP45]]
; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult i8* [[A3]], [[SCEVGEP2]]
; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N]], 2
; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[N]], [[N_MOD_VF]]
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP13:%.*]] = add i32 [[INDEX]], 0
; CHECK-NEXT: [[TMP14:%.*]] = urem i32 [[TMP13]], 4
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, i32* [[B]], i32 [[TMP13]]
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, i32* [[TMP15]], i32 0
; CHECK-NEXT: [[TMP17:%.*]] = bitcast i32* [[TMP16]] to <2 x i32>*
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, <2 x i32>* [[TMP17]], align 4, !alias.scope !8, !noalias !11
; CHECK-NEXT: [[TMP18:%.*]] = add <2 x i32> [[WIDE_LOAD]], <i32 10, i32 10>
; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[TMP14]]
; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, i32* [[TMP19]], i32 0
; CHECK-NEXT: [[TMP21:%.*]] = bitcast i32* [[TMP20]] to <2 x i32>*
; CHECK-NEXT: store <2 x i32> [[TMP18]], <2 x i32>* [[TMP21]], align 4, !alias.scope !11
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
; CHECK-NEXT: [[TMP22:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[VECTOR_MEMCHECK]] ]
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
; CHECK-NEXT: [[CLAMPED_INDEX:%.*]] = urem i32 [[IV]], 4
; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds i32, i32* [[B]], i32 [[IV]]
; CHECK-NEXT: [[LV:%.*]] = load i32, i32* [[GEP_B]], align 4
; CHECK-NEXT: [[ADD:%.*]] = add i32 [[LV]], 10
; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[CLAMPED_INDEX]]
; CHECK-NEXT: store i32 [[ADD]], i32* [[GEP_A]], align 4
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1
; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[IV_NEXT]], [[N]]
; CHECK-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP14:![0-9]+]]
; CHECK: exit:
; CHECK-NEXT: ret void
;
entry:
br label %loop
loop:
%iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
%clamped.index = urem i32 %iv, 4
%gep.B = getelementptr inbounds i32, i32* %B, i32 %iv
%lv = load i32, i32* %gep.B
%add = add i32 %lv, 10
%gep.A = getelementptr inbounds i32, i32* %A, i32 %clamped.index
store i32 %add, i32* %gep.A
%iv.next = add nuw nsw i32 %iv, 1
%cond = icmp eq i32 %iv.next, %N
br i1 %cond, label %exit, label %loop
exit:
ret void
}
define void @clamped_index_dependence_non_clamped(i32* %A, i32* %B, i32 %N) {
; CHECK-LABEL: @clamped_index_dependence_non_clamped(
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i32 [[IV]]
; CHECK-NEXT: [[LV:%.*]] = load i32, i32* [[GEP_B]], align 4
; CHECK-NEXT: [[GEP_A_1:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[IV]]
; CHECK-NEXT: [[LV_A:%.*]] = load i32, i32* [[GEP_A_1]], align 4
; CHECK-NEXT: [[ADD:%.*]] = add i32 [[LV]], [[LV_A]]
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1
; CHECK-NEXT: [[CLAMPED_INDEX:%.*]] = urem i32 [[IV_NEXT]], 4
; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[CLAMPED_INDEX]]
; CHECK-NEXT: store i32 [[ADD]], i32* [[GEP_A]], align 4
; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[IV_NEXT]], [[N:%.*]]
; CHECK-NEXT: br i1 [[COND]], label [[EXIT:%.*]], label [[LOOP]]
; CHECK: exit:
; CHECK-NEXT: ret void
;
entry:
br label %loop
loop:
%iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
%gep.B = getelementptr inbounds i32, i32* %B, i32 %iv
%lv = load i32, i32* %gep.B
%gep.A.1 = getelementptr inbounds i32, i32* %A, i32 %iv
%lv.A = load i32, i32* %gep.A.1
%add = add i32 %lv, %lv.A
%iv.next = add nuw nsw i32 %iv, 1
%clamped.index = urem i32 %iv.next, 4
%gep.A = getelementptr inbounds i32, i32* %A, i32 %clamped.index
store i32 %add, i32* %gep.A
%cond = icmp eq i32 %iv.next, %N
br i1 %cond, label %exit, label %loop
exit:
ret void
}
define void @clamped_index_dependence_clamped_index(i32* %A, i32* %B, i32 %N) {
; CHECK-LABEL: @clamped_index_dependence_clamped_index(
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
; CHECK-NEXT: [[CLAMPED_INDEX_1:%.*]] = urem i32 [[IV]], 4
; CHECK-NEXT: [[GEP_A_1:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[CLAMPED_INDEX_1]]
; CHECK-NEXT: [[LV_A:%.*]] = load i32, i32* [[GEP_A_1]], align 4
; CHECK-NEXT: [[ADD:%.*]] = add i32 [[LV_A]], 10
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1
; CHECK-NEXT: [[CLAMPED_INDEX:%.*]] = urem i32 [[IV_NEXT]], 4
; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[CLAMPED_INDEX]]
; CHECK-NEXT: store i32 [[ADD]], i32* [[GEP_A]], align 4
; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[IV_NEXT]], [[N:%.*]]
; CHECK-NEXT: br i1 [[COND]], label [[EXIT:%.*]], label [[LOOP]]
; CHECK: exit:
; CHECK-NEXT: ret void
;
entry:
br label %loop
loop:
%iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
%clamped.index.1 = urem i32 %iv, 4
%gep.A.1 = getelementptr inbounds i32, i32* %A, i32 %clamped.index.1
%lv.A = load i32, i32* %gep.A.1
%add = add i32 %lv.A, 10
%iv.next = add nuw nsw i32 %iv, 1
%clamped.index = urem i32 %iv.next, 4
%gep.A = getelementptr inbounds i32, i32* %A, i32 %clamped.index
store i32 %add, i32* %gep.A
%cond = icmp eq i32 %iv.next, %N
br i1 %cond, label %exit, label %loop
exit:
ret void
}
define void @clamped_index_equal_dependence(i32* %A, i32* %B, i32 %N) {
; CHECK-LABEL: @clamped_index_equal_dependence(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N:%.*]], 2
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
; CHECK: vector.scevcheck:
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1
; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[TMP0]] to i2
; CHECK-NEXT: [[TMP2:%.*]] = add i2 0, [[TMP1]]
; CHECK-NEXT: [[TMP3:%.*]] = sub i2 0, [[TMP1]]
; CHECK-NEXT: [[TMP4:%.*]] = icmp ugt i2 [[TMP3]], 0
; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i2 [[TMP2]], 0
; CHECK-NEXT: [[TMP6:%.*]] = select i1 false, i1 [[TMP4]], i1 [[TMP5]]
; CHECK-NEXT: [[TMP7:%.*]] = icmp ugt i32 [[TMP0]], 3
; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]]
; CHECK-NEXT: [[TMP9:%.*]] = or i1 false, [[TMP8]]
; CHECK-NEXT: br i1 [[TMP9]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N]], 2
; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[N]], [[N_MOD_VF]]
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP10:%.*]] = add i32 [[INDEX]], 0
; CHECK-NEXT: [[TMP11:%.*]] = urem i32 [[TMP10]], 4
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[TMP11]]
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, i32* [[TMP12]], i32 0
; CHECK-NEXT: [[TMP14:%.*]] = bitcast i32* [[TMP13]] to <2 x i32>*
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, <2 x i32>* [[TMP14]], align 4
; CHECK-NEXT: [[TMP15:%.*]] = add <2 x i32> [[WIDE_LOAD]], <i32 10, i32 10>
; CHECK-NEXT: [[TMP16:%.*]] = bitcast i32* [[TMP13]] to <2 x i32>*
; CHECK-NEXT: store <2 x i32> [[TMP15]], <2 x i32>* [[TMP16]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
; CHECK-NEXT: [[CLAMPED_INDEX:%.*]] = urem i32 [[IV]], 4
; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[CLAMPED_INDEX]]
; CHECK-NEXT: [[LV_A:%.*]] = load i32, i32* [[GEP_A]], align 4
; CHECK-NEXT: [[ADD:%.*]] = add i32 [[LV_A]], 10
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1
; CHECK-NEXT: store i32 [[ADD]], i32* [[GEP_A]], align 4
; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[IV_NEXT]], [[N]]
; CHECK-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP16:![0-9]+]]
; CHECK: exit:
; CHECK-NEXT: ret void
;
entry:
br label %loop
loop:
%iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
%clamped.index = urem i32 %iv, 4
%gep.A = getelementptr inbounds i32, i32* %A, i32 %clamped.index
%lv.A = load i32, i32* %gep.A
%add = add i32 %lv.A, 10
%iv.next = add nuw nsw i32 %iv, 1
store i32 %add, i32* %gep.A
%cond = icmp eq i32 %iv.next, %N
br i1 %cond, label %exit, label %loop
exit:
ret void
}