Previously the branch from the middle block to the scalar preheader & exit was being set-up at the end of skeleton creation in completeLoopSkeleton. Inserting SCEV or runtime checks may result in LCSSA phis being created, if they are required. Adjusting branches afterwards may break those PHIs. To avoid this, we can instead create the branch from the middle block to the exit after we created the middle block, so we have the final CFG before potentially adjusting/creating PHIs. This fixes a crash for the included test case. For the non-crashing case, this is almost a NFC with respect to the generated code. The only change is the order of the predecessors of the involved branch targets. Note an assertion was moved from LoopVersioning() to LoopVersioning::versionLoop. Adjusting the branches means loop-simplify form may be broken before constructing LoopVersioning. But LV only uses LoopVersioning to annotate the loop instructions with !noalias metadata, which does not require loop-simplify form. This is a fix for an existing issue uncovered by D93317.
138 lines
7.2 KiB
LLVM
138 lines
7.2 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
|
; RUN: opt -loop-vectorize -force-vector-width=2 -force-vector-interleave=1 -S %s | FileCheck %s
|
|
|
|
; Make sure LV does not crash when creating runtime checks involving values from
|
|
; other loops.
|
|
define i16 @test(i16** %arg, i64 %N) {
|
|
; CHECK-LABEL: @test(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: br label [[OUTER:%.*]]
|
|
; CHECK: outer:
|
|
; CHECK-NEXT: [[L_1:%.*]] = load i16*, i16** [[ARG:%.*]], align 8
|
|
; CHECK-NEXT: [[L_2:%.*]] = load i16*, i16** [[ARG]], align 8
|
|
; CHECK-NEXT: [[C_1:%.*]] = call i1 @cond()
|
|
; CHECK-NEXT: br i1 [[C_1]], label [[OUTER_BACKEDGE:%.*]], label [[INNER_PREHEADER:%.*]]
|
|
; CHECK: outer.backedge:
|
|
; CHECK-NEXT: br label [[OUTER]]
|
|
; CHECK: inner.preheader:
|
|
; CHECK-NEXT: br label [[INNER:%.*]]
|
|
; CHECK: inner:
|
|
; CHECK-NEXT: [[C_2:%.*]] = call i1 @cond()
|
|
; CHECK-NEXT: br i1 [[C_2]], label [[OUTER_LATCH:%.*]], label [[INNER_BB:%.*]]
|
|
; CHECK: inner.bb:
|
|
; CHECK-NEXT: [[C_3:%.*]] = call i1 @cond()
|
|
; CHECK-NEXT: br i1 [[C_3]], label [[LOOP3_PREHEADER:%.*]], label [[INNER_LATCH:%.*]]
|
|
; CHECK: loop3.preheader:
|
|
; CHECK-NEXT: [[L_1_LCSSA11:%.*]] = phi i16* [ [[L_1]], [[INNER_BB]] ]
|
|
; CHECK-NEXT: [[L_1_LCSSA:%.*]] = phi i16* [ [[L_1]], [[INNER_BB]] ]
|
|
; CHECK-NEXT: [[L_2_LCSSA:%.*]] = phi i16* [ [[L_2]], [[INNER_BB]] ]
|
|
; CHECK-NEXT: [[L_2_LCSSA4:%.*]] = bitcast i16* [[L_2_LCSSA]] to i8*
|
|
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N:%.*]], 1
|
|
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 2
|
|
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
|
|
; CHECK: vector.memcheck:
|
|
; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, i8* [[L_2_LCSSA4]], i64 1
|
|
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i16, i16* [[L_1_LCSSA]], i64 1
|
|
; CHECK-NEXT: [[SCEVGEP9:%.*]] = bitcast i16* [[SCEVGEP]] to i8*
|
|
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[N]], 2
|
|
; CHECK-NEXT: [[SCEVGEP10:%.*]] = getelementptr i16, i16* [[L_1_LCSSA11]], i64 [[TMP1]]
|
|
; CHECK-NEXT: [[SCEVGEP1013:%.*]] = bitcast i16* [[SCEVGEP10]] to i8*
|
|
; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult i8* [[L_2_LCSSA4]], [[SCEVGEP1013]]
|
|
; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult i8* [[SCEVGEP9]], [[UGLYGEP]]
|
|
; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
|
|
; CHECK-NEXT: [[MEMCHECK_CONFLICT:%.*]] = and i1 [[FOUND_CONFLICT]], true
|
|
; CHECK-NEXT: br i1 [[MEMCHECK_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
|
|
; CHECK: vector.ph:
|
|
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 2
|
|
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]]
|
|
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
|
|
; CHECK: vector.body:
|
|
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
|
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0
|
|
; CHECK-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1
|
|
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i16, i16* [[L_1]], i64 [[TMP3]]
|
|
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, i16* [[TMP4]], i32 0
|
|
; CHECK-NEXT: [[TMP6:%.*]] = bitcast i16* [[TMP5]] to <2 x i16>*
|
|
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i16>, <2 x i16>* [[TMP6]], align 2, !alias.scope !0
|
|
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i16, i16* [[L_2]], i64 0
|
|
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i16> [[WIDE_LOAD]], i32 0
|
|
; CHECK-NEXT: store i16 [[TMP8]], i16* [[TMP7]], align 2, !alias.scope !3, !noalias !0
|
|
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i16> [[WIDE_LOAD]], i32 1
|
|
; CHECK-NEXT: store i16 [[TMP9]], i16* [[TMP7]], align 2, !alias.scope !3, !noalias !0
|
|
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 2
|
|
; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
|
|
; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP5:!llvm.loop !.*]]
|
|
; CHECK: middle.block:
|
|
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
|
|
; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]]
|
|
; CHECK: scalar.ph:
|
|
; CHECK-NEXT: [[L_18:%.*]] = phi i16* [ [[L_1_LCSSA]], [[VECTOR_MEMCHECK]] ], [ [[L_1_LCSSA]], [[LOOP3_PREHEADER]] ], [ [[L_1_LCSSA]], [[MIDDLE_BLOCK]] ]
|
|
; CHECK-NEXT: [[L_23:%.*]] = phi i16* [ [[L_2_LCSSA]], [[VECTOR_MEMCHECK]] ], [ [[L_2_LCSSA]], [[LOOP3_PREHEADER]] ], [ [[L_2_LCSSA]], [[MIDDLE_BLOCK]] ]
|
|
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[LOOP3_PREHEADER]] ], [ 0, [[VECTOR_MEMCHECK]] ]
|
|
; CHECK-NEXT: br label [[LOOP3:%.*]]
|
|
; CHECK: inner.latch:
|
|
; CHECK-NEXT: [[C_4:%.*]] = call i1 @cond()
|
|
; CHECK-NEXT: br i1 [[C_4]], label [[EXIT_LOOPEXIT1:%.*]], label [[INNER]]
|
|
; CHECK: outer.latch:
|
|
; CHECK-NEXT: br label [[OUTER_BACKEDGE]]
|
|
; CHECK: loop3:
|
|
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[LOOP3]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
|
|
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
|
|
; CHECK-NEXT: [[C_5:%.*]] = icmp ult i64 [[IV]], [[N]]
|
|
; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i16, i16* [[L_18]], i64 [[IV_NEXT]]
|
|
; CHECK-NEXT: [[LOOP_L_1:%.*]] = load i16, i16* [[GEP_1]], align 2
|
|
; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i16, i16* [[L_23]], i64 0
|
|
; CHECK-NEXT: store i16 [[LOOP_L_1]], i16* [[GEP_2]], align 2
|
|
; CHECK-NEXT: br i1 [[C_5]], label [[LOOP3]], label [[EXIT_LOOPEXIT]], [[LOOP7:!llvm.loop !.*]]
|
|
; CHECK: exit.loopexit:
|
|
; CHECK-NEXT: [[L_17:%.*]] = phi i16* [ [[L_1_LCSSA]], [[MIDDLE_BLOCK]] ], [ [[L_18]], [[LOOP3]] ]
|
|
; CHECK-NEXT: br label [[EXIT:%.*]]
|
|
; CHECK: exit.loopexit1:
|
|
; CHECK-NEXT: [[L_1_LCSSA5:%.*]] = phi i16* [ [[L_1]], [[INNER_LATCH]] ]
|
|
; CHECK-NEXT: br label [[EXIT]]
|
|
; CHECK: exit:
|
|
; CHECK-NEXT: [[L_16:%.*]] = phi i16* [ [[L_1_LCSSA5]], [[EXIT_LOOPEXIT1]] ], [ [[L_17]], [[EXIT_LOOPEXIT]] ]
|
|
; CHECK-NEXT: [[L_3:%.*]] = load i16, i16* [[L_16]], align 2
|
|
; CHECK-NEXT: ret i16 [[L_3]]
|
|
;
|
|
entry:
|
|
br label %outer
|
|
|
|
outer:
|
|
%l.1 = load i16*, i16** %arg, align 8
|
|
%l.2 = load i16*, i16** %arg, align 8
|
|
%c.1 = call i1 @cond()
|
|
br i1 %c.1, label %outer, label %inner
|
|
|
|
inner: ; preds = %bb15, %bb1
|
|
%c.2 = call i1 @cond()
|
|
br i1 %c.2, label %outer.latch, label %inner.bb
|
|
|
|
inner.bb: ; preds = %bb3
|
|
%c.3 = call i1 @cond()
|
|
br i1 %c.3, label %loop3, label %inner.latch
|
|
|
|
inner.latch: ; preds = %bb4
|
|
%c.4 = call i1 @cond()
|
|
br i1 %c.4, label %exit, label %inner
|
|
|
|
outer.latch: ; preds = %bb3
|
|
br label %outer
|
|
|
|
loop3: ; preds = %bb9, %bb4
|
|
%iv = phi i64 [ %iv.next, %loop3 ], [ 0, %inner.bb ]
|
|
%iv.next = add nsw nuw i64 %iv, 1
|
|
%c.5 = icmp ult i64 %iv, %N
|
|
%gep.1 = getelementptr inbounds i16, i16* %l.1, i64 %iv.next
|
|
%loop.l.1 = load i16, i16* %gep.1, align 2
|
|
%gep.2 = getelementptr inbounds i16, i16* %l.2, i64 0
|
|
store i16 %loop.l.1, i16* %gep.2 , align 2
|
|
br i1 %c.5, label %loop3, label %exit
|
|
|
|
exit: ; preds = %bb15, %bb5
|
|
%l.3 = load i16, i16* %l.1, align 2
|
|
ret i16 %l.3
|
|
}
|
|
|
|
declare i1 @cond()
|