Motivating example: https://godbolt.org/z/eb97zrxhx Here we have 2 induction variables in the loop: one is corresponding to i variable (add rdx, 4), the other - to res (add rax, 2). The second induction variable can be removed by rewriteLoopExitValues() method (final value of res at loop exit is unroll_iter * -2); however, this doesn't happen because we have duplicated LCSSA phi nodes at loop exit: ``` ; Preheader: for.body.preheader.new: ; preds = %for.body.preheader %unroll_iter = and i64 %N, -4 br label %for.body ; Loop: for.body: ; preds = %for.body, %for.body.preheader.new %lsr.iv = phi i64 [ %lsr.iv.next, %for.body ], [ 0, %for.body.preheader.new ] %i.07 = phi i64 [ 0, %for.body.preheader.new ], [ %inc.3, %for.body ] %inc.3 = add nuw i64 %i.07, 4 %lsr.iv.next = add nsw i64 %lsr.iv, -2 %niter.ncmp.3.not = icmp eq i64 %unroll_iter, %inc.3 br i1 %niter.ncmp.3.not, label %for.end.loopexit.unr-lcssa.loopexit, label %for.body, !llvm.loop !7 ; Exit blocks for.end.loopexit.unr-lcssa.loopexit: ; preds = %for.body %inc.3.lcssa = phi i64 [ %inc.3, %for.body ] %lsr.iv.next.lcssa11 = phi i64 [ %lsr.iv.next, %for.body ] %lsr.iv.next.lcssa = phi i64 [ %lsr.iv.next, %for.body ] br label %for.end.loopexit.unr-lcssa ``` rewriteLoopExitValues requires %lsr.iv.next value to have only 2 uses: one in LCSSA phi node, the other - in induction phi node. Here we have 3 uses of this value because of duplicated lcssa nodes, so the transform doesn't apply and leads to an extra add operation inside the loop. The proposed solution is to accumulate inserted instructions that will require LCSSA form update into SetVector and then call formLCSSAForInstructions for this SetVector once, so the same instructions don't process twice. Reland fixes the issue with preserve-lcssa.ll test: it fails in the situation when x86_64-unknown-linux-gnu target is unavailable in opt. The changes are moved into separate duplicated-phis.ll test with explicit x86 target requirement to fix bots which are not building this target.
203 lines
11 KiB
LLVM
203 lines
11 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3
|
|
; PR41445: This test checks the case when LSR split critical edge
|
|
; and phi node has other pending fixup operands
|
|
|
|
; RUN: opt -S -loop-reduce < %s | FileCheck %s
|
|
|
|
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
|
target triple = "x86_64-unknown-linux-gnu"
|
|
|
|
; We have %indvars.iv.lcssa phi node where 4 input operands
|
|
; need to be rewritten: %tmp1, %tmp2, %tmp3, %tmp4.
|
|
; When we try to rewrite %tmp1, we first split the critical edge.
|
|
; All the other PHI inputs besides %tmp1 go to a new phi node.
|
|
; This test checks that LSR is still able to rewrite %tmp2, %tmp3, %tmp4.
|
|
define i32 @foo(ptr %A, i32 %t) {
|
|
; CHECK-LABEL: define i32 @foo(
|
|
; CHECK-SAME: ptr [[A:%.*]], i32 [[T:%.*]]) {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: br label [[LOOP_32:%.*]]
|
|
; CHECK: loop.exit.loopexitsplitsplitsplit:
|
|
; CHECK-NEXT: [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV1:%.*]], [[IFMERGE_34:%.*]] ]
|
|
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[LSR_IV]], -1
|
|
; CHECK-NEXT: br label [[LOOP_EXIT_LOOPEXITSPLITSPLIT:%.*]]
|
|
; CHECK: ifmerge.38.loop.exit.loopexitsplitsplit_crit_edge:
|
|
; CHECK-NEXT: [[LSR_IV_LCSSA10:%.*]] = phi i64 [ [[LSR_IV1]], [[IFMERGE_38:%.*]] ]
|
|
; CHECK-NEXT: br label [[LOOP_EXIT_LOOPEXITSPLITSPLIT]]
|
|
; CHECK: loop.exit.loopexitsplitsplit:
|
|
; CHECK-NEXT: [[INDVARS_IV_LCSSA_PH_PH_PH:%.*]] = phi i64 [ [[LSR_IV_LCSSA10]], [[IFMERGE_38_LOOP_EXIT_LOOPEXITSPLITSPLIT_CRIT_EDGE:%.*]] ], [ [[TMP0]], [[LOOP_EXIT_LOOPEXITSPLITSPLITSPLIT:%.*]] ]
|
|
; CHECK-NEXT: br label [[LOOP_EXIT_LOOPEXITSPLIT:%.*]]
|
|
; CHECK: ifmerge.42.loop.exit.loopexitsplit_crit_edge:
|
|
; CHECK-NEXT: [[LSR_IV_LCSSA11:%.*]] = phi i64 [ [[LSR_IV1]], [[IFMERGE_42:%.*]] ]
|
|
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[LSR_IV_LCSSA11]], 1
|
|
; CHECK-NEXT: br label [[LOOP_EXIT_LOOPEXITSPLIT]]
|
|
; CHECK: loop.exit.loopexitsplit:
|
|
; CHECK-NEXT: [[INDVARS_IV_LCSSA_PH_PH:%.*]] = phi i64 [ [[TMP1]], [[IFMERGE_42_LOOP_EXIT_LOOPEXITSPLIT_CRIT_EDGE:%.*]] ], [ [[INDVARS_IV_LCSSA_PH_PH_PH]], [[LOOP_EXIT_LOOPEXITSPLITSPLIT]] ]
|
|
; CHECK-NEXT: br label [[LOOP_EXIT_LOOPEXIT:%.*]]
|
|
; CHECK: then.34.loop.exit.loopexit_crit_edge:
|
|
; CHECK-NEXT: [[LSR_IV_LCSSA:%.*]] = phi i64 [ [[LSR_IV1]], [[THEN_34:%.*]] ]
|
|
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[LSR_IV_LCSSA]], -2
|
|
; CHECK-NEXT: br label [[LOOP_EXIT_LOOPEXIT]]
|
|
; CHECK: loop.exit.loopexit:
|
|
; CHECK-NEXT: [[INDVARS_IV_LCSSA_PH:%.*]] = phi i64 [ [[TMP2]], [[THEN_34_LOOP_EXIT_LOOPEXIT_CRIT_EDGE:%.*]] ], [ [[INDVARS_IV_LCSSA_PH_PH]], [[LOOP_EXIT_LOOPEXITSPLIT]] ]
|
|
; CHECK-NEXT: br label [[LOOP_EXIT:%.*]]
|
|
; CHECK: loop.exit:
|
|
; CHECK-NEXT: [[INDVARS_IV_LCSSA:%.*]] = phi i64 [ 48, [[THEN_8:%.*]] ], [ 49, [[THEN_8_1:%.*]] ], [ [[INDVARS_IV_LCSSA_PH]], [[LOOP_EXIT_LOOPEXIT]] ]
|
|
; CHECK-NEXT: [[TMP:%.*]] = trunc i64 [[INDVARS_IV_LCSSA]] to i32
|
|
; CHECK-NEXT: br label [[FOR_END:%.*]]
|
|
; CHECK: for.end:
|
|
; CHECK-NEXT: [[I_0_LCSSA:%.*]] = phi i32 [ [[TMP]], [[LOOP_EXIT]] ], [ 50, [[THEN_8_1]] ], [ 50, [[IFMERGE_8:%.*]] ]
|
|
; CHECK-NEXT: ret i32 [[I_0_LCSSA]]
|
|
; CHECK: loop.32:
|
|
; CHECK-NEXT: [[LSR_IV1]] = phi i64 [ [[LSR_IV_NEXT:%.*]], [[IFMERGE_46:%.*]] ], [ 2, [[ENTRY:%.*]] ]
|
|
; CHECK-NEXT: [[I1_I64_0:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[NEXTIVLOOP_32:%.*]], [[IFMERGE_46]] ]
|
|
; CHECK-NEXT: [[TMP3:%.*]] = shl nuw nsw i64 [[LSR_IV1]], 2
|
|
; CHECK-NEXT: [[SCEVGEP7:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP3]]
|
|
; CHECK-NEXT: [[SCEVGEP8:%.*]] = getelementptr i8, ptr [[SCEVGEP7]], i64 -4
|
|
; CHECK-NEXT: [[GEPLOAD:%.*]] = load i32, ptr [[SCEVGEP8]], align 4
|
|
; CHECK-NEXT: [[CMP_34:%.*]] = icmp sgt i32 [[GEPLOAD]], [[T]]
|
|
; CHECK-NEXT: br i1 [[CMP_34]], label [[THEN_34]], label [[IFMERGE_34]]
|
|
; CHECK: then.34:
|
|
; CHECK-NEXT: [[TMP4:%.*]] = shl nuw nsw i64 [[LSR_IV1]], 2
|
|
; CHECK-NEXT: [[SCEVGEP5:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP4]]
|
|
; CHECK-NEXT: [[SCEVGEP6:%.*]] = getelementptr i8, ptr [[SCEVGEP5]], i64 -8
|
|
; CHECK-NEXT: [[GEPLOAD18:%.*]] = load i32, ptr [[SCEVGEP6]], align 4
|
|
; CHECK-NEXT: [[CMP_35:%.*]] = icmp slt i32 [[GEPLOAD18]], [[T]]
|
|
; CHECK-NEXT: br i1 [[CMP_35]], label [[THEN_34_LOOP_EXIT_LOOPEXIT_CRIT_EDGE]], label [[IFMERGE_34]]
|
|
; CHECK: ifmerge.34:
|
|
; CHECK-NEXT: [[TMP5:%.*]] = shl nuw nsw i64 [[LSR_IV1]], 2
|
|
; CHECK-NEXT: [[SCEVGEP4:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP5]]
|
|
; CHECK-NEXT: [[GEPLOAD20:%.*]] = load i32, ptr [[SCEVGEP4]], align 4
|
|
; CHECK-NEXT: [[CMP_38:%.*]] = icmp sgt i32 [[GEPLOAD20]], [[T]]
|
|
; CHECK-NEXT: [[CMP_39:%.*]] = icmp slt i32 [[GEPLOAD]], [[T]]
|
|
; CHECK-NEXT: [[OR_COND:%.*]] = and i1 [[CMP_38]], [[CMP_39]]
|
|
; CHECK-NEXT: br i1 [[OR_COND]], label [[LOOP_EXIT_LOOPEXITSPLITSPLITSPLIT]], label [[IFMERGE_38]]
|
|
; CHECK: ifmerge.38:
|
|
; CHECK-NEXT: [[TMP6:%.*]] = shl nuw nsw i64 [[LSR_IV1]], 2
|
|
; CHECK-NEXT: [[SCEVGEP2:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP6]]
|
|
; CHECK-NEXT: [[SCEVGEP3:%.*]] = getelementptr i8, ptr [[SCEVGEP2]], i64 4
|
|
; CHECK-NEXT: [[GEPLOAD24:%.*]] = load i32, ptr [[SCEVGEP3]], align 4
|
|
; CHECK-NEXT: [[CMP_42:%.*]] = icmp sgt i32 [[GEPLOAD24]], [[T]]
|
|
; CHECK-NEXT: [[CMP_43:%.*]] = icmp slt i32 [[GEPLOAD20]], [[T]]
|
|
; CHECK-NEXT: [[OR_COND55:%.*]] = and i1 [[CMP_42]], [[CMP_43]]
|
|
; CHECK-NEXT: br i1 [[OR_COND55]], label [[IFMERGE_38_LOOP_EXIT_LOOPEXITSPLITSPLIT_CRIT_EDGE]], label [[IFMERGE_42]]
|
|
; CHECK: ifmerge.42:
|
|
; CHECK-NEXT: [[TMP7:%.*]] = shl nuw nsw i64 [[LSR_IV1]], 2
|
|
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP7]]
|
|
; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[SCEVGEP]], i64 8
|
|
; CHECK-NEXT: [[GEPLOAD28:%.*]] = load i32, ptr [[SCEVGEP1]], align 4
|
|
; CHECK-NEXT: [[CMP_46:%.*]] = icmp sgt i32 [[GEPLOAD28]], [[T]]
|
|
; CHECK-NEXT: [[CMP_47:%.*]] = icmp slt i32 [[GEPLOAD24]], [[T]]
|
|
; CHECK-NEXT: [[OR_COND56:%.*]] = and i1 [[CMP_46]], [[CMP_47]]
|
|
; CHECK-NEXT: br i1 [[OR_COND56]], label [[IFMERGE_42_LOOP_EXIT_LOOPEXITSPLIT_CRIT_EDGE]], label [[IFMERGE_46]]
|
|
; CHECK: ifmerge.46:
|
|
; CHECK-NEXT: [[NEXTIVLOOP_32]] = add nuw nsw i64 [[I1_I64_0]], 1
|
|
; CHECK-NEXT: [[LSR_IV_NEXT]] = add nuw nsw i64 [[LSR_IV1]], 4
|
|
; CHECK-NEXT: [[CONDLOOP_32:%.*]] = icmp ult i64 [[NEXTIVLOOP_32]], 12
|
|
; CHECK-NEXT: br i1 [[CONDLOOP_32]], label [[LOOP_32]], label [[LOOP_25:%.*]]
|
|
; CHECK: loop.25:
|
|
; CHECK-NEXT: [[ARRAYIDX31:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 49
|
|
; CHECK-NEXT: [[GEPLOAD32:%.*]] = load i32, ptr [[ARRAYIDX31]], align 4
|
|
; CHECK-NEXT: [[CMP_8:%.*]] = icmp sgt i32 [[GEPLOAD32]], [[T]]
|
|
; CHECK-NEXT: br i1 [[CMP_8]], label [[THEN_8]], label [[IFMERGE_8]]
|
|
; CHECK: then.8:
|
|
; CHECK-NEXT: [[ARRAYIDX33:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 48
|
|
; CHECK-NEXT: [[GEPLOAD34:%.*]] = load i32, ptr [[ARRAYIDX33]], align 4
|
|
; CHECK-NEXT: [[CMP_15:%.*]] = icmp slt i32 [[GEPLOAD34]], [[T]]
|
|
; CHECK-NEXT: br i1 [[CMP_15]], label [[LOOP_EXIT]], label [[IFMERGE_8]]
|
|
; CHECK: ifmerge.8:
|
|
; CHECK-NEXT: [[ARRAYIDX31_1:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 50
|
|
; CHECK-NEXT: [[GEPLOAD32_1:%.*]] = load i32, ptr [[ARRAYIDX31_1]], align 4
|
|
; CHECK-NEXT: [[CMP_8_1:%.*]] = icmp sgt i32 [[GEPLOAD32_1]], [[T]]
|
|
; CHECK-NEXT: br i1 [[CMP_8_1]], label [[THEN_8_1]], label [[FOR_END]]
|
|
; CHECK: then.8.1:
|
|
; CHECK-NEXT: [[ARRAYIDX33_1:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 49
|
|
; CHECK-NEXT: [[GEPLOAD34_1:%.*]] = load i32, ptr [[ARRAYIDX33_1]], align 4
|
|
; CHECK-NEXT: [[CMP_15_1:%.*]] = icmp slt i32 [[GEPLOAD34_1]], [[T]]
|
|
; CHECK-NEXT: br i1 [[CMP_15_1]], label [[LOOP_EXIT]], label [[FOR_END]]
|
|
;
|
|
entry:
|
|
br label %loop.32
|
|
|
|
loop.exit: ; preds = %then.8.1, %then.8, %ifmerge.42, %ifmerge.38, %ifmerge.34, %then.34
|
|
%indvars.iv.lcssa = phi i64 [ 48, %then.8 ], [ 49, %then.8.1 ], [ %tmp4, %ifmerge.42 ], [ %tmp3, %ifmerge.38 ], [ %tmp2, %ifmerge.34 ], [ %tmp1, %then.34 ]
|
|
%tmp = trunc i64 %indvars.iv.lcssa to i32
|
|
br label %for.end
|
|
|
|
for.end: ; preds = %then.8.1, %ifmerge.8, %loop.exit
|
|
%i.0.lcssa = phi i32 [ %tmp, %loop.exit ], [ 50, %then.8.1 ], [ 50, %ifmerge.8 ]
|
|
ret i32 %i.0.lcssa
|
|
|
|
; shl instruction will be dead eliminated when all it's uses will be rewritten.
|
|
loop.32: ; preds = %ifmerge.46, %entry
|
|
%i1.i64.0 = phi i64 [ 0, %entry ], [ %nextivloop.32, %ifmerge.46 ]
|
|
%tmp1 = shl i64 %i1.i64.0, 2
|
|
%tmp2 = or disjoint i64 %tmp1, 1
|
|
%arrayIdx = getelementptr inbounds i32, ptr %A, i64 %tmp2
|
|
%gepload = load i32, ptr %arrayIdx, align 4
|
|
%cmp.34 = icmp sgt i32 %gepload, %t
|
|
br i1 %cmp.34, label %then.34, label %ifmerge.34
|
|
|
|
then.34: ; preds = %loop.32
|
|
%arrayIdx17 = getelementptr inbounds i32, ptr %A, i64 %tmp1
|
|
%gepload18 = load i32, ptr %arrayIdx17, align 4
|
|
%cmp.35 = icmp slt i32 %gepload18, %t
|
|
br i1 %cmp.35, label %loop.exit, label %ifmerge.34
|
|
|
|
ifmerge.34: ; preds = %then.34, %loop.32
|
|
%tmp3 = or disjoint i64 %tmp1, 2
|
|
%arrayIdx19 = getelementptr inbounds i32, ptr %A, i64 %tmp3
|
|
%gepload20 = load i32, ptr %arrayIdx19, align 4
|
|
%cmp.38 = icmp sgt i32 %gepload20, %t
|
|
%cmp.39 = icmp slt i32 %gepload, %t
|
|
%or.cond = and i1 %cmp.38, %cmp.39
|
|
br i1 %or.cond, label %loop.exit, label %ifmerge.38
|
|
|
|
ifmerge.38: ; preds = %ifmerge.34
|
|
%tmp4 = or disjoint i64 %tmp1, 3
|
|
%arrayIdx23 = getelementptr inbounds i32, ptr %A, i64 %tmp4
|
|
%gepload24 = load i32, ptr %arrayIdx23, align 4
|
|
%cmp.42 = icmp sgt i32 %gepload24, %t
|
|
%cmp.43 = icmp slt i32 %gepload20, %t
|
|
%or.cond55 = and i1 %cmp.42, %cmp.43
|
|
br i1 %or.cond55, label %loop.exit, label %ifmerge.42
|
|
|
|
ifmerge.42: ; preds = %ifmerge.38
|
|
%tmp5 = add i64 %tmp1, 4
|
|
%arrayIdx27 = getelementptr inbounds i32, ptr %A, i64 %tmp5
|
|
%gepload28 = load i32, ptr %arrayIdx27, align 4
|
|
%cmp.46 = icmp sgt i32 %gepload28, %t
|
|
%cmp.47 = icmp slt i32 %gepload24, %t
|
|
%or.cond56 = and i1 %cmp.46, %cmp.47
|
|
br i1 %or.cond56, label %loop.exit, label %ifmerge.46
|
|
|
|
ifmerge.46: ; preds = %ifmerge.42
|
|
%nextivloop.32 = add nuw nsw i64 %i1.i64.0, 1
|
|
%condloop.32 = icmp ult i64 %nextivloop.32, 12
|
|
br i1 %condloop.32, label %loop.32, label %loop.25
|
|
|
|
loop.25: ; preds = %ifmerge.46
|
|
%arrayIdx31 = getelementptr inbounds i32, ptr %A, i64 49
|
|
%gepload32 = load i32, ptr %arrayIdx31, align 4
|
|
%cmp.8 = icmp sgt i32 %gepload32, %t
|
|
br i1 %cmp.8, label %then.8, label %ifmerge.8
|
|
|
|
then.8: ; preds = %loop.25
|
|
%arrayIdx33 = getelementptr inbounds i32, ptr %A, i64 48
|
|
%gepload34 = load i32, ptr %arrayIdx33, align 4
|
|
%cmp.15 = icmp slt i32 %gepload34, %t
|
|
br i1 %cmp.15, label %loop.exit, label %ifmerge.8
|
|
|
|
ifmerge.8: ; preds = %then.8, %loop.25
|
|
%arrayIdx31.1 = getelementptr inbounds i32, ptr %A, i64 50
|
|
%gepload32.1 = load i32, ptr %arrayIdx31.1, align 4
|
|
%cmp.8.1 = icmp sgt i32 %gepload32.1, %t
|
|
br i1 %cmp.8.1, label %then.8.1, label %for.end
|
|
|
|
then.8.1: ; preds = %ifmerge.8
|
|
%arrayIdx33.1 = getelementptr inbounds i32, ptr %A, i64 49
|
|
%gepload34.1 = load i32, ptr %arrayIdx33.1, align 4
|
|
%cmp.15.1 = icmp slt i32 %gepload34.1, %t
|
|
br i1 %cmp.15.1, label %loop.exit, label %for.end
|
|
}
|