Files
clang-p2996/llvm/test/Transforms/LoopUnroll/runtime-loop-at-most-two-exits.ll
Nikita Popov 81c648a3d9 [LoopUnroll] Freeze tripcount rather than condition
This is a followup to D125754. We introduce two branches, one
before the unrolled loop and one before the epilogue (and similar
for the prologue case). The previous patch only froze the
condition on the first branch.

Rather than independently freezing the second condition, this patch
instead freezes TripCount and bases BECount on it. These are the
two quantities involved in the conditions, and this ensures that
both work on a consistent, non-poisonous trip count.

Differential Revision: https://reviews.llvm.org/D125896
2022-05-24 09:42:39 +02:00

177 lines
11 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -loop-unroll -unroll-runtime=true -unroll-runtime-epilog=true -unroll-runtime-other-exit-predictable=true -verify-loop-lcssa -verify-dom-info -verify-loop-info -S | FileCheck %s --check-prefix=ENABLED
; RUN: opt < %s -loop-unroll -unroll-runtime=true -unroll-runtime-epilog=true -unroll-runtime-other-exit-predictable=false -verify-loop-lcssa -verify-dom-info -verify-loop-info -S | FileCheck %s --check-prefix=DISABLED
define i32 @test(i32* nocapture %a, i64 %n) {
; ENABLED-LABEL: @test(
; ENABLED-NEXT: entry:
; ENABLED-NEXT: [[TMP0:%.*]] = freeze i64 [[N:%.*]]
; ENABLED-NEXT: [[TMP1:%.*]] = add i64 [[TMP0]], -1
; ENABLED-NEXT: [[XTRAITER:%.*]] = and i64 [[TMP0]], 7
; ENABLED-NEXT: [[TMP2:%.*]] = icmp ult i64 [[TMP1]], 7
; ENABLED-NEXT: br i1 [[TMP2]], label [[FOR_END_UNR_LCSSA:%.*]], label [[ENTRY_NEW:%.*]]
; ENABLED: entry.new:
; ENABLED-NEXT: [[UNROLL_ITER:%.*]] = sub i64 [[TMP0]], [[XTRAITER]]
; ENABLED-NEXT: br label [[HEADER:%.*]]
; ENABLED: header:
; ENABLED-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY_NEW]] ], [ [[INDVARS_IV_NEXT_7:%.*]], [[FOR_BODY_7:%.*]] ]
; ENABLED-NEXT: [[SUM_02:%.*]] = phi i32 [ 0, [[ENTRY_NEW]] ], [ [[ADD_7:%.*]], [[FOR_BODY_7]] ]
; ENABLED-NEXT: [[NITER:%.*]] = phi i64 [ 0, [[ENTRY_NEW]] ], [ [[NITER_NEXT_7:%.*]], [[FOR_BODY_7]] ]
; ENABLED-NEXT: [[CMP:%.*]] = icmp eq i64 [[N]], 42
; ENABLED-NEXT: br i1 [[CMP]], label [[FOR_EXIT2_LOOPEXIT:%.*]], label [[FOR_BODY:%.*]]
; ENABLED: for.body:
; ENABLED-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDVARS_IV]]
; ENABLED-NEXT: [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
; ENABLED-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], [[SUM_02]]
; ENABLED-NEXT: [[INDVARS_IV_NEXT:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 1
; ENABLED-NEXT: [[NITER_NEXT:%.*]] = add nuw nsw i64 [[NITER]], 1
; ENABLED-NEXT: [[CMP_1:%.*]] = icmp eq i64 [[N]], 42
; ENABLED-NEXT: br i1 [[CMP_1]], label [[FOR_EXIT2_LOOPEXIT]], label [[FOR_BODY_1:%.*]]
; ENABLED: for.body.1:
; ENABLED-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT]]
; ENABLED-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX_1]], align 4
; ENABLED-NEXT: [[ADD_1:%.*]] = add nsw i32 [[TMP4]], [[ADD]]
; ENABLED-NEXT: [[INDVARS_IV_NEXT_1:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT]], 1
; ENABLED-NEXT: [[NITER_NEXT_1:%.*]] = add nuw nsw i64 [[NITER_NEXT]], 1
; ENABLED-NEXT: [[CMP_2:%.*]] = icmp eq i64 [[N]], 42
; ENABLED-NEXT: br i1 [[CMP_2]], label [[FOR_EXIT2_LOOPEXIT]], label [[FOR_BODY_2:%.*]]
; ENABLED: for.body.2:
; ENABLED-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT_1]]
; ENABLED-NEXT: [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX_2]], align 4
; ENABLED-NEXT: [[ADD_2:%.*]] = add nsw i32 [[TMP5]], [[ADD_1]]
; ENABLED-NEXT: [[INDVARS_IV_NEXT_2:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_1]], 1
; ENABLED-NEXT: [[NITER_NEXT_2:%.*]] = add nuw nsw i64 [[NITER_NEXT_1]], 1
; ENABLED-NEXT: [[CMP_3:%.*]] = icmp eq i64 [[N]], 42
; ENABLED-NEXT: br i1 [[CMP_3]], label [[FOR_EXIT2_LOOPEXIT]], label [[FOR_BODY_3:%.*]]
; ENABLED: for.body.3:
; ENABLED-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT_2]]
; ENABLED-NEXT: [[TMP6:%.*]] = load i32, i32* [[ARRAYIDX_3]], align 4
; ENABLED-NEXT: [[ADD_3:%.*]] = add nsw i32 [[TMP6]], [[ADD_2]]
; ENABLED-NEXT: [[INDVARS_IV_NEXT_3:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_2]], 1
; ENABLED-NEXT: [[NITER_NEXT_3:%.*]] = add nuw nsw i64 [[NITER_NEXT_2]], 1
; ENABLED-NEXT: [[CMP_4:%.*]] = icmp eq i64 [[N]], 42
; ENABLED-NEXT: br i1 [[CMP_4]], label [[FOR_EXIT2_LOOPEXIT]], label [[FOR_BODY_4:%.*]]
; ENABLED: for.body.4:
; ENABLED-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT_3]]
; ENABLED-NEXT: [[TMP7:%.*]] = load i32, i32* [[ARRAYIDX_4]], align 4
; ENABLED-NEXT: [[ADD_4:%.*]] = add nsw i32 [[TMP7]], [[ADD_3]]
; ENABLED-NEXT: [[INDVARS_IV_NEXT_4:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_3]], 1
; ENABLED-NEXT: [[NITER_NEXT_4:%.*]] = add nuw nsw i64 [[NITER_NEXT_3]], 1
; ENABLED-NEXT: [[CMP_5:%.*]] = icmp eq i64 [[N]], 42
; ENABLED-NEXT: br i1 [[CMP_5]], label [[FOR_EXIT2_LOOPEXIT]], label [[FOR_BODY_5:%.*]]
; ENABLED: for.body.5:
; ENABLED-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT_4]]
; ENABLED-NEXT: [[TMP8:%.*]] = load i32, i32* [[ARRAYIDX_5]], align 4
; ENABLED-NEXT: [[ADD_5:%.*]] = add nsw i32 [[TMP8]], [[ADD_4]]
; ENABLED-NEXT: [[INDVARS_IV_NEXT_5:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_4]], 1
; ENABLED-NEXT: [[NITER_NEXT_5:%.*]] = add nuw nsw i64 [[NITER_NEXT_4]], 1
; ENABLED-NEXT: [[CMP_6:%.*]] = icmp eq i64 [[N]], 42
; ENABLED-NEXT: br i1 [[CMP_6]], label [[FOR_EXIT2_LOOPEXIT]], label [[FOR_BODY_6:%.*]]
; ENABLED: for.body.6:
; ENABLED-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT_5]]
; ENABLED-NEXT: [[TMP9:%.*]] = load i32, i32* [[ARRAYIDX_6]], align 4
; ENABLED-NEXT: [[ADD_6:%.*]] = add nsw i32 [[TMP9]], [[ADD_5]]
; ENABLED-NEXT: [[INDVARS_IV_NEXT_6:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_5]], 1
; ENABLED-NEXT: [[NITER_NEXT_6:%.*]] = add nuw nsw i64 [[NITER_NEXT_5]], 1
; ENABLED-NEXT: [[CMP_7:%.*]] = icmp eq i64 [[N]], 42
; ENABLED-NEXT: br i1 [[CMP_7]], label [[FOR_EXIT2_LOOPEXIT]], label [[FOR_BODY_7]]
; ENABLED: for.body.7:
; ENABLED-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT_6]]
; ENABLED-NEXT: [[TMP10:%.*]] = load i32, i32* [[ARRAYIDX_7]], align 4
; ENABLED-NEXT: [[ADD_7]] = add nsw i32 [[TMP10]], [[ADD_6]]
; ENABLED-NEXT: [[INDVARS_IV_NEXT_7]] = add i64 [[INDVARS_IV_NEXT_6]], 1
; ENABLED-NEXT: [[NITER_NEXT_7]] = add i64 [[NITER_NEXT_6]], 1
; ENABLED-NEXT: [[NITER_NCMP_7:%.*]] = icmp eq i64 [[NITER_NEXT_7]], [[UNROLL_ITER]]
; ENABLED-NEXT: br i1 [[NITER_NCMP_7]], label [[FOR_END_UNR_LCSSA_LOOPEXIT:%.*]], label [[HEADER]]
; ENABLED: for.end.unr-lcssa.loopexit:
; ENABLED-NEXT: [[SUM_0_LCSSA_PH_PH:%.*]] = phi i32 [ [[ADD_7]], [[FOR_BODY_7]] ]
; ENABLED-NEXT: [[INDVARS_IV_UNR_PH:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_7]], [[FOR_BODY_7]] ]
; ENABLED-NEXT: [[SUM_02_UNR_PH:%.*]] = phi i32 [ [[ADD_7]], [[FOR_BODY_7]] ]
; ENABLED-NEXT: br label [[FOR_END_UNR_LCSSA]]
; ENABLED: for.end.unr-lcssa:
; ENABLED-NEXT: [[SUM_0_LCSSA_PH:%.*]] = phi i32 [ undef, [[ENTRY:%.*]] ], [ [[SUM_0_LCSSA_PH_PH]], [[FOR_END_UNR_LCSSA_LOOPEXIT]] ]
; ENABLED-NEXT: [[INDVARS_IV_UNR:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[INDVARS_IV_UNR_PH]], [[FOR_END_UNR_LCSSA_LOOPEXIT]] ]
; ENABLED-NEXT: [[SUM_02_UNR:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_02_UNR_PH]], [[FOR_END_UNR_LCSSA_LOOPEXIT]] ]
; ENABLED-NEXT: [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0
; ENABLED-NEXT: br i1 [[LCMP_MOD]], label [[HEADER_EPIL_PREHEADER:%.*]], label [[FOR_END:%.*]]
; ENABLED: header.epil.preheader:
; ENABLED-NEXT: br label [[HEADER_EPIL:%.*]]
; ENABLED: header.epil:
; ENABLED-NEXT: [[INDVARS_IV_EPIL:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_EPIL:%.*]], [[FOR_BODY_EPIL:%.*]] ], [ [[INDVARS_IV_UNR]], [[HEADER_EPIL_PREHEADER]] ]
; ENABLED-NEXT: [[SUM_02_EPIL:%.*]] = phi i32 [ [[ADD_EPIL:%.*]], [[FOR_BODY_EPIL]] ], [ [[SUM_02_UNR]], [[HEADER_EPIL_PREHEADER]] ]
; ENABLED-NEXT: [[EPIL_ITER:%.*]] = phi i64 [ 0, [[HEADER_EPIL_PREHEADER]] ], [ [[EPIL_ITER_NEXT:%.*]], [[FOR_BODY_EPIL]] ]
; ENABLED-NEXT: [[CMP_EPIL:%.*]] = icmp eq i64 [[N]], 42
; ENABLED-NEXT: br i1 [[CMP_EPIL]], label [[FOR_EXIT2_LOOPEXIT2:%.*]], label [[FOR_BODY_EPIL]]
; ENABLED: for.body.epil:
; ENABLED-NEXT: [[ARRAYIDX_EPIL:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_EPIL]]
; ENABLED-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX_EPIL]], align 4
; ENABLED-NEXT: [[ADD_EPIL]] = add nsw i32 [[TMP11]], [[SUM_02_EPIL]]
; ENABLED-NEXT: [[INDVARS_IV_NEXT_EPIL]] = add i64 [[INDVARS_IV_EPIL]], 1
; ENABLED-NEXT: [[EXITCOND_EPIL:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT_EPIL]], [[N]]
; ENABLED-NEXT: [[EPIL_ITER_NEXT]] = add i64 [[EPIL_ITER]], 1
; ENABLED-NEXT: [[EPIL_ITER_CMP:%.*]] = icmp ne i64 [[EPIL_ITER_NEXT]], [[XTRAITER]]
; ENABLED-NEXT: br i1 [[EPIL_ITER_CMP]], label [[HEADER_EPIL]], label [[FOR_END_EPILOG_LCSSA:%.*]], !llvm.loop [[LOOP0:![0-9]+]]
; ENABLED: for.end.epilog-lcssa:
; ENABLED-NEXT: [[SUM_0_LCSSA_PH1:%.*]] = phi i32 [ [[ADD_EPIL]], [[FOR_BODY_EPIL]] ]
; ENABLED-NEXT: br label [[FOR_END]]
; ENABLED: for.end:
; ENABLED-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ [[SUM_0_LCSSA_PH]], [[FOR_END_UNR_LCSSA]] ], [ [[SUM_0_LCSSA_PH1]], [[FOR_END_EPILOG_LCSSA]] ]
; ENABLED-NEXT: ret i32 [[SUM_0_LCSSA]]
; ENABLED: for.exit2.loopexit:
; ENABLED-NEXT: [[RETVAL_PH:%.*]] = phi i32 [ [[SUM_02]], [[HEADER]] ], [ [[ADD]], [[FOR_BODY]] ], [ [[ADD_1]], [[FOR_BODY_1]] ], [ [[ADD_2]], [[FOR_BODY_2]] ], [ [[ADD_3]], [[FOR_BODY_3]] ], [ [[ADD_4]], [[FOR_BODY_4]] ], [ [[ADD_5]], [[FOR_BODY_5]] ], [ [[ADD_6]], [[FOR_BODY_6]] ]
; ENABLED-NEXT: br label [[FOR_EXIT2:%.*]]
; ENABLED: for.exit2.loopexit2:
; ENABLED-NEXT: [[RETVAL_PH3:%.*]] = phi i32 [ [[SUM_02_EPIL]], [[HEADER_EPIL]] ]
; ENABLED-NEXT: br label [[FOR_EXIT2]]
; ENABLED: for.exit2:
; ENABLED-NEXT: [[RETVAL:%.*]] = phi i32 [ [[RETVAL_PH]], [[FOR_EXIT2_LOOPEXIT]] ], [ [[RETVAL_PH3]], [[FOR_EXIT2_LOOPEXIT2]] ]
; ENABLED-NEXT: ret i32 [[RETVAL]]
;
; DISABLED-LABEL: @test(
; DISABLED-NEXT: entry:
; DISABLED-NEXT: br label [[HEADER:%.*]]
; DISABLED: header:
; DISABLED-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY:%.*]] ], [ 0, [[ENTRY:%.*]] ]
; DISABLED-NEXT: [[SUM_02:%.*]] = phi i32 [ [[ADD:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY]] ]
; DISABLED-NEXT: [[CMP:%.*]] = icmp eq i64 [[N:%.*]], 42
; DISABLED-NEXT: br i1 [[CMP]], label [[FOR_EXIT2:%.*]], label [[FOR_BODY]]
; DISABLED: for.body:
; DISABLED-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDVARS_IV]]
; DISABLED-NEXT: [[TMP0:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
; DISABLED-NEXT: [[ADD]] = add nsw i32 [[TMP0]], [[SUM_02]]
; DISABLED-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1
; DISABLED-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
; DISABLED-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[HEADER]]
; DISABLED: for.end:
; DISABLED-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ]
; DISABLED-NEXT: ret i32 [[SUM_0_LCSSA]]
; DISABLED: for.exit2:
; DISABLED-NEXT: [[RETVAL:%.*]] = phi i32 [ [[SUM_02]], [[HEADER]] ]
; DISABLED-NEXT: ret i32 [[RETVAL]]
;
entry:
br label %header
header:
%indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
%sum.02 = phi i32 [ %add, %for.body ], [ 0, %entry ]
%cmp = icmp eq i64 %n, 42
br i1 %cmp, label %for.exit2, label %for.body
for.body:
%arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
%0 = load i32, i32* %arrayidx, align 4
%add = add nsw i32 %0, %sum.02
%indvars.iv.next = add i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, %n
br i1 %exitcond, label %for.end, label %header
for.end:
%sum.0.lcssa = phi i32 [ %add, %for.body ]
ret i32 %sum.0.lcssa
for.exit2:
%retval = phi i32 [ %sum.02, %header ]
ret i32 %retval
}