Files
clang-p2996/llvm/test/Transforms/LoopUnroll/runtime-loop5.ll
Philip Reames 8d85e945b2 [SCEV] Canonicalize X - urem X, Y patterns
There are multiple possible ways to represent the X - urem X, Y pattern. SCEV was not canonicalizing, and thus, depending on which you analyzed, you could get different results. The sub representation appears to produce strictly inferior results in practice, so I decided to canonicalize to the Y * X/Y version.

The motivation here is that runtime unroll produces the sub X - (and X, Y-1) pattern when Y is a power of two. SCEV is thus unable to recognize that an unrolled loop exits because we don't figure out that the new unrolled step evenly divides the trip count of the unrolled loop. After instcombine runs, we convert the the andn form which SCEV recognizes, so essentially, this is just fixing a nasty pass ordering dependency.

The ARM loop hardware interaction in the test diff is opague to me, but the comments in the review from others knowledge of the infrastructure appear to indicate these are improvements in loop recognition, not regressions.

Differential Revision: https://reviews.llvm.org/D114018
2021-11-16 11:59:21 -08:00

157 lines
11 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -S -loop-unroll -unroll-runtime=true -unroll-count=16 | FileCheck --check-prefix=UNROLL-16 %s
; RUN: opt < %s -S -loop-unroll -unroll-runtime=true -unroll-count=4 | FileCheck --check-prefix=UNROLL-4 %s
; RUN: opt < %s -S -passes='require<opt-remark-emit>,loop-unroll' -unroll-runtime=true -unroll-count=16 | FileCheck --check-prefix=UNROLL-16 %s
; RUN: opt < %s -S -passes='require<opt-remark-emit>,loop-unroll' -unroll-runtime=true -unroll-count=4 | FileCheck --check-prefix=UNROLL-4 %s
; Given that the trip-count of this loop is a 3-bit value, we cannot
; safely unroll it with a count of anything more than 8.
define i3 @test(i3* %a, i3 %n) {
; UNROLL-16-LABEL: @test(
; UNROLL-16-NEXT: entry:
; UNROLL-16-NEXT: [[CMP1:%.*]] = icmp eq i3 [[N:%.*]], 0
; UNROLL-16-NEXT: br i1 [[CMP1]], label [[FOR_END:%.*]], label [[FOR_BODY_PREHEADER:%.*]]
; UNROLL-16: for.body.preheader:
; UNROLL-16-NEXT: br label [[FOR_BODY:%.*]]
; UNROLL-16: for.body:
; UNROLL-16-NEXT: [[TMP0:%.*]] = load i3, i3* [[A:%.*]], align 1
; UNROLL-16-NEXT: [[EXITCOND:%.*]] = icmp eq i3 1, [[N]]
; UNROLL-16-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT:%.*]], label [[FOR_BODY_1:%.*]]
; UNROLL-16: for.body.1:
; UNROLL-16-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds i3, i3* [[A]], i64 1
; UNROLL-16-NEXT: [[TMP1:%.*]] = load i3, i3* [[ARRAYIDX_1]], align 1
; UNROLL-16-NEXT: [[ADD_1:%.*]] = add nsw i3 [[TMP1]], [[TMP0]]
; UNROLL-16-NEXT: [[EXITCOND_1:%.*]] = icmp eq i3 2, [[N]]
; UNROLL-16-NEXT: br i1 [[EXITCOND_1]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY_2:%.*]]
; UNROLL-16: for.body.2:
; UNROLL-16-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds i3, i3* [[A]], i64 2
; UNROLL-16-NEXT: [[TMP2:%.*]] = load i3, i3* [[ARRAYIDX_2]], align 1
; UNROLL-16-NEXT: [[ADD_2:%.*]] = add nsw i3 [[TMP2]], [[ADD_1]]
; UNROLL-16-NEXT: [[EXITCOND_2:%.*]] = icmp eq i3 3, [[N]]
; UNROLL-16-NEXT: br i1 [[EXITCOND_2]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY_3:%.*]]
; UNROLL-16: for.body.3:
; UNROLL-16-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds i3, i3* [[A]], i64 3
; UNROLL-16-NEXT: [[TMP3:%.*]] = load i3, i3* [[ARRAYIDX_3]], align 1
; UNROLL-16-NEXT: [[ADD_3:%.*]] = add nsw i3 [[TMP3]], [[ADD_2]]
; UNROLL-16-NEXT: [[EXITCOND_3:%.*]] = icmp eq i3 -4, [[N]]
; UNROLL-16-NEXT: br i1 [[EXITCOND_3]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY_4:%.*]]
; UNROLL-16: for.body.4:
; UNROLL-16-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds i3, i3* [[A]], i64 4
; UNROLL-16-NEXT: [[TMP4:%.*]] = load i3, i3* [[ARRAYIDX_4]], align 1
; UNROLL-16-NEXT: [[ADD_4:%.*]] = add nsw i3 [[TMP4]], [[ADD_3]]
; UNROLL-16-NEXT: [[EXITCOND_4:%.*]] = icmp eq i3 -3, [[N]]
; UNROLL-16-NEXT: br i1 [[EXITCOND_4]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY_5:%.*]]
; UNROLL-16: for.body.5:
; UNROLL-16-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds i3, i3* [[A]], i64 5
; UNROLL-16-NEXT: [[TMP5:%.*]] = load i3, i3* [[ARRAYIDX_5]], align 1
; UNROLL-16-NEXT: [[ADD_5:%.*]] = add nsw i3 [[TMP5]], [[ADD_4]]
; UNROLL-16-NEXT: [[EXITCOND_5:%.*]] = icmp eq i3 -2, [[N]]
; UNROLL-16-NEXT: br i1 [[EXITCOND_5]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY_6:%.*]]
; UNROLL-16: for.body.6:
; UNROLL-16-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds i3, i3* [[A]], i64 6
; UNROLL-16-NEXT: [[TMP6:%.*]] = load i3, i3* [[ARRAYIDX_6]], align 1
; UNROLL-16-NEXT: [[ADD_6:%.*]] = add nsw i3 [[TMP6]], [[ADD_5]]
; UNROLL-16-NEXT: br label [[FOR_END_LOOPEXIT]]
; UNROLL-16: for.end.loopexit:
; UNROLL-16-NEXT: [[ADD_LCSSA:%.*]] = phi i3 [ [[TMP0]], [[FOR_BODY]] ], [ [[ADD_1]], [[FOR_BODY_1]] ], [ [[ADD_2]], [[FOR_BODY_2]] ], [ [[ADD_3]], [[FOR_BODY_3]] ], [ [[ADD_4]], [[FOR_BODY_4]] ], [ [[ADD_5]], [[FOR_BODY_5]] ], [ [[ADD_6]], [[FOR_BODY_6]] ]
; UNROLL-16-NEXT: br label [[FOR_END]]
; UNROLL-16: for.end:
; UNROLL-16-NEXT: [[SUM_0_LCSSA:%.*]] = phi i3 [ 0, [[ENTRY:%.*]] ], [ [[ADD_LCSSA]], [[FOR_END_LOOPEXIT]] ]
; UNROLL-16-NEXT: ret i3 [[SUM_0_LCSSA]]
;
; UNROLL-4-LABEL: @test(
; UNROLL-4-NEXT: entry:
; UNROLL-4-NEXT: [[CMP1:%.*]] = icmp eq i3 [[N:%.*]], 0
; UNROLL-4-NEXT: br i1 [[CMP1]], label [[FOR_END:%.*]], label [[FOR_BODY_PREHEADER:%.*]]
; UNROLL-4: for.body.preheader:
; UNROLL-4-NEXT: [[TMP0:%.*]] = add i3 [[N]], -1
; UNROLL-4-NEXT: [[XTRAITER:%.*]] = and i3 [[N]], 3
; UNROLL-4-NEXT: [[TMP1:%.*]] = icmp ult i3 [[TMP0]], 3
; UNROLL-4-NEXT: br i1 [[TMP1]], label [[FOR_END_LOOPEXIT_UNR_LCSSA:%.*]], label [[FOR_BODY_PREHEADER_NEW:%.*]]
; UNROLL-4: for.body.preheader.new:
; UNROLL-4-NEXT: [[UNROLL_ITER:%.*]] = sub i3 [[N]], [[XTRAITER]]
; UNROLL-4-NEXT: br label [[FOR_BODY:%.*]]
; UNROLL-4: for.body:
; UNROLL-4-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[FOR_BODY_PREHEADER_NEW]] ], [ [[INDVARS_IV_NEXT_3:%.*]], [[FOR_BODY]] ]
; UNROLL-4-NEXT: [[SUM_02:%.*]] = phi i3 [ 0, [[FOR_BODY_PREHEADER_NEW]] ], [ [[ADD_3:%.*]], [[FOR_BODY]] ]
; UNROLL-4-NEXT: [[NITER:%.*]] = phi i3 [ 0, [[FOR_BODY_PREHEADER_NEW]] ], [ [[NITER_NEXT_3:%.*]], [[FOR_BODY]] ]
; UNROLL-4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i3, i3* [[A:%.*]], i64 [[INDVARS_IV]]
; UNROLL-4-NEXT: [[TMP2:%.*]] = load i3, i3* [[ARRAYIDX]], align 1
; UNROLL-4-NEXT: [[ADD:%.*]] = add nsw i3 [[TMP2]], [[SUM_02]]
; UNROLL-4-NEXT: [[INDVARS_IV_NEXT:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 1
; UNROLL-4-NEXT: [[NITER_NEXT:%.*]] = add nuw nsw i3 [[NITER]], 1
; UNROLL-4-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds i3, i3* [[A]], i64 [[INDVARS_IV_NEXT]]
; UNROLL-4-NEXT: [[TMP3:%.*]] = load i3, i3* [[ARRAYIDX_1]], align 1
; UNROLL-4-NEXT: [[ADD_1:%.*]] = add nsw i3 [[TMP3]], [[ADD]]
; UNROLL-4-NEXT: [[INDVARS_IV_NEXT_1:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT]], 1
; UNROLL-4-NEXT: [[NITER_NEXT_1:%.*]] = add nuw nsw i3 [[NITER_NEXT]], 1
; UNROLL-4-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds i3, i3* [[A]], i64 [[INDVARS_IV_NEXT_1]]
; UNROLL-4-NEXT: [[TMP4:%.*]] = load i3, i3* [[ARRAYIDX_2]], align 1
; UNROLL-4-NEXT: [[ADD_2:%.*]] = add nsw i3 [[TMP4]], [[ADD_1]]
; UNROLL-4-NEXT: [[INDVARS_IV_NEXT_2:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_1]], 1
; UNROLL-4-NEXT: [[NITER_NEXT_2:%.*]] = add nuw nsw i3 [[NITER_NEXT_1]], 1
; UNROLL-4-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds i3, i3* [[A]], i64 [[INDVARS_IV_NEXT_2]]
; UNROLL-4-NEXT: [[TMP5:%.*]] = load i3, i3* [[ARRAYIDX_3]], align 1
; UNROLL-4-NEXT: [[ADD_3]] = add nsw i3 [[TMP5]], [[ADD_2]]
; UNROLL-4-NEXT: [[INDVARS_IV_NEXT_3]] = add nuw nsw i64 [[INDVARS_IV_NEXT_2]], 1
; UNROLL-4-NEXT: [[NITER_NEXT_3]] = add i3 [[NITER_NEXT_2]], 1
; UNROLL-4-NEXT: [[NITER_NCMP_3:%.*]] = icmp eq i3 [[NITER_NEXT_3]], [[UNROLL_ITER]]
; UNROLL-4-NEXT: br i1 [[NITER_NCMP_3]], label [[FOR_END_LOOPEXIT_UNR_LCSSA_LOOPEXIT:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; UNROLL-4: for.end.loopexit.unr-lcssa.loopexit:
; UNROLL-4-NEXT: [[ADD_LCSSA_PH_PH:%.*]] = phi i3 [ [[ADD_3]], [[FOR_BODY]] ]
; UNROLL-4-NEXT: [[INDVARS_IV_UNR_PH:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_3]], [[FOR_BODY]] ]
; UNROLL-4-NEXT: [[SUM_02_UNR_PH:%.*]] = phi i3 [ [[ADD_3]], [[FOR_BODY]] ]
; UNROLL-4-NEXT: br label [[FOR_END_LOOPEXIT_UNR_LCSSA]]
; UNROLL-4: for.end.loopexit.unr-lcssa:
; UNROLL-4-NEXT: [[ADD_LCSSA_PH:%.*]] = phi i3 [ undef, [[FOR_BODY_PREHEADER]] ], [ [[ADD_LCSSA_PH_PH]], [[FOR_END_LOOPEXIT_UNR_LCSSA_LOOPEXIT]] ]
; UNROLL-4-NEXT: [[INDVARS_IV_UNR:%.*]] = phi i64 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[INDVARS_IV_UNR_PH]], [[FOR_END_LOOPEXIT_UNR_LCSSA_LOOPEXIT]] ]
; UNROLL-4-NEXT: [[SUM_02_UNR:%.*]] = phi i3 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[SUM_02_UNR_PH]], [[FOR_END_LOOPEXIT_UNR_LCSSA_LOOPEXIT]] ]
; UNROLL-4-NEXT: [[LCMP_MOD:%.*]] = icmp ne i3 [[XTRAITER]], 0
; UNROLL-4-NEXT: br i1 [[LCMP_MOD]], label [[FOR_BODY_EPIL_PREHEADER:%.*]], label [[FOR_END_LOOPEXIT:%.*]]
; UNROLL-4: for.body.epil.preheader:
; UNROLL-4-NEXT: br label [[FOR_BODY_EPIL:%.*]]
; UNROLL-4: for.body.epil:
; UNROLL-4-NEXT: [[INDVARS_IV_EPIL:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_EPIL:%.*]], [[FOR_BODY_EPIL]] ], [ [[INDVARS_IV_UNR]], [[FOR_BODY_EPIL_PREHEADER]] ]
; UNROLL-4-NEXT: [[SUM_02_EPIL:%.*]] = phi i3 [ [[ADD_EPIL:%.*]], [[FOR_BODY_EPIL]] ], [ [[SUM_02_UNR]], [[FOR_BODY_EPIL_PREHEADER]] ]
; UNROLL-4-NEXT: [[EPIL_ITER:%.*]] = phi i3 [ 0, [[FOR_BODY_EPIL_PREHEADER]] ], [ [[EPIL_ITER_NEXT:%.*]], [[FOR_BODY_EPIL]] ]
; UNROLL-4-NEXT: [[ARRAYIDX_EPIL:%.*]] = getelementptr inbounds i3, i3* [[A]], i64 [[INDVARS_IV_EPIL]]
; UNROLL-4-NEXT: [[TMP6:%.*]] = load i3, i3* [[ARRAYIDX_EPIL]], align 1
; UNROLL-4-NEXT: [[ADD_EPIL]] = add nsw i3 [[TMP6]], [[SUM_02_EPIL]]
; UNROLL-4-NEXT: [[INDVARS_IV_NEXT_EPIL]] = add i64 [[INDVARS_IV_EPIL]], 1
; UNROLL-4-NEXT: [[LFTR_WIDEIV_EPIL:%.*]] = trunc i64 [[INDVARS_IV_NEXT_EPIL]] to i3
; UNROLL-4-NEXT: [[EXITCOND_EPIL:%.*]] = icmp eq i3 [[LFTR_WIDEIV_EPIL]], [[N]]
; UNROLL-4-NEXT: [[EPIL_ITER_NEXT]] = add i3 [[EPIL_ITER]], 1
; UNROLL-4-NEXT: [[EPIL_ITER_CMP:%.*]] = icmp ne i3 [[EPIL_ITER_NEXT]], [[XTRAITER]]
; UNROLL-4-NEXT: br i1 [[EPIL_ITER_CMP]], label [[FOR_BODY_EPIL]], label [[FOR_END_LOOPEXIT_EPILOG_LCSSA:%.*]], !llvm.loop [[LOOP2:![0-9]+]]
; UNROLL-4: for.end.loopexit.epilog-lcssa:
; UNROLL-4-NEXT: [[ADD_LCSSA_PH1:%.*]] = phi i3 [ [[ADD_EPIL]], [[FOR_BODY_EPIL]] ]
; UNROLL-4-NEXT: br label [[FOR_END_LOOPEXIT]]
; UNROLL-4: for.end.loopexit:
; UNROLL-4-NEXT: [[ADD_LCSSA:%.*]] = phi i3 [ [[ADD_LCSSA_PH]], [[FOR_END_LOOPEXIT_UNR_LCSSA]] ], [ [[ADD_LCSSA_PH1]], [[FOR_END_LOOPEXIT_EPILOG_LCSSA]] ]
; UNROLL-4-NEXT: br label [[FOR_END]]
; UNROLL-4: for.end:
; UNROLL-4-NEXT: [[SUM_0_LCSSA:%.*]] = phi i3 [ 0, [[ENTRY:%.*]] ], [ [[ADD_LCSSA]], [[FOR_END_LOOPEXIT]] ]
; UNROLL-4-NEXT: ret i3 [[SUM_0_LCSSA]]
;
entry:
%cmp1 = icmp eq i3 %n, 0
br i1 %cmp1, label %for.end, label %for.body
for.body: ; preds = %for.body, %entry
%indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
%sum.02 = phi i3 [ %add, %for.body ], [ 0, %entry ]
%arrayidx = getelementptr inbounds i3, i3* %a, i64 %indvars.iv
%0 = load i3, i3* %arrayidx
%add = add nsw i3 %0, %sum.02
%indvars.iv.next = add i64 %indvars.iv, 1
%lftr.wideiv = trunc i64 %indvars.iv.next to i3
%exitcond = icmp eq i3 %lftr.wideiv, %n
br i1 %exitcond, label %for.end, label %for.body
for.end: ; preds = %for.body, %entry
%sum.0.lcssa = phi i3 [ 0, %entry ], [ %add, %for.body ]
ret i3 %sum.0.lcssa
}