Files
clang-p2996/llvm/test/Transforms/LoopUnroll/runtime-small-upperbound.ll
Philip Reames de2fed6152 [unroll] Keep unrolled iterations with initial iteration
The unrolling code was previously inserting new cloned blocks at the end of the function.  The result of this with typical loop structures is that the new iterations are placed far from the initial iteration.

With unrolling, the general assumption is that the a) the loop is reasonable hot, and b) the first Count-1 copies of the loop are rarely (if ever) loop exiting.  As such, placing Count-1 copies out of line is a fairly poor code placement choice.  We'd much rather fall through into the hot (non-exiting) path.  For code with branch profiles, later layout would fix this, but this may have a positive impact on non-PGO compiled code.

However, the real motivation for this change isn't performance.  Its readability and human understanding.  Having to jump around long distances in an IR file to trace an unrolled loop structure is error prone and tedious.
2021-11-12 11:40:50 -08:00

161 lines
7.3 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -S -loop-unroll -unroll-runtime %s -o - | FileCheck %s
; RUN: opt -S -loop-unroll -unroll-runtime -unroll-max-upperbound=6 %s -o - | FileCheck %s --check-prefix=UPPER
target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
@global = dso_local local_unnamed_addr global i32 0, align 4
@global.1 = dso_local local_unnamed_addr global i8* null, align 4
; Check that loop in hoge_3, with a runtime upperbound of 3, is not unrolled.
define dso_local void @hoge_3(i8 %arg) {
; CHECK-LABEL: @hoge_3(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[X:%.*]] = load i32, i32* @global, align 4
; CHECK-NEXT: [[Y:%.*]] = load i8*, i8** @global.1, align 4
; CHECK-NEXT: [[TMP0:%.*]] = icmp ult i32 [[X]], 17
; CHECK-NEXT: br i1 [[TMP0]], label [[LOOP_PREHEADER:%.*]], label [[EXIT:%.*]]
; CHECK: loop.preheader:
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], [[LOOP]] ], [ [[X]], [[LOOP_PREHEADER]] ]
; CHECK-NEXT: [[PTR:%.*]] = phi i8* [ [[PTR_NEXT:%.*]], [[LOOP]] ], [ [[Y]], [[LOOP_PREHEADER]] ]
; CHECK-NEXT: [[IV_NEXT]] = add nuw i32 [[IV]], 8
; CHECK-NEXT: [[PTR_NEXT]] = getelementptr inbounds i8, i8* [[PTR]], i32 1
; CHECK-NEXT: store i8 [[ARG:%.*]], i8* [[PTR_NEXT]], align 1
; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[IV_NEXT]], 17
; CHECK-NEXT: br i1 [[TMP1]], label [[LOOP]], label [[EXIT_LOOPEXIT:%.*]]
; CHECK: exit.loopexit:
; CHECK-NEXT: br label [[EXIT]]
; CHECK: exit:
; CHECK-NEXT: ret void
;
; UPPER-LABEL: @hoge_3(
; UPPER-NEXT: entry:
; UPPER-NEXT: [[X:%.*]] = load i32, i32* @global, align 4
; UPPER-NEXT: [[Y:%.*]] = load i8*, i8** @global.1, align 4
; UPPER-NEXT: [[TMP0:%.*]] = icmp ult i32 [[X]], 17
; UPPER-NEXT: br i1 [[TMP0]], label [[LOOP_PREHEADER:%.*]], label [[EXIT:%.*]]
; UPPER: loop.preheader:
; UPPER-NEXT: br label [[LOOP:%.*]]
; UPPER: loop:
; UPPER-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], [[LOOP]] ], [ [[X]], [[LOOP_PREHEADER]] ]
; UPPER-NEXT: [[PTR:%.*]] = phi i8* [ [[PTR_NEXT:%.*]], [[LOOP]] ], [ [[Y]], [[LOOP_PREHEADER]] ]
; UPPER-NEXT: [[IV_NEXT]] = add nuw i32 [[IV]], 8
; UPPER-NEXT: [[PTR_NEXT]] = getelementptr inbounds i8, i8* [[PTR]], i32 1
; UPPER-NEXT: store i8 [[ARG:%.*]], i8* [[PTR_NEXT]], align 1
; UPPER-NEXT: [[TMP1:%.*]] = icmp ult i32 [[IV_NEXT]], 17
; UPPER-NEXT: br i1 [[TMP1]], label [[LOOP]], label [[EXIT_LOOPEXIT:%.*]]
; UPPER: exit.loopexit:
; UPPER-NEXT: br label [[EXIT]]
; UPPER: exit:
; UPPER-NEXT: ret void
;
entry:
%x = load i32, i32* @global, align 4
%y = load i8*, i8** @global.1, align 4
%0 = icmp ult i32 %x, 17
br i1 %0, label %loop, label %exit
loop:
%iv = phi i32 [ %x, %entry ], [ %iv.next, %loop ]
%ptr = phi i8* [ %y, %entry ], [ %ptr.next, %loop ]
%iv.next = add nuw i32 %iv, 8
%ptr.next = getelementptr inbounds i8, i8* %ptr, i32 1
store i8 %arg, i8* %ptr.next, align 1
%1 = icmp ult i32 %iv.next, 17
br i1 %1, label %loop, label %exit
exit:
ret void
}
; Check that loop in hoge_5, with a runtime upperbound of 5, is unrolled when -unroll-max-upperbound=4
define dso_local void @hoge_5(i8 %arg) {
; CHECK-LABEL: @hoge_5(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[X:%.*]] = load i32, i32* @global, align 4
; CHECK-NEXT: [[Y:%.*]] = load i8*, i8** @global.1, align 4
; CHECK-NEXT: [[TMP0:%.*]] = icmp ult i32 [[X]], 17
; CHECK-NEXT: br i1 [[TMP0]], label [[LOOP_PREHEADER:%.*]], label [[EXIT:%.*]]
; CHECK: loop.preheader:
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], [[LOOP]] ], [ [[X]], [[LOOP_PREHEADER]] ]
; CHECK-NEXT: [[PTR:%.*]] = phi i8* [ [[PTR_NEXT:%.*]], [[LOOP]] ], [ [[Y]], [[LOOP_PREHEADER]] ]
; CHECK-NEXT: [[IV_NEXT]] = add nuw i32 [[IV]], 4
; CHECK-NEXT: [[PTR_NEXT]] = getelementptr inbounds i8, i8* [[PTR]], i32 1
; CHECK-NEXT: store i8 [[ARG:%.*]], i8* [[PTR_NEXT]], align 1
; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[IV_NEXT]], 17
; CHECK-NEXT: br i1 [[TMP1]], label [[LOOP]], label [[EXIT_LOOPEXIT:%.*]]
; CHECK: exit.loopexit:
; CHECK-NEXT: br label [[EXIT]]
; CHECK: exit:
; CHECK-NEXT: ret void
;
; UPPER-LABEL: @hoge_5(
; UPPER-NEXT: entry:
; UPPER-NEXT: [[X:%.*]] = load i32, i32* @global, align 4
; UPPER-NEXT: [[Y:%.*]] = load i8*, i8** @global.1, align 4
; UPPER-NEXT: [[TMP0:%.*]] = icmp ult i32 [[X]], 17
; UPPER-NEXT: br i1 [[TMP0]], label [[LOOP_PREHEADER:%.*]], label [[EXIT:%.*]]
; UPPER: loop.preheader:
; UPPER-NEXT: br label [[LOOP:%.*]]
; UPPER: loop:
; UPPER-NEXT: [[IV_NEXT:%.*]] = add nuw i32 [[X]], 4
; UPPER-NEXT: [[PTR_NEXT:%.*]] = getelementptr inbounds i8, i8* [[Y]], i32 1
; UPPER-NEXT: store i8 [[ARG:%.*]], i8* [[PTR_NEXT]], align 1
; UPPER-NEXT: [[TMP1:%.*]] = icmp ult i32 [[IV_NEXT]], 17
; UPPER-NEXT: br i1 [[TMP1]], label [[LOOP_1:%.*]], label [[EXIT_LOOPEXIT:%.*]]
; UPPER: loop.1:
; UPPER-NEXT: [[IV_NEXT_1:%.*]] = add nuw i32 [[IV_NEXT]], 4
; UPPER-NEXT: [[PTR_NEXT_1:%.*]] = getelementptr inbounds i8, i8* [[PTR_NEXT]], i32 1
; UPPER-NEXT: store i8 [[ARG]], i8* [[PTR_NEXT_1]], align 1
; UPPER-NEXT: [[TMP2:%.*]] = icmp ult i32 [[IV_NEXT_1]], 17
; UPPER-NEXT: br i1 [[TMP2]], label [[LOOP_2:%.*]], label [[EXIT_LOOPEXIT]]
; UPPER: loop.2:
; UPPER-NEXT: [[IV_NEXT_2:%.*]] = add nuw i32 [[IV_NEXT_1]], 4
; UPPER-NEXT: [[PTR_NEXT_2:%.*]] = getelementptr inbounds i8, i8* [[PTR_NEXT_1]], i32 1
; UPPER-NEXT: store i8 [[ARG]], i8* [[PTR_NEXT_2]], align 1
; UPPER-NEXT: [[TMP3:%.*]] = icmp ult i32 [[IV_NEXT_2]], 17
; UPPER-NEXT: br i1 [[TMP3]], label [[LOOP_3:%.*]], label [[EXIT_LOOPEXIT]]
; UPPER: loop.3:
; UPPER-NEXT: [[IV_NEXT_3:%.*]] = add nuw i32 [[IV_NEXT_2]], 4
; UPPER-NEXT: [[PTR_NEXT_3:%.*]] = getelementptr inbounds i8, i8* [[PTR_NEXT_2]], i32 1
; UPPER-NEXT: store i8 [[ARG]], i8* [[PTR_NEXT_3]], align 1
; UPPER-NEXT: [[TMP4:%.*]] = icmp ult i32 [[IV_NEXT_3]], 17
; UPPER-NEXT: br i1 [[TMP4]], label [[LOOP_4:%.*]], label [[EXIT_LOOPEXIT]]
; UPPER: loop.4:
; UPPER-NEXT: [[IV_NEXT_4:%.*]] = add nuw i32 [[IV_NEXT_3]], 4
; UPPER-NEXT: [[PTR_NEXT_4:%.*]] = getelementptr inbounds i8, i8* [[PTR_NEXT_3]], i32 1
; UPPER-NEXT: store i8 [[ARG]], i8* [[PTR_NEXT_4]], align 1
; UPPER-NEXT: [[TMP5:%.*]] = icmp ult i32 [[IV_NEXT_4]], 17
; UPPER-NEXT: br i1 [[TMP5]], label [[LOOP_5:%.*]], label [[EXIT_LOOPEXIT]]
; UPPER: loop.5:
; UPPER-NEXT: [[PTR_NEXT_5:%.*]] = getelementptr inbounds i8, i8* [[PTR_NEXT_4]], i32 1
; UPPER-NEXT: store i8 [[ARG]], i8* [[PTR_NEXT_5]], align 1
; UPPER-NEXT: br label [[EXIT_LOOPEXIT]]
; UPPER: exit.loopexit:
; UPPER-NEXT: br label [[EXIT]]
; UPPER: exit:
; UPPER-NEXT: ret void
;
entry:
%x = load i32, i32* @global, align 4
%y = load i8*, i8** @global.1, align 4
%0 = icmp ult i32 %x, 17
br i1 %0, label %loop, label %exit
loop:
%iv = phi i32 [ %x, %entry ], [ %iv.next, %loop ]
%ptr = phi i8* [ %y, %entry ], [ %ptr.next, %loop ]
%iv.next = add nuw i32 %iv, 4
%ptr.next = getelementptr inbounds i8, i8* %ptr, i32 1
store i8 %arg, i8* %ptr.next, align 1
%1 = icmp ult i32 %iv.next, 17
br i1 %1, label %loop, label %exit
exit:
ret void
}