- There is no restriction on a loop with controlled convergent operations when the relevant tokens are defined and used within the loop. - When a token defined outside a loop is used inside (also called a loop convergence heart), unrolling is allowed only in the absence of remainder or runtime checks. - When a token defined inside a loop is used outside, such a loop is said to be "extended". This loop can only be unrolled by also duplicating the extended part lying outside the loop. Such unrolling is disabled for now. - Clean up loop hearts: When unrolling a loop with a heart, duplicating the heart will introduce multiple static uses of a convergence control token in a cycle that does not contain its definition. This violates the static rules for tokens, and needs to be cleaned up into a single occurrence of the intrinsic. - Spell out the initializer for UnrollLoopOptions to improve readability. Original implementation [D85605] by Nicolai Haehnle <nicolai.haehnle@amd.com>.
563 lines
23 KiB
LLVM
563 lines
23 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
|
; RUN: opt < %s -passes=loop-unroll -unroll-runtime -unroll-allow-partial -S | FileCheck %s
|
|
|
|
declare void @f() convergent
|
|
declare void @g()
|
|
|
|
; Although this loop contains a convergent instruction, it should be
|
|
; fully unrolled.
|
|
define i32 @full_unroll() {
|
|
; CHECK-LABEL: @full_unroll(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[ANCHOR:%.*]] = call token @llvm.experimental.convergence.anchor()
|
|
; CHECK-NEXT: br label [[L3:%.*]]
|
|
; CHECK: l3:
|
|
; CHECK-NEXT: [[TOK_LOOP:%.*]] = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token [[ANCHOR]]) ]
|
|
; CHECK-NEXT: br label [[A:%.*]]
|
|
; CHECK: a:
|
|
; CHECK-NEXT: call void @f() [ "convergencectrl"(token [[TOK_LOOP]]) ]
|
|
; CHECK-NEXT: br label [[A_1:%.*]]
|
|
; CHECK: a.1:
|
|
; CHECK-NEXT: call void @f() [ "convergencectrl"(token [[TOK_LOOP]]) ]
|
|
; CHECK-NEXT: br label [[A_2:%.*]]
|
|
; CHECK: a.2:
|
|
; CHECK-NEXT: call void @f() [ "convergencectrl"(token [[TOK_LOOP]]) ]
|
|
; CHECK-NEXT: ret i32 0
|
|
;
|
|
entry:
|
|
%anchor = call token @llvm.experimental.convergence.anchor()
|
|
br label %l3
|
|
|
|
l3:
|
|
%x.0 = phi i32 [ 0, %entry ], [ %inc, %a ]
|
|
%tok.loop = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token %anchor) ]
|
|
%inc = add nsw i32 %x.0, 1
|
|
%exitcond = icmp eq i32 %inc, 3
|
|
br label %a
|
|
|
|
a:
|
|
call void @f() [ "convergencectrl"(token %tok.loop) ]
|
|
br i1 %exitcond, label %exit, label %l3
|
|
|
|
exit:
|
|
ret i32 0
|
|
}
|
|
|
|
; This loop contains a convergent instruction, but it should be partially
|
|
; unrolled. The unroll count is the largest power of 2 that divides the
|
|
; multiple -- 4, in this case.
|
|
define i32 @runtime_unroll(i32 %n) {
|
|
; CHECK-LABEL: @runtime_unroll(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[ANCHOR:%.*]] = call token @llvm.experimental.convergence.anchor()
|
|
; CHECK-NEXT: [[LOOP_CTL:%.*]] = mul nsw i32 [[N:%.*]], 12
|
|
; CHECK-NEXT: br label [[L3:%.*]]
|
|
; CHECK: l3:
|
|
; CHECK-NEXT: [[X_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_3:%.*]], [[A_3:%.*]] ]
|
|
; CHECK-NEXT: [[TOK_LOOP:%.*]] = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token [[ANCHOR]]) ]
|
|
; CHECK-NEXT: br label [[A:%.*]]
|
|
; CHECK: a:
|
|
; CHECK-NEXT: call void @f() [ "convergencectrl"(token [[TOK_LOOP]]) ]
|
|
; CHECK-NEXT: br label [[A_1:%.*]]
|
|
; CHECK: a.1:
|
|
; CHECK-NEXT: call void @f() [ "convergencectrl"(token [[TOK_LOOP]]) ]
|
|
; CHECK-NEXT: br label [[A_2:%.*]]
|
|
; CHECK: a.2:
|
|
; CHECK-NEXT: call void @f() [ "convergencectrl"(token [[TOK_LOOP]]) ]
|
|
; CHECK-NEXT: br label [[A_3]]
|
|
; CHECK: a.3:
|
|
; CHECK-NEXT: call void @f() [ "convergencectrl"(token [[TOK_LOOP]]) ]
|
|
; CHECK-NEXT: [[INC_3]] = add nsw i32 [[X_0]], 4
|
|
; CHECK-NEXT: [[EXITCOND_3:%.*]] = icmp eq i32 [[INC_3]], [[LOOP_CTL]]
|
|
; CHECK-NEXT: br i1 [[EXITCOND_3]], label [[EXIT:%.*]], label [[L3]]
|
|
; CHECK: exit:
|
|
; CHECK-NEXT: ret i32 0
|
|
;
|
|
entry:
|
|
%anchor = call token @llvm.experimental.convergence.anchor()
|
|
%loop_ctl = mul nsw i32 %n, 12
|
|
br label %l3
|
|
|
|
l3:
|
|
%x.0 = phi i32 [ 0, %entry ], [ %inc, %a ]
|
|
%tok.loop = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token %anchor) ]
|
|
br label %a
|
|
|
|
a:
|
|
call void @f() [ "convergencectrl"(token %tok.loop) ]
|
|
%inc = add nsw i32 %x.0, 1
|
|
%exitcond = icmp eq i32 %inc, %loop_ctl
|
|
br i1 %exitcond, label %exit, label %l3
|
|
|
|
exit:
|
|
ret i32 0
|
|
}
|
|
|
|
; This loop contains a convergent instruction, so its partial unroll
|
|
; count must divide its trip multiple. This overrides its unroll
|
|
; pragma -- we unroll exactly 8 times, even though 16 is requested.
|
|
define i32 @pragma_unroll(i32 %n) {
|
|
; CHECK-LABEL: @pragma_unroll(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[ANCHOR:%.*]] = call token @llvm.experimental.convergence.anchor()
|
|
; CHECK-NEXT: [[LOOP_CTL:%.*]] = mul nsw i32 [[N:%.*]], 24
|
|
; CHECK-NEXT: br label [[L3:%.*]], !llvm.loop [[LOOP0:![0-9]+]]
|
|
; CHECK: l3:
|
|
; CHECK-NEXT: [[X_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_7:%.*]], [[A_7:%.*]] ]
|
|
; CHECK-NEXT: [[TOK_LOOP:%.*]] = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token [[ANCHOR]]) ]
|
|
; CHECK-NEXT: br label [[A:%.*]]
|
|
; CHECK: a:
|
|
; CHECK-NEXT: call void @f() [ "convergencectrl"(token [[TOK_LOOP]]) ]
|
|
; CHECK-NEXT: br label [[A_1:%.*]]
|
|
; CHECK: a.1:
|
|
; CHECK-NEXT: call void @f() [ "convergencectrl"(token [[TOK_LOOP]]) ]
|
|
; CHECK-NEXT: br label [[A_2:%.*]]
|
|
; CHECK: a.2:
|
|
; CHECK-NEXT: call void @f() [ "convergencectrl"(token [[TOK_LOOP]]) ]
|
|
; CHECK-NEXT: br label [[A_3:%.*]]
|
|
; CHECK: a.3:
|
|
; CHECK-NEXT: call void @f() [ "convergencectrl"(token [[TOK_LOOP]]) ]
|
|
; CHECK-NEXT: br label [[A_4:%.*]]
|
|
; CHECK: a.4:
|
|
; CHECK-NEXT: call void @f() [ "convergencectrl"(token [[TOK_LOOP]]) ]
|
|
; CHECK-NEXT: br label [[A_5:%.*]]
|
|
; CHECK: a.5:
|
|
; CHECK-NEXT: call void @f() [ "convergencectrl"(token [[TOK_LOOP]]) ]
|
|
; CHECK-NEXT: br label [[A_6:%.*]]
|
|
; CHECK: a.6:
|
|
; CHECK-NEXT: call void @f() [ "convergencectrl"(token [[TOK_LOOP]]) ]
|
|
; CHECK-NEXT: br label [[A_7]]
|
|
; CHECK: a.7:
|
|
; CHECK-NEXT: call void @f() [ "convergencectrl"(token [[TOK_LOOP]]) ]
|
|
; CHECK-NEXT: [[INC_7]] = add nsw i32 [[X_0]], 8
|
|
; CHECK-NEXT: [[EXITCOND_7:%.*]] = icmp eq i32 [[INC_7]], [[LOOP_CTL]]
|
|
; CHECK-NEXT: br i1 [[EXITCOND_7]], label [[EXIT:%.*]], label [[L3]], !llvm.loop [[LOOP2:![0-9]+]]
|
|
; CHECK: exit:
|
|
; CHECK-NEXT: ret i32 0
|
|
;
|
|
entry:
|
|
%anchor = call token @llvm.experimental.convergence.anchor()
|
|
%loop_ctl = mul nsw i32 %n, 24
|
|
br label %l3, !llvm.loop !0
|
|
|
|
l3:
|
|
%x.0 = phi i32 [ 0, %entry ], [ %inc, %a ]
|
|
%tok.loop = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token %anchor) ]
|
|
br label %a
|
|
|
|
a:
|
|
call void @f() [ "convergencectrl"(token %tok.loop) ]
|
|
%inc = add nsw i32 %x.0, 1
|
|
%exitcond = icmp eq i32 %inc, %loop_ctl
|
|
br i1 %exitcond, label %exit, label %l3, !llvm.loop !0
|
|
|
|
exit:
|
|
ret i32 0
|
|
}
|
|
|
|
; This loop contains a convergent instruction. Since the pragma loop unroll
|
|
; count 2 divides trip count 4. The loop unroll should respect the pragma.
|
|
define void @pragma_unroll_divisible_trip_count() {
|
|
; CHECK-LABEL: @pragma_unroll_divisible_trip_count(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[ANCHOR:%.*]] = call token @llvm.experimental.convergence.anchor()
|
|
; CHECK-NEXT: br label [[L3:%.*]], !llvm.loop [[LOOP4:![0-9]+]]
|
|
; CHECK: l3:
|
|
; CHECK-NEXT: [[X_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_1:%.*]], [[L3]] ]
|
|
; CHECK-NEXT: [[TOK_LOOP:%.*]] = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token [[ANCHOR]]) ]
|
|
; CHECK-NEXT: call void @f() [ "convergencectrl"(token [[TOK_LOOP]]) ]
|
|
; CHECK-NEXT: call void @f() [ "convergencectrl"(token [[TOK_LOOP]]) ]
|
|
; CHECK-NEXT: [[INC_1]] = add nuw nsw i32 [[X_0]], 2
|
|
; CHECK-NEXT: [[EXITCOND_1:%.*]] = icmp eq i32 [[INC_1]], 4
|
|
; CHECK-NEXT: br i1 [[EXITCOND_1]], label [[EXIT:%.*]], label [[L3]], !llvm.loop [[LOOP6:![0-9]+]]
|
|
; CHECK: exit:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
%anchor = call token @llvm.experimental.convergence.anchor()
|
|
br label %l3, !llvm.loop !1
|
|
|
|
l3:
|
|
%x.0 = phi i32 [ 0, %entry ], [ %inc, %l3 ]
|
|
%tok.loop = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token %anchor) ]
|
|
call void @f() [ "convergencectrl"(token %tok.loop) ]
|
|
%inc = add nsw i32 %x.0, 1
|
|
%exitcond = icmp eq i32 %inc, 4
|
|
br i1 %exitcond, label %exit, label %l3, !llvm.loop !1
|
|
|
|
exit:
|
|
ret void
|
|
}
|
|
|
|
; This loop contains a convergent instruction. Since the pragma loop unroll
|
|
; count 2 divides trip multiple 2. The loop unroll should respect the pragma.
|
|
define i32 @pragma_unroll_divisible_trip_multiple(i32 %n) {
|
|
; CHECK-LABEL: @pragma_unroll_divisible_trip_multiple(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[ANCHOR:%.*]] = call token @llvm.experimental.convergence.anchor()
|
|
; CHECK-NEXT: [[LOOP_CTL:%.*]] = mul nsw i32 [[N:%.*]], 2
|
|
; CHECK-NEXT: br label [[L3:%.*]], !llvm.loop [[LOOP4]]
|
|
; CHECK: l3:
|
|
; CHECK-NEXT: [[X_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_1:%.*]], [[L3]] ]
|
|
; CHECK-NEXT: [[TOK_LOOP:%.*]] = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token [[ANCHOR]]) ]
|
|
; CHECK-NEXT: call void @f() [ "convergencectrl"(token [[TOK_LOOP]]) ]
|
|
; CHECK-NEXT: call void @f() [ "convergencectrl"(token [[TOK_LOOP]]) ]
|
|
; CHECK-NEXT: [[INC_1]] = add nsw i32 [[X_0]], 2
|
|
; CHECK-NEXT: [[EXITCOND_1:%.*]] = icmp eq i32 [[INC_1]], [[LOOP_CTL]]
|
|
; CHECK-NEXT: br i1 [[EXITCOND_1]], label [[EXIT:%.*]], label [[L3]], !llvm.loop [[LOOP7:![0-9]+]]
|
|
; CHECK: exit:
|
|
; CHECK-NEXT: ret i32 0
|
|
;
|
|
entry:
|
|
%anchor = call token @llvm.experimental.convergence.anchor()
|
|
%loop_ctl = mul nsw i32 %n, 2
|
|
br label %l3, !llvm.loop !1
|
|
|
|
l3:
|
|
%x.0 = phi i32 [ 0, %entry ], [ %inc, %l3 ]
|
|
%tok.loop = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token %anchor) ]
|
|
call void @f() [ "convergencectrl"(token %tok.loop) ]
|
|
%inc = add nsw i32 %x.0, 1
|
|
%exitcond = icmp eq i32 %inc, %loop_ctl
|
|
br i1 %exitcond, label %exit, label %l3, !llvm.loop !1
|
|
|
|
exit:
|
|
ret i32 0
|
|
}
|
|
|
|
; This loop contains a convergent instruction. Since the pragma loop unroll
|
|
; count 2 is unknown to divide runtime trip count, the loop is not unrolled
|
|
; since remainder is forbidden for unrolling convergent loop.
|
|
define i32 @pragma_unroll_indivisible_runtime_trip_count(i32 %n) {
|
|
; CHECK-LABEL: @pragma_unroll_indivisible_runtime_trip_count(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[ANCHOR:%.*]] = call token @llvm.experimental.convergence.anchor()
|
|
; CHECK-NEXT: br label [[L3:%.*]], !llvm.loop [[LOOP4]]
|
|
; CHECK: l3:
|
|
; CHECK-NEXT: [[X_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[L3]] ]
|
|
; CHECK-NEXT: [[TOK_LOOP:%.*]] = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token [[ANCHOR]]) ]
|
|
; CHECK-NEXT: call void @f() [ "convergencectrl"(token [[TOK_LOOP]]) ]
|
|
; CHECK-NEXT: [[INC]] = add nsw i32 [[X_0]], 1
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[N:%.*]]
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT:%.*]], label [[L3]], !llvm.loop [[LOOP4]]
|
|
; CHECK: exit:
|
|
; CHECK-NEXT: ret i32 0
|
|
;
|
|
entry:
|
|
%anchor = call token @llvm.experimental.convergence.anchor()
|
|
br label %l3, !llvm.loop !1
|
|
|
|
l3:
|
|
%x.0 = phi i32 [ 0, %entry ], [ %inc, %l3 ]
|
|
%tok.loop = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token %anchor) ]
|
|
call void @f() [ "convergencectrl"(token %tok.loop) ]
|
|
%inc = add nsw i32 %x.0, 1
|
|
%exitcond = icmp eq i32 %inc, %n
|
|
br i1 %exitcond, label %exit, label %l3, !llvm.loop !1
|
|
|
|
exit:
|
|
ret i32 0
|
|
}
|
|
|
|
; This loop contains a convergent instruction. Since the pragma loop unroll
|
|
; count 2 does not divide trip count 5, the loop is not unrolled by 2
|
|
; since remainder is forbidden for unrolling convergent loop. Instead, the
|
|
; loop gets fully unrolled.
|
|
define i32 @pragma_unroll_indivisible_trip_count() {
|
|
; CHECK-LABEL: @pragma_unroll_indivisible_trip_count(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[ANCHOR:%.*]] = call token @llvm.experimental.convergence.anchor()
|
|
; CHECK-NEXT: br label [[L3:%.*]], !llvm.loop [[LOOP4]]
|
|
; CHECK: l3:
|
|
; CHECK-NEXT: [[TOK_LOOP:%.*]] = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token [[ANCHOR]]) ]
|
|
; CHECK-NEXT: call void @f() [ "convergencectrl"(token [[TOK_LOOP]]) ]
|
|
; CHECK-NEXT: call void @f() [ "convergencectrl"(token [[TOK_LOOP]]) ]
|
|
; CHECK-NEXT: call void @f() [ "convergencectrl"(token [[TOK_LOOP]]) ]
|
|
; CHECK-NEXT: call void @f() [ "convergencectrl"(token [[TOK_LOOP]]) ]
|
|
; CHECK-NEXT: call void @f() [ "convergencectrl"(token [[TOK_LOOP]]) ]
|
|
; CHECK-NEXT: ret i32 0
|
|
;
|
|
entry:
|
|
%anchor = call token @llvm.experimental.convergence.anchor()
|
|
br label %l3, !llvm.loop !1
|
|
|
|
l3:
|
|
%x.0 = phi i32 [ 0, %entry ], [ %inc, %l3 ]
|
|
%tok.loop = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token %anchor) ]
|
|
call void @f() [ "convergencectrl"(token %tok.loop) ]
|
|
%inc = add nsw i32 %x.0, 1
|
|
%exitcond = icmp eq i32 %inc, 5
|
|
br i1 %exitcond, label %exit, label %l3, !llvm.loop !1
|
|
|
|
exit:
|
|
ret i32 0
|
|
}
|
|
|
|
; This loop contains a convergent instruction that is anchored inside the loop
|
|
; itself. It is unrolled by 2 with remainder, as requested by the loop metadata.
|
|
define i32 @pragma_unroll_with_remainder(i32 %n) {
|
|
; CHECK-LABEL: @pragma_unroll_with_remainder(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = freeze i32 [[N:%.*]]
|
|
; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[TMP0]], -1
|
|
; CHECK-NEXT: [[XTRAITER:%.*]] = and i32 [[TMP0]], 1
|
|
; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i32 [[TMP1]], 1
|
|
; CHECK-NEXT: br i1 [[TMP2]], label [[EXIT_UNR_LCSSA:%.*]], label [[ENTRY_NEW:%.*]]
|
|
; CHECK: entry.new:
|
|
; CHECK-NEXT: [[UNROLL_ITER:%.*]] = sub i32 [[TMP0]], [[XTRAITER]]
|
|
; CHECK-NEXT: br label [[L3:%.*]], !llvm.loop [[LOOP4]]
|
|
; CHECK: l3:
|
|
; CHECK-NEXT: [[X_0:%.*]] = phi i32 [ 0, [[ENTRY_NEW]] ], [ [[INC_1:%.*]], [[L3]] ]
|
|
; CHECK-NEXT: [[NITER:%.*]] = phi i32 [ 0, [[ENTRY_NEW]] ], [ [[NITER_NEXT_1:%.*]], [[L3]] ]
|
|
; CHECK-NEXT: [[TOK_LOOP:%.*]] = call token @llvm.experimental.convergence.anchor()
|
|
; CHECK-NEXT: call void @f() [ "convergencectrl"(token [[TOK_LOOP]]) ]
|
|
; CHECK-NEXT: [[TOK_LOOP_1:%.*]] = call token @llvm.experimental.convergence.anchor()
|
|
; CHECK-NEXT: call void @f() [ "convergencectrl"(token [[TOK_LOOP_1]]) ]
|
|
; CHECK-NEXT: [[INC_1]] = add nsw i32 [[X_0]], 2
|
|
; CHECK-NEXT: [[NITER_NEXT_1]] = add i32 [[NITER]], 2
|
|
; CHECK-NEXT: [[NITER_NCMP_1:%.*]] = icmp eq i32 [[NITER_NEXT_1]], [[UNROLL_ITER]]
|
|
; CHECK-NEXT: br i1 [[NITER_NCMP_1]], label [[EXIT_UNR_LCSSA_LOOPEXIT:%.*]], label [[L3]], !llvm.loop [[LOOP8:![0-9]+]]
|
|
; CHECK: exit.unr-lcssa.loopexit:
|
|
; CHECK-NEXT: br label [[EXIT_UNR_LCSSA]]
|
|
; CHECK: exit.unr-lcssa:
|
|
; CHECK-NEXT: [[LCMP_MOD:%.*]] = icmp ne i32 [[XTRAITER]], 0
|
|
; CHECK-NEXT: br i1 [[LCMP_MOD]], label [[L3_EPIL_PREHEADER:%.*]], label [[EXIT:%.*]]
|
|
; CHECK: l3.epil.preheader:
|
|
; CHECK-NEXT: br label [[L3_EPIL:%.*]]
|
|
; CHECK: l3.epil:
|
|
; CHECK-NEXT: [[TOK_LOOP_EPIL:%.*]] = call token @llvm.experimental.convergence.anchor()
|
|
; CHECK-NEXT: call void @f() [ "convergencectrl"(token [[TOK_LOOP_EPIL]]) ]
|
|
; CHECK-NEXT: br label [[EXIT]]
|
|
; CHECK: exit:
|
|
; CHECK-NEXT: ret i32 0
|
|
;
|
|
entry:
|
|
br label %l3, !llvm.loop !1
|
|
|
|
l3:
|
|
%x.0 = phi i32 [ 0, %entry ], [ %inc, %l3 ]
|
|
%tok.loop = call token @llvm.experimental.convergence.anchor()
|
|
call void @f() [ "convergencectrl"(token %tok.loop) ]
|
|
%inc = add nsw i32 %x.0, 1
|
|
%exitcond = icmp eq i32 %inc, %n
|
|
br i1 %exitcond, label %exit, label %l3, !llvm.loop !1
|
|
|
|
exit:
|
|
ret i32 0
|
|
}
|
|
|
|
; Don't unroll a loop that is extended by convergence controls.
|
|
;
|
|
; We could theoretically duplicate the extension part, but this is not
|
|
; implemented.
|
|
define i32 @extended_loop(i32 %n) {
|
|
; CHECK-LABEL: @extended_loop(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: br label [[L3:%.*]], !llvm.loop [[LOOP4]]
|
|
; CHECK: l3:
|
|
; CHECK-NEXT: [[X_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[L3]] ]
|
|
; CHECK-NEXT: [[TOK_LOOP:%.*]] = call token @llvm.experimental.convergence.anchor()
|
|
; CHECK-NEXT: [[INC]] = add nsw i32 [[X_0]], 1
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[N:%.*]]
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT:%.*]], label [[L3]], !llvm.loop [[LOOP4]]
|
|
; CHECK: exit:
|
|
; CHECK-NEXT: call void @f() [ "convergencectrl"(token [[TOK_LOOP]]) ]
|
|
; CHECK-NEXT: ret i32 0
|
|
;
|
|
entry:
|
|
br label %l3, !llvm.loop !1
|
|
|
|
l3:
|
|
%x.0 = phi i32 [ 0, %entry ], [ %inc, %l3 ]
|
|
%tok.loop = call token @llvm.experimental.convergence.anchor()
|
|
%inc = add nsw i32 %x.0, 1
|
|
%exitcond = icmp eq i32 %inc, %n
|
|
br i1 %exitcond, label %exit, label %l3, !llvm.loop !1
|
|
|
|
exit:
|
|
call void @f() [ "convergencectrl"(token %tok.loop) ]
|
|
ret i32 0
|
|
}
|
|
|
|
; Inner loop is extended beyond the outer loop. No unrolling possible.
|
|
|
|
define i32 @extended_inner_loop_1(i32 %n, i1 %cond) {
|
|
; CHECK-LABEL: @extended_inner_loop_1(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: br label [[L3:%.*]]
|
|
; CHECK: l3:
|
|
; CHECK-NEXT: [[X_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[LATCH:%.*]] ]
|
|
; CHECK-NEXT: [[TOK_LOOP:%.*]] = call token @llvm.experimental.convergence.anchor()
|
|
; CHECK-NEXT: [[INC]] = add nsw i32 [[X_0]], 1
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 4
|
|
; CHECK-NEXT: br label [[L2:%.*]], !llvm.loop [[LOOP4]]
|
|
; CHECK: l2:
|
|
; CHECK-NEXT: [[TOK_L2:%.*]] = call token @llvm.experimental.convergence.anchor()
|
|
; CHECK-NEXT: call void @f() [ "convergencectrl"(token [[TOK_L2]]) ]
|
|
; CHECK-NEXT: br i1 [[COND:%.*]], label [[L2]], label [[LATCH]], !llvm.loop [[LOOP4]]
|
|
; CHECK: latch:
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT:%.*]], label [[L3]]
|
|
; CHECK: exit:
|
|
; CHECK-NEXT: call void @f() [ "convergencectrl"(token [[TOK_L2]]) ]
|
|
; CHECK-NEXT: ret i32 0
|
|
;
|
|
entry:
|
|
br label %l3
|
|
|
|
l3:
|
|
%x.0 = phi i32 [ 0, %entry ], [ %inc, %latch ]
|
|
%tok.loop = call token @llvm.experimental.convergence.anchor()
|
|
%inc = add nsw i32 %x.0, 1
|
|
%exitcond = icmp eq i32 %inc, 4
|
|
br label %l2, !llvm.loop !1
|
|
|
|
l2:
|
|
%tok.l2 = call token @llvm.experimental.convergence.anchor()
|
|
call void @f() [ "convergencectrl"(token %tok.l2) ]
|
|
br i1 %cond, label %l2, label %latch, !llvm.loop !1
|
|
|
|
latch:
|
|
br i1 %exitcond, label %exit, label %l3
|
|
|
|
exit:
|
|
call void @f() [ "convergencectrl"(token %tok.l2) ]
|
|
ret i32 0
|
|
}
|
|
|
|
; Inner loop is extended inside the outer loop. Outer loop is unrolled.
|
|
|
|
define i32 @extended_inner_loop_2(i32 %n, i1 %cond) {
|
|
; CHECK-LABEL: @extended_inner_loop_2(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: br label [[L3:%.*]]
|
|
; CHECK: l3:
|
|
; CHECK-NEXT: br label [[L2:%.*]], !llvm.loop [[LOOP4]]
|
|
; CHECK: l2:
|
|
; CHECK-NEXT: [[TOK_L2:%.*]] = call token @llvm.experimental.convergence.anchor()
|
|
; CHECK-NEXT: call void @f() [ "convergencectrl"(token [[TOK_L2]]) ]
|
|
; CHECK-NEXT: br i1 [[COND:%.*]], label [[L2]], label [[LATCH:%.*]], !llvm.loop [[LOOP4]]
|
|
; CHECK: latch:
|
|
; CHECK-NEXT: call void @f() [ "convergencectrl"(token [[TOK_L2]]) ]
|
|
; CHECK-NEXT: br label [[L2_1:%.*]], !llvm.loop [[LOOP4]]
|
|
; CHECK: l2.1:
|
|
; CHECK-NEXT: [[TOK_L2_1:%.*]] = call token @llvm.experimental.convergence.anchor()
|
|
; CHECK-NEXT: call void @f() [ "convergencectrl"(token [[TOK_L2_1]]) ]
|
|
; CHECK-NEXT: br i1 [[COND]], label [[L2_1]], label [[LATCH_1:%.*]], !llvm.loop [[LOOP4]]
|
|
; CHECK: latch.1:
|
|
; CHECK-NEXT: call void @f() [ "convergencectrl"(token [[TOK_L2_1]]) ]
|
|
; CHECK-NEXT: br label [[L2_2:%.*]], !llvm.loop [[LOOP4]]
|
|
; CHECK: l2.2:
|
|
; CHECK-NEXT: [[TOK_L2_2:%.*]] = call token @llvm.experimental.convergence.anchor()
|
|
; CHECK-NEXT: call void @f() [ "convergencectrl"(token [[TOK_L2_2]]) ]
|
|
; CHECK-NEXT: br i1 [[COND]], label [[L2_2]], label [[LATCH_2:%.*]], !llvm.loop [[LOOP4]]
|
|
; CHECK: latch.2:
|
|
; CHECK-NEXT: call void @f() [ "convergencectrl"(token [[TOK_L2_2]]) ]
|
|
; CHECK-NEXT: br label [[L2_3:%.*]], !llvm.loop [[LOOP4]]
|
|
; CHECK: l2.3:
|
|
; CHECK-NEXT: [[TOK_L2_3:%.*]] = call token @llvm.experimental.convergence.anchor()
|
|
; CHECK-NEXT: call void @f() [ "convergencectrl"(token [[TOK_L2_3]]) ]
|
|
; CHECK-NEXT: br i1 [[COND]], label [[L2_3]], label [[LATCH_3:%.*]], !llvm.loop [[LOOP4]]
|
|
; CHECK: latch.3:
|
|
; CHECK-NEXT: call void @f() [ "convergencectrl"(token [[TOK_L2_3]]) ]
|
|
; CHECK-NEXT: ret i32 0
|
|
;
|
|
entry:
|
|
br label %l3
|
|
|
|
l3:
|
|
%x.0 = phi i32 [ 0, %entry ], [ %inc, %latch ]
|
|
%tok.loop = call token @llvm.experimental.convergence.anchor()
|
|
%inc = add nsw i32 %x.0, 1
|
|
%exitcond = icmp eq i32 %inc, 4
|
|
br label %l2, !llvm.loop !1
|
|
|
|
l2:
|
|
%tok.l2 = call token @llvm.experimental.convergence.anchor()
|
|
call void @f() [ "convergencectrl"(token %tok.l2) ]
|
|
br i1 %cond, label %l2, label %latch, !llvm.loop !1
|
|
|
|
latch:
|
|
call void @f() [ "convergencectrl"(token %tok.l2) ]
|
|
br i1 %exitcond, label %exit, label %l3
|
|
|
|
exit:
|
|
ret i32 0
|
|
}
|
|
|
|
; No extension. Both loops unrolled.
|
|
|
|
define i32 @unroll_nest(i32 %n, i1 %cond) {
|
|
; CHECK-LABEL: @unroll_nest(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: br label [[L3:%.*]]
|
|
; CHECK: l3:
|
|
; CHECK-NEXT: br label [[L2:%.*]], !llvm.loop [[LOOP4]]
|
|
; CHECK: l2:
|
|
; CHECK-NEXT: [[TOK_L2:%.*]] = call token @llvm.experimental.convergence.anchor()
|
|
; CHECK-NEXT: call void @f() [ "convergencectrl"(token [[TOK_L2]]) ]
|
|
; CHECK-NEXT: br i1 [[COND:%.*]], label [[L2_1:%.*]], label [[LATCH:%.*]], !llvm.loop [[LOOP4]]
|
|
; CHECK: l2.1:
|
|
; CHECK-NEXT: [[TOK_L2_1:%.*]] = call token @llvm.experimental.convergence.anchor()
|
|
; CHECK-NEXT: call void @f() [ "convergencectrl"(token [[TOK_L2_1]]) ]
|
|
; CHECK-NEXT: br i1 [[COND]], label [[L2]], label [[LATCH]], !llvm.loop [[LOOP9:![0-9]+]]
|
|
; CHECK: latch:
|
|
; CHECK-NEXT: br label [[L2_12:%.*]], !llvm.loop [[LOOP4]]
|
|
; CHECK: l2.12:
|
|
; CHECK-NEXT: [[TOK_L2_11:%.*]] = call token @llvm.experimental.convergence.anchor()
|
|
; CHECK-NEXT: call void @f() [ "convergencectrl"(token [[TOK_L2_11]]) ]
|
|
; CHECK-NEXT: br i1 [[COND]], label [[L2_1_1:%.*]], label [[LATCH_1:%.*]], !llvm.loop [[LOOP4]]
|
|
; CHECK: l2.1.1:
|
|
; CHECK-NEXT: [[TOK_L2_1_1:%.*]] = call token @llvm.experimental.convergence.anchor()
|
|
; CHECK-NEXT: call void @f() [ "convergencectrl"(token [[TOK_L2_1_1]]) ]
|
|
; CHECK-NEXT: br i1 [[COND]], label [[L2_12]], label [[LATCH_1]], !llvm.loop [[LOOP9]]
|
|
; CHECK: latch.1:
|
|
; CHECK-NEXT: br label [[L2_2:%.*]], !llvm.loop [[LOOP4]]
|
|
; CHECK: l2.2:
|
|
; CHECK-NEXT: [[TOK_L2_2:%.*]] = call token @llvm.experimental.convergence.anchor()
|
|
; CHECK-NEXT: call void @f() [ "convergencectrl"(token [[TOK_L2_2]]) ]
|
|
; CHECK-NEXT: br i1 [[COND]], label [[L2_1_2:%.*]], label [[LATCH_2:%.*]], !llvm.loop [[LOOP4]]
|
|
; CHECK: l2.1.2:
|
|
; CHECK-NEXT: [[TOK_L2_1_2:%.*]] = call token @llvm.experimental.convergence.anchor()
|
|
; CHECK-NEXT: call void @f() [ "convergencectrl"(token [[TOK_L2_1_2]]) ]
|
|
; CHECK-NEXT: br i1 [[COND]], label [[L2_2]], label [[LATCH_2]], !llvm.loop [[LOOP9]]
|
|
; CHECK: latch.2:
|
|
; CHECK-NEXT: br label [[L2_3:%.*]], !llvm.loop [[LOOP4]]
|
|
; CHECK: l2.3:
|
|
; CHECK-NEXT: [[TOK_L2_3:%.*]] = call token @llvm.experimental.convergence.anchor()
|
|
; CHECK-NEXT: call void @f() [ "convergencectrl"(token [[TOK_L2_3]]) ]
|
|
; CHECK-NEXT: br i1 [[COND]], label [[L2_1_3:%.*]], label [[LATCH_3:%.*]], !llvm.loop [[LOOP4]]
|
|
; CHECK: l2.1.3:
|
|
; CHECK-NEXT: [[TOK_L2_1_3:%.*]] = call token @llvm.experimental.convergence.anchor()
|
|
; CHECK-NEXT: call void @f() [ "convergencectrl"(token [[TOK_L2_1_3]]) ]
|
|
; CHECK-NEXT: br i1 [[COND]], label [[L2_3]], label [[LATCH_3]], !llvm.loop [[LOOP9]]
|
|
; CHECK: latch.3:
|
|
; CHECK-NEXT: ret i32 0
|
|
;
|
|
entry:
|
|
br label %l3
|
|
|
|
l3:
|
|
%x.0 = phi i32 [ 0, %entry ], [ %inc, %latch ]
|
|
%tok.loop = call token @llvm.experimental.convergence.anchor()
|
|
%inc = add nsw i32 %x.0, 1
|
|
%exitcond = icmp eq i32 %inc, 4
|
|
br label %l2, !llvm.loop !1
|
|
|
|
l2:
|
|
%tok.l2 = call token @llvm.experimental.convergence.anchor()
|
|
call void @f() [ "convergencectrl"(token %tok.l2) ]
|
|
br i1 %cond, label %l2, label %latch, !llvm.loop !1
|
|
|
|
latch:
|
|
br i1 %exitcond, label %exit, label %l3
|
|
|
|
exit:
|
|
ret i32 0
|
|
}
|
|
|
|
declare token @llvm.experimental.convergence.anchor()
|
|
declare token @llvm.experimental.convergence.loop()
|
|
|
|
!0 = !{!0, !{!"llvm.loop.unroll.count", i32 16}}
|
|
!1 = !{!1, !{!"llvm.loop.unroll.count", i32 2}}
|