Files
clang-p2996/llvm/test/Transforms/LoopUnswitch/AMDGPU/divergent-unswitch.ll
Sameer Sahasrabuddhe b92c8c22b9 [NewPM] Disable non-trivial loop-unswitch on targets with divergence
Unswitching a loop on a non-trivial divergent branch is expensive
since it serializes the execution of both version of the
loop. But identifying a divergent branch needs divergence analysis,
which is a function level analysis.

The legacy pass manager handles this dependency by isolating such a
loop transform and rerunning the required function analyses. This
functionality is currently missing in the new pass manager, and there
is no safe way for the SimpleLoopUnswitch pass to depend on
DivergenceAnalysis. So we conservatively assume that all non-trivial
branches are divergent if the target has divergence.

Reviewed By: tra

Differential Revision: https://reviews.llvm.org/D98958
2021-03-25 11:27:10 +00:00

46 lines
1.5 KiB
LLVM

; RUN: opt -mtriple=amdgcn-- -O3 -S %s | FileCheck %s
; Check that loop unswitch does not happen if condition is divergent.
; CHECK-LABEL: {{^}}define amdgpu_kernel void @divergent_unswitch
; CHECK: entry:
; CHECK: icmp
; CHECK: [[IF_COND:%[a-z0-9]+]] = icmp {{.*}} 567890
; CHECK: br label
; CHECK: br i1 [[IF_COND]]
define amdgpu_kernel void @divergent_unswitch(i32 * nocapture %out, i32 %n) {
entry:
%cmp9 = icmp sgt i32 %n, 0
br i1 %cmp9, label %for.body.lr.ph, label %for.cond.cleanup
for.body.lr.ph: ; preds = %entry
%call = tail call i32 @llvm.amdgcn.workitem.id.x() #0
%cmp2 = icmp eq i32 %call, 567890
br label %for.body
for.cond.cleanup.loopexit: ; preds = %for.inc
br label %for.cond.cleanup
for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry
ret void
for.body: ; preds = %for.inc, %for.body.lr.ph
%i.010 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.inc ]
br i1 %cmp2, label %if.then, label %for.inc
if.then: ; preds = %for.body
%arrayidx = getelementptr inbounds i32, i32 * %out, i32 %i.010
store i32 %i.010, i32 * %arrayidx, align 4
br label %for.inc
for.inc: ; preds = %for.body, %if.then
%inc = add nuw nsw i32 %i.010, 1
%exitcond = icmp eq i32 %inc, %n
br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body
}
declare i32 @llvm.amdgcn.workitem.id.x() #0
attributes #0 = { nounwind readnone }