This was supposed to document the new PM limitation but
was deleted in fb4113ef0c
Switch to generated checks since that's more reliable than XFAIL, and
just preserve the preferred results as comments.
78 lines
3.4 KiB
LLVM
78 lines
3.4 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3
|
|
; RUN: opt -mtriple=amdgcn-- -passes='default<O3>' -S %s | FileCheck %s
|
|
|
|
; Check that loop unswitch happened and condition hoisted out of the loop.
|
|
; Condition is uniform so even targets with divergence should perform unswitching.
|
|
|
|
; This fails with the new pass manager:
|
|
; https://bugs.llvm.org/show_bug.cgi?id=48819
|
|
; The correct behaviour (allow uniform non-trivial branches to be
|
|
; unswitched on all targets) requires access to the function-level
|
|
; divergence analysis from a loop transform, which is currently not
|
|
; supported in the new pass manager.
|
|
|
|
; SHOULDBE-LABEL: {{^}}define amdgpu_kernel void @uniform_unswitch
|
|
; SHOULDBE: entry:
|
|
; SHOULDBE-NEXT: [[LOOP_COND:%[a-z0-9]+]] = icmp
|
|
; SHOULDBE-NEXT: [[IF_COND:%[a-z0-9]+]] = icmp eq i32 %x, 123456
|
|
; SHOULDBE-NEXT: and i1 [[LOOP_COND]], [[IF_COND]]
|
|
; SHOULDBE-NEXT: br i1
|
|
|
|
define amdgpu_kernel void @uniform_unswitch(ptr nocapture %out, i32 %n, i32 %x) {
|
|
; CHECK-LABEL: define amdgpu_kernel void @uniform_unswitch
|
|
; CHECK-SAME: (ptr nocapture writeonly [[OUT:%.*]], i32 [[N:%.*]], i32 [[X:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[OUT_GLOBAL:%.*]] = addrspacecast ptr [[OUT]] to ptr addrspace(1)
|
|
; CHECK-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[N]], 0
|
|
; CHECK-NEXT: br i1 [[CMP6]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_COND_CLEANUP:%.*]]
|
|
; CHECK: for.body.lr.ph:
|
|
; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[X]], 123456
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
; CHECK: for.cond.cleanup:
|
|
; CHECK-NEXT: ret void
|
|
; CHECK: for.body:
|
|
; CHECK-NEXT: [[I_07:%.*]] = phi i32 [ 0, [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[FOR_INC:%.*]] ]
|
|
; CHECK-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]]
|
|
; CHECK: if.then:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[I_07]] to i64
|
|
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[OUT_GLOBAL]], i64 [[TMP0]]
|
|
; CHECK-NEXT: store i32 [[I_07]], ptr addrspace(1) [[ARRAYIDX]], align 4
|
|
; CHECK-NEXT: br label [[FOR_INC]]
|
|
; CHECK: for.inc:
|
|
; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_07]], 1
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[N]]
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]]
|
|
;
|
|
entry:
|
|
%cmp6 = icmp sgt i32 %n, 0
|
|
br i1 %cmp6, label %for.body.lr.ph, label %for.cond.cleanup
|
|
|
|
for.body.lr.ph: ; preds = %entry
|
|
%cmp1 = icmp eq i32 %x, 123456
|
|
br label %for.body
|
|
|
|
for.cond.cleanup.loopexit: ; preds = %for.inc
|
|
br label %for.cond.cleanup
|
|
|
|
for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry
|
|
ret void
|
|
|
|
for.body: ; preds = %for.inc, %for.body.lr.ph
|
|
%i.07 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.inc ]
|
|
br i1 %cmp1, label %if.then, label %for.inc
|
|
|
|
if.then: ; preds = %for.body
|
|
%arrayidx = getelementptr inbounds i32, ptr %out, i32 %i.07
|
|
store i32 %i.07, ptr %arrayidx, align 4
|
|
br label %for.inc
|
|
|
|
for.inc: ; preds = %for.body, %if.then
|
|
%inc = add nuw nsw i32 %i.07, 1
|
|
%exitcond = icmp eq i32 %inc, %n
|
|
br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body
|
|
}
|
|
|
|
declare i32 @llvm.amdgcn.workitem.id.x() #0
|
|
|
|
attributes #0 = { nounwind readnone }
|