Files
clang-p2996/llvm/test/CodeGen/AMDGPU/loop_exit_with_xor.ll
Piotr Sobczak 09fe84abb4 [AMDGPU] Move code sinking before structurizer
Moving code sinking pass before structurizer creates more sinking
opportunities.

The extra flow edges introduced by the structurizer can have adverse
effects on sinking, because the sinking pass prefers moving instructions
to blocks with unique predecessors and the structurizer destroys that
property in some cases.

A notable example is moving high-latency image instructions across kills.

Reviewed By: arsenm

Differential Revision: https://reviews.llvm.org/D101115
2021-05-11 14:07:23 +02:00

92 lines
2.9 KiB
LLVM

; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
; Where the mask of lanes wanting to exit the loop on this iteration is not
; obviously already masked by exec (in this case, the xor with -1 inserted by
; control flow annotation), then lower control flow must insert an S_AND_B64
; with exec.
; GCN-LABEL: {{^}}needs_and:
; GCN: s_or_b64 exec, exec, [[REG1:[^ ,]*]]
; GCN: s_andn2_b64 exec, exec, [[REG2:[^ ,]*]]
; GCN: s_or_b64 [[REG2:[^ ,]*]], [[REG1:[^ ,]*]], [[REG2:[^ ,]*]]
; GCN: s_or_b64 exec, exec, [[REG2:[^ ,]*]]
define void @needs_and(i32 %arg) {
entry:
br label %loop
loop:
%tmp23phi = phi i32 [ %tmp23, %endif ], [ 0, %entry ]
%tmp23 = add nuw i32 %tmp23phi, 1
%tmp27 = icmp ult i32 %arg, %tmp23
br i1 %tmp27, label %then, label %endif
then: ; preds = %bb
call void @llvm.amdgcn.raw.buffer.store.f32(float undef, <4 x i32> undef, i32 0, i32 undef, i32 0)
br label %endif
endif: ; preds = %bb28, %bb
br i1 %tmp27, label %loop, label %loopexit
loopexit:
ret void
}
; Where the mask of lanes wanting to exit the loop on this iteration is
; obviously already masked by exec (a V_CMP), then lower control flow can omit
; the S_AND_B64 to avoid an unnecessary instruction.
; GCN-LABEL: {{^}}doesnt_need_and:
; GCN: v_cmp{{[^ ]*}} [[REG1:[^ ,]*]]
; GCN: s_or_b64 [[REG2:[^ ,]*]], [[REG1]],
; GCN: s_andn2_b64 exec, exec, [[REG2]]
define void @doesnt_need_and(i32 %arg) {
entry:
br label %loop
loop:
%tmp23phi = phi i32 [ %tmp23, %loop ], [ 0, %entry ]
%tmp23 = add nuw i32 %tmp23phi, 1
%tmp27 = icmp ult i32 %arg, %tmp23
call void @llvm.amdgcn.raw.buffer.store.f32(float undef, <4 x i32> undef, i32 0, i32 undef, i32 0)
br i1 %tmp27, label %loop, label %loopexit
loopexit:
ret void
}
; Another case where the mask of lanes wanting to exit the loop is not masked
; by exec, because it is a function parameter.
; GCN-LABEL: {{^}}break_cond_is_arg:
; GCN: s_xor_b64 [[REG1:[^ ,]*]], {{[^ ,]*, -1$}}
; GCN: s_andn2_b64 exec, exec, [[REG3:[^ ,]*]]
; GCN: s_and_b64 [[REG2:[^ ,]*]], exec, [[REG1]]
; GCN: s_or_b64 [[REG3]], [[REG2]],
define void @break_cond_is_arg(i32 %arg, i1 %breakcond) {
entry:
br label %loop
loop:
%tmp23phi = phi i32 [ %tmp23, %endif ], [ 0, %entry ]
%tmp23 = add nuw i32 %tmp23phi, 1
%tmp27 = icmp ult i32 %arg, %tmp23
br i1 %tmp27, label %then, label %endif
then: ; preds = %bb
call void @llvm.amdgcn.raw.buffer.store.f32(float undef, <4 x i32> undef, i32 0, i32 undef, i32 0)
br label %endif
endif: ; preds = %bb28, %bb
br i1 %breakcond, label %loop, label %loopexit
loopexit:
ret void
}
declare void @llvm.amdgcn.raw.buffer.store.f32(float, <4 x i32>, i32, i32, i32 immarg) #0
attributes #0 = { nounwind writeonly }