Files
clang-p2996/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll
Tim Renouf 2a99fa2c08 [AMDGPU] added writelane intrinsic
Summary:
For use by LLPC SPV_AMD_shader_ballot extension.

The v_writelane instruction was already implemented for use by SGPR
spilling, but I had to add an extra dummy operand tied to the
destination, to represent that all lanes except the selected one keep
the old value of the destination register.

.ll test changes were due to schedule changes caused by that new
operand.

Differential Revision: https://reviews.llvm.org/D42838

llvm-svn: 326353
2018-02-28 19:10:32 +00:00

97 lines
2.7 KiB
LLVM

; RUN: llc -march=amdgcn -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=CI %s
; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GFX9 %s
; GCN-LABEL: {{^}}callee_no_stack:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt
; GCN-NEXT: s_setpc_b64
define void @callee_no_stack() #0 {
ret void
}
; GCN-LABEL: {{^}}callee_no_stack_no_fp_elim:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt
; GCN-NEXT: s_setpc_b64
define void @callee_no_stack_no_fp_elim() #1 {
ret void
}
; Requires frame pointer for access to local regular object.
; GCN-LABEL: {{^}}callee_with_stack:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt
; GCN-NEXT: s_mov_b32 s5, s32
; GCN-NEXT: v_mov_b32_e32 v0, 0{{$}}
; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s5 offset:4{{$}}
; GCN-NEXT: s_waitcnt
; GCN-NEXT: s_setpc_b64
define void @callee_with_stack() #0 {
%alloca = alloca i32, addrspace(5)
store volatile i32 0, i32 addrspace(5)* %alloca
ret void
}
; GCN-LABEL: {{^}}callee_with_stack_and_call:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt
; GCN: s_mov_b32 s5, s32
; GCN: buffer_store_dword v32, off, s[0:3], s5 offset:8
; GCN-DAG: v_writelane_b32 v32, s33,
; GCN-DAG: v_writelane_b32 v32, s34,
; GCN-DAG: v_writelane_b32 v32, s35,
; GCN-DAG: s_add_u32 s32, s32, 0x300{{$}}
; GCN-DAG: v_mov_b32_e32 v0, 0{{$}}
; GCN-DAG: buffer_store_dword v0, off, s[0:3], s5 offset:4{{$}}
; GCN-DAG: s_mov_b32 s33, s5
; GCN: s_swappc_b64
; GCN: s_mov_b32 s5, s33
; GCN-DAG: v_readlane_b32 s35,
; GCN-DAG: v_readlane_b32 s34,
; GCN-DAG: v_readlane_b32 s33,
; GCN: buffer_load_dword v32, off, s[0:3], s5 offset:8
; GCN: s_waitcnt
; GCN-NEXT: s_setpc_b64
define void @callee_with_stack_and_call() #0 {
%alloca = alloca i32, addrspace(5)
store volatile i32 0, i32 addrspace(5)* %alloca
call void @external_void_func_void()
ret void
}
; Should be able to copy incoming stack pointer directly to inner
; call's stack pointer argument.
; There is stack usage only because of the need to evict a VGPR for
; spilling CSR SGPRs.
; GCN-LABEL: {{^}}callee_no_stack_with_call:
; GCN: s_waitcnt
; GCN: s_mov_b32 s5, s32
; GCN: buffer_store_dword v32, off, s[0:3], s5 offset:4
; GCN-DAG: v_writelane_b32 v32, s33, 0
; GCN-DAG: v_writelane_b32 v32, s34, 1
; GCN: s_mov_b32 s33, s5
; GCN: s_swappc_b64
; GCN: s_mov_b32 s5, s33
; GCN-DAG: v_readlane_b32 s34, v32, 1
; GCN-DAG: v_readlane_b32 s33, v32, 0
; GCN: buffer_load_dword v32, off, s[0:3], s5 offset:4
; GCN: s_sub_u32 s32, s32, 0x200
; GCN: s_setpc_b64
define void @callee_no_stack_with_call() #0 {
call void @external_void_func_void()
ret void
}
declare void @external_void_func_void() #0
attributes #0 = { nounwind }
attributes #1 = { nounwind "no-frame-pointer-elim"="true" }