Files
clang-p2996/llvm/test/CodeGen/AMDGPU/stacksave_stackrestore.ll
Aaditya 11b0401926 [AMDGPU] Restore SP from saved-FP or saved-BP (#124007)
Currently, the AMDGPU backend bumps the Stack Pointer 
by fixed size offsets in the prolog of device functions, and 
restores it by the same amount in the epilog.
Prolog:
sp += frameSize

Epilog:
sp -= frameSize

If a function has dynamic stack realignment,
Prolog:
sp += frameSize + max_alignment

Epilog:
sp -= frameSize + max_alignment

These calculations are not optimal in case of dynamic 
stack realignment, and completely fail in case of 
dynamic stack readjustment.
This patch uses the saved Frame Pointer to restore SP. 
Prolog:
fp = sp
sp += frameSize

Epilog:
sp = fp

In case of dynamic stack realignment, SP is restored from 
the saved Base Pointer. 
Prolog:
fp = sp + (max_alignment - 1)
fp = fp & (-max_alignment)
bp = sp
sp += frameSize + max_alignment

Epilog:
sp = bp

(Note: The presence of BP has been enforced in case of any 
dynamic stack realignment.)

---------

Co-authored-by: Pravin Jagtap <Pravin.Jagtap@amd.com>
Co-authored-by: Matt Arsenault <arsenm2@gmail.com>
2025-01-24 19:13:40 +05:30

1681 lines
73 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GCN,WAVE32,WAVE32-OPT %s
; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1030 -mattr=+wavefrontsize64 < %s | FileCheck -check-prefixes=GCN,WAVE64,WAVE64-OPT %s
; RUN: llc -O0 -mtriple=amdgcn-amd-amdpal -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GCN,WAVE32,WAVE32-O0 %s
; RUN: llc -O0 -mtriple=amdgcn-amd-amdpal -mcpu=gfx1030 -mattr=+wavefrontsize64 < %s | FileCheck -check-prefixes=GCN,WAVE64,WAVE64-O0 %s
; RUN: llc -O0 -mtriple=amdgcn-amd-amdpal -mcpu=gfx1030 -amdgpu-prealloc-sgpr-spill-vgprs=1 < %s | FileCheck -check-prefixes=GCN,WAVE32,WAVE32-WWM-PREALLOC %s
declare ptr addrspace(5) @llvm.stacksave.p5()
declare void @llvm.stackrestore.p5(ptr addrspace(5))
define hidden void @stack_passed_argument([32 x i32], i32) {
; GCN-LABEL: stack_passed_argument:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: s_setpc_b64 s[30:31]
ret void
}
define void @func_store_stacksave() {
; WAVE32-OPT-LABEL: func_store_stacksave:
; WAVE32-OPT: ; %bb.0:
; WAVE32-OPT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; WAVE32-OPT-NEXT: s_lshr_b32 s4, s32, 5
; WAVE32-OPT-NEXT: ;;#ASMSTART
; WAVE32-OPT-NEXT: ; use s4
; WAVE32-OPT-NEXT: ;;#ASMEND
; WAVE32-OPT-NEXT: s_setpc_b64 s[30:31]
;
; WAVE64-OPT-LABEL: func_store_stacksave:
; WAVE64-OPT: ; %bb.0:
; WAVE64-OPT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; WAVE64-OPT-NEXT: s_lshr_b32 s4, s32, 6
; WAVE64-OPT-NEXT: ;;#ASMSTART
; WAVE64-OPT-NEXT: ; use s4
; WAVE64-OPT-NEXT: ;;#ASMEND
; WAVE64-OPT-NEXT: s_setpc_b64 s[30:31]
;
; WAVE32-O0-LABEL: func_store_stacksave:
; WAVE32-O0: ; %bb.0:
; WAVE32-O0-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; WAVE32-O0-NEXT: s_mov_b32 s4, s32
; WAVE32-O0-NEXT: s_lshr_b32 s4, s4, 5
; WAVE32-O0-NEXT: ;;#ASMSTART
; WAVE32-O0-NEXT: ; use s4
; WAVE32-O0-NEXT: ;;#ASMEND
; WAVE32-O0-NEXT: s_setpc_b64 s[30:31]
;
; WAVE64-O0-LABEL: func_store_stacksave:
; WAVE64-O0: ; %bb.0:
; WAVE64-O0-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; WAVE64-O0-NEXT: s_mov_b32 s4, s32
; WAVE64-O0-NEXT: s_lshr_b32 s4, s4, 6
; WAVE64-O0-NEXT: ;;#ASMSTART
; WAVE64-O0-NEXT: ; use s4
; WAVE64-O0-NEXT: ;;#ASMEND
; WAVE64-O0-NEXT: s_setpc_b64 s[30:31]
;
; WAVE32-WWM-PREALLOC-LABEL: func_store_stacksave:
; WAVE32-WWM-PREALLOC: ; %bb.0:
; WAVE32-WWM-PREALLOC-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; WAVE32-WWM-PREALLOC-NEXT: s_mov_b32 s4, s32
; WAVE32-WWM-PREALLOC-NEXT: s_lshr_b32 s4, s4, 5
; WAVE32-WWM-PREALLOC-NEXT: ;;#ASMSTART
; WAVE32-WWM-PREALLOC-NEXT: ; use s4
; WAVE32-WWM-PREALLOC-NEXT: ;;#ASMEND
; WAVE32-WWM-PREALLOC-NEXT: s_setpc_b64 s[30:31]
%stacksave = call ptr addrspace(5) @llvm.stacksave.p5()
call void asm sideeffect "; use $0", "s"(ptr addrspace(5) %stacksave)
ret void
}
define amdgpu_kernel void @kernel_store_stacksave() {
; WAVE32-OPT-LABEL: kernel_store_stacksave:
; WAVE32-OPT: ; %bb.0:
; WAVE32-OPT-NEXT: s_lshr_b32 s0, s32, 5
; WAVE32-OPT-NEXT: ;;#ASMSTART
; WAVE32-OPT-NEXT: ; use s0
; WAVE32-OPT-NEXT: ;;#ASMEND
; WAVE32-OPT-NEXT: s_endpgm
;
; WAVE64-OPT-LABEL: kernel_store_stacksave:
; WAVE64-OPT: ; %bb.0:
; WAVE64-OPT-NEXT: s_lshr_b32 s0, s32, 6
; WAVE64-OPT-NEXT: ;;#ASMSTART
; WAVE64-OPT-NEXT: ; use s0
; WAVE64-OPT-NEXT: ;;#ASMEND
; WAVE64-OPT-NEXT: s_endpgm
;
; WAVE32-O0-LABEL: kernel_store_stacksave:
; WAVE32-O0: ; %bb.0:
; WAVE32-O0-NEXT: s_mov_b32 s0, s32
; WAVE32-O0-NEXT: s_lshr_b32 s0, s0, 5
; WAVE32-O0-NEXT: ;;#ASMSTART
; WAVE32-O0-NEXT: ; use s0
; WAVE32-O0-NEXT: ;;#ASMEND
; WAVE32-O0-NEXT: s_endpgm
;
; WAVE64-O0-LABEL: kernel_store_stacksave:
; WAVE64-O0: ; %bb.0:
; WAVE64-O0-NEXT: s_mov_b32 s0, s32
; WAVE64-O0-NEXT: s_lshr_b32 s0, s0, 6
; WAVE64-O0-NEXT: ;;#ASMSTART
; WAVE64-O0-NEXT: ; use s0
; WAVE64-O0-NEXT: ;;#ASMEND
; WAVE64-O0-NEXT: s_endpgm
;
; WAVE32-WWM-PREALLOC-LABEL: kernel_store_stacksave:
; WAVE32-WWM-PREALLOC: ; %bb.0:
; WAVE32-WWM-PREALLOC-NEXT: s_mov_b32 s0, s32
; WAVE32-WWM-PREALLOC-NEXT: s_lshr_b32 s0, s0, 5
; WAVE32-WWM-PREALLOC-NEXT: ;;#ASMSTART
; WAVE32-WWM-PREALLOC-NEXT: ; use s0
; WAVE32-WWM-PREALLOC-NEXT: ;;#ASMEND
; WAVE32-WWM-PREALLOC-NEXT: s_endpgm
%stacksave = call ptr addrspace(5) @llvm.stacksave.p5()
call void asm sideeffect "; use $0", "s"(ptr addrspace(5) %stacksave)
ret void
}
define amdgpu_kernel void @kernel_store_stacksave_nocall() {
; WAVE32-OPT-LABEL: kernel_store_stacksave_nocall:
; WAVE32-OPT: ; %bb.0:
; WAVE32-OPT-NEXT: s_getpc_b64 s[12:13]
; WAVE32-OPT-NEXT: s_mov_b32 s12, s0
; WAVE32-OPT-NEXT: v_mov_b32_e32 v0, 0
; WAVE32-OPT-NEXT: s_load_dwordx4 s[12:15], s[12:13], 0x0
; WAVE32-OPT-NEXT: s_waitcnt lgkmcnt(0)
; WAVE32-OPT-NEXT: s_bitset0_b32 s15, 21
; WAVE32-OPT-NEXT: s_add_u32 s12, s12, s11
; WAVE32-OPT-NEXT: s_addc_u32 s13, s13, 0
; WAVE32-OPT-NEXT: s_lshr_b32 s0, s32, 5
; WAVE32-OPT-NEXT: v_mov_b32_e32 v1, s0
; WAVE32-OPT-NEXT: buffer_store_dword v0, v1, s[12:15], 0 offen
; WAVE32-OPT-NEXT: s_endpgm
;
; WAVE64-OPT-LABEL: kernel_store_stacksave_nocall:
; WAVE64-OPT: ; %bb.0:
; WAVE64-OPT-NEXT: s_getpc_b64 s[12:13]
; WAVE64-OPT-NEXT: s_mov_b32 s12, s0
; WAVE64-OPT-NEXT: v_mov_b32_e32 v0, 0
; WAVE64-OPT-NEXT: s_load_dwordx4 s[12:15], s[12:13], 0x0
; WAVE64-OPT-NEXT: s_waitcnt lgkmcnt(0)
; WAVE64-OPT-NEXT: s_add_u32 s12, s12, s11
; WAVE64-OPT-NEXT: s_addc_u32 s13, s13, 0
; WAVE64-OPT-NEXT: s_lshr_b32 s0, s32, 6
; WAVE64-OPT-NEXT: v_mov_b32_e32 v1, s0
; WAVE64-OPT-NEXT: buffer_store_dword v0, v1, s[12:15], 0 offen
; WAVE64-OPT-NEXT: s_endpgm
;
; WAVE32-O0-LABEL: kernel_store_stacksave_nocall:
; WAVE32-O0: ; %bb.0:
; WAVE32-O0-NEXT: s_getpc_b64 s[12:13]
; WAVE32-O0-NEXT: s_mov_b32 s12, s0
; WAVE32-O0-NEXT: s_load_dwordx4 s[12:15], s[12:13], 0x0
; WAVE32-O0-NEXT: s_waitcnt lgkmcnt(0)
; WAVE32-O0-NEXT: s_bitset0_b32 s15, 21
; WAVE32-O0-NEXT: s_add_u32 s12, s12, s11
; WAVE32-O0-NEXT: s_addc_u32 s13, s13, 0
; WAVE32-O0-NEXT: s_mov_b32 s0, s32
; WAVE32-O0-NEXT: s_lshr_b32 s0, s0, 5
; WAVE32-O0-NEXT: v_mov_b32_e32 v0, 0
; WAVE32-O0-NEXT: v_mov_b32_e32 v1, s0
; WAVE32-O0-NEXT: buffer_store_dword v0, v1, s[12:15], 0 offen
; WAVE32-O0-NEXT: s_endpgm
;
; WAVE64-O0-LABEL: kernel_store_stacksave_nocall:
; WAVE64-O0: ; %bb.0:
; WAVE64-O0-NEXT: s_getpc_b64 s[12:13]
; WAVE64-O0-NEXT: s_mov_b32 s12, s0
; WAVE64-O0-NEXT: s_load_dwordx4 s[12:15], s[12:13], 0x0
; WAVE64-O0-NEXT: s_waitcnt lgkmcnt(0)
; WAVE64-O0-NEXT: s_add_u32 s12, s12, s11
; WAVE64-O0-NEXT: s_addc_u32 s13, s13, 0
; WAVE64-O0-NEXT: s_mov_b32 s0, s32
; WAVE64-O0-NEXT: s_lshr_b32 s0, s0, 6
; WAVE64-O0-NEXT: v_mov_b32_e32 v0, 0
; WAVE64-O0-NEXT: v_mov_b32_e32 v1, s0
; WAVE64-O0-NEXT: buffer_store_dword v0, v1, s[12:15], 0 offen
; WAVE64-O0-NEXT: s_endpgm
;
; WAVE32-WWM-PREALLOC-LABEL: kernel_store_stacksave_nocall:
; WAVE32-WWM-PREALLOC: ; %bb.0:
; WAVE32-WWM-PREALLOC-NEXT: s_getpc_b64 s[12:13]
; WAVE32-WWM-PREALLOC-NEXT: s_mov_b32 s12, s0
; WAVE32-WWM-PREALLOC-NEXT: s_load_dwordx4 s[12:15], s[12:13], 0x0
; WAVE32-WWM-PREALLOC-NEXT: s_waitcnt lgkmcnt(0)
; WAVE32-WWM-PREALLOC-NEXT: s_bitset0_b32 s15, 21
; WAVE32-WWM-PREALLOC-NEXT: s_add_u32 s12, s12, s11
; WAVE32-WWM-PREALLOC-NEXT: s_addc_u32 s13, s13, 0
; WAVE32-WWM-PREALLOC-NEXT: s_mov_b32 s0, s32
; WAVE32-WWM-PREALLOC-NEXT: s_lshr_b32 s0, s0, 5
; WAVE32-WWM-PREALLOC-NEXT: v_mov_b32_e32 v0, 0
; WAVE32-WWM-PREALLOC-NEXT: v_mov_b32_e32 v1, s0
; WAVE32-WWM-PREALLOC-NEXT: buffer_store_dword v0, v1, s[12:15], 0 offen
; WAVE32-WWM-PREALLOC-NEXT: s_endpgm
%stacksave = call ptr addrspace(5) @llvm.stacksave.p5()
store i32 0, ptr addrspace(5) %stacksave
ret void
}
define void @func_stacksave_nonentry_block(i1 %cond) {
; WAVE32-OPT-LABEL: func_stacksave_nonentry_block:
; WAVE32-OPT: ; %bb.0: ; %bb0
; WAVE32-OPT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; WAVE32-OPT-NEXT: v_and_b32_e32 v0, 1, v0
; WAVE32-OPT-NEXT: s_mov_b32 s4, exec_lo
; WAVE32-OPT-NEXT: v_cmpx_eq_u32_e32 1, v0
; WAVE32-OPT-NEXT: s_cbranch_execz .LBB4_2
; WAVE32-OPT-NEXT: ; %bb.1: ; %bb1
; WAVE32-OPT-NEXT: s_lshr_b32 s5, s32, 5
; WAVE32-OPT-NEXT: ;;#ASMSTART
; WAVE32-OPT-NEXT: ; use s5
; WAVE32-OPT-NEXT: ;;#ASMEND
; WAVE32-OPT-NEXT: .LBB4_2: ; %bb2
; WAVE32-OPT-NEXT: s_or_b32 exec_lo, exec_lo, s4
; WAVE32-OPT-NEXT: s_setpc_b64 s[30:31]
;
; WAVE64-OPT-LABEL: func_stacksave_nonentry_block:
; WAVE64-OPT: ; %bb.0: ; %bb0
; WAVE64-OPT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; WAVE64-OPT-NEXT: v_and_b32_e32 v0, 1, v0
; WAVE64-OPT-NEXT: s_mov_b64 s[4:5], exec
; WAVE64-OPT-NEXT: v_cmpx_eq_u32_e32 1, v0
; WAVE64-OPT-NEXT: s_cbranch_execz .LBB4_2
; WAVE64-OPT-NEXT: ; %bb.1: ; %bb1
; WAVE64-OPT-NEXT: s_lshr_b32 s6, s32, 6
; WAVE64-OPT-NEXT: ;;#ASMSTART
; WAVE64-OPT-NEXT: ; use s6
; WAVE64-OPT-NEXT: ;;#ASMEND
; WAVE64-OPT-NEXT: .LBB4_2: ; %bb2
; WAVE64-OPT-NEXT: s_or_b64 exec, exec, s[4:5]
; WAVE64-OPT-NEXT: s_setpc_b64 s[30:31]
;
; WAVE32-O0-LABEL: func_stacksave_nonentry_block:
; WAVE32-O0: ; %bb.0: ; %bb0
; WAVE32-O0-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; WAVE32-O0-NEXT: s_xor_saveexec_b32 s4, -1
; WAVE32-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
; WAVE32-O0-NEXT: s_mov_b32 exec_lo, s4
; WAVE32-O0-NEXT: v_and_b32_e64 v0, 1, v0
; WAVE32-O0-NEXT: v_cmp_eq_u32_e64 s5, v0, 1
; WAVE32-O0-NEXT: s_mov_b32 s4, exec_lo
; WAVE32-O0-NEXT: ; implicit-def: $vgpr1 : SGPR spill to VGPR lane
; WAVE32-O0-NEXT: v_writelane_b32 v1, s4, 0
; WAVE32-O0-NEXT: s_or_saveexec_b32 s7, -1
; WAVE32-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 ; 4-byte Folded Spill
; WAVE32-O0-NEXT: s_mov_b32 exec_lo, s7
; WAVE32-O0-NEXT: s_and_b32 s4, s4, s5
; WAVE32-O0-NEXT: s_mov_b32 exec_lo, s4
; WAVE32-O0-NEXT: s_cbranch_execz .LBB4_2
; WAVE32-O0-NEXT: ; %bb.1: ; %bb1
; WAVE32-O0-NEXT: s_mov_b32 s4, s32
; WAVE32-O0-NEXT: s_lshr_b32 s4, s4, 5
; WAVE32-O0-NEXT: ;;#ASMSTART
; WAVE32-O0-NEXT: ; use s4
; WAVE32-O0-NEXT: ;;#ASMEND
; WAVE32-O0-NEXT: .LBB4_2: ; %bb2
; WAVE32-O0-NEXT: s_or_saveexec_b32 s7, -1
; WAVE32-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 ; 4-byte Folded Reload
; WAVE32-O0-NEXT: s_mov_b32 exec_lo, s7
; WAVE32-O0-NEXT: s_waitcnt vmcnt(0)
; WAVE32-O0-NEXT: v_readlane_b32 s4, v1, 0
; WAVE32-O0-NEXT: s_or_b32 exec_lo, exec_lo, s4
; WAVE32-O0-NEXT: s_xor_saveexec_b32 s4, -1
; WAVE32-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
; WAVE32-O0-NEXT: s_mov_b32 exec_lo, s4
; WAVE32-O0-NEXT: s_waitcnt vmcnt(0)
; WAVE32-O0-NEXT: s_setpc_b64 s[30:31]
;
; WAVE64-O0-LABEL: func_stacksave_nonentry_block:
; WAVE64-O0: ; %bb.0: ; %bb0
; WAVE64-O0-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; WAVE64-O0-NEXT: s_xor_saveexec_b64 s[4:5], -1
; WAVE64-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
; WAVE64-O0-NEXT: s_mov_b64 exec, s[4:5]
; WAVE64-O0-NEXT: v_and_b32_e64 v0, 1, v0
; WAVE64-O0-NEXT: v_cmp_eq_u32_e64 s[6:7], v0, 1
; WAVE64-O0-NEXT: s_mov_b64 s[4:5], exec
; WAVE64-O0-NEXT: ; implicit-def: $vgpr1 : SGPR spill to VGPR lane
; WAVE64-O0-NEXT: v_writelane_b32 v1, s4, 0
; WAVE64-O0-NEXT: v_writelane_b32 v1, s5, 1
; WAVE64-O0-NEXT: s_or_saveexec_b64 s[10:11], -1
; WAVE64-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 ; 4-byte Folded Spill
; WAVE64-O0-NEXT: s_mov_b64 exec, s[10:11]
; WAVE64-O0-NEXT: s_and_b64 s[4:5], s[4:5], s[6:7]
; WAVE64-O0-NEXT: s_mov_b64 exec, s[4:5]
; WAVE64-O0-NEXT: s_cbranch_execz .LBB4_2
; WAVE64-O0-NEXT: ; %bb.1: ; %bb1
; WAVE64-O0-NEXT: s_mov_b32 s4, s32
; WAVE64-O0-NEXT: s_lshr_b32 s4, s4, 6
; WAVE64-O0-NEXT: ;;#ASMSTART
; WAVE64-O0-NEXT: ; use s4
; WAVE64-O0-NEXT: ;;#ASMEND
; WAVE64-O0-NEXT: .LBB4_2: ; %bb2
; WAVE64-O0-NEXT: s_or_saveexec_b64 s[10:11], -1
; WAVE64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 ; 4-byte Folded Reload
; WAVE64-O0-NEXT: s_mov_b64 exec, s[10:11]
; WAVE64-O0-NEXT: s_waitcnt vmcnt(0)
; WAVE64-O0-NEXT: v_readlane_b32 s4, v1, 0
; WAVE64-O0-NEXT: v_readlane_b32 s5, v1, 1
; WAVE64-O0-NEXT: s_or_b64 exec, exec, s[4:5]
; WAVE64-O0-NEXT: s_xor_saveexec_b64 s[4:5], -1
; WAVE64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
; WAVE64-O0-NEXT: s_mov_b64 exec, s[4:5]
; WAVE64-O0-NEXT: s_waitcnt vmcnt(0)
; WAVE64-O0-NEXT: s_setpc_b64 s[30:31]
;
; WAVE32-WWM-PREALLOC-LABEL: func_stacksave_nonentry_block:
; WAVE32-WWM-PREALLOC: ; %bb.0: ; %bb0
; WAVE32-WWM-PREALLOC-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; WAVE32-WWM-PREALLOC-NEXT: s_xor_saveexec_b32 s4, -1
; WAVE32-WWM-PREALLOC-NEXT: buffer_store_dword v1, off, s[0:3], s32 ; 4-byte Folded Spill
; WAVE32-WWM-PREALLOC-NEXT: s_mov_b32 exec_lo, s4
; WAVE32-WWM-PREALLOC-NEXT: v_and_b32_e64 v0, 1, v0
; WAVE32-WWM-PREALLOC-NEXT: v_cmp_eq_u32_e64 s5, v0, 1
; WAVE32-WWM-PREALLOC-NEXT: s_mov_b32 s4, exec_lo
; WAVE32-WWM-PREALLOC-NEXT: ; implicit-def: $vgpr1 : SGPR spill to VGPR lane
; WAVE32-WWM-PREALLOC-NEXT: v_writelane_b32 v1, s4, 0
; WAVE32-WWM-PREALLOC-NEXT: s_and_b32 s4, s4, s5
; WAVE32-WWM-PREALLOC-NEXT: s_mov_b32 exec_lo, s4
; WAVE32-WWM-PREALLOC-NEXT: s_cbranch_execz .LBB4_2
; WAVE32-WWM-PREALLOC-NEXT: ; %bb.1: ; %bb1
; WAVE32-WWM-PREALLOC-NEXT: s_mov_b32 s4, s32
; WAVE32-WWM-PREALLOC-NEXT: s_lshr_b32 s4, s4, 5
; WAVE32-WWM-PREALLOC-NEXT: ;;#ASMSTART
; WAVE32-WWM-PREALLOC-NEXT: ; use s4
; WAVE32-WWM-PREALLOC-NEXT: ;;#ASMEND
; WAVE32-WWM-PREALLOC-NEXT: .LBB4_2: ; %bb2
; WAVE32-WWM-PREALLOC-NEXT: v_readlane_b32 s4, v1, 0
; WAVE32-WWM-PREALLOC-NEXT: s_or_b32 exec_lo, exec_lo, s4
; WAVE32-WWM-PREALLOC-NEXT: s_xor_saveexec_b32 s4, -1
; WAVE32-WWM-PREALLOC-NEXT: buffer_load_dword v1, off, s[0:3], s32 ; 4-byte Folded Reload
; WAVE32-WWM-PREALLOC-NEXT: s_mov_b32 exec_lo, s4
; WAVE32-WWM-PREALLOC-NEXT: s_waitcnt vmcnt(0)
; WAVE32-WWM-PREALLOC-NEXT: s_setpc_b64 s[30:31]
bb0:
br i1 %cond, label %bb1, label %bb2
bb1:
%stacksave = call ptr addrspace(5) @llvm.stacksave.p5()
call void asm sideeffect "; use $0", "s"(ptr addrspace(5) %stacksave)
br label %bb2
bb2:
ret void
}
define void @func_stackrestore_poison() {
; WAVE32-OPT-LABEL: func_stackrestore_poison:
; WAVE32-OPT: ; %bb.0:
; WAVE32-OPT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; WAVE32-OPT-NEXT: s_lshl_b32 s32, s4, 5
; WAVE32-OPT-NEXT: s_setpc_b64 s[30:31]
;
; WAVE64-OPT-LABEL: func_stackrestore_poison:
; WAVE64-OPT: ; %bb.0:
; WAVE64-OPT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; WAVE64-OPT-NEXT: s_lshl_b32 s32, s4, 6
; WAVE64-OPT-NEXT: s_setpc_b64 s[30:31]
;
; WAVE32-O0-LABEL: func_stackrestore_poison:
; WAVE32-O0: ; %bb.0:
; WAVE32-O0-NEXT: ; implicit-def: $sgpr4
; WAVE32-O0-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; WAVE32-O0-NEXT: s_lshl_b32 s4, s4, 5
; WAVE32-O0-NEXT: s_mov_b32 s32, s4
; WAVE32-O0-NEXT: s_setpc_b64 s[30:31]
;
; WAVE64-O0-LABEL: func_stackrestore_poison:
; WAVE64-O0: ; %bb.0:
; WAVE64-O0-NEXT: ; implicit-def: $sgpr4
; WAVE64-O0-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; WAVE64-O0-NEXT: s_lshl_b32 s4, s4, 6
; WAVE64-O0-NEXT: s_mov_b32 s32, s4
; WAVE64-O0-NEXT: s_setpc_b64 s[30:31]
;
; WAVE32-WWM-PREALLOC-LABEL: func_stackrestore_poison:
; WAVE32-WWM-PREALLOC: ; %bb.0:
; WAVE32-WWM-PREALLOC-NEXT: ; implicit-def: $sgpr4
; WAVE32-WWM-PREALLOC-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; WAVE32-WWM-PREALLOC-NEXT: s_lshl_b32 s4, s4, 5
; WAVE32-WWM-PREALLOC-NEXT: s_mov_b32 s32, s4
; WAVE32-WWM-PREALLOC-NEXT: s_setpc_b64 s[30:31]
call void @llvm.stackrestore.p5(ptr addrspace(5) poison)
ret void
}
define void @func_stackrestore_null() {
; WAVE32-OPT-LABEL: func_stackrestore_null:
; WAVE32-OPT: ; %bb.0:
; WAVE32-OPT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; WAVE32-OPT-NEXT: s_mov_b32 s32, 0
; WAVE32-OPT-NEXT: s_setpc_b64 s[30:31]
;
; WAVE64-OPT-LABEL: func_stackrestore_null:
; WAVE64-OPT: ; %bb.0:
; WAVE64-OPT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; WAVE64-OPT-NEXT: s_mov_b32 s32, 0
; WAVE64-OPT-NEXT: s_setpc_b64 s[30:31]
;
; WAVE32-O0-LABEL: func_stackrestore_null:
; WAVE32-O0: ; %bb.0:
; WAVE32-O0-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; WAVE32-O0-NEXT: s_mov_b32 s4, 0
; WAVE32-O0-NEXT: s_lshl_b32 s4, s4, 5
; WAVE32-O0-NEXT: s_mov_b32 s32, s4
; WAVE32-O0-NEXT: s_setpc_b64 s[30:31]
;
; WAVE64-O0-LABEL: func_stackrestore_null:
; WAVE64-O0: ; %bb.0:
; WAVE64-O0-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; WAVE64-O0-NEXT: s_mov_b32 s4, 0
; WAVE64-O0-NEXT: s_lshl_b32 s4, s4, 6
; WAVE64-O0-NEXT: s_mov_b32 s32, s4
; WAVE64-O0-NEXT: s_setpc_b64 s[30:31]
;
; WAVE32-WWM-PREALLOC-LABEL: func_stackrestore_null:
; WAVE32-WWM-PREALLOC: ; %bb.0:
; WAVE32-WWM-PREALLOC-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; WAVE32-WWM-PREALLOC-NEXT: s_mov_b32 s4, 0
; WAVE32-WWM-PREALLOC-NEXT: s_lshl_b32 s4, s4, 5
; WAVE32-WWM-PREALLOC-NEXT: s_mov_b32 s32, s4
; WAVE32-WWM-PREALLOC-NEXT: s_setpc_b64 s[30:31]
call void @llvm.stackrestore.p5(ptr addrspace(5) null)
ret void
}
define void @func_stackrestore_neg1() {
; WAVE32-OPT-LABEL: func_stackrestore_neg1:
; WAVE32-OPT: ; %bb.0:
; WAVE32-OPT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; WAVE32-OPT-NEXT: s_movk_i32 s32, 0xffe0
; WAVE32-OPT-NEXT: s_setpc_b64 s[30:31]
;
; WAVE64-OPT-LABEL: func_stackrestore_neg1:
; WAVE64-OPT: ; %bb.0:
; WAVE64-OPT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; WAVE64-OPT-NEXT: s_movk_i32 s32, 0xffc0
; WAVE64-OPT-NEXT: s_setpc_b64 s[30:31]
;
; WAVE32-O0-LABEL: func_stackrestore_neg1:
; WAVE32-O0: ; %bb.0:
; WAVE32-O0-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; WAVE32-O0-NEXT: s_mov_b32 s4, -1
; WAVE32-O0-NEXT: s_lshl_b32 s4, s4, 5
; WAVE32-O0-NEXT: s_mov_b32 s32, s4
; WAVE32-O0-NEXT: s_setpc_b64 s[30:31]
;
; WAVE64-O0-LABEL: func_stackrestore_neg1:
; WAVE64-O0: ; %bb.0:
; WAVE64-O0-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; WAVE64-O0-NEXT: s_mov_b32 s4, -1
; WAVE64-O0-NEXT: s_lshl_b32 s4, s4, 6
; WAVE64-O0-NEXT: s_mov_b32 s32, s4
; WAVE64-O0-NEXT: s_setpc_b64 s[30:31]
;
; WAVE32-WWM-PREALLOC-LABEL: func_stackrestore_neg1:
; WAVE32-WWM-PREALLOC: ; %bb.0:
; WAVE32-WWM-PREALLOC-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; WAVE32-WWM-PREALLOC-NEXT: s_mov_b32 s4, -1
; WAVE32-WWM-PREALLOC-NEXT: s_lshl_b32 s4, s4, 5
; WAVE32-WWM-PREALLOC-NEXT: s_mov_b32 s32, s4
; WAVE32-WWM-PREALLOC-NEXT: s_setpc_b64 s[30:31]
call void @llvm.stackrestore.p5(ptr addrspace(5) inttoptr (i32 -1 to ptr addrspace(5)))
ret void
}
define void @func_stackrestore_42() {
; WAVE32-OPT-LABEL: func_stackrestore_42:
; WAVE32-OPT: ; %bb.0:
; WAVE32-OPT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; WAVE32-OPT-NEXT: s_movk_i32 s32, 0x540
; WAVE32-OPT-NEXT: s_setpc_b64 s[30:31]
;
; WAVE64-OPT-LABEL: func_stackrestore_42:
; WAVE64-OPT: ; %bb.0:
; WAVE64-OPT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; WAVE64-OPT-NEXT: s_movk_i32 s32, 0xa80
; WAVE64-OPT-NEXT: s_setpc_b64 s[30:31]
;
; WAVE32-O0-LABEL: func_stackrestore_42:
; WAVE32-O0: ; %bb.0:
; WAVE32-O0-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; WAVE32-O0-NEXT: s_mov_b32 s4, 42
; WAVE32-O0-NEXT: s_lshl_b32 s4, s4, 5
; WAVE32-O0-NEXT: s_mov_b32 s32, s4
; WAVE32-O0-NEXT: s_setpc_b64 s[30:31]
;
; WAVE64-O0-LABEL: func_stackrestore_42:
; WAVE64-O0: ; %bb.0:
; WAVE64-O0-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; WAVE64-O0-NEXT: s_mov_b32 s4, 42
; WAVE64-O0-NEXT: s_lshl_b32 s4, s4, 6
; WAVE64-O0-NEXT: s_mov_b32 s32, s4
; WAVE64-O0-NEXT: s_setpc_b64 s[30:31]
;
; WAVE32-WWM-PREALLOC-LABEL: func_stackrestore_42:
; WAVE32-WWM-PREALLOC: ; %bb.0:
; WAVE32-WWM-PREALLOC-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; WAVE32-WWM-PREALLOC-NEXT: s_mov_b32 s4, 42
; WAVE32-WWM-PREALLOC-NEXT: s_lshl_b32 s4, s4, 5
; WAVE32-WWM-PREALLOC-NEXT: s_mov_b32 s32, s4
; WAVE32-WWM-PREALLOC-NEXT: s_setpc_b64 s[30:31]
call void @llvm.stackrestore.p5(ptr addrspace(5) inttoptr (i32 42 to ptr addrspace(5)))
ret void
}
define void @func_stacksave_stackrestore() {
; WAVE32-OPT-LABEL: func_stacksave_stackrestore:
; WAVE32-OPT: ; %bb.0:
; WAVE32-OPT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; WAVE32-OPT-NEXT: s_setpc_b64 s[30:31]
;
; WAVE64-OPT-LABEL: func_stacksave_stackrestore:
; WAVE64-OPT: ; %bb.0:
; WAVE64-OPT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; WAVE64-OPT-NEXT: s_setpc_b64 s[30:31]
;
; WAVE32-O0-LABEL: func_stacksave_stackrestore:
; WAVE32-O0: ; %bb.0:
; WAVE32-O0-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; WAVE32-O0-NEXT: s_mov_b32 s4, s32
; WAVE32-O0-NEXT: s_mov_b32 s32, s4
; WAVE32-O0-NEXT: s_setpc_b64 s[30:31]
;
; WAVE64-O0-LABEL: func_stacksave_stackrestore:
; WAVE64-O0: ; %bb.0:
; WAVE64-O0-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; WAVE64-O0-NEXT: s_mov_b32 s4, s32
; WAVE64-O0-NEXT: s_mov_b32 s32, s4
; WAVE64-O0-NEXT: s_setpc_b64 s[30:31]
;
; WAVE32-WWM-PREALLOC-LABEL: func_stacksave_stackrestore:
; WAVE32-WWM-PREALLOC: ; %bb.0:
; WAVE32-WWM-PREALLOC-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; WAVE32-WWM-PREALLOC-NEXT: s_mov_b32 s4, s32
; WAVE32-WWM-PREALLOC-NEXT: s_mov_b32 s32, s4
; WAVE32-WWM-PREALLOC-NEXT: s_setpc_b64 s[30:31]
%stacksave = call ptr addrspace(5) @llvm.stacksave.p5()
call void @llvm.stackrestore.p5(ptr addrspace(5) %stacksave)
ret void
}
define void @func_stacksave_stackrestore_use() {
; WAVE32-OPT-LABEL: func_stacksave_stackrestore_use:
; WAVE32-OPT: ; %bb.0:
; WAVE32-OPT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; WAVE32-OPT-NEXT: s_lshr_b32 s4, s32, 5
; WAVE32-OPT-NEXT: ;;#ASMSTART
; WAVE32-OPT-NEXT: ; use s4
; WAVE32-OPT-NEXT: ;;#ASMEND
; WAVE32-OPT-NEXT: s_setpc_b64 s[30:31]
;
; WAVE64-OPT-LABEL: func_stacksave_stackrestore_use:
; WAVE64-OPT: ; %bb.0:
; WAVE64-OPT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; WAVE64-OPT-NEXT: s_lshr_b32 s4, s32, 6
; WAVE64-OPT-NEXT: ;;#ASMSTART
; WAVE64-OPT-NEXT: ; use s4
; WAVE64-OPT-NEXT: ;;#ASMEND
; WAVE64-OPT-NEXT: s_setpc_b64 s[30:31]
;
; WAVE32-O0-LABEL: func_stacksave_stackrestore_use:
; WAVE32-O0: ; %bb.0:
; WAVE32-O0-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; WAVE32-O0-NEXT: s_mov_b32 s4, s32
; WAVE32-O0-NEXT: s_lshr_b32 s5, s4, 5
; WAVE32-O0-NEXT: ;;#ASMSTART
; WAVE32-O0-NEXT: ; use s5
; WAVE32-O0-NEXT: ;;#ASMEND
; WAVE32-O0-NEXT: s_mov_b32 s32, s4
; WAVE32-O0-NEXT: s_setpc_b64 s[30:31]
;
; WAVE64-O0-LABEL: func_stacksave_stackrestore_use:
; WAVE64-O0: ; %bb.0:
; WAVE64-O0-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; WAVE64-O0-NEXT: s_mov_b32 s4, s32
; WAVE64-O0-NEXT: s_lshr_b32 s5, s4, 6
; WAVE64-O0-NEXT: ;;#ASMSTART
; WAVE64-O0-NEXT: ; use s5
; WAVE64-O0-NEXT: ;;#ASMEND
; WAVE64-O0-NEXT: s_mov_b32 s32, s4
; WAVE64-O0-NEXT: s_setpc_b64 s[30:31]
;
; WAVE32-WWM-PREALLOC-LABEL: func_stacksave_stackrestore_use:
; WAVE32-WWM-PREALLOC: ; %bb.0:
; WAVE32-WWM-PREALLOC-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; WAVE32-WWM-PREALLOC-NEXT: s_mov_b32 s4, s32
; WAVE32-WWM-PREALLOC-NEXT: s_lshr_b32 s5, s4, 5
; WAVE32-WWM-PREALLOC-NEXT: ;;#ASMSTART
; WAVE32-WWM-PREALLOC-NEXT: ; use s5
; WAVE32-WWM-PREALLOC-NEXT: ;;#ASMEND
; WAVE32-WWM-PREALLOC-NEXT: s_mov_b32 s32, s4
; WAVE32-WWM-PREALLOC-NEXT: s_setpc_b64 s[30:31]
%stacksave = call ptr addrspace(5) @llvm.stacksave.p5()
call void asm sideeffect "; use $0", "s"(ptr addrspace(5) %stacksave)
call void @llvm.stackrestore.p5(ptr addrspace(5) %stacksave)
ret void
}
define amdgpu_kernel void @kernel_stacksave_stackrestore_use() {
; WAVE32-OPT-LABEL: kernel_stacksave_stackrestore_use:
; WAVE32-OPT: ; %bb.0:
; WAVE32-OPT-NEXT: s_lshr_b32 s0, s32, 5
; WAVE32-OPT-NEXT: ;;#ASMSTART
; WAVE32-OPT-NEXT: ; use s0
; WAVE32-OPT-NEXT: ;;#ASMEND
; WAVE32-OPT-NEXT: s_endpgm
;
; WAVE64-OPT-LABEL: kernel_stacksave_stackrestore_use:
; WAVE64-OPT: ; %bb.0:
; WAVE64-OPT-NEXT: s_lshr_b32 s0, s32, 6
; WAVE64-OPT-NEXT: ;;#ASMSTART
; WAVE64-OPT-NEXT: ; use s0
; WAVE64-OPT-NEXT: ;;#ASMEND
; WAVE64-OPT-NEXT: s_endpgm
;
; WAVE32-O0-LABEL: kernel_stacksave_stackrestore_use:
; WAVE32-O0: ; %bb.0:
; WAVE32-O0-NEXT: s_mov_b32 s0, s32
; WAVE32-O0-NEXT: s_lshr_b32 s1, s0, 5
; WAVE32-O0-NEXT: ;;#ASMSTART
; WAVE32-O0-NEXT: ; use s1
; WAVE32-O0-NEXT: ;;#ASMEND
; WAVE32-O0-NEXT: s_mov_b32 s32, s0
; WAVE32-O0-NEXT: s_endpgm
;
; WAVE64-O0-LABEL: kernel_stacksave_stackrestore_use:
; WAVE64-O0: ; %bb.0:
; WAVE64-O0-NEXT: s_mov_b32 s0, s32
; WAVE64-O0-NEXT: s_lshr_b32 s1, s0, 6
; WAVE64-O0-NEXT: ;;#ASMSTART
; WAVE64-O0-NEXT: ; use s1
; WAVE64-O0-NEXT: ;;#ASMEND
; WAVE64-O0-NEXT: s_mov_b32 s32, s0
; WAVE64-O0-NEXT: s_endpgm
;
; WAVE32-WWM-PREALLOC-LABEL: kernel_stacksave_stackrestore_use:
; WAVE32-WWM-PREALLOC: ; %bb.0:
; WAVE32-WWM-PREALLOC-NEXT: s_mov_b32 s0, s32
; WAVE32-WWM-PREALLOC-NEXT: s_lshr_b32 s1, s0, 5
; WAVE32-WWM-PREALLOC-NEXT: ;;#ASMSTART
; WAVE32-WWM-PREALLOC-NEXT: ; use s1
; WAVE32-WWM-PREALLOC-NEXT: ;;#ASMEND
; WAVE32-WWM-PREALLOC-NEXT: s_mov_b32 s32, s0
; WAVE32-WWM-PREALLOC-NEXT: s_endpgm
%stacksave = call ptr addrspace(5) @llvm.stacksave.p5()
call void asm sideeffect "; use $0", "s"(ptr addrspace(5) %stacksave)
call void @llvm.stackrestore.p5(ptr addrspace(5) %stacksave)
ret void
}
define void @func_stacksave_stackrestore_voffset(i32 %offset) {
; WAVE32-OPT-LABEL: func_stacksave_stackrestore_voffset:
; WAVE32-OPT: ; %bb.0:
; WAVE32-OPT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; WAVE32-OPT-NEXT: s_lshr_b32 s4, s32, 5
; WAVE32-OPT-NEXT: v_add_nc_u32_e32 v0, s4, v0
; WAVE32-OPT-NEXT: v_readfirstlane_b32 s4, v0
; WAVE32-OPT-NEXT: s_lshl_b32 s32, s4, 5
; WAVE32-OPT-NEXT: s_setpc_b64 s[30:31]
;
; WAVE64-OPT-LABEL: func_stacksave_stackrestore_voffset:
; WAVE64-OPT: ; %bb.0:
; WAVE64-OPT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; WAVE64-OPT-NEXT: s_lshr_b32 s4, s32, 6
; WAVE64-OPT-NEXT: v_add_nc_u32_e32 v0, s4, v0
; WAVE64-OPT-NEXT: v_readfirstlane_b32 s4, v0
; WAVE64-OPT-NEXT: s_lshl_b32 s32, s4, 6
; WAVE64-OPT-NEXT: s_setpc_b64 s[30:31]
;
; WAVE32-O0-LABEL: func_stacksave_stackrestore_voffset:
; WAVE32-O0: ; %bb.0:
; WAVE32-O0-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; WAVE32-O0-NEXT: s_mov_b32 s4, s32
; WAVE32-O0-NEXT: s_lshr_b32 s4, s4, 5
; WAVE32-O0-NEXT: v_add_nc_u32_e64 v0, s4, v0
; WAVE32-O0-NEXT: v_readfirstlane_b32 s4, v0
; WAVE32-O0-NEXT: s_lshl_b32 s4, s4, 5
; WAVE32-O0-NEXT: s_mov_b32 s32, s4
; WAVE32-O0-NEXT: s_setpc_b64 s[30:31]
;
; WAVE64-O0-LABEL: func_stacksave_stackrestore_voffset:
; WAVE64-O0: ; %bb.0:
; WAVE64-O0-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; WAVE64-O0-NEXT: s_mov_b32 s4, s32
; WAVE64-O0-NEXT: s_lshr_b32 s4, s4, 6
; WAVE64-O0-NEXT: v_add_nc_u32_e64 v0, s4, v0
; WAVE64-O0-NEXT: v_readfirstlane_b32 s4, v0
; WAVE64-O0-NEXT: s_lshl_b32 s4, s4, 6
; WAVE64-O0-NEXT: s_mov_b32 s32, s4
; WAVE64-O0-NEXT: s_setpc_b64 s[30:31]
;
; WAVE32-WWM-PREALLOC-LABEL: func_stacksave_stackrestore_voffset:
; WAVE32-WWM-PREALLOC: ; %bb.0:
; WAVE32-WWM-PREALLOC-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; WAVE32-WWM-PREALLOC-NEXT: s_mov_b32 s4, s32
; WAVE32-WWM-PREALLOC-NEXT: s_lshr_b32 s4, s4, 5
; WAVE32-WWM-PREALLOC-NEXT: v_add_nc_u32_e64 v0, s4, v0
; WAVE32-WWM-PREALLOC-NEXT: v_readfirstlane_b32 s4, v0
; WAVE32-WWM-PREALLOC-NEXT: s_lshl_b32 s4, s4, 5
; WAVE32-WWM-PREALLOC-NEXT: s_mov_b32 s32, s4
; WAVE32-WWM-PREALLOC-NEXT: s_setpc_b64 s[30:31]
%stacksave = call ptr addrspace(5) @llvm.stacksave.p5()
%gep = getelementptr i8, ptr addrspace(5) %stacksave, i32 %offset
call void @llvm.stackrestore.p5(ptr addrspace(5) %gep)
ret void
}
define void @func_stacksave_vgpr(ptr addrspace(5) %stack) {
; WAVE32-OPT-LABEL: func_stacksave_vgpr:
; WAVE32-OPT: ; %bb.0:
; WAVE32-OPT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; WAVE32-OPT-NEXT: v_readfirstlane_b32 s4, v0
; WAVE32-OPT-NEXT: s_lshl_b32 s32, s4, 5
; WAVE32-OPT-NEXT: s_setpc_b64 s[30:31]
;
; WAVE64-OPT-LABEL: func_stacksave_vgpr:
; WAVE64-OPT: ; %bb.0:
; WAVE64-OPT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; WAVE64-OPT-NEXT: v_readfirstlane_b32 s4, v0
; WAVE64-OPT-NEXT: s_lshl_b32 s32, s4, 6
; WAVE64-OPT-NEXT: s_setpc_b64 s[30:31]
;
; WAVE32-O0-LABEL: func_stacksave_vgpr:
; WAVE32-O0: ; %bb.0:
; WAVE32-O0-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; WAVE32-O0-NEXT: v_readfirstlane_b32 s4, v0
; WAVE32-O0-NEXT: s_lshl_b32 s4, s4, 5
; WAVE32-O0-NEXT: s_mov_b32 s32, s4
; WAVE32-O0-NEXT: s_setpc_b64 s[30:31]
;
; WAVE64-O0-LABEL: func_stacksave_vgpr:
; WAVE64-O0: ; %bb.0:
; WAVE64-O0-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; WAVE64-O0-NEXT: v_readfirstlane_b32 s4, v0
; WAVE64-O0-NEXT: s_lshl_b32 s4, s4, 6
; WAVE64-O0-NEXT: s_mov_b32 s32, s4
; WAVE64-O0-NEXT: s_setpc_b64 s[30:31]
;
; WAVE32-WWM-PREALLOC-LABEL: func_stacksave_vgpr:
; WAVE32-WWM-PREALLOC: ; %bb.0:
; WAVE32-WWM-PREALLOC-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; WAVE32-WWM-PREALLOC-NEXT: v_readfirstlane_b32 s4, v0
; WAVE32-WWM-PREALLOC-NEXT: s_lshl_b32 s4, s4, 5
; WAVE32-WWM-PREALLOC-NEXT: s_mov_b32 s32, s4
; WAVE32-WWM-PREALLOC-NEXT: s_setpc_b64 s[30:31]
call void @llvm.stackrestore.p5(ptr addrspace(5) %stack)
ret void
}
define amdgpu_gfx void @func_stacksave_sgpr(ptr addrspace(5) inreg %stack) {
; WAVE32-OPT-LABEL: func_stacksave_sgpr:
; WAVE32-OPT: ; %bb.0:
; WAVE32-OPT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; WAVE32-OPT-NEXT: s_lshl_b32 s32, s4, 5
; WAVE32-OPT-NEXT: s_setpc_b64 s[30:31]
;
; WAVE64-OPT-LABEL: func_stacksave_sgpr:
; WAVE64-OPT: ; %bb.0:
; WAVE64-OPT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; WAVE64-OPT-NEXT: s_lshl_b32 s32, s4, 6
; WAVE64-OPT-NEXT: s_setpc_b64 s[30:31]
;
; WAVE32-O0-LABEL: func_stacksave_sgpr:
; WAVE32-O0: ; %bb.0:
; WAVE32-O0-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; WAVE32-O0-NEXT: s_lshl_b32 s34, s4, 5
; WAVE32-O0-NEXT: s_mov_b32 s32, s34
; WAVE32-O0-NEXT: s_setpc_b64 s[30:31]
;
; WAVE64-O0-LABEL: func_stacksave_sgpr:
; WAVE64-O0: ; %bb.0:
; WAVE64-O0-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; WAVE64-O0-NEXT: s_lshl_b32 s34, s4, 6
; WAVE64-O0-NEXT: s_mov_b32 s32, s34
; WAVE64-O0-NEXT: s_setpc_b64 s[30:31]
;
; WAVE32-WWM-PREALLOC-LABEL: func_stacksave_sgpr:
; WAVE32-WWM-PREALLOC: ; %bb.0:
; WAVE32-WWM-PREALLOC-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; WAVE32-WWM-PREALLOC-NEXT: s_lshl_b32 s34, s4, 5
; WAVE32-WWM-PREALLOC-NEXT: s_mov_b32 s32, s34
; WAVE32-WWM-PREALLOC-NEXT: s_setpc_b64 s[30:31]
call void @llvm.stackrestore.p5(ptr addrspace(5) %stack)
ret void
}
define amdgpu_kernel void @kernel_stacksave_sgpr(ptr addrspace(5) %stack) {
; WAVE32-OPT-LABEL: kernel_stacksave_sgpr:
; WAVE32-OPT: ; %bb.0:
; WAVE32-OPT-NEXT: s_load_dword s0, s[4:5], 0x0
; WAVE32-OPT-NEXT: s_waitcnt lgkmcnt(0)
; WAVE32-OPT-NEXT: ;;#ASMSTART
; WAVE32-OPT-NEXT: ; use s0
; WAVE32-OPT-NEXT: ;;#ASMEND
; WAVE32-OPT-NEXT: s_endpgm
;
; WAVE64-OPT-LABEL: kernel_stacksave_sgpr:
; WAVE64-OPT: ; %bb.0:
; WAVE64-OPT-NEXT: s_load_dword s0, s[4:5], 0x0
; WAVE64-OPT-NEXT: s_waitcnt lgkmcnt(0)
; WAVE64-OPT-NEXT: ;;#ASMSTART
; WAVE64-OPT-NEXT: ; use s0
; WAVE64-OPT-NEXT: ;;#ASMEND
; WAVE64-OPT-NEXT: s_endpgm
;
; WAVE32-O0-LABEL: kernel_stacksave_sgpr:
; WAVE32-O0: ; %bb.0:
; WAVE32-O0-NEXT: s_load_dword s0, s[4:5], 0x0
; WAVE32-O0-NEXT: s_waitcnt lgkmcnt(0)
; WAVE32-O0-NEXT: s_mov_b32 s1, s0
; WAVE32-O0-NEXT: ;;#ASMSTART
; WAVE32-O0-NEXT: ; use s1
; WAVE32-O0-NEXT: ;;#ASMEND
; WAVE32-O0-NEXT: s_lshl_b32 s0, s0, 5
; WAVE32-O0-NEXT: s_mov_b32 s32, s0
; WAVE32-O0-NEXT: s_endpgm
;
; WAVE64-O0-LABEL: kernel_stacksave_sgpr:
; WAVE64-O0: ; %bb.0:
; WAVE64-O0-NEXT: s_load_dword s0, s[4:5], 0x0
; WAVE64-O0-NEXT: s_waitcnt lgkmcnt(0)
; WAVE64-O0-NEXT: s_mov_b32 s1, s0
; WAVE64-O0-NEXT: ;;#ASMSTART
; WAVE64-O0-NEXT: ; use s1
; WAVE64-O0-NEXT: ;;#ASMEND
; WAVE64-O0-NEXT: s_lshl_b32 s0, s0, 6
; WAVE64-O0-NEXT: s_mov_b32 s32, s0
; WAVE64-O0-NEXT: s_endpgm
;
; WAVE32-WWM-PREALLOC-LABEL: kernel_stacksave_sgpr:
; WAVE32-WWM-PREALLOC: ; %bb.0:
; WAVE32-WWM-PREALLOC-NEXT: s_load_dword s0, s[4:5], 0x0
; WAVE32-WWM-PREALLOC-NEXT: s_waitcnt lgkmcnt(0)
; WAVE32-WWM-PREALLOC-NEXT: s_mov_b32 s1, s0
; WAVE32-WWM-PREALLOC-NEXT: ;;#ASMSTART
; WAVE32-WWM-PREALLOC-NEXT: ; use s1
; WAVE32-WWM-PREALLOC-NEXT: ;;#ASMEND
; WAVE32-WWM-PREALLOC-NEXT: s_lshl_b32 s0, s0, 5
; WAVE32-WWM-PREALLOC-NEXT: s_mov_b32 s32, s0
; WAVE32-WWM-PREALLOC-NEXT: s_endpgm
call void asm sideeffect "; use $0", "s"(ptr addrspace(5) %stack)
call void @llvm.stackrestore.p5(ptr addrspace(5) %stack)
ret void
}
define amdgpu_kernel void @kernel_stacksave_stackrestore_call_with_stack_objects() {
; WAVE32-OPT-LABEL: kernel_stacksave_stackrestore_call_with_stack_objects:
; WAVE32-OPT: ; %bb.0:
; WAVE32-OPT-NEXT: s_getpc_b64 s[20:21]
; WAVE32-OPT-NEXT: s_mov_b32 s20, s0
; WAVE32-OPT-NEXT: v_lshlrev_b32_e32 v2, 20, v2
; WAVE32-OPT-NEXT: s_load_dwordx4 s[20:23], s[20:21], 0x0
; WAVE32-OPT-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; WAVE32-OPT-NEXT: s_movk_i32 s32, 0x1200
; WAVE32-OPT-NEXT: s_mov_b32 s13, s9
; WAVE32-OPT-NEXT: s_mov_b32 s12, s8
; WAVE32-OPT-NEXT: s_mov_b64 s[8:9], s[4:5]
; WAVE32-OPT-NEXT: s_mov_b32 s4, s32
; WAVE32-OPT-NEXT: v_mov_b32_e32 v3, 42
; WAVE32-OPT-NEXT: v_mov_b32_e32 v4, 17
; WAVE32-OPT-NEXT: v_or3_b32 v31, v0, v1, v2
; WAVE32-OPT-NEXT: s_mov_b32 s14, s10
; WAVE32-OPT-NEXT: s_mov_b32 s17, stack_passed_argument@abs32@hi
; WAVE32-OPT-NEXT: s_mov_b32 s16, stack_passed_argument@abs32@lo
; WAVE32-OPT-NEXT: s_waitcnt lgkmcnt(0)
; WAVE32-OPT-NEXT: s_bitset0_b32 s23, 21
; WAVE32-OPT-NEXT: s_add_u32 s20, s20, s11
; WAVE32-OPT-NEXT: s_addc_u32 s21, s21, 0
; WAVE32-OPT-NEXT: s_mov_b64 s[10:11], s[6:7]
; WAVE32-OPT-NEXT: s_lshr_b32 s15, s4, 5
; WAVE32-OPT-NEXT: s_mov_b64 s[4:5], s[0:1]
; WAVE32-OPT-NEXT: s_mov_b64 s[6:7], s[2:3]
; WAVE32-OPT-NEXT: s_mov_b64 s[0:1], s[20:21]
; WAVE32-OPT-NEXT: s_mov_b64 s[2:3], s[22:23]
; WAVE32-OPT-NEXT: buffer_store_dword v3, off, s[20:23], 0
; WAVE32-OPT-NEXT: s_waitcnt_vscnt null, 0x0
; WAVE32-OPT-NEXT: buffer_store_dword v4, off, s[20:23], s32 offset:4
; WAVE32-OPT-NEXT: s_swappc_b64 s[30:31], s[16:17]
; WAVE32-OPT-NEXT: ;;#ASMSTART
; WAVE32-OPT-NEXT: ; use s15
; WAVE32-OPT-NEXT: ;;#ASMEND
; WAVE32-OPT-NEXT: s_endpgm
;
; WAVE64-OPT-LABEL: kernel_stacksave_stackrestore_call_with_stack_objects:
; WAVE64-OPT: ; %bb.0:
; WAVE64-OPT-NEXT: s_getpc_b64 s[20:21]
; WAVE64-OPT-NEXT: s_mov_b32 s20, s0
; WAVE64-OPT-NEXT: v_lshlrev_b32_e32 v2, 20, v2
; WAVE64-OPT-NEXT: s_load_dwordx4 s[20:23], s[20:21], 0x0
; WAVE64-OPT-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; WAVE64-OPT-NEXT: s_movk_i32 s32, 0x2400
; WAVE64-OPT-NEXT: s_mov_b32 s13, s9
; WAVE64-OPT-NEXT: s_mov_b32 s12, s8
; WAVE64-OPT-NEXT: s_mov_b64 s[8:9], s[4:5]
; WAVE64-OPT-NEXT: s_mov_b32 s4, s32
; WAVE64-OPT-NEXT: v_mov_b32_e32 v3, 42
; WAVE64-OPT-NEXT: v_mov_b32_e32 v4, 17
; WAVE64-OPT-NEXT: v_or3_b32 v31, v0, v1, v2
; WAVE64-OPT-NEXT: s_mov_b32 s14, s10
; WAVE64-OPT-NEXT: s_mov_b32 s17, stack_passed_argument@abs32@hi
; WAVE64-OPT-NEXT: s_mov_b32 s16, stack_passed_argument@abs32@lo
; WAVE64-OPT-NEXT: s_waitcnt lgkmcnt(0)
; WAVE64-OPT-NEXT: s_add_u32 s20, s20, s11
; WAVE64-OPT-NEXT: s_addc_u32 s21, s21, 0
; WAVE64-OPT-NEXT: s_mov_b64 s[10:11], s[6:7]
; WAVE64-OPT-NEXT: s_lshr_b32 s15, s4, 6
; WAVE64-OPT-NEXT: s_mov_b64 s[4:5], s[0:1]
; WAVE64-OPT-NEXT: s_mov_b64 s[6:7], s[2:3]
; WAVE64-OPT-NEXT: s_mov_b64 s[0:1], s[20:21]
; WAVE64-OPT-NEXT: s_mov_b64 s[2:3], s[22:23]
; WAVE64-OPT-NEXT: buffer_store_dword v3, off, s[20:23], 0
; WAVE64-OPT-NEXT: s_waitcnt_vscnt null, 0x0
; WAVE64-OPT-NEXT: buffer_store_dword v4, off, s[20:23], s32 offset:4
; WAVE64-OPT-NEXT: s_swappc_b64 s[30:31], s[16:17]
; WAVE64-OPT-NEXT: ;;#ASMSTART
; WAVE64-OPT-NEXT: ; use s15
; WAVE64-OPT-NEXT: ;;#ASMEND
; WAVE64-OPT-NEXT: s_endpgm
;
; WAVE32-O0-LABEL: kernel_stacksave_stackrestore_call_with_stack_objects:
; WAVE32-O0: ; %bb.0:
; WAVE32-O0-NEXT: s_mov_b32 s32, 0x1200
; WAVE32-O0-NEXT: s_getpc_b64 s[20:21]
; WAVE32-O0-NEXT: s_mov_b32 s20, s0
; WAVE32-O0-NEXT: s_load_dwordx4 s[20:23], s[20:21], 0x0
; WAVE32-O0-NEXT: s_waitcnt lgkmcnt(0)
; WAVE32-O0-NEXT: s_bitset0_b32 s23, 21
; WAVE32-O0-NEXT: s_add_u32 s20, s20, s11
; WAVE32-O0-NEXT: s_addc_u32 s21, s21, 0
; WAVE32-O0-NEXT: s_mov_b32 s14, s10
; WAVE32-O0-NEXT: s_mov_b32 s13, s9
; WAVE32-O0-NEXT: s_mov_b32 s12, s8
; WAVE32-O0-NEXT: s_mov_b64 s[10:11], s[6:7]
; WAVE32-O0-NEXT: s_mov_b64 s[8:9], s[4:5]
; WAVE32-O0-NEXT: s_mov_b64 s[6:7], s[2:3]
; WAVE32-O0-NEXT: s_mov_b64 s[4:5], s[0:1]
; WAVE32-O0-NEXT: s_mov_b32 s0, s32
; WAVE32-O0-NEXT: ; implicit-def: $vgpr32 : SGPR spill to VGPR lane
; WAVE32-O0-NEXT: v_writelane_b32 v32, s0, 0
; WAVE32-O0-NEXT: s_lshr_b32 s0, s0, 5
; WAVE32-O0-NEXT: v_writelane_b32 v32, s0, 1
; WAVE32-O0-NEXT: v_mov_b32_e32 v3, 42
; WAVE32-O0-NEXT: buffer_store_dword v3, off, s[20:23], 0
; WAVE32-O0-NEXT: s_waitcnt_vscnt null, 0x0
; WAVE32-O0-NEXT: s_mov_b64 s[0:1], s[20:21]
; WAVE32-O0-NEXT: s_mov_b64 s[2:3], s[22:23]
; WAVE32-O0-NEXT: s_mov_b32 s15, s32
; WAVE32-O0-NEXT: v_mov_b32_e32 v3, 17
; WAVE32-O0-NEXT: buffer_store_dword v3, off, s[20:23], s15 offset:4
; WAVE32-O0-NEXT: s_mov_b32 s15, stack_passed_argument@abs32@hi
; WAVE32-O0-NEXT: s_mov_b32 s16, stack_passed_argument@abs32@lo
; WAVE32-O0-NEXT: ; kill: def $sgpr16 killed $sgpr16 def $sgpr16_sgpr17
; WAVE32-O0-NEXT: s_mov_b32 s17, s15
; WAVE32-O0-NEXT: s_mov_b32 s15, 20
; WAVE32-O0-NEXT: v_lshlrev_b32_e64 v2, s15, v2
; WAVE32-O0-NEXT: s_mov_b32 s15, 10
; WAVE32-O0-NEXT: v_lshlrev_b32_e64 v1, s15, v1
; WAVE32-O0-NEXT: v_or3_b32 v31, v0, v1, v2
; WAVE32-O0-NEXT: ; implicit-def: $sgpr15
; WAVE32-O0-NEXT: ; implicit-def: $sgpr18
; WAVE32-O0-NEXT: v_mov_b32_e32 v0, s18
; WAVE32-O0-NEXT: ; implicit-def: $sgpr18
; WAVE32-O0-NEXT: v_mov_b32_e32 v1, s18
; WAVE32-O0-NEXT: ; implicit-def: $sgpr18
; WAVE32-O0-NEXT: v_mov_b32_e32 v2, s18
; WAVE32-O0-NEXT: ; implicit-def: $sgpr18
; WAVE32-O0-NEXT: v_mov_b32_e32 v3, s18
; WAVE32-O0-NEXT: ; implicit-def: $sgpr18
; WAVE32-O0-NEXT: v_mov_b32_e32 v4, s18
; WAVE32-O0-NEXT: ; implicit-def: $sgpr18
; WAVE32-O0-NEXT: v_mov_b32_e32 v5, s18
; WAVE32-O0-NEXT: ; implicit-def: $sgpr18
; WAVE32-O0-NEXT: v_mov_b32_e32 v6, s18
; WAVE32-O0-NEXT: ; implicit-def: $sgpr18
; WAVE32-O0-NEXT: v_mov_b32_e32 v7, s18
; WAVE32-O0-NEXT: ; implicit-def: $sgpr18
; WAVE32-O0-NEXT: v_mov_b32_e32 v8, s18
; WAVE32-O0-NEXT: ; implicit-def: $sgpr18
; WAVE32-O0-NEXT: v_mov_b32_e32 v9, s18
; WAVE32-O0-NEXT: ; implicit-def: $sgpr18
; WAVE32-O0-NEXT: v_mov_b32_e32 v10, s18
; WAVE32-O0-NEXT: ; implicit-def: $sgpr18
; WAVE32-O0-NEXT: v_mov_b32_e32 v11, s18
; WAVE32-O0-NEXT: ; implicit-def: $sgpr18
; WAVE32-O0-NEXT: v_mov_b32_e32 v12, s18
; WAVE32-O0-NEXT: ; implicit-def: $sgpr18
; WAVE32-O0-NEXT: v_mov_b32_e32 v13, s18
; WAVE32-O0-NEXT: ; implicit-def: $sgpr18
; WAVE32-O0-NEXT: v_mov_b32_e32 v14, s18
; WAVE32-O0-NEXT: ; implicit-def: $sgpr18
; WAVE32-O0-NEXT: v_mov_b32_e32 v15, s18
; WAVE32-O0-NEXT: ; implicit-def: $sgpr18
; WAVE32-O0-NEXT: v_mov_b32_e32 v16, s18
; WAVE32-O0-NEXT: ; implicit-def: $sgpr18
; WAVE32-O0-NEXT: v_mov_b32_e32 v17, s18
; WAVE32-O0-NEXT: ; implicit-def: $sgpr18
; WAVE32-O0-NEXT: v_mov_b32_e32 v18, s18
; WAVE32-O0-NEXT: ; implicit-def: $sgpr18
; WAVE32-O0-NEXT: v_mov_b32_e32 v19, s18
; WAVE32-O0-NEXT: ; implicit-def: $sgpr18
; WAVE32-O0-NEXT: v_mov_b32_e32 v20, s18
; WAVE32-O0-NEXT: ; implicit-def: $sgpr18
; WAVE32-O0-NEXT: v_mov_b32_e32 v21, s18
; WAVE32-O0-NEXT: ; implicit-def: $sgpr18
; WAVE32-O0-NEXT: v_mov_b32_e32 v22, s18
; WAVE32-O0-NEXT: ; implicit-def: $sgpr18
; WAVE32-O0-NEXT: v_mov_b32_e32 v23, s18
; WAVE32-O0-NEXT: ; implicit-def: $sgpr18
; WAVE32-O0-NEXT: v_mov_b32_e32 v24, s18
; WAVE32-O0-NEXT: ; implicit-def: $sgpr18
; WAVE32-O0-NEXT: v_mov_b32_e32 v25, s18
; WAVE32-O0-NEXT: ; implicit-def: $sgpr18
; WAVE32-O0-NEXT: v_mov_b32_e32 v26, s18
; WAVE32-O0-NEXT: ; implicit-def: $sgpr18
; WAVE32-O0-NEXT: v_mov_b32_e32 v27, s18
; WAVE32-O0-NEXT: ; implicit-def: $sgpr18
; WAVE32-O0-NEXT: v_mov_b32_e32 v28, s18
; WAVE32-O0-NEXT: ; implicit-def: $sgpr18
; WAVE32-O0-NEXT: v_mov_b32_e32 v29, s18
; WAVE32-O0-NEXT: ; implicit-def: $sgpr18
; WAVE32-O0-NEXT: v_mov_b32_e32 v30, s18
; WAVE32-O0-NEXT: s_swappc_b64 s[30:31], s[16:17]
; WAVE32-O0-NEXT: v_readlane_b32 s1, v32, 1
; WAVE32-O0-NEXT: v_readlane_b32 s0, v32, 0
; WAVE32-O0-NEXT: ;;#ASMSTART
; WAVE32-O0-NEXT: ; use s1
; WAVE32-O0-NEXT: ;;#ASMEND
; WAVE32-O0-NEXT: s_mov_b32 s32, s0
; WAVE32-O0-NEXT: s_endpgm
;
; WAVE64-O0-LABEL: kernel_stacksave_stackrestore_call_with_stack_objects:
; WAVE64-O0: ; %bb.0:
; WAVE64-O0-NEXT: s_mov_b32 s32, 0x2400
; WAVE64-O0-NEXT: s_getpc_b64 s[24:25]
; WAVE64-O0-NEXT: s_mov_b32 s24, s0
; WAVE64-O0-NEXT: s_load_dwordx4 s[24:27], s[24:25], 0x0
; WAVE64-O0-NEXT: s_waitcnt lgkmcnt(0)
; WAVE64-O0-NEXT: s_add_u32 s24, s24, s11
; WAVE64-O0-NEXT: s_addc_u32 s25, s25, 0
; WAVE64-O0-NEXT: s_mov_b32 s14, s10
; WAVE64-O0-NEXT: s_mov_b32 s13, s9
; WAVE64-O0-NEXT: s_mov_b32 s12, s8
; WAVE64-O0-NEXT: s_mov_b64 s[10:11], s[6:7]
; WAVE64-O0-NEXT: s_mov_b64 s[8:9], s[4:5]
; WAVE64-O0-NEXT: s_mov_b64 s[6:7], s[2:3]
; WAVE64-O0-NEXT: s_mov_b64 s[4:5], s[0:1]
; WAVE64-O0-NEXT: s_mov_b32 s0, s32
; WAVE64-O0-NEXT: ; implicit-def: $vgpr32 : SGPR spill to VGPR lane
; WAVE64-O0-NEXT: v_writelane_b32 v32, s0, 0
; WAVE64-O0-NEXT: s_lshr_b32 s0, s0, 6
; WAVE64-O0-NEXT: v_writelane_b32 v32, s0, 1
; WAVE64-O0-NEXT: v_mov_b32_e32 v3, 42
; WAVE64-O0-NEXT: buffer_store_dword v3, off, s[24:27], 0
; WAVE64-O0-NEXT: s_waitcnt_vscnt null, 0x0
; WAVE64-O0-NEXT: s_mov_b64 s[0:1], s[24:25]
; WAVE64-O0-NEXT: s_mov_b64 s[2:3], s[26:27]
; WAVE64-O0-NEXT: s_mov_b32 s15, s32
; WAVE64-O0-NEXT: v_mov_b32_e32 v3, 17
; WAVE64-O0-NEXT: buffer_store_dword v3, off, s[24:27], s15 offset:4
; WAVE64-O0-NEXT: s_mov_b32 s15, stack_passed_argument@abs32@hi
; WAVE64-O0-NEXT: s_mov_b32 s16, stack_passed_argument@abs32@lo
; WAVE64-O0-NEXT: ; kill: def $sgpr16 killed $sgpr16 def $sgpr16_sgpr17
; WAVE64-O0-NEXT: s_mov_b32 s17, s15
; WAVE64-O0-NEXT: s_mov_b32 s15, 20
; WAVE64-O0-NEXT: v_lshlrev_b32_e64 v2, s15, v2
; WAVE64-O0-NEXT: s_mov_b32 s15, 10
; WAVE64-O0-NEXT: v_lshlrev_b32_e64 v1, s15, v1
; WAVE64-O0-NEXT: v_or3_b32 v31, v0, v1, v2
; WAVE64-O0-NEXT: ; implicit-def: $sgpr15
; WAVE64-O0-NEXT: ; implicit-def: $sgpr18
; WAVE64-O0-NEXT: v_mov_b32_e32 v0, s18
; WAVE64-O0-NEXT: ; implicit-def: $sgpr18
; WAVE64-O0-NEXT: v_mov_b32_e32 v1, s18
; WAVE64-O0-NEXT: ; implicit-def: $sgpr18
; WAVE64-O0-NEXT: v_mov_b32_e32 v2, s18
; WAVE64-O0-NEXT: ; implicit-def: $sgpr18
; WAVE64-O0-NEXT: v_mov_b32_e32 v3, s18
; WAVE64-O0-NEXT: ; implicit-def: $sgpr18
; WAVE64-O0-NEXT: v_mov_b32_e32 v4, s18
; WAVE64-O0-NEXT: ; implicit-def: $sgpr18
; WAVE64-O0-NEXT: v_mov_b32_e32 v5, s18
; WAVE64-O0-NEXT: ; implicit-def: $sgpr18
; WAVE64-O0-NEXT: v_mov_b32_e32 v6, s18
; WAVE64-O0-NEXT: ; implicit-def: $sgpr18
; WAVE64-O0-NEXT: v_mov_b32_e32 v7, s18
; WAVE64-O0-NEXT: ; implicit-def: $sgpr18
; WAVE64-O0-NEXT: v_mov_b32_e32 v8, s18
; WAVE64-O0-NEXT: ; implicit-def: $sgpr18
; WAVE64-O0-NEXT: v_mov_b32_e32 v9, s18
; WAVE64-O0-NEXT: ; implicit-def: $sgpr18
; WAVE64-O0-NEXT: v_mov_b32_e32 v10, s18
; WAVE64-O0-NEXT: ; implicit-def: $sgpr18
; WAVE64-O0-NEXT: v_mov_b32_e32 v11, s18
; WAVE64-O0-NEXT: ; implicit-def: $sgpr18
; WAVE64-O0-NEXT: v_mov_b32_e32 v12, s18
; WAVE64-O0-NEXT: ; implicit-def: $sgpr18
; WAVE64-O0-NEXT: v_mov_b32_e32 v13, s18
; WAVE64-O0-NEXT: ; implicit-def: $sgpr18
; WAVE64-O0-NEXT: v_mov_b32_e32 v14, s18
; WAVE64-O0-NEXT: ; implicit-def: $sgpr18
; WAVE64-O0-NEXT: v_mov_b32_e32 v15, s18
; WAVE64-O0-NEXT: ; implicit-def: $sgpr18
; WAVE64-O0-NEXT: v_mov_b32_e32 v16, s18
; WAVE64-O0-NEXT: ; implicit-def: $sgpr18
; WAVE64-O0-NEXT: v_mov_b32_e32 v17, s18
; WAVE64-O0-NEXT: ; implicit-def: $sgpr18
; WAVE64-O0-NEXT: v_mov_b32_e32 v18, s18
; WAVE64-O0-NEXT: ; implicit-def: $sgpr18
; WAVE64-O0-NEXT: v_mov_b32_e32 v19, s18
; WAVE64-O0-NEXT: ; implicit-def: $sgpr18
; WAVE64-O0-NEXT: v_mov_b32_e32 v20, s18
; WAVE64-O0-NEXT: ; implicit-def: $sgpr18
; WAVE64-O0-NEXT: v_mov_b32_e32 v21, s18
; WAVE64-O0-NEXT: ; implicit-def: $sgpr18
; WAVE64-O0-NEXT: v_mov_b32_e32 v22, s18
; WAVE64-O0-NEXT: ; implicit-def: $sgpr18
; WAVE64-O0-NEXT: v_mov_b32_e32 v23, s18
; WAVE64-O0-NEXT: ; implicit-def: $sgpr18
; WAVE64-O0-NEXT: v_mov_b32_e32 v24, s18
; WAVE64-O0-NEXT: ; implicit-def: $sgpr18
; WAVE64-O0-NEXT: v_mov_b32_e32 v25, s18
; WAVE64-O0-NEXT: ; implicit-def: $sgpr18
; WAVE64-O0-NEXT: v_mov_b32_e32 v26, s18
; WAVE64-O0-NEXT: ; implicit-def: $sgpr18
; WAVE64-O0-NEXT: v_mov_b32_e32 v27, s18
; WAVE64-O0-NEXT: ; implicit-def: $sgpr18
; WAVE64-O0-NEXT: v_mov_b32_e32 v28, s18
; WAVE64-O0-NEXT: ; implicit-def: $sgpr18
; WAVE64-O0-NEXT: v_mov_b32_e32 v29, s18
; WAVE64-O0-NEXT: ; implicit-def: $sgpr18
; WAVE64-O0-NEXT: v_mov_b32_e32 v30, s18
; WAVE64-O0-NEXT: s_swappc_b64 s[30:31], s[16:17]
; WAVE64-O0-NEXT: v_readlane_b32 s1, v32, 1
; WAVE64-O0-NEXT: v_readlane_b32 s0, v32, 0
; WAVE64-O0-NEXT: ;;#ASMSTART
; WAVE64-O0-NEXT: ; use s1
; WAVE64-O0-NEXT: ;;#ASMEND
; WAVE64-O0-NEXT: s_mov_b32 s32, s0
; WAVE64-O0-NEXT: s_endpgm
;
; WAVE32-WWM-PREALLOC-LABEL: kernel_stacksave_stackrestore_call_with_stack_objects:
; WAVE32-WWM-PREALLOC: ; %bb.0:
; WAVE32-WWM-PREALLOC-NEXT: s_mov_b32 s32, 0x1200
; WAVE32-WWM-PREALLOC-NEXT: s_getpc_b64 s[20:21]
; WAVE32-WWM-PREALLOC-NEXT: s_mov_b32 s20, s0
; WAVE32-WWM-PREALLOC-NEXT: s_load_dwordx4 s[20:23], s[20:21], 0x0
; WAVE32-WWM-PREALLOC-NEXT: s_waitcnt lgkmcnt(0)
; WAVE32-WWM-PREALLOC-NEXT: s_bitset0_b32 s23, 21
; WAVE32-WWM-PREALLOC-NEXT: s_add_u32 s20, s20, s11
; WAVE32-WWM-PREALLOC-NEXT: s_addc_u32 s21, s21, 0
; WAVE32-WWM-PREALLOC-NEXT: s_mov_b32 s14, s10
; WAVE32-WWM-PREALLOC-NEXT: s_mov_b32 s13, s9
; WAVE32-WWM-PREALLOC-NEXT: s_mov_b32 s12, s8
; WAVE32-WWM-PREALLOC-NEXT: s_mov_b64 s[10:11], s[6:7]
; WAVE32-WWM-PREALLOC-NEXT: s_mov_b64 s[8:9], s[4:5]
; WAVE32-WWM-PREALLOC-NEXT: s_mov_b64 s[6:7], s[2:3]
; WAVE32-WWM-PREALLOC-NEXT: s_mov_b64 s[4:5], s[0:1]
; WAVE32-WWM-PREALLOC-NEXT: s_mov_b32 s0, s32
; WAVE32-WWM-PREALLOC-NEXT: ; implicit-def: $vgpr32 : SGPR spill to VGPR lane
; WAVE32-WWM-PREALLOC-NEXT: v_writelane_b32 v32, s0, 0
; WAVE32-WWM-PREALLOC-NEXT: s_lshr_b32 s0, s0, 5
; WAVE32-WWM-PREALLOC-NEXT: v_writelane_b32 v32, s0, 1
; WAVE32-WWM-PREALLOC-NEXT: v_mov_b32_e32 v3, 42
; WAVE32-WWM-PREALLOC-NEXT: buffer_store_dword v3, off, s[20:23], 0
; WAVE32-WWM-PREALLOC-NEXT: s_waitcnt_vscnt null, 0x0
; WAVE32-WWM-PREALLOC-NEXT: s_mov_b64 s[0:1], s[20:21]
; WAVE32-WWM-PREALLOC-NEXT: s_mov_b64 s[2:3], s[22:23]
; WAVE32-WWM-PREALLOC-NEXT: s_mov_b32 s15, s32
; WAVE32-WWM-PREALLOC-NEXT: v_mov_b32_e32 v3, 17
; WAVE32-WWM-PREALLOC-NEXT: buffer_store_dword v3, off, s[20:23], s15 offset:4
; WAVE32-WWM-PREALLOC-NEXT: s_mov_b32 s15, stack_passed_argument@abs32@hi
; WAVE32-WWM-PREALLOC-NEXT: s_mov_b32 s16, stack_passed_argument@abs32@lo
; WAVE32-WWM-PREALLOC-NEXT: ; kill: def $sgpr16 killed $sgpr16 def $sgpr16_sgpr17
; WAVE32-WWM-PREALLOC-NEXT: s_mov_b32 s17, s15
; WAVE32-WWM-PREALLOC-NEXT: s_mov_b32 s15, 20
; WAVE32-WWM-PREALLOC-NEXT: v_lshlrev_b32_e64 v2, s15, v2
; WAVE32-WWM-PREALLOC-NEXT: s_mov_b32 s15, 10
; WAVE32-WWM-PREALLOC-NEXT: v_lshlrev_b32_e64 v1, s15, v1
; WAVE32-WWM-PREALLOC-NEXT: v_or3_b32 v31, v0, v1, v2
; WAVE32-WWM-PREALLOC-NEXT: ; implicit-def: $sgpr15
; WAVE32-WWM-PREALLOC-NEXT: ; implicit-def: $sgpr18
; WAVE32-WWM-PREALLOC-NEXT: v_mov_b32_e32 v0, s18
; WAVE32-WWM-PREALLOC-NEXT: ; implicit-def: $sgpr18
; WAVE32-WWM-PREALLOC-NEXT: v_mov_b32_e32 v1, s18
; WAVE32-WWM-PREALLOC-NEXT: ; implicit-def: $sgpr18
; WAVE32-WWM-PREALLOC-NEXT: v_mov_b32_e32 v2, s18
; WAVE32-WWM-PREALLOC-NEXT: ; implicit-def: $sgpr18
; WAVE32-WWM-PREALLOC-NEXT: v_mov_b32_e32 v3, s18
; WAVE32-WWM-PREALLOC-NEXT: ; implicit-def: $sgpr18
; WAVE32-WWM-PREALLOC-NEXT: v_mov_b32_e32 v4, s18
; WAVE32-WWM-PREALLOC-NEXT: ; implicit-def: $sgpr18
; WAVE32-WWM-PREALLOC-NEXT: v_mov_b32_e32 v5, s18
; WAVE32-WWM-PREALLOC-NEXT: ; implicit-def: $sgpr18
; WAVE32-WWM-PREALLOC-NEXT: v_mov_b32_e32 v6, s18
; WAVE32-WWM-PREALLOC-NEXT: ; implicit-def: $sgpr18
; WAVE32-WWM-PREALLOC-NEXT: v_mov_b32_e32 v7, s18
; WAVE32-WWM-PREALLOC-NEXT: ; implicit-def: $sgpr18
; WAVE32-WWM-PREALLOC-NEXT: v_mov_b32_e32 v8, s18
; WAVE32-WWM-PREALLOC-NEXT: ; implicit-def: $sgpr18
; WAVE32-WWM-PREALLOC-NEXT: v_mov_b32_e32 v9, s18
; WAVE32-WWM-PREALLOC-NEXT: ; implicit-def: $sgpr18
; WAVE32-WWM-PREALLOC-NEXT: v_mov_b32_e32 v10, s18
; WAVE32-WWM-PREALLOC-NEXT: ; implicit-def: $sgpr18
; WAVE32-WWM-PREALLOC-NEXT: v_mov_b32_e32 v11, s18
; WAVE32-WWM-PREALLOC-NEXT: ; implicit-def: $sgpr18
; WAVE32-WWM-PREALLOC-NEXT: v_mov_b32_e32 v12, s18
; WAVE32-WWM-PREALLOC-NEXT: ; implicit-def: $sgpr18
; WAVE32-WWM-PREALLOC-NEXT: v_mov_b32_e32 v13, s18
; WAVE32-WWM-PREALLOC-NEXT: ; implicit-def: $sgpr18
; WAVE32-WWM-PREALLOC-NEXT: v_mov_b32_e32 v14, s18
; WAVE32-WWM-PREALLOC-NEXT: ; implicit-def: $sgpr18
; WAVE32-WWM-PREALLOC-NEXT: v_mov_b32_e32 v15, s18
; WAVE32-WWM-PREALLOC-NEXT: ; implicit-def: $sgpr18
; WAVE32-WWM-PREALLOC-NEXT: v_mov_b32_e32 v16, s18
; WAVE32-WWM-PREALLOC-NEXT: ; implicit-def: $sgpr18
; WAVE32-WWM-PREALLOC-NEXT: v_mov_b32_e32 v17, s18
; WAVE32-WWM-PREALLOC-NEXT: ; implicit-def: $sgpr18
; WAVE32-WWM-PREALLOC-NEXT: v_mov_b32_e32 v18, s18
; WAVE32-WWM-PREALLOC-NEXT: ; implicit-def: $sgpr18
; WAVE32-WWM-PREALLOC-NEXT: v_mov_b32_e32 v19, s18
; WAVE32-WWM-PREALLOC-NEXT: ; implicit-def: $sgpr18
; WAVE32-WWM-PREALLOC-NEXT: v_mov_b32_e32 v20, s18
; WAVE32-WWM-PREALLOC-NEXT: ; implicit-def: $sgpr18
; WAVE32-WWM-PREALLOC-NEXT: v_mov_b32_e32 v21, s18
; WAVE32-WWM-PREALLOC-NEXT: ; implicit-def: $sgpr18
; WAVE32-WWM-PREALLOC-NEXT: v_mov_b32_e32 v22, s18
; WAVE32-WWM-PREALLOC-NEXT: ; implicit-def: $sgpr18
; WAVE32-WWM-PREALLOC-NEXT: v_mov_b32_e32 v23, s18
; WAVE32-WWM-PREALLOC-NEXT: ; implicit-def: $sgpr18
; WAVE32-WWM-PREALLOC-NEXT: v_mov_b32_e32 v24, s18
; WAVE32-WWM-PREALLOC-NEXT: ; implicit-def: $sgpr18
; WAVE32-WWM-PREALLOC-NEXT: v_mov_b32_e32 v25, s18
; WAVE32-WWM-PREALLOC-NEXT: ; implicit-def: $sgpr18
; WAVE32-WWM-PREALLOC-NEXT: v_mov_b32_e32 v26, s18
; WAVE32-WWM-PREALLOC-NEXT: ; implicit-def: $sgpr18
; WAVE32-WWM-PREALLOC-NEXT: v_mov_b32_e32 v27, s18
; WAVE32-WWM-PREALLOC-NEXT: ; implicit-def: $sgpr18
; WAVE32-WWM-PREALLOC-NEXT: v_mov_b32_e32 v28, s18
; WAVE32-WWM-PREALLOC-NEXT: ; implicit-def: $sgpr18
; WAVE32-WWM-PREALLOC-NEXT: v_mov_b32_e32 v29, s18
; WAVE32-WWM-PREALLOC-NEXT: ; implicit-def: $sgpr18
; WAVE32-WWM-PREALLOC-NEXT: v_mov_b32_e32 v30, s18
; WAVE32-WWM-PREALLOC-NEXT: s_swappc_b64 s[30:31], s[16:17]
; WAVE32-WWM-PREALLOC-NEXT: v_readlane_b32 s1, v32, 1
; WAVE32-WWM-PREALLOC-NEXT: v_readlane_b32 s0, v32, 0
; WAVE32-WWM-PREALLOC-NEXT: ;;#ASMSTART
; WAVE32-WWM-PREALLOC-NEXT: ; use s1
; WAVE32-WWM-PREALLOC-NEXT: ;;#ASMEND
; WAVE32-WWM-PREALLOC-NEXT: s_mov_b32 s32, s0
; WAVE32-WWM-PREALLOC-NEXT: s_endpgm
%alloca = alloca [32 x i32], addrspace(5)
%stacksave = call ptr addrspace(5) @llvm.stacksave.p5()
store volatile i32 42, ptr addrspace(5) %alloca
call void @stack_passed_argument([32 x i32] poison, i32 17)
call void asm sideeffect "; use $0", "s"(ptr addrspace(5) %stacksave)
call void @llvm.stackrestore.p5(ptr addrspace(5) %stacksave)
ret void
}
define void @func_stacksave_stackrestore_call_with_stack_objects() {
; WAVE32-OPT-LABEL: func_stacksave_stackrestore_call_with_stack_objects:
; WAVE32-OPT: ; %bb.0:
; WAVE32-OPT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; WAVE32-OPT-NEXT: s_mov_b32 s20, s33
; WAVE32-OPT-NEXT: s_mov_b32 s33, s32
; WAVE32-OPT-NEXT: s_xor_saveexec_b32 s16, -1
; WAVE32-OPT-NEXT: buffer_store_dword v32, off, s[0:3], s33 offset:128 ; 4-byte Folded Spill
; WAVE32-OPT-NEXT: s_mov_b32 exec_lo, s16
; WAVE32-OPT-NEXT: v_writelane_b32 v32, s30, 0
; WAVE32-OPT-NEXT: v_mov_b32_e32 v0, 42
; WAVE32-OPT-NEXT: v_mov_b32_e32 v1, 17
; WAVE32-OPT-NEXT: s_addk_i32 s32, 0x1200
; WAVE32-OPT-NEXT: s_mov_b32 s17, stack_passed_argument@abs32@hi
; WAVE32-OPT-NEXT: s_mov_b32 s18, s32
; WAVE32-OPT-NEXT: s_mov_b32 s16, stack_passed_argument@abs32@lo
; WAVE32-OPT-NEXT: v_writelane_b32 v32, s31, 1
; WAVE32-OPT-NEXT: s_lshr_b32 s19, s18, 5
; WAVE32-OPT-NEXT: buffer_store_dword v0, off, s[0:3], s33
; WAVE32-OPT-NEXT: s_waitcnt_vscnt null, 0x0
; WAVE32-OPT-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4
; WAVE32-OPT-NEXT: s_swappc_b64 s[30:31], s[16:17]
; WAVE32-OPT-NEXT: s_mov_b32 s32, s18
; WAVE32-OPT-NEXT: ;;#ASMSTART
; WAVE32-OPT-NEXT: ; use s19
; WAVE32-OPT-NEXT: ;;#ASMEND
; WAVE32-OPT-NEXT: v_readlane_b32 s31, v32, 1
; WAVE32-OPT-NEXT: v_readlane_b32 s30, v32, 0
; WAVE32-OPT-NEXT: s_mov_b32 s32, s33
; WAVE32-OPT-NEXT: s_xor_saveexec_b32 s4, -1
; WAVE32-OPT-NEXT: buffer_load_dword v32, off, s[0:3], s33 offset:128 ; 4-byte Folded Reload
; WAVE32-OPT-NEXT: s_mov_b32 exec_lo, s4
; WAVE32-OPT-NEXT: s_mov_b32 s33, s20
; WAVE32-OPT-NEXT: s_waitcnt vmcnt(0)
; WAVE32-OPT-NEXT: s_setpc_b64 s[30:31]
;
; WAVE64-OPT-LABEL: func_stacksave_stackrestore_call_with_stack_objects:
; WAVE64-OPT: ; %bb.0:
; WAVE64-OPT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; WAVE64-OPT-NEXT: s_mov_b32 s20, s33
; WAVE64-OPT-NEXT: s_mov_b32 s33, s32
; WAVE64-OPT-NEXT: s_xor_saveexec_b64 s[16:17], -1
; WAVE64-OPT-NEXT: buffer_store_dword v32, off, s[0:3], s33 offset:128 ; 4-byte Folded Spill
; WAVE64-OPT-NEXT: s_mov_b64 exec, s[16:17]
; WAVE64-OPT-NEXT: v_writelane_b32 v32, s30, 0
; WAVE64-OPT-NEXT: v_mov_b32_e32 v0, 42
; WAVE64-OPT-NEXT: v_mov_b32_e32 v1, 17
; WAVE64-OPT-NEXT: s_addk_i32 s32, 0x2400
; WAVE64-OPT-NEXT: s_mov_b32 s17, stack_passed_argument@abs32@hi
; WAVE64-OPT-NEXT: s_mov_b32 s18, s32
; WAVE64-OPT-NEXT: s_mov_b32 s16, stack_passed_argument@abs32@lo
; WAVE64-OPT-NEXT: v_writelane_b32 v32, s31, 1
; WAVE64-OPT-NEXT: s_lshr_b32 s19, s18, 6
; WAVE64-OPT-NEXT: buffer_store_dword v0, off, s[0:3], s33
; WAVE64-OPT-NEXT: s_waitcnt_vscnt null, 0x0
; WAVE64-OPT-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4
; WAVE64-OPT-NEXT: s_swappc_b64 s[30:31], s[16:17]
; WAVE64-OPT-NEXT: s_mov_b32 s32, s18
; WAVE64-OPT-NEXT: ;;#ASMSTART
; WAVE64-OPT-NEXT: ; use s19
; WAVE64-OPT-NEXT: ;;#ASMEND
; WAVE64-OPT-NEXT: v_readlane_b32 s31, v32, 1
; WAVE64-OPT-NEXT: v_readlane_b32 s30, v32, 0
; WAVE64-OPT-NEXT: s_mov_b32 s32, s33
; WAVE64-OPT-NEXT: s_xor_saveexec_b64 s[4:5], -1
; WAVE64-OPT-NEXT: buffer_load_dword v32, off, s[0:3], s33 offset:128 ; 4-byte Folded Reload
; WAVE64-OPT-NEXT: s_mov_b64 exec, s[4:5]
; WAVE64-OPT-NEXT: s_mov_b32 s33, s20
; WAVE64-OPT-NEXT: s_waitcnt vmcnt(0)
; WAVE64-OPT-NEXT: s_setpc_b64 s[30:31]
;
; WAVE32-O0-LABEL: func_stacksave_stackrestore_call_with_stack_objects:
; WAVE32-O0: ; %bb.0:
; WAVE32-O0-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; WAVE32-O0-NEXT: s_mov_b32 s24, s33
; WAVE32-O0-NEXT: s_mov_b32 s33, s32
; WAVE32-O0-NEXT: s_xor_saveexec_b32 s16, -1
; WAVE32-O0-NEXT: buffer_store_dword v32, off, s[0:3], s33 offset:128 ; 4-byte Folded Spill
; WAVE32-O0-NEXT: buffer_store_dword v33, off, s[0:3], s33 offset:132 ; 4-byte Folded Spill
; WAVE32-O0-NEXT: s_mov_b32 exec_lo, s16
; WAVE32-O0-NEXT: s_add_i32 s32, s32, 0x1200
; WAVE32-O0-NEXT: v_writelane_b32 v32, s30, 0
; WAVE32-O0-NEXT: v_writelane_b32 v32, s31, 1
; WAVE32-O0-NEXT: s_mov_b32 s16, s32
; WAVE32-O0-NEXT: ; implicit-def: $vgpr33 : SGPR spill to VGPR lane
; WAVE32-O0-NEXT: v_writelane_b32 v33, s16, 0
; WAVE32-O0-NEXT: s_lshr_b32 s16, s16, 5
; WAVE32-O0-NEXT: v_writelane_b32 v33, s16, 1
; WAVE32-O0-NEXT: v_mov_b32_e32 v0, 42
; WAVE32-O0-NEXT: buffer_store_dword v0, off, s[0:3], s33
; WAVE32-O0-NEXT: s_waitcnt_vscnt null, 0x0
; WAVE32-O0-NEXT: s_mov_b64 s[22:23], s[2:3]
; WAVE32-O0-NEXT: s_mov_b64 s[20:21], s[0:1]
; WAVE32-O0-NEXT: s_mov_b32 s16, s32
; WAVE32-O0-NEXT: v_mov_b32_e32 v0, 17
; WAVE32-O0-NEXT: buffer_store_dword v0, off, s[0:3], s16 offset:4
; WAVE32-O0-NEXT: s_mov_b32 s18, stack_passed_argument@abs32@hi
; WAVE32-O0-NEXT: s_mov_b32 s16, stack_passed_argument@abs32@lo
; WAVE32-O0-NEXT: ; kill: def $sgpr16 killed $sgpr16 def $sgpr16_sgpr17
; WAVE32-O0-NEXT: s_mov_b32 s17, s18
; WAVE32-O0-NEXT: s_mov_b64 s[0:1], s[20:21]
; WAVE32-O0-NEXT: s_mov_b64 s[2:3], s[22:23]
; WAVE32-O0-NEXT: ; implicit-def: $sgpr18
; WAVE32-O0-NEXT: v_mov_b32_e32 v0, s18
; WAVE32-O0-NEXT: ; implicit-def: $sgpr18
; WAVE32-O0-NEXT: v_mov_b32_e32 v1, s18
; WAVE32-O0-NEXT: ; implicit-def: $sgpr18
; WAVE32-O0-NEXT: v_mov_b32_e32 v2, s18
; WAVE32-O0-NEXT: ; implicit-def: $sgpr18
; WAVE32-O0-NEXT: v_mov_b32_e32 v3, s18
; WAVE32-O0-NEXT: ; implicit-def: $sgpr18
; WAVE32-O0-NEXT: v_mov_b32_e32 v4, s18
; WAVE32-O0-NEXT: ; implicit-def: $sgpr18
; WAVE32-O0-NEXT: v_mov_b32_e32 v5, s18
; WAVE32-O0-NEXT: ; implicit-def: $sgpr18
; WAVE32-O0-NEXT: v_mov_b32_e32 v6, s18
; WAVE32-O0-NEXT: ; implicit-def: $sgpr18
; WAVE32-O0-NEXT: v_mov_b32_e32 v7, s18
; WAVE32-O0-NEXT: ; implicit-def: $sgpr18
; WAVE32-O0-NEXT: v_mov_b32_e32 v8, s18
; WAVE32-O0-NEXT: ; implicit-def: $sgpr18
; WAVE32-O0-NEXT: v_mov_b32_e32 v9, s18
; WAVE32-O0-NEXT: ; implicit-def: $sgpr18
; WAVE32-O0-NEXT: v_mov_b32_e32 v10, s18
; WAVE32-O0-NEXT: ; implicit-def: $sgpr18
; WAVE32-O0-NEXT: v_mov_b32_e32 v11, s18
; WAVE32-O0-NEXT: ; implicit-def: $sgpr18
; WAVE32-O0-NEXT: v_mov_b32_e32 v12, s18
; WAVE32-O0-NEXT: ; implicit-def: $sgpr18
; WAVE32-O0-NEXT: v_mov_b32_e32 v13, s18
; WAVE32-O0-NEXT: ; implicit-def: $sgpr18
; WAVE32-O0-NEXT: v_mov_b32_e32 v14, s18
; WAVE32-O0-NEXT: ; implicit-def: $sgpr18
; WAVE32-O0-NEXT: v_mov_b32_e32 v15, s18
; WAVE32-O0-NEXT: ; implicit-def: $sgpr18
; WAVE32-O0-NEXT: v_mov_b32_e32 v16, s18
; WAVE32-O0-NEXT: ; implicit-def: $sgpr18
; WAVE32-O0-NEXT: v_mov_b32_e32 v17, s18
; WAVE32-O0-NEXT: ; implicit-def: $sgpr18
; WAVE32-O0-NEXT: v_mov_b32_e32 v18, s18
; WAVE32-O0-NEXT: ; implicit-def: $sgpr18
; WAVE32-O0-NEXT: v_mov_b32_e32 v19, s18
; WAVE32-O0-NEXT: ; implicit-def: $sgpr18
; WAVE32-O0-NEXT: v_mov_b32_e32 v20, s18
; WAVE32-O0-NEXT: ; implicit-def: $sgpr18
; WAVE32-O0-NEXT: v_mov_b32_e32 v21, s18
; WAVE32-O0-NEXT: ; implicit-def: $sgpr18
; WAVE32-O0-NEXT: v_mov_b32_e32 v22, s18
; WAVE32-O0-NEXT: ; implicit-def: $sgpr18
; WAVE32-O0-NEXT: v_mov_b32_e32 v23, s18
; WAVE32-O0-NEXT: ; implicit-def: $sgpr18
; WAVE32-O0-NEXT: v_mov_b32_e32 v24, s18
; WAVE32-O0-NEXT: ; implicit-def: $sgpr18
; WAVE32-O0-NEXT: v_mov_b32_e32 v25, s18
; WAVE32-O0-NEXT: ; implicit-def: $sgpr18
; WAVE32-O0-NEXT: v_mov_b32_e32 v26, s18
; WAVE32-O0-NEXT: ; implicit-def: $sgpr18
; WAVE32-O0-NEXT: v_mov_b32_e32 v27, s18
; WAVE32-O0-NEXT: ; implicit-def: $sgpr18
; WAVE32-O0-NEXT: v_mov_b32_e32 v28, s18
; WAVE32-O0-NEXT: ; implicit-def: $sgpr18
; WAVE32-O0-NEXT: v_mov_b32_e32 v29, s18
; WAVE32-O0-NEXT: ; implicit-def: $sgpr18
; WAVE32-O0-NEXT: v_mov_b32_e32 v30, s18
; WAVE32-O0-NEXT: s_swappc_b64 s[30:31], s[16:17]
; WAVE32-O0-NEXT: v_readlane_b32 s5, v33, 1
; WAVE32-O0-NEXT: v_readlane_b32 s4, v33, 0
; WAVE32-O0-NEXT: ;;#ASMSTART
; WAVE32-O0-NEXT: ; use s5
; WAVE32-O0-NEXT: ;;#ASMEND
; WAVE32-O0-NEXT: s_mov_b32 s32, s4
; WAVE32-O0-NEXT: v_readlane_b32 s31, v32, 1
; WAVE32-O0-NEXT: v_readlane_b32 s30, v32, 0
; WAVE32-O0-NEXT: s_mov_b32 s32, s33
; WAVE32-O0-NEXT: s_xor_saveexec_b32 s4, -1
; WAVE32-O0-NEXT: buffer_load_dword v32, off, s[0:3], s33 offset:128 ; 4-byte Folded Reload
; WAVE32-O0-NEXT: buffer_load_dword v33, off, s[0:3], s33 offset:132 ; 4-byte Folded Reload
; WAVE32-O0-NEXT: s_mov_b32 exec_lo, s4
; WAVE32-O0-NEXT: s_mov_b32 s33, s24
; WAVE32-O0-NEXT: s_waitcnt vmcnt(0)
; WAVE32-O0-NEXT: s_setpc_b64 s[30:31]
;
; WAVE64-O0-LABEL: func_stacksave_stackrestore_call_with_stack_objects:
; WAVE64-O0: ; %bb.0:
; WAVE64-O0-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; WAVE64-O0-NEXT: s_mov_b32 s19, s33
; WAVE64-O0-NEXT: s_mov_b32 s33, s32
; WAVE64-O0-NEXT: s_xor_saveexec_b64 s[16:17], -1
; WAVE64-O0-NEXT: buffer_store_dword v32, off, s[0:3], s33 offset:128 ; 4-byte Folded Spill
; WAVE64-O0-NEXT: buffer_store_dword v33, off, s[0:3], s33 offset:132 ; 4-byte Folded Spill
; WAVE64-O0-NEXT: s_mov_b64 exec, s[16:17]
; WAVE64-O0-NEXT: s_add_i32 s32, s32, 0x2400
; WAVE64-O0-NEXT: v_writelane_b32 v32, s30, 0
; WAVE64-O0-NEXT: v_writelane_b32 v32, s31, 1
; WAVE64-O0-NEXT: s_mov_b32 s16, s32
; WAVE64-O0-NEXT: ; implicit-def: $vgpr33 : SGPR spill to VGPR lane
; WAVE64-O0-NEXT: v_writelane_b32 v33, s16, 0
; WAVE64-O0-NEXT: s_lshr_b32 s16, s16, 6
; WAVE64-O0-NEXT: v_writelane_b32 v33, s16, 1
; WAVE64-O0-NEXT: v_mov_b32_e32 v0, 42
; WAVE64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s33
; WAVE64-O0-NEXT: s_waitcnt_vscnt null, 0x0
; WAVE64-O0-NEXT: s_mov_b64 s[22:23], s[2:3]
; WAVE64-O0-NEXT: s_mov_b64 s[20:21], s[0:1]
; WAVE64-O0-NEXT: s_mov_b32 s16, s32
; WAVE64-O0-NEXT: v_mov_b32_e32 v0, 17
; WAVE64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s16 offset:4
; WAVE64-O0-NEXT: s_mov_b32 s18, stack_passed_argument@abs32@hi
; WAVE64-O0-NEXT: s_mov_b32 s16, stack_passed_argument@abs32@lo
; WAVE64-O0-NEXT: ; kill: def $sgpr16 killed $sgpr16 def $sgpr16_sgpr17
; WAVE64-O0-NEXT: s_mov_b32 s17, s18
; WAVE64-O0-NEXT: s_mov_b64 s[0:1], s[20:21]
; WAVE64-O0-NEXT: s_mov_b64 s[2:3], s[22:23]
; WAVE64-O0-NEXT: ; implicit-def: $sgpr18
; WAVE64-O0-NEXT: v_mov_b32_e32 v0, s18
; WAVE64-O0-NEXT: ; implicit-def: $sgpr18
; WAVE64-O0-NEXT: v_mov_b32_e32 v1, s18
; WAVE64-O0-NEXT: ; implicit-def: $sgpr18
; WAVE64-O0-NEXT: v_mov_b32_e32 v2, s18
; WAVE64-O0-NEXT: ; implicit-def: $sgpr18
; WAVE64-O0-NEXT: v_mov_b32_e32 v3, s18
; WAVE64-O0-NEXT: ; implicit-def: $sgpr18
; WAVE64-O0-NEXT: v_mov_b32_e32 v4, s18
; WAVE64-O0-NEXT: ; implicit-def: $sgpr18
; WAVE64-O0-NEXT: v_mov_b32_e32 v5, s18
; WAVE64-O0-NEXT: ; implicit-def: $sgpr18
; WAVE64-O0-NEXT: v_mov_b32_e32 v6, s18
; WAVE64-O0-NEXT: ; implicit-def: $sgpr18
; WAVE64-O0-NEXT: v_mov_b32_e32 v7, s18
; WAVE64-O0-NEXT: ; implicit-def: $sgpr18
; WAVE64-O0-NEXT: v_mov_b32_e32 v8, s18
; WAVE64-O0-NEXT: ; implicit-def: $sgpr18
; WAVE64-O0-NEXT: v_mov_b32_e32 v9, s18
; WAVE64-O0-NEXT: ; implicit-def: $sgpr18
; WAVE64-O0-NEXT: v_mov_b32_e32 v10, s18
; WAVE64-O0-NEXT: ; implicit-def: $sgpr18
; WAVE64-O0-NEXT: v_mov_b32_e32 v11, s18
; WAVE64-O0-NEXT: ; implicit-def: $sgpr18
; WAVE64-O0-NEXT: v_mov_b32_e32 v12, s18
; WAVE64-O0-NEXT: ; implicit-def: $sgpr18
; WAVE64-O0-NEXT: v_mov_b32_e32 v13, s18
; WAVE64-O0-NEXT: ; implicit-def: $sgpr18
; WAVE64-O0-NEXT: v_mov_b32_e32 v14, s18
; WAVE64-O0-NEXT: ; implicit-def: $sgpr18
; WAVE64-O0-NEXT: v_mov_b32_e32 v15, s18
; WAVE64-O0-NEXT: ; implicit-def: $sgpr18
; WAVE64-O0-NEXT: v_mov_b32_e32 v16, s18
; WAVE64-O0-NEXT: ; implicit-def: $sgpr18
; WAVE64-O0-NEXT: v_mov_b32_e32 v17, s18
; WAVE64-O0-NEXT: ; implicit-def: $sgpr18
; WAVE64-O0-NEXT: v_mov_b32_e32 v18, s18
; WAVE64-O0-NEXT: ; implicit-def: $sgpr18
; WAVE64-O0-NEXT: v_mov_b32_e32 v19, s18
; WAVE64-O0-NEXT: ; implicit-def: $sgpr18
; WAVE64-O0-NEXT: v_mov_b32_e32 v20, s18
; WAVE64-O0-NEXT: ; implicit-def: $sgpr18
; WAVE64-O0-NEXT: v_mov_b32_e32 v21, s18
; WAVE64-O0-NEXT: ; implicit-def: $sgpr18
; WAVE64-O0-NEXT: v_mov_b32_e32 v22, s18
; WAVE64-O0-NEXT: ; implicit-def: $sgpr18
; WAVE64-O0-NEXT: v_mov_b32_e32 v23, s18
; WAVE64-O0-NEXT: ; implicit-def: $sgpr18
; WAVE64-O0-NEXT: v_mov_b32_e32 v24, s18
; WAVE64-O0-NEXT: ; implicit-def: $sgpr18
; WAVE64-O0-NEXT: v_mov_b32_e32 v25, s18
; WAVE64-O0-NEXT: ; implicit-def: $sgpr18
; WAVE64-O0-NEXT: v_mov_b32_e32 v26, s18
; WAVE64-O0-NEXT: ; implicit-def: $sgpr18
; WAVE64-O0-NEXT: v_mov_b32_e32 v27, s18
; WAVE64-O0-NEXT: ; implicit-def: $sgpr18
; WAVE64-O0-NEXT: v_mov_b32_e32 v28, s18
; WAVE64-O0-NEXT: ; implicit-def: $sgpr18
; WAVE64-O0-NEXT: v_mov_b32_e32 v29, s18
; WAVE64-O0-NEXT: ; implicit-def: $sgpr18
; WAVE64-O0-NEXT: v_mov_b32_e32 v30, s18
; WAVE64-O0-NEXT: s_swappc_b64 s[30:31], s[16:17]
; WAVE64-O0-NEXT: v_readlane_b32 s5, v33, 1
; WAVE64-O0-NEXT: v_readlane_b32 s4, v33, 0
; WAVE64-O0-NEXT: ;;#ASMSTART
; WAVE64-O0-NEXT: ; use s5
; WAVE64-O0-NEXT: ;;#ASMEND
; WAVE64-O0-NEXT: s_mov_b32 s32, s4
; WAVE64-O0-NEXT: v_readlane_b32 s31, v32, 1
; WAVE64-O0-NEXT: v_readlane_b32 s30, v32, 0
; WAVE64-O0-NEXT: s_mov_b32 s32, s33
; WAVE64-O0-NEXT: s_xor_saveexec_b64 s[4:5], -1
; WAVE64-O0-NEXT: buffer_load_dword v32, off, s[0:3], s33 offset:128 ; 4-byte Folded Reload
; WAVE64-O0-NEXT: buffer_load_dword v33, off, s[0:3], s33 offset:132 ; 4-byte Folded Reload
; WAVE64-O0-NEXT: s_mov_b64 exec, s[4:5]
; WAVE64-O0-NEXT: s_mov_b32 s33, s19
; WAVE64-O0-NEXT: s_waitcnt vmcnt(0)
; WAVE64-O0-NEXT: s_setpc_b64 s[30:31]
;
; WAVE32-WWM-PREALLOC-LABEL: func_stacksave_stackrestore_call_with_stack_objects:
; WAVE32-WWM-PREALLOC: ; %bb.0:
; WAVE32-WWM-PREALLOC-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; WAVE32-WWM-PREALLOC-NEXT: s_mov_b32 s24, s33
; WAVE32-WWM-PREALLOC-NEXT: s_mov_b32 s33, s32
; WAVE32-WWM-PREALLOC-NEXT: s_xor_saveexec_b32 s16, -1
; WAVE32-WWM-PREALLOC-NEXT: buffer_store_dword v32, off, s[0:3], s33 offset:128 ; 4-byte Folded Spill
; WAVE32-WWM-PREALLOC-NEXT: buffer_store_dword v33, off, s[0:3], s33 offset:132 ; 4-byte Folded Spill
; WAVE32-WWM-PREALLOC-NEXT: s_mov_b32 exec_lo, s16
; WAVE32-WWM-PREALLOC-NEXT: s_add_i32 s32, s32, 0x1200
; WAVE32-WWM-PREALLOC-NEXT: v_writelane_b32 v33, s30, 0
; WAVE32-WWM-PREALLOC-NEXT: v_writelane_b32 v33, s31, 1
; WAVE32-WWM-PREALLOC-NEXT: s_mov_b32 s16, s32
; WAVE32-WWM-PREALLOC-NEXT: ; implicit-def: $vgpr32 : SGPR spill to VGPR lane
; WAVE32-WWM-PREALLOC-NEXT: v_writelane_b32 v32, s16, 0
; WAVE32-WWM-PREALLOC-NEXT: s_lshr_b32 s16, s16, 5
; WAVE32-WWM-PREALLOC-NEXT: v_writelane_b32 v32, s16, 1
; WAVE32-WWM-PREALLOC-NEXT: v_mov_b32_e32 v0, 42
; WAVE32-WWM-PREALLOC-NEXT: buffer_store_dword v0, off, s[0:3], s33
; WAVE32-WWM-PREALLOC-NEXT: s_waitcnt_vscnt null, 0x0
; WAVE32-WWM-PREALLOC-NEXT: s_mov_b64 s[22:23], s[2:3]
; WAVE32-WWM-PREALLOC-NEXT: s_mov_b64 s[20:21], s[0:1]
; WAVE32-WWM-PREALLOC-NEXT: s_mov_b32 s16, s32
; WAVE32-WWM-PREALLOC-NEXT: v_mov_b32_e32 v0, 17
; WAVE32-WWM-PREALLOC-NEXT: buffer_store_dword v0, off, s[0:3], s16 offset:4
; WAVE32-WWM-PREALLOC-NEXT: s_mov_b32 s18, stack_passed_argument@abs32@hi
; WAVE32-WWM-PREALLOC-NEXT: s_mov_b32 s16, stack_passed_argument@abs32@lo
; WAVE32-WWM-PREALLOC-NEXT: ; kill: def $sgpr16 killed $sgpr16 def $sgpr16_sgpr17
; WAVE32-WWM-PREALLOC-NEXT: s_mov_b32 s17, s18
; WAVE32-WWM-PREALLOC-NEXT: s_mov_b64 s[0:1], s[20:21]
; WAVE32-WWM-PREALLOC-NEXT: s_mov_b64 s[2:3], s[22:23]
; WAVE32-WWM-PREALLOC-NEXT: ; implicit-def: $sgpr18
; WAVE32-WWM-PREALLOC-NEXT: v_mov_b32_e32 v0, s18
; WAVE32-WWM-PREALLOC-NEXT: ; implicit-def: $sgpr18
; WAVE32-WWM-PREALLOC-NEXT: v_mov_b32_e32 v1, s18
; WAVE32-WWM-PREALLOC-NEXT: ; implicit-def: $sgpr18
; WAVE32-WWM-PREALLOC-NEXT: v_mov_b32_e32 v2, s18
; WAVE32-WWM-PREALLOC-NEXT: ; implicit-def: $sgpr18
; WAVE32-WWM-PREALLOC-NEXT: v_mov_b32_e32 v3, s18
; WAVE32-WWM-PREALLOC-NEXT: ; implicit-def: $sgpr18
; WAVE32-WWM-PREALLOC-NEXT: v_mov_b32_e32 v4, s18
; WAVE32-WWM-PREALLOC-NEXT: ; implicit-def: $sgpr18
; WAVE32-WWM-PREALLOC-NEXT: v_mov_b32_e32 v5, s18
; WAVE32-WWM-PREALLOC-NEXT: ; implicit-def: $sgpr18
; WAVE32-WWM-PREALLOC-NEXT: v_mov_b32_e32 v6, s18
; WAVE32-WWM-PREALLOC-NEXT: ; implicit-def: $sgpr18
; WAVE32-WWM-PREALLOC-NEXT: v_mov_b32_e32 v7, s18
; WAVE32-WWM-PREALLOC-NEXT: ; implicit-def: $sgpr18
; WAVE32-WWM-PREALLOC-NEXT: v_mov_b32_e32 v8, s18
; WAVE32-WWM-PREALLOC-NEXT: ; implicit-def: $sgpr18
; WAVE32-WWM-PREALLOC-NEXT: v_mov_b32_e32 v9, s18
; WAVE32-WWM-PREALLOC-NEXT: ; implicit-def: $sgpr18
; WAVE32-WWM-PREALLOC-NEXT: v_mov_b32_e32 v10, s18
; WAVE32-WWM-PREALLOC-NEXT: ; implicit-def: $sgpr18
; WAVE32-WWM-PREALLOC-NEXT: v_mov_b32_e32 v11, s18
; WAVE32-WWM-PREALLOC-NEXT: ; implicit-def: $sgpr18
; WAVE32-WWM-PREALLOC-NEXT: v_mov_b32_e32 v12, s18
; WAVE32-WWM-PREALLOC-NEXT: ; implicit-def: $sgpr18
; WAVE32-WWM-PREALLOC-NEXT: v_mov_b32_e32 v13, s18
; WAVE32-WWM-PREALLOC-NEXT: ; implicit-def: $sgpr18
; WAVE32-WWM-PREALLOC-NEXT: v_mov_b32_e32 v14, s18
; WAVE32-WWM-PREALLOC-NEXT: ; implicit-def: $sgpr18
; WAVE32-WWM-PREALLOC-NEXT: v_mov_b32_e32 v15, s18
; WAVE32-WWM-PREALLOC-NEXT: ; implicit-def: $sgpr18
; WAVE32-WWM-PREALLOC-NEXT: v_mov_b32_e32 v16, s18
; WAVE32-WWM-PREALLOC-NEXT: ; implicit-def: $sgpr18
; WAVE32-WWM-PREALLOC-NEXT: v_mov_b32_e32 v17, s18
; WAVE32-WWM-PREALLOC-NEXT: ; implicit-def: $sgpr18
; WAVE32-WWM-PREALLOC-NEXT: v_mov_b32_e32 v18, s18
; WAVE32-WWM-PREALLOC-NEXT: ; implicit-def: $sgpr18
; WAVE32-WWM-PREALLOC-NEXT: v_mov_b32_e32 v19, s18
; WAVE32-WWM-PREALLOC-NEXT: ; implicit-def: $sgpr18
; WAVE32-WWM-PREALLOC-NEXT: v_mov_b32_e32 v20, s18
; WAVE32-WWM-PREALLOC-NEXT: ; implicit-def: $sgpr18
; WAVE32-WWM-PREALLOC-NEXT: v_mov_b32_e32 v21, s18
; WAVE32-WWM-PREALLOC-NEXT: ; implicit-def: $sgpr18
; WAVE32-WWM-PREALLOC-NEXT: v_mov_b32_e32 v22, s18
; WAVE32-WWM-PREALLOC-NEXT: ; implicit-def: $sgpr18
; WAVE32-WWM-PREALLOC-NEXT: v_mov_b32_e32 v23, s18
; WAVE32-WWM-PREALLOC-NEXT: ; implicit-def: $sgpr18
; WAVE32-WWM-PREALLOC-NEXT: v_mov_b32_e32 v24, s18
; WAVE32-WWM-PREALLOC-NEXT: ; implicit-def: $sgpr18
; WAVE32-WWM-PREALLOC-NEXT: v_mov_b32_e32 v25, s18
; WAVE32-WWM-PREALLOC-NEXT: ; implicit-def: $sgpr18
; WAVE32-WWM-PREALLOC-NEXT: v_mov_b32_e32 v26, s18
; WAVE32-WWM-PREALLOC-NEXT: ; implicit-def: $sgpr18
; WAVE32-WWM-PREALLOC-NEXT: v_mov_b32_e32 v27, s18
; WAVE32-WWM-PREALLOC-NEXT: ; implicit-def: $sgpr18
; WAVE32-WWM-PREALLOC-NEXT: v_mov_b32_e32 v28, s18
; WAVE32-WWM-PREALLOC-NEXT: ; implicit-def: $sgpr18
; WAVE32-WWM-PREALLOC-NEXT: v_mov_b32_e32 v29, s18
; WAVE32-WWM-PREALLOC-NEXT: ; implicit-def: $sgpr18
; WAVE32-WWM-PREALLOC-NEXT: v_mov_b32_e32 v30, s18
; WAVE32-WWM-PREALLOC-NEXT: s_swappc_b64 s[30:31], s[16:17]
; WAVE32-WWM-PREALLOC-NEXT: v_readlane_b32 s5, v32, 1
; WAVE32-WWM-PREALLOC-NEXT: v_readlane_b32 s4, v32, 0
; WAVE32-WWM-PREALLOC-NEXT: ;;#ASMSTART
; WAVE32-WWM-PREALLOC-NEXT: ; use s5
; WAVE32-WWM-PREALLOC-NEXT: ;;#ASMEND
; WAVE32-WWM-PREALLOC-NEXT: s_mov_b32 s32, s4
; WAVE32-WWM-PREALLOC-NEXT: v_readlane_b32 s31, v33, 1
; WAVE32-WWM-PREALLOC-NEXT: v_readlane_b32 s30, v33, 0
; WAVE32-WWM-PREALLOC-NEXT: s_mov_b32 s32, s33
; WAVE32-WWM-PREALLOC-NEXT: s_xor_saveexec_b32 s4, -1
; WAVE32-WWM-PREALLOC-NEXT: buffer_load_dword v32, off, s[0:3], s33 offset:128 ; 4-byte Folded Reload
; WAVE32-WWM-PREALLOC-NEXT: buffer_load_dword v33, off, s[0:3], s33 offset:132 ; 4-byte Folded Reload
; WAVE32-WWM-PREALLOC-NEXT: s_mov_b32 exec_lo, s4
; WAVE32-WWM-PREALLOC-NEXT: s_mov_b32 s33, s24
; WAVE32-WWM-PREALLOC-NEXT: s_waitcnt vmcnt(0)
; WAVE32-WWM-PREALLOC-NEXT: s_setpc_b64 s[30:31]
%alloca = alloca [32 x i32], addrspace(5)
%stacksave = call ptr addrspace(5) @llvm.stacksave.p5()
store volatile i32 42, ptr addrspace(5) %alloca
call void @stack_passed_argument([32 x i32] poison, i32 17)
call void asm sideeffect "; use $0", "s"(ptr addrspace(5) %stacksave)
call void @llvm.stackrestore.p5(ptr addrspace(5) %stacksave)
ret void
}
!llvm.module.flags = !{!0}
!0 = !{i32 1, !"amdhsa_code_object_version", i32 500}
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; WAVE32: {{.*}}
; WAVE64: {{.*}}