Files
clang-p2996/llvm/test/CodeGen/AMDGPU/pei-amdgpu-cs-chain.mir
Diana eb3c02fdc2 [AMDGPU] Use immediates for stack accesses in chain funcs (#71913)
Switch to using immediate offsets instead of the SP register to access
objects on the current stack frame in chain functions. This means we no
longer need to reserve a SP register just for accesing stack objects and
it also allows us to set the SP (when one is actually needed) to the
stack size from the very beginning.

This only works if we use a FixedObject for the ScavengeFI, which is
what we do for entry functions anyway (and we generally want to keep
chain functions close to amdgpu_cs behaviour where we don't have a good
reason to diverge).
2023-11-14 13:17:46 +01:00

233 lines
12 KiB
YAML

# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 2
# RUN: llc -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs -run-pass=prologepilog -o - %s | FileCheck -check-prefix=GCN %s
# We're keeping the IR around for the callees and the CCs
--- |
declare amdgpu_cs_chain void @callee()
declare amdgpu_gfx void @gfx_callee()
define amdgpu_cs_chain void @preserve_inactive_wwm() {ret void}
define amdgpu_cs_chain void @preserve_inactive_detected_wwm() {ret void}
define amdgpu_cs_chain void @dont_preserve_wwm_if_no_chain_calls() {ret void}
define amdgpu_cs_chain void @dont_preserve_non_wwm() {ret void}
define amdgpu_cs_chain void @dont_preserve_v0_v7() {ret void}
define amdgpu_cs_chain void @dont_preserve_sgpr() {ret void}
...
---
# Check that we preserve the inactive lanes of registers v8+ received in the
# MachineFunctionInfo as wwmReservedRegs.
---
name: preserve_inactive_wwm
tracksRegLiveness: true
frameInfo:
hasTailCall: true
machineFunctionInfo:
stackPtrOffsetReg: '$sgpr32'
returnsVoid: true
wwmReservedRegs:
- '$vgpr8'
- '$vgpr9'
body: |
bb.0:
liveins: $sgpr0, $sgpr35, $vgpr8, $vgpr9
; GCN-LABEL: name: preserve_inactive_wwm
; GCN: liveins: $sgpr0, $sgpr35, $vgpr8, $vgpr9
; GCN-NEXT: {{ $}}
; GCN-NEXT: $sgpr1 = S_XOR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
; GCN-NEXT: SCRATCH_STORE_DWORD_ST $vgpr8, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5)
; GCN-NEXT: SCRATCH_STORE_DWORD_ST $vgpr9, 8, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.1, addrspace 5)
; GCN-NEXT: $exec_lo = S_MOV_B32 killed $sgpr1
; GCN-NEXT: renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @callee + 4, target-flags(amdgpu-gotprel32-hi) @callee + 12, implicit-def dead $scc
; GCN-NEXT: renamable $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (p0) from got, addrspace 4)
; GCN-NEXT: $sgpr1 = S_XOR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
; GCN-NEXT: $vgpr8 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr, implicit $vgpr8(tied-def 0) :: (load (s32) from %stack.0, addrspace 5)
; GCN-NEXT: $vgpr9 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.1, addrspace 5)
; GCN-NEXT: $exec_lo = S_MOV_B32 killed $sgpr1
; GCN-NEXT: SI_CS_CHAIN_TC_W32 killed renamable $sgpr4_sgpr5, @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $vgpr8
renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @callee + 4, target-flags(amdgpu-gotprel32-hi) @callee + 12, implicit-def dead $scc
renamable $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (p0) from got, addrspace 4)
SI_CS_CHAIN_TC_W32 killed renamable $sgpr4_sgpr5, @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $vgpr8
...
# Check that it also works for SGPR to VGPR spills.
---
name: preserve_inactive_detected_wwm
tracksRegLiveness: true
frameInfo:
hasTailCall: true
machineFunctionInfo:
stackPtrOffsetReg: '$sgpr32'
returnsVoid: true
body: |
bb.0:
liveins: $sgpr0, $sgpr35, $vgpr8, $vgpr9
; GCN-LABEL: name: preserve_inactive_detected_wwm
; GCN: liveins: $sgpr0, $sgpr35, $vgpr8, $vgpr9
; GCN-NEXT: {{ $}}
; GCN-NEXT: $sgpr1 = S_XOR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
; GCN-NEXT: SCRATCH_STORE_DWORD_ST $vgpr8, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5)
; GCN-NEXT: SCRATCH_STORE_DWORD_ST $vgpr9, 8, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.1, addrspace 5)
; GCN-NEXT: $exec_lo = S_MOV_B32 killed $sgpr1
; GCN-NEXT: renamable $vgpr8 = SI_SPILL_S32_TO_VGPR $sgpr35, 0, killed $vgpr8
; GCN-NEXT: $sgpr35 = S_MOV_B32 5
; GCN-NEXT: $sgpr35 = SI_RESTORE_S32_FROM_VGPR $vgpr8, 0
; GCN-NEXT: renamable $vgpr8 = V_MOV_B32_e32 10, implicit $exec
; GCN-NEXT: renamable $vgpr9 = SI_SPILL_S32_TO_VGPR $sgpr35, 0, killed $vgpr9
; GCN-NEXT: $sgpr35 = S_MOV_B32 5
; GCN-NEXT: $sgpr35 = SI_RESTORE_S32_FROM_VGPR $vgpr9, 0
; GCN-NEXT: renamable $vgpr9 = V_MOV_B32_e32 10, implicit $exec
; GCN-NEXT: renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @callee + 4, target-flags(amdgpu-gotprel32-hi) @callee + 12, implicit-def dead $scc
; GCN-NEXT: renamable $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (p0) from got, addrspace 4)
; GCN-NEXT: $sgpr1 = S_XOR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
; GCN-NEXT: $vgpr8 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr, implicit $vgpr8(tied-def 0) :: (load (s32) from %stack.0, addrspace 5)
; GCN-NEXT: $vgpr9 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.1, addrspace 5)
; GCN-NEXT: $exec_lo = S_MOV_B32 killed $sgpr1
; GCN-NEXT: SI_CS_CHAIN_TC_W32 killed renamable $sgpr4_sgpr5, @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $vgpr8
renamable $vgpr8 = SI_SPILL_S32_TO_VGPR $sgpr35, 0, killed $vgpr8
$sgpr35 = S_MOV_B32 5
$sgpr35 = SI_RESTORE_S32_FROM_VGPR $vgpr8, 0
renamable $vgpr8 = V_MOV_B32_e32 10, implicit $exec
renamable $vgpr9 = SI_SPILL_S32_TO_VGPR $sgpr35, 0, killed $vgpr9
$sgpr35 = S_MOV_B32 5
$sgpr35 = SI_RESTORE_S32_FROM_VGPR $vgpr9, 0
renamable $vgpr9 = V_MOV_B32_e32 10, implicit $exec
renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @callee + 4, target-flags(amdgpu-gotprel32-hi) @callee + 12, implicit-def dead $scc
renamable $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (p0) from got, addrspace 4)
SI_CS_CHAIN_TC_W32 killed renamable $sgpr4_sgpr5, @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $vgpr8
...
---
name: dont_preserve_wwm_if_no_chain_calls
tracksRegLiveness: true
frameInfo:
hasTailCall: false
machineFunctionInfo:
stackPtrOffsetReg: '$sgpr32'
returnsVoid: true
wwmReservedRegs:
- '$vgpr9'
body: |
bb.0:
liveins: $sgpr35, $vgpr8
; GCN-LABEL: name: dont_preserve_wwm_if_no_chain_calls
; GCN: liveins: $sgpr35, $vgpr8
; GCN-NEXT: {{ $}}
; GCN-NEXT: renamable $vgpr8 = SI_SPILL_S32_TO_VGPR $sgpr35, 0, killed $vgpr8
; GCN-NEXT: $sgpr35 = S_MOV_B32 5
; GCN-NEXT: $sgpr35 = SI_RESTORE_S32_FROM_VGPR $vgpr8, 0
; GCN-NEXT: renamable $vgpr8 = V_MOV_B32_e32 10, implicit $exec
; GCN-NEXT: S_ENDPGM 0
renamable $vgpr8 = SI_SPILL_S32_TO_VGPR $sgpr35, 0, killed $vgpr8
$sgpr35 = S_MOV_B32 5
$sgpr35 = SI_RESTORE_S32_FROM_VGPR $vgpr8, 0
renamable $vgpr8 = V_MOV_B32_e32 10, implicit $exec
S_ENDPGM 0
...
---
name: dont_preserve_non_wwm
tracksRegLiveness: true
frameInfo:
hasTailCall: true
machineFunctionInfo:
stackPtrOffsetReg: '$sgpr32'
isChainFunction: true
returnsVoid: true
body: |
bb.0:
liveins: $sgpr0, $sgpr35, $vgpr0, $vgpr8, $vgpr16
; GCN-LABEL: name: dont_preserve_non_wwm
; GCN: liveins: $sgpr0, $sgpr35, $vgpr0, $vgpr8, $vgpr16
; GCN-NEXT: {{ $}}
; GCN-NEXT: renamable $vgpr16 = V_MOV_B32_e32 16, implicit $exec
; GCN-NEXT: renamable $vgpr8 = V_MOV_B32_e32 8, implicit $exec
; GCN-NEXT: renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @callee + 4, target-flags(amdgpu-gotprel32-hi) @callee + 12, implicit-def dead $scc
; GCN-NEXT: renamable $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (p0) from got, addrspace 4)
; GCN-NEXT: SI_CS_CHAIN_TC_W32 killed renamable $sgpr4_sgpr5, @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $vgpr8
renamable $vgpr16 = V_MOV_B32_e32 16, implicit $exec
renamable $vgpr8 = V_MOV_B32_e32 8, implicit $exec
renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @callee + 4, target-flags(amdgpu-gotprel32-hi) @callee + 12, implicit-def dead $scc
renamable $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (p0) from got, addrspace 4)
SI_CS_CHAIN_TC_W32 killed renamable $sgpr4_sgpr5, @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $vgpr8
...
---
name: dont_preserve_v0_v7
tracksRegLiveness: true
frameInfo:
hasTailCall: true
machineFunctionInfo:
stackPtrOffsetReg: '$sgpr32'
isChainFunction: true
returnsVoid: true
wwmReservedRegs:
- '$vgpr1'
body: |
bb.0:
liveins: $sgpr0, $sgpr35, $vgpr0, $vgpr7, $vgpr8, $vgpr9
; GCN-LABEL: name: dont_preserve_v0_v7
; GCN: liveins: $sgpr0, $sgpr35, $vgpr0, $vgpr7, $vgpr8, $vgpr9
; GCN-NEXT: {{ $}}
; GCN-NEXT: renamable $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr35, 0, killed $vgpr0
; GCN-NEXT: $sgpr35 = S_MOV_B32 5
; GCN-NEXT: $sgpr35 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0
; GCN-NEXT: renamable $vgpr0 = V_MOV_B32_e32 10, implicit $exec
; GCN-NEXT: renamable $vgpr7 = V_MOV_B32_e32 16, implicit $exec
; GCN-NEXT: renamable $vgpr8 = COPY killed renamable $vgpr0
; GCN-NEXT: renamable $vgpr9 = COPY killed renamable $vgpr7
; GCN-NEXT: renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @callee + 4, target-flags(amdgpu-gotprel32-hi) @callee + 12, implicit-def dead $scc
; GCN-NEXT: renamable $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (p0) from got, addrspace 4)
; GCN-NEXT: SI_CS_CHAIN_TC_W32 killed renamable $sgpr4_sgpr5, @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $vgpr8, implicit $vgpr9
renamable $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr35, 0, killed $vgpr0
$sgpr35 = S_MOV_B32 5
$sgpr35 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0
renamable $vgpr0 = V_MOV_B32_e32 10, implicit $exec
renamable $vgpr7 = V_MOV_B32_e32 16, implicit $exec
renamable $vgpr8 = COPY killed renamable $vgpr0
renamable $vgpr9 = COPY killed renamable $vgpr7
renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @callee + 4, target-flags(amdgpu-gotprel32-hi) @callee + 12, implicit-def dead $scc
renamable $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (p0) from got, addrspace 4)
SI_CS_CHAIN_TC_W32 killed renamable $sgpr4_sgpr5, @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $vgpr8, implicit $vgpr9
...
---
name: dont_preserve_sgpr
tracksRegLiveness: true
frameInfo:
hasTailCall: true
machineFunctionInfo:
stackPtrOffsetReg: '$sgpr32'
returnsVoid: true
body: |
bb.0 (%ir-block.0):
liveins: $sgpr0
; GCN-LABEL: name: dont_preserve_sgpr
; GCN: liveins: $sgpr0
; GCN-NEXT: {{ $}}
; GCN-NEXT: renamable $sgpr1 = S_ADD_I32 killed renamable $sgpr0, renamable $sgpr0, implicit-def dead $scc
; GCN-NEXT: $sgpr0 = COPY killed renamable $sgpr1
; GCN-NEXT: renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @callee + 4, target-flags(amdgpu-gotprel32-hi) @callee + 12, implicit-def dead $scc
; GCN-NEXT: renamable $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (p0) from got, addrspace 4)
; GCN-NEXT: SI_CS_CHAIN_TC_W32 killed renamable $sgpr4_sgpr5, @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0
renamable $sgpr1 = S_ADD_I32 killed renamable $sgpr0, renamable $sgpr0, implicit-def dead $scc
$sgpr0 = COPY killed renamable $sgpr1
renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @callee + 4, target-flags(amdgpu-gotprel32-hi) @callee + 12, implicit-def dead $scc
renamable $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (p0) from got, addrspace 4)
SI_CS_CHAIN_TC_W32 killed renamable $sgpr4_sgpr5, @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0
...