Switch to using immediate offsets instead of the SP register to access objects on the current stack frame in chain functions. This means we no longer need to reserve a SP register just for accesing stack objects and it also allows us to set the SP (when one is actually needed) to the stack size from the very beginning. This only works if we use a FixedObject for the ScavengeFI, which is what we do for entry functions anyway (and we generally want to keep chain functions close to amdgpu_cs behaviour where we don't have a good reason to diverge).
233 lines
12 KiB
YAML
233 lines
12 KiB
YAML
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 2
|
|
# RUN: llc -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs -run-pass=prologepilog -o - %s | FileCheck -check-prefix=GCN %s
|
|
|
|
# We're keeping the IR around for the callees and the CCs
|
|
|
|
--- |
|
|
declare amdgpu_cs_chain void @callee()
|
|
declare amdgpu_gfx void @gfx_callee()
|
|
|
|
define amdgpu_cs_chain void @preserve_inactive_wwm() {ret void}
|
|
define amdgpu_cs_chain void @preserve_inactive_detected_wwm() {ret void}
|
|
define amdgpu_cs_chain void @dont_preserve_wwm_if_no_chain_calls() {ret void}
|
|
define amdgpu_cs_chain void @dont_preserve_non_wwm() {ret void}
|
|
define amdgpu_cs_chain void @dont_preserve_v0_v7() {ret void}
|
|
define amdgpu_cs_chain void @dont_preserve_sgpr() {ret void}
|
|
...
|
|
---
|
|
|
|
# Check that we preserve the inactive lanes of registers v8+ received in the
|
|
# MachineFunctionInfo as wwmReservedRegs.
|
|
|
|
---
|
|
name: preserve_inactive_wwm
|
|
tracksRegLiveness: true
|
|
frameInfo:
|
|
hasTailCall: true
|
|
machineFunctionInfo:
|
|
stackPtrOffsetReg: '$sgpr32'
|
|
returnsVoid: true
|
|
wwmReservedRegs:
|
|
- '$vgpr8'
|
|
- '$vgpr9'
|
|
body: |
|
|
bb.0:
|
|
liveins: $sgpr0, $sgpr35, $vgpr8, $vgpr9
|
|
|
|
; GCN-LABEL: name: preserve_inactive_wwm
|
|
; GCN: liveins: $sgpr0, $sgpr35, $vgpr8, $vgpr9
|
|
; GCN-NEXT: {{ $}}
|
|
; GCN-NEXT: $sgpr1 = S_XOR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
|
|
; GCN-NEXT: SCRATCH_STORE_DWORD_ST $vgpr8, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5)
|
|
; GCN-NEXT: SCRATCH_STORE_DWORD_ST $vgpr9, 8, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.1, addrspace 5)
|
|
; GCN-NEXT: $exec_lo = S_MOV_B32 killed $sgpr1
|
|
; GCN-NEXT: renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @callee + 4, target-flags(amdgpu-gotprel32-hi) @callee + 12, implicit-def dead $scc
|
|
; GCN-NEXT: renamable $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (p0) from got, addrspace 4)
|
|
; GCN-NEXT: $sgpr1 = S_XOR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
|
|
; GCN-NEXT: $vgpr8 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr, implicit $vgpr8(tied-def 0) :: (load (s32) from %stack.0, addrspace 5)
|
|
; GCN-NEXT: $vgpr9 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.1, addrspace 5)
|
|
; GCN-NEXT: $exec_lo = S_MOV_B32 killed $sgpr1
|
|
; GCN-NEXT: SI_CS_CHAIN_TC_W32 killed renamable $sgpr4_sgpr5, @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $vgpr8
|
|
renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @callee + 4, target-flags(amdgpu-gotprel32-hi) @callee + 12, implicit-def dead $scc
|
|
renamable $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (p0) from got, addrspace 4)
|
|
SI_CS_CHAIN_TC_W32 killed renamable $sgpr4_sgpr5, @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $vgpr8
|
|
|
|
...
|
|
|
|
# Check that it also works for SGPR to VGPR spills.
|
|
|
|
---
|
|
name: preserve_inactive_detected_wwm
|
|
tracksRegLiveness: true
|
|
frameInfo:
|
|
hasTailCall: true
|
|
machineFunctionInfo:
|
|
stackPtrOffsetReg: '$sgpr32'
|
|
returnsVoid: true
|
|
body: |
|
|
bb.0:
|
|
liveins: $sgpr0, $sgpr35, $vgpr8, $vgpr9
|
|
|
|
; GCN-LABEL: name: preserve_inactive_detected_wwm
|
|
; GCN: liveins: $sgpr0, $sgpr35, $vgpr8, $vgpr9
|
|
; GCN-NEXT: {{ $}}
|
|
; GCN-NEXT: $sgpr1 = S_XOR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
|
|
; GCN-NEXT: SCRATCH_STORE_DWORD_ST $vgpr8, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5)
|
|
; GCN-NEXT: SCRATCH_STORE_DWORD_ST $vgpr9, 8, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.1, addrspace 5)
|
|
; GCN-NEXT: $exec_lo = S_MOV_B32 killed $sgpr1
|
|
; GCN-NEXT: renamable $vgpr8 = SI_SPILL_S32_TO_VGPR $sgpr35, 0, killed $vgpr8
|
|
; GCN-NEXT: $sgpr35 = S_MOV_B32 5
|
|
; GCN-NEXT: $sgpr35 = SI_RESTORE_S32_FROM_VGPR $vgpr8, 0
|
|
; GCN-NEXT: renamable $vgpr8 = V_MOV_B32_e32 10, implicit $exec
|
|
; GCN-NEXT: renamable $vgpr9 = SI_SPILL_S32_TO_VGPR $sgpr35, 0, killed $vgpr9
|
|
; GCN-NEXT: $sgpr35 = S_MOV_B32 5
|
|
; GCN-NEXT: $sgpr35 = SI_RESTORE_S32_FROM_VGPR $vgpr9, 0
|
|
; GCN-NEXT: renamable $vgpr9 = V_MOV_B32_e32 10, implicit $exec
|
|
; GCN-NEXT: renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @callee + 4, target-flags(amdgpu-gotprel32-hi) @callee + 12, implicit-def dead $scc
|
|
; GCN-NEXT: renamable $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (p0) from got, addrspace 4)
|
|
; GCN-NEXT: $sgpr1 = S_XOR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
|
|
; GCN-NEXT: $vgpr8 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr, implicit $vgpr8(tied-def 0) :: (load (s32) from %stack.0, addrspace 5)
|
|
; GCN-NEXT: $vgpr9 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.1, addrspace 5)
|
|
; GCN-NEXT: $exec_lo = S_MOV_B32 killed $sgpr1
|
|
; GCN-NEXT: SI_CS_CHAIN_TC_W32 killed renamable $sgpr4_sgpr5, @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $vgpr8
|
|
renamable $vgpr8 = SI_SPILL_S32_TO_VGPR $sgpr35, 0, killed $vgpr8
|
|
$sgpr35 = S_MOV_B32 5
|
|
$sgpr35 = SI_RESTORE_S32_FROM_VGPR $vgpr8, 0
|
|
renamable $vgpr8 = V_MOV_B32_e32 10, implicit $exec
|
|
renamable $vgpr9 = SI_SPILL_S32_TO_VGPR $sgpr35, 0, killed $vgpr9
|
|
$sgpr35 = S_MOV_B32 5
|
|
$sgpr35 = SI_RESTORE_S32_FROM_VGPR $vgpr9, 0
|
|
renamable $vgpr9 = V_MOV_B32_e32 10, implicit $exec
|
|
renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @callee + 4, target-flags(amdgpu-gotprel32-hi) @callee + 12, implicit-def dead $scc
|
|
renamable $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (p0) from got, addrspace 4)
|
|
SI_CS_CHAIN_TC_W32 killed renamable $sgpr4_sgpr5, @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $vgpr8
|
|
|
|
...
|
|
|
|
---
|
|
name: dont_preserve_wwm_if_no_chain_calls
|
|
tracksRegLiveness: true
|
|
frameInfo:
|
|
hasTailCall: false
|
|
machineFunctionInfo:
|
|
stackPtrOffsetReg: '$sgpr32'
|
|
returnsVoid: true
|
|
wwmReservedRegs:
|
|
- '$vgpr9'
|
|
body: |
|
|
bb.0:
|
|
liveins: $sgpr35, $vgpr8
|
|
|
|
; GCN-LABEL: name: dont_preserve_wwm_if_no_chain_calls
|
|
; GCN: liveins: $sgpr35, $vgpr8
|
|
; GCN-NEXT: {{ $}}
|
|
; GCN-NEXT: renamable $vgpr8 = SI_SPILL_S32_TO_VGPR $sgpr35, 0, killed $vgpr8
|
|
; GCN-NEXT: $sgpr35 = S_MOV_B32 5
|
|
; GCN-NEXT: $sgpr35 = SI_RESTORE_S32_FROM_VGPR $vgpr8, 0
|
|
; GCN-NEXT: renamable $vgpr8 = V_MOV_B32_e32 10, implicit $exec
|
|
; GCN-NEXT: S_ENDPGM 0
|
|
renamable $vgpr8 = SI_SPILL_S32_TO_VGPR $sgpr35, 0, killed $vgpr8
|
|
$sgpr35 = S_MOV_B32 5
|
|
$sgpr35 = SI_RESTORE_S32_FROM_VGPR $vgpr8, 0
|
|
renamable $vgpr8 = V_MOV_B32_e32 10, implicit $exec
|
|
S_ENDPGM 0
|
|
...
|
|
|
|
---
|
|
name: dont_preserve_non_wwm
|
|
tracksRegLiveness: true
|
|
frameInfo:
|
|
hasTailCall: true
|
|
machineFunctionInfo:
|
|
stackPtrOffsetReg: '$sgpr32'
|
|
isChainFunction: true
|
|
returnsVoid: true
|
|
body: |
|
|
bb.0:
|
|
liveins: $sgpr0, $sgpr35, $vgpr0, $vgpr8, $vgpr16
|
|
|
|
; GCN-LABEL: name: dont_preserve_non_wwm
|
|
; GCN: liveins: $sgpr0, $sgpr35, $vgpr0, $vgpr8, $vgpr16
|
|
; GCN-NEXT: {{ $}}
|
|
; GCN-NEXT: renamable $vgpr16 = V_MOV_B32_e32 16, implicit $exec
|
|
; GCN-NEXT: renamable $vgpr8 = V_MOV_B32_e32 8, implicit $exec
|
|
; GCN-NEXT: renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @callee + 4, target-flags(amdgpu-gotprel32-hi) @callee + 12, implicit-def dead $scc
|
|
; GCN-NEXT: renamable $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (p0) from got, addrspace 4)
|
|
; GCN-NEXT: SI_CS_CHAIN_TC_W32 killed renamable $sgpr4_sgpr5, @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $vgpr8
|
|
renamable $vgpr16 = V_MOV_B32_e32 16, implicit $exec
|
|
renamable $vgpr8 = V_MOV_B32_e32 8, implicit $exec
|
|
renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @callee + 4, target-flags(amdgpu-gotprel32-hi) @callee + 12, implicit-def dead $scc
|
|
renamable $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (p0) from got, addrspace 4)
|
|
SI_CS_CHAIN_TC_W32 killed renamable $sgpr4_sgpr5, @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $vgpr8
|
|
|
|
...
|
|
|
|
---
|
|
name: dont_preserve_v0_v7
|
|
tracksRegLiveness: true
|
|
frameInfo:
|
|
hasTailCall: true
|
|
machineFunctionInfo:
|
|
stackPtrOffsetReg: '$sgpr32'
|
|
isChainFunction: true
|
|
returnsVoid: true
|
|
wwmReservedRegs:
|
|
- '$vgpr1'
|
|
body: |
|
|
bb.0:
|
|
liveins: $sgpr0, $sgpr35, $vgpr0, $vgpr7, $vgpr8, $vgpr9
|
|
|
|
; GCN-LABEL: name: dont_preserve_v0_v7
|
|
; GCN: liveins: $sgpr0, $sgpr35, $vgpr0, $vgpr7, $vgpr8, $vgpr9
|
|
; GCN-NEXT: {{ $}}
|
|
; GCN-NEXT: renamable $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr35, 0, killed $vgpr0
|
|
; GCN-NEXT: $sgpr35 = S_MOV_B32 5
|
|
; GCN-NEXT: $sgpr35 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0
|
|
; GCN-NEXT: renamable $vgpr0 = V_MOV_B32_e32 10, implicit $exec
|
|
; GCN-NEXT: renamable $vgpr7 = V_MOV_B32_e32 16, implicit $exec
|
|
; GCN-NEXT: renamable $vgpr8 = COPY killed renamable $vgpr0
|
|
; GCN-NEXT: renamable $vgpr9 = COPY killed renamable $vgpr7
|
|
; GCN-NEXT: renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @callee + 4, target-flags(amdgpu-gotprel32-hi) @callee + 12, implicit-def dead $scc
|
|
; GCN-NEXT: renamable $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (p0) from got, addrspace 4)
|
|
; GCN-NEXT: SI_CS_CHAIN_TC_W32 killed renamable $sgpr4_sgpr5, @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $vgpr8, implicit $vgpr9
|
|
renamable $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr35, 0, killed $vgpr0
|
|
$sgpr35 = S_MOV_B32 5
|
|
$sgpr35 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0
|
|
renamable $vgpr0 = V_MOV_B32_e32 10, implicit $exec
|
|
renamable $vgpr7 = V_MOV_B32_e32 16, implicit $exec
|
|
renamable $vgpr8 = COPY killed renamable $vgpr0
|
|
renamable $vgpr9 = COPY killed renamable $vgpr7
|
|
renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @callee + 4, target-flags(amdgpu-gotprel32-hi) @callee + 12, implicit-def dead $scc
|
|
renamable $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (p0) from got, addrspace 4)
|
|
SI_CS_CHAIN_TC_W32 killed renamable $sgpr4_sgpr5, @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $vgpr8, implicit $vgpr9
|
|
|
|
...
|
|
|
|
---
|
|
name: dont_preserve_sgpr
|
|
tracksRegLiveness: true
|
|
frameInfo:
|
|
hasTailCall: true
|
|
machineFunctionInfo:
|
|
stackPtrOffsetReg: '$sgpr32'
|
|
returnsVoid: true
|
|
body: |
|
|
bb.0 (%ir-block.0):
|
|
liveins: $sgpr0
|
|
|
|
; GCN-LABEL: name: dont_preserve_sgpr
|
|
; GCN: liveins: $sgpr0
|
|
; GCN-NEXT: {{ $}}
|
|
; GCN-NEXT: renamable $sgpr1 = S_ADD_I32 killed renamable $sgpr0, renamable $sgpr0, implicit-def dead $scc
|
|
; GCN-NEXT: $sgpr0 = COPY killed renamable $sgpr1
|
|
; GCN-NEXT: renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @callee + 4, target-flags(amdgpu-gotprel32-hi) @callee + 12, implicit-def dead $scc
|
|
; GCN-NEXT: renamable $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (p0) from got, addrspace 4)
|
|
; GCN-NEXT: SI_CS_CHAIN_TC_W32 killed renamable $sgpr4_sgpr5, @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0
|
|
renamable $sgpr1 = S_ADD_I32 killed renamable $sgpr0, renamable $sgpr0, implicit-def dead $scc
|
|
$sgpr0 = COPY killed renamable $sgpr1
|
|
renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @callee + 4, target-flags(amdgpu-gotprel32-hi) @callee + 12, implicit-def dead $scc
|
|
renamable $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (p0) from got, addrspace 4)
|
|
SI_CS_CHAIN_TC_W32 killed renamable $sgpr4_sgpr5, @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0
|
|
|
|
...
|