Unlike the callee-saved VGPR spill instructions emitted by `PEI::spillCalleeSavedRegs`, the CS VGPR spills inserted during emitPrologue/emitEpilogue require the exec bits flipping to avoid clobbering the inactive lanes of VGPRs used for SGPR spilling. Currently, these spill instructions are referenced from the SP at function entry and when the callee performs a stack realignment, they ended up getting incorrect stack offsets. Even if we try to adjust the offsets, the FP-SP becomes a runtime entity with dynamic stack realignment and the offsets would still be inaccurate. To fix it, use FP as the frame base in the spill instructions whenever the function has FP. The offsets obtained for the CS objects would always be the right values from FP. Reviewed By: arsenm Differential Revision: https://reviews.llvm.org/D134949
145 lines
11 KiB
YAML
145 lines
11 KiB
YAML
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
|
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs -run-pass=prologepilog %s -o - | FileCheck --check-prefix=MUBUF %s
|
|
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs -run-pass=prologepilog -mattr=+enable-flat-scratch %s -o - | FileCheck --check-prefix=FLATSCR %s
|
|
|
|
---
|
|
name: use_restore_frame_reg
|
|
tracksRegLiveness: true
|
|
|
|
stack:
|
|
- { id: 0, type: default, offset: 0, size: 4, alignment: 8192 }
|
|
- { id: 1, type: default, offset: 0, size: 4, alignment: 8192 }
|
|
- { id: 2, type: default, offset: 0, size: 4, alignment: 8192 }
|
|
- { id: 3, type: default, offset: 0, size: 4, alignment: 8192 }
|
|
- { id: 4, type: default, offset: 0, size: 4, alignment: 8192 }
|
|
- { id: 5, type: default, offset: 0, size: 4, alignment: 8192 }
|
|
- { id: 6, type: default, offset: 0, size: 4, alignment: 8192 }
|
|
- { id: 7, type: default, offset: 0, size: 4, alignment: 8192 }
|
|
- { id: 8, type: default, offset: 0, size: 4, alignment: 8192 }
|
|
- { id: 9, type: default, offset: 0, size: 4, alignment: 8192 }
|
|
- { id: 10, type: default, offset: 0, size: 4, alignment: 8192 }
|
|
- { id: 11, type: default, offset: 0, size: 4, alignment: 8192 }
|
|
- { id: 12, type: default, offset: 0, size: 4, alignment: 8192 }
|
|
- { id: 13, type: default, offset: 0, size: 4, alignment: 8192 }
|
|
- { id: 14, type: default, offset: 0, size: 4, alignment: 8192 }
|
|
- { id: 15, type: default, offset: 0, size: 4, alignment: 8192 }
|
|
- { id: 16, type: default, offset: 0, size: 4, alignment: 8192 }
|
|
- { id: 17, type: default, offset: 0, size: 4, alignment: 8192 }
|
|
- { id: 18, type: default, offset: 0, size: 4, alignment: 8192 }
|
|
|
|
machineFunctionInfo:
|
|
isEntryFunction: false
|
|
scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
|
|
frameOffsetReg: $sgpr33
|
|
stackPtrOffsetReg: $sgpr32
|
|
|
|
body: |
|
|
; MUBUF-LABEL: name: use_restore_frame_reg
|
|
; MUBUF: bb.0:
|
|
; MUBUF-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
|
|
; MUBUF-NEXT: liveins: $vgpr1, $vgpr2
|
|
; MUBUF-NEXT: {{ $}}
|
|
; MUBUF-NEXT: $sgpr4 = COPY $sgpr33
|
|
; MUBUF-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc
|
|
; MUBUF-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc
|
|
; MUBUF-NEXT: $sgpr6_sgpr7 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
|
|
; MUBUF-NEXT: $sgpr5 = S_ADD_I32 $sgpr33, 9961728, implicit-def dead $scc
|
|
; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr5, 0, 0, 0, implicit $exec :: (store (s32) into %stack.20, addrspace 5)
|
|
; MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr6_sgpr7
|
|
; MUBUF-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr4, 0, undef $vgpr2
|
|
; MUBUF-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 11010048, implicit-def dead $scc
|
|
; MUBUF-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc
|
|
; MUBUF-NEXT: S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr28, implicit-def $sgpr29, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc
|
|
; MUBUF-NEXT: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
|
|
; MUBUF-NEXT: $vgpr0 = V_ADD_U32_e32 8192, killed $vgpr0, implicit $exec
|
|
; MUBUF-NEXT: $vgpr3 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
|
|
; MUBUF-NEXT: $vgpr3 = V_ADD_U32_e32 155648, killed $vgpr3, implicit $exec
|
|
; MUBUF-NEXT: $vgpr0 = V_OR_B32_e32 killed $vgpr3, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28, implicit $sgpr29, implicit $sgpr30, implicit $sgpr31
|
|
; MUBUF-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit $vcc
|
|
; MUBUF-NEXT: S_CBRANCH_SCC1 %bb.2, implicit $scc
|
|
; MUBUF-NEXT: {{ $}}
|
|
; MUBUF-NEXT: bb.1:
|
|
; MUBUF-NEXT: successors: %bb.2(0x80000000)
|
|
; MUBUF-NEXT: liveins: $vgpr2
|
|
; MUBUF-NEXT: {{ $}}
|
|
; MUBUF-NEXT: S_NOP 0
|
|
; MUBUF-NEXT: {{ $}}
|
|
; MUBUF-NEXT: bb.2:
|
|
; MUBUF-NEXT: liveins: $vgpr2
|
|
; MUBUF-NEXT: {{ $}}
|
|
; MUBUF-NEXT: $sgpr4 = V_READLANE_B32 $vgpr2, 0
|
|
; MUBUF-NEXT: $sgpr6_sgpr7 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
|
|
; MUBUF-NEXT: $sgpr5 = S_ADD_I32 $sgpr33, 9961728, implicit-def dead $scc
|
|
; MUBUF-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr5, 0, 0, 0, implicit $exec :: (load (s32) from %stack.20, addrspace 5)
|
|
; MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr6_sgpr7
|
|
; MUBUF-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -11010048, implicit-def dead $scc
|
|
; MUBUF-NEXT: $sgpr33 = COPY $sgpr4
|
|
; MUBUF-NEXT: S_ENDPGM 0
|
|
; FLATSCR-LABEL: name: use_restore_frame_reg
|
|
; FLATSCR: bb.0:
|
|
; FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
|
|
; FLATSCR-NEXT: liveins: $vgpr1, $vgpr2
|
|
; FLATSCR-NEXT: {{ $}}
|
|
; FLATSCR-NEXT: $sgpr4 = COPY $sgpr33
|
|
; FLATSCR-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 8191, implicit-def $scc
|
|
; FLATSCR-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294959104, implicit-def dead $scc
|
|
; FLATSCR-NEXT: $sgpr6_sgpr7 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
|
|
; FLATSCR-NEXT: $sgpr5 = S_ADD_I32 $sgpr33, 155652, implicit-def dead $scc
|
|
; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr2, killed $sgpr5, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.20, addrspace 5)
|
|
; FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr6_sgpr7
|
|
; FLATSCR-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr4, 0, undef $vgpr2
|
|
; FLATSCR-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 172032, implicit-def dead $scc
|
|
; FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc
|
|
; FLATSCR-NEXT: S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr28, implicit-def $sgpr29, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc
|
|
; FLATSCR-NEXT: $sgpr33 = S_ADDC_U32 $sgpr33, 8192, implicit-def $scc, implicit $scc
|
|
; FLATSCR-NEXT: S_BITCMP1_B32 $sgpr33, 0, implicit-def $scc
|
|
; FLATSCR-NEXT: $sgpr33 = S_BITSET0_B32 0, $sgpr33
|
|
; FLATSCR-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr33, implicit $exec
|
|
; FLATSCR-NEXT: $sgpr33 = S_ADDC_U32 $sgpr33, -8192, implicit-def $scc, implicit $scc
|
|
; FLATSCR-NEXT: S_BITCMP1_B32 $sgpr33, 0, implicit-def $scc
|
|
; FLATSCR-NEXT: $sgpr33 = S_BITSET0_B32 0, $sgpr33
|
|
; FLATSCR-NEXT: $sgpr33 = S_ADDC_U32 $sgpr33, 155648, implicit-def $scc, implicit $scc
|
|
; FLATSCR-NEXT: S_BITCMP1_B32 $sgpr33, 0, implicit-def $scc
|
|
; FLATSCR-NEXT: $sgpr33 = S_BITSET0_B32 0, $sgpr33
|
|
; FLATSCR-NEXT: $vgpr0 = V_OR_B32_e32 $sgpr33, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28, implicit $sgpr29, implicit $sgpr30, implicit $sgpr31
|
|
; FLATSCR-NEXT: $sgpr33 = S_ADDC_U32 $sgpr33, -155648, implicit-def $scc, implicit $scc
|
|
; FLATSCR-NEXT: S_BITCMP1_B32 $sgpr33, 0, implicit-def $scc
|
|
; FLATSCR-NEXT: $sgpr33 = S_BITSET0_B32 0, $sgpr33
|
|
; FLATSCR-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit $vcc
|
|
; FLATSCR-NEXT: S_CBRANCH_SCC1 %bb.2, implicit $scc
|
|
; FLATSCR-NEXT: {{ $}}
|
|
; FLATSCR-NEXT: bb.1:
|
|
; FLATSCR-NEXT: successors: %bb.2(0x80000000)
|
|
; FLATSCR-NEXT: liveins: $vgpr2
|
|
; FLATSCR-NEXT: {{ $}}
|
|
; FLATSCR-NEXT: S_NOP 0
|
|
; FLATSCR-NEXT: {{ $}}
|
|
; FLATSCR-NEXT: bb.2:
|
|
; FLATSCR-NEXT: liveins: $vgpr2
|
|
; FLATSCR-NEXT: {{ $}}
|
|
; FLATSCR-NEXT: $sgpr4 = V_READLANE_B32 $vgpr2, 0
|
|
; FLATSCR-NEXT: $sgpr6_sgpr7 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
|
|
; FLATSCR-NEXT: $sgpr5 = S_ADD_I32 $sgpr33, 155652, implicit-def dead $scc
|
|
; FLATSCR-NEXT: $vgpr2 = SCRATCH_LOAD_DWORD_SADDR killed $sgpr5, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.20, addrspace 5)
|
|
; FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr6_sgpr7
|
|
; FLATSCR-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -172032, implicit-def dead $scc
|
|
; FLATSCR-NEXT: $sgpr33 = COPY $sgpr4
|
|
; FLATSCR-NEXT: S_ENDPGM 0
|
|
bb.0:
|
|
liveins: $vgpr1
|
|
|
|
S_CMP_EQ_U32 0, 0, implicit-def $scc
|
|
S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr28, implicit-def $sgpr29, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc
|
|
$vgpr0 = V_MOV_B32_e32 %stack.0, implicit $exec
|
|
$vgpr0 = V_OR_B32_e32 %stack.18, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28, implicit $sgpr29, implicit $sgpr30, implicit $sgpr31
|
|
S_CBRANCH_VCCNZ %bb.2, implicit $vcc
|
|
S_CBRANCH_SCC1 %bb.2, implicit $scc
|
|
|
|
bb.1:
|
|
S_NOP 0
|
|
|
|
bb.2:
|
|
S_ENDPGM 0
|
|
|
|
...
|