Files
clang-p2996/llvm/test/CodeGen/AMDGPU/use_restore_frame_reg.mir
Alexander Timofeev 48ab3e7527 [AMDGPU] Avoid SCC clobbering before S_CSELECT_B32
Frame lowering inserts scalar addition to compute the offset to the
stack objects.  This instructions inserted in arbitrary place and may clobber
SCC between its definition and S_CSELECT_B32 instruction.  This change
workarounds this particular code pattern. It queries the scavenger for SGPR and
if available saves SCC to it and restore its value after frame lowering code
insertion.

Reviewed By: foad

Differential Revision: https://reviews.llvm.org/D136169
2022-11-22 19:08:04 +01:00

141 lines
11 KiB
YAML

# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs -run-pass=prologepilog %s -o - | FileCheck --check-prefix=MUBUF %s
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs -run-pass=prologepilog -mattr=+enable-flat-scratch %s -o - | FileCheck --check-prefix=FLATSCR %s
---
name: use_restore_frame_reg
tracksRegLiveness: true
stack:
- { id: 0, type: default, offset: 0, size: 4, alignment: 8192 }
- { id: 1, type: default, offset: 0, size: 4, alignment: 8192 }
- { id: 2, type: default, offset: 0, size: 4, alignment: 8192 }
- { id: 3, type: default, offset: 0, size: 4, alignment: 8192 }
- { id: 4, type: default, offset: 0, size: 4, alignment: 8192 }
- { id: 5, type: default, offset: 0, size: 4, alignment: 8192 }
- { id: 6, type: default, offset: 0, size: 4, alignment: 8192 }
- { id: 7, type: default, offset: 0, size: 4, alignment: 8192 }
- { id: 8, type: default, offset: 0, size: 4, alignment: 8192 }
- { id: 9, type: default, offset: 0, size: 4, alignment: 8192 }
- { id: 10, type: default, offset: 0, size: 4, alignment: 8192 }
- { id: 11, type: default, offset: 0, size: 4, alignment: 8192 }
- { id: 12, type: default, offset: 0, size: 4, alignment: 8192 }
- { id: 13, type: default, offset: 0, size: 4, alignment: 8192 }
- { id: 14, type: default, offset: 0, size: 4, alignment: 8192 }
- { id: 15, type: default, offset: 0, size: 4, alignment: 8192 }
- { id: 16, type: default, offset: 0, size: 4, alignment: 8192 }
- { id: 17, type: default, offset: 0, size: 4, alignment: 8192 }
- { id: 18, type: default, offset: 0, size: 4, alignment: 8192 }
machineFunctionInfo:
isEntryFunction: false
scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
frameOffsetReg: $sgpr33
stackPtrOffsetReg: $sgpr32
body: |
; MUBUF-LABEL: name: use_restore_frame_reg
; MUBUF: bb.0:
; MUBUF-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
; MUBUF-NEXT: liveins: $vgpr1, $vgpr2
; MUBUF-NEXT: {{ $}}
; MUBUF-NEXT: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
; MUBUF-NEXT: $sgpr6 = S_ADD_I32 $sgpr32, 9961728, implicit-def dead $scc
; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr6, 0, 0, 0, implicit $exec :: (store (s32) into %stack.20, addrspace 5)
; MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5
; MUBUF-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr33, 0, undef $vgpr2
; MUBUF-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc
; MUBUF-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc
; MUBUF-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 11010048, implicit-def dead $scc
; MUBUF-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc
; MUBUF-NEXT: S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr28, implicit-def $sgpr29, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc
; MUBUF-NEXT: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
; MUBUF-NEXT: $vgpr0 = V_ADD_U32_e32 8192, killed $vgpr0, implicit $exec
; MUBUF-NEXT: $vgpr3 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
; MUBUF-NEXT: $vgpr3 = V_ADD_U32_e32 155648, killed $vgpr3, implicit $exec
; MUBUF-NEXT: $vgpr0 = V_OR_B32_e32 killed $vgpr3, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28, implicit $sgpr29, implicit $sgpr30, implicit $sgpr31
; MUBUF-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit $vcc
; MUBUF-NEXT: S_CBRANCH_SCC1 %bb.2, implicit $scc
; MUBUF-NEXT: {{ $}}
; MUBUF-NEXT: bb.1:
; MUBUF-NEXT: successors: %bb.2(0x80000000)
; MUBUF-NEXT: liveins: $vgpr2
; MUBUF-NEXT: {{ $}}
; MUBUF-NEXT: S_NOP 0
; MUBUF-NEXT: {{ $}}
; MUBUF-NEXT: bb.2:
; MUBUF-NEXT: liveins: $vgpr2
; MUBUF-NEXT: {{ $}}
; MUBUF-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -11010048, implicit-def dead $scc
; MUBUF-NEXT: $sgpr33 = V_READLANE_B32 $vgpr2, 0
; MUBUF-NEXT: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
; MUBUF-NEXT: $sgpr6 = S_ADD_I32 $sgpr32, 9961728, implicit-def dead $scc
; MUBUF-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr6, 0, 0, 0, implicit $exec :: (load (s32) from %stack.20, addrspace 5)
; MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5
; MUBUF-NEXT: S_ENDPGM 0
; FLATSCR-LABEL: name: use_restore_frame_reg
; FLATSCR: bb.0:
; FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
; FLATSCR-NEXT: liveins: $vgpr1, $vgpr2
; FLATSCR-NEXT: {{ $}}
; FLATSCR-NEXT: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
; FLATSCR-NEXT: $sgpr6 = S_ADD_I32 $sgpr32, 155652, implicit-def dead $scc
; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr2, killed $sgpr6, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.20, addrspace 5)
; FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5
; FLATSCR-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr33, 0, undef $vgpr2
; FLATSCR-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 8191, implicit-def $scc
; FLATSCR-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294959104, implicit-def dead $scc
; FLATSCR-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 172032, implicit-def dead $scc
; FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc
; FLATSCR-NEXT: S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr28, implicit-def $sgpr29, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc
; FLATSCR-NEXT: $sgpr33 = S_ADDC_U32 $sgpr33, 8192, implicit-def $scc, implicit $scc
; FLATSCR-NEXT: S_BITCMP1_B32 $sgpr33, 0, implicit-def $scc
; FLATSCR-NEXT: $sgpr33 = S_BITSET0_B32 0, $sgpr33
; FLATSCR-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr33, implicit $exec
; FLATSCR-NEXT: $sgpr33 = S_ADDC_U32 $sgpr33, -8192, implicit-def $scc, implicit $scc
; FLATSCR-NEXT: S_BITCMP1_B32 $sgpr33, 0, implicit-def $scc
; FLATSCR-NEXT: $sgpr33 = S_BITSET0_B32 0, $sgpr33
; FLATSCR-NEXT: $sgpr33 = S_ADDC_U32 $sgpr33, 155648, implicit-def $scc, implicit $scc
; FLATSCR-NEXT: S_BITCMP1_B32 $sgpr33, 0, implicit-def $scc
; FLATSCR-NEXT: $sgpr33 = S_BITSET0_B32 0, $sgpr33
; FLATSCR-NEXT: $vgpr0 = V_OR_B32_e32 $sgpr33, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28, implicit $sgpr29, implicit $sgpr30, implicit $sgpr31
; FLATSCR-NEXT: $sgpr33 = S_ADDC_U32 $sgpr33, -155648, implicit-def $scc, implicit $scc
; FLATSCR-NEXT: S_BITCMP1_B32 $sgpr33, 0, implicit-def $scc
; FLATSCR-NEXT: $sgpr33 = S_BITSET0_B32 0, $sgpr33
; FLATSCR-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit $vcc
; FLATSCR-NEXT: S_CBRANCH_SCC1 %bb.2, implicit $scc
; FLATSCR-NEXT: {{ $}}
; FLATSCR-NEXT: bb.1:
; FLATSCR-NEXT: successors: %bb.2(0x80000000)
; FLATSCR-NEXT: liveins: $vgpr2
; FLATSCR-NEXT: {{ $}}
; FLATSCR-NEXT: S_NOP 0
; FLATSCR-NEXT: {{ $}}
; FLATSCR-NEXT: bb.2:
; FLATSCR-NEXT: liveins: $vgpr2
; FLATSCR-NEXT: {{ $}}
; FLATSCR-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -172032, implicit-def dead $scc
; FLATSCR-NEXT: $sgpr33 = V_READLANE_B32 $vgpr2, 0
; FLATSCR-NEXT: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
; FLATSCR-NEXT: $sgpr6 = S_ADD_I32 $sgpr32, 155652, implicit-def dead $scc
; FLATSCR-NEXT: $vgpr2 = SCRATCH_LOAD_DWORD_SADDR killed $sgpr6, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.20, addrspace 5)
; FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5
; FLATSCR-NEXT: S_ENDPGM 0
bb.0:
liveins: $vgpr1
S_CMP_EQ_U32 0, 0, implicit-def $scc
S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr28, implicit-def $sgpr29, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc
$vgpr0 = V_MOV_B32_e32 %stack.0, implicit $exec
$vgpr0 = V_OR_B32_e32 %stack.18, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28, implicit $sgpr29, implicit $sgpr30, implicit $sgpr31
S_CBRANCH_VCCNZ %bb.2, implicit $vcc
S_CBRANCH_SCC1 %bb.2, implicit $scc
bb.1:
S_NOP 0
bb.2:
S_ENDPGM 0
...