Allocating wwm-registers and per-thread VGPR operands together imposes many challenges in the way the registers are reused during allocation. There are times when regalloc reuses the registers of regular VGPRs operations for wwm-operations in a small range leading to unwantedly clobbering their inactive lanes causing correctness issues that are hard to trace. This patch splits the VGPR allocation pipeline further to allocate wwm-registers first and the regular VGPR operands in a separate pipeline. The splitting would ensure that the physical registers used for wwm allocations won't take part in the next allocation pipeline to avoid any such clobbering.
133 lines
7.4 KiB
YAML
133 lines
7.4 KiB
YAML
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 2
|
|
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 \
|
|
# RUN: -start-before=greedy,0 -stop-after=virtregrewriter,1 -verify-regalloc -o - %s | FileCheck %s
|
|
|
|
# This hit an assert when identifying snippet copy bundles from
|
|
# running off the end of the block due to using
|
|
# MachineBasicBlock::iterator instead of
|
|
# MachineBasicBlock::instr_iterator.
|
|
|
|
--- |
|
|
|
|
define amdgpu_kernel void @kernel() #0 {
|
|
bb:
|
|
ret void
|
|
}
|
|
|
|
attributes #0 = { "amdgpu-calls" "amdgpu-waves-per-eu"="10,10" "target-cpu"="gfx900" }
|
|
|
|
...
|
|
---
|
|
name: kernel
|
|
tracksRegLiveness: true
|
|
frameInfo:
|
|
adjustsStack: true
|
|
hasCalls: true
|
|
machineFunctionInfo:
|
|
isEntryFunction: true
|
|
scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
|
|
stackPtrOffsetReg: '$sgpr32'
|
|
occupancy: 8
|
|
body: |
|
|
; CHECK-LABEL: name: kernel
|
|
; CHECK: bb.0:
|
|
; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
|
|
; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
|
|
; CHECK-NEXT: {{ $}}
|
|
; CHECK-NEXT: renamable $sgpr34_sgpr35 = IMPLICIT_DEF
|
|
; CHECK-NEXT: dead [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
|
|
; CHECK-NEXT: renamable $sgpr41 = IMPLICIT_DEF
|
|
; CHECK-NEXT: renamable $sgpr38_sgpr39 = COPY undef $sgpr8_sgpr9
|
|
; CHECK-NEXT: renamable $sgpr36_sgpr37 = IMPLICIT_DEF
|
|
; CHECK-NEXT: renamable $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51 = S_LOAD_DWORDX8_IMM renamable $sgpr38_sgpr39, 0, 0 :: (dereferenceable invariant load (s256), align 16, addrspace 4)
|
|
; CHECK-NEXT: dead renamable $sgpr4 = S_LOAD_DWORD_IMM renamable $sgpr38_sgpr39, 48, 0 :: (dereferenceable invariant load (s32), align 16, addrspace 4)
|
|
; CHECK-NEXT: dead renamable $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM renamable $sgpr44_sgpr45, 0, 0 :: (invariant load (s64), align 16, addrspace 4)
|
|
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
|
|
; CHECK-NEXT: $vgpr1 = COPY renamable $sgpr51
|
|
; CHECK-NEXT: dead $sgpr30_sgpr31 = SI_CALL undef renamable $sgpr4_sgpr5, 0, csr_amdgpu, implicit undef $sgpr15, implicit $vgpr31, implicit $sgpr0_sgpr1_sgpr2_sgpr3
|
|
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
|
|
; CHECK-NEXT: $vcc = COPY renamable $sgpr40_sgpr41
|
|
; CHECK-NEXT: S_CBRANCH_VCCZ %bb.2, implicit undef $vcc
|
|
; CHECK-NEXT: {{ $}}
|
|
; CHECK-NEXT: bb.1:
|
|
; CHECK-NEXT: successors: %bb.3(0x80000000)
|
|
; CHECK-NEXT: liveins: $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51:0x000000000000FC00
|
|
; CHECK-NEXT: {{ $}}
|
|
; CHECK-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11 = S_LOAD_DWORDX8_IMM renamable $sgpr38_sgpr39, 56, 0 :: (dereferenceable invariant load (s256), align 8, addrspace 4)
|
|
; CHECK-NEXT: S_BRANCH %bb.3
|
|
; CHECK-NEXT: {{ $}}
|
|
; CHECK-NEXT: bb.2:
|
|
; CHECK-NEXT: successors: %bb.3(0x80000000)
|
|
; CHECK-NEXT: liveins: $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51:0x000000000000FC00
|
|
; CHECK-NEXT: {{ $}}
|
|
; CHECK-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11 = S_LOAD_DWORDX8_IMM renamable $sgpr38_sgpr39, 56, 0 :: (dereferenceable invariant load (s256), align 8, addrspace 4)
|
|
; CHECK-NEXT: S_CMP_LG_U64 renamable $sgpr4_sgpr5, 0, implicit-def $scc
|
|
; CHECK-NEXT: {{ $}}
|
|
; CHECK-NEXT: bb.3:
|
|
; CHECK-NEXT: successors: %bb.5(0x40000000), %bb.4(0x40000000)
|
|
; CHECK-NEXT: liveins: $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11:0x00000000000003F0, $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51:0x000000000000FC00
|
|
; CHECK-NEXT: {{ $}}
|
|
; CHECK-NEXT: S_CBRANCH_VCCZ %bb.5, implicit undef $vcc
|
|
; CHECK-NEXT: {{ $}}
|
|
; CHECK-NEXT: bb.4:
|
|
; CHECK-NEXT: successors: %bb.5(0x80000000)
|
|
; CHECK-NEXT: liveins: $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11:0x00000000000003F0, $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51:0x000000000000FC00
|
|
; CHECK-NEXT: {{ $}}
|
|
; CHECK-NEXT: S_CMP_EQ_U32 renamable $sgpr8, 0, implicit-def $scc
|
|
; CHECK-NEXT: {{ $}}
|
|
; CHECK-NEXT: bb.5:
|
|
; CHECK-NEXT: liveins: $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11:0x00000000000000F0, $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51:0x000000000000FC00
|
|
; CHECK-NEXT: {{ $}}
|
|
; CHECK-NEXT: dead renamable $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed renamable $sgpr38_sgpr39, 40, 0 :: (dereferenceable invariant load (s64), addrspace 4)
|
|
; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR undef [[DEF]], undef [[DEF]], killed renamable $sgpr6_sgpr7, 0, 0, implicit $exec :: (store (s32), addrspace 1)
|
|
; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR undef [[DEF]], undef [[DEF]], renamable $sgpr50_sgpr51, 0, 0, implicit $exec :: (store (s32), addrspace 1)
|
|
; CHECK-NEXT: dead [[COPY:%[0-9]+]]:vgpr_32 = COPY killed renamable $sgpr49
|
|
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
|
|
; CHECK-NEXT: $sgpr6_sgpr7 = COPY killed renamable $sgpr36_sgpr37
|
|
; CHECK-NEXT: $sgpr10_sgpr11 = COPY killed renamable $sgpr34_sgpr35
|
|
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
|
|
; CHECK-NEXT: S_ENDPGM 0
|
|
bb.0:
|
|
liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr14, $sgpr15, $sgpr16
|
|
|
|
%0:sgpr_64 = IMPLICIT_DEF
|
|
%1:vgpr_32 = IMPLICIT_DEF
|
|
undef %2.sub1:sreg_64 = IMPLICIT_DEF
|
|
%3:sgpr_64 = COPY undef $sgpr8_sgpr9
|
|
%4:sgpr_64 = IMPLICIT_DEF
|
|
%5:sgpr_256 = S_LOAD_DWORDX8_IMM %3, 0, 0 :: (dereferenceable invariant load (s256), align 16, addrspace 4)
|
|
%6:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %3, 48, 0 :: (dereferenceable invariant load (s32), align 16, addrspace 4)
|
|
%7:sgpr_256 = S_LOAD_DWORDX8_IMM %3, 56, 0 :: (dereferenceable invariant load (s256), align 8, addrspace 4)
|
|
%8:sreg_64_xexec = S_LOAD_DWORDX2_IMM %5.sub0_sub1, 0, 0 :: (invariant load (s64), align 16, addrspace 4)
|
|
ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
|
|
$vgpr1 = COPY %5.sub7
|
|
dead $sgpr30_sgpr31 = SI_CALL undef %8, 0, csr_amdgpu, implicit undef $sgpr15, implicit $vgpr31, implicit $sgpr0_sgpr1_sgpr2_sgpr3
|
|
ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
|
|
$vcc = COPY %2
|
|
S_CBRANCH_VCCZ %bb.2, implicit undef $vcc
|
|
|
|
bb.1:
|
|
S_BRANCH %bb.3
|
|
|
|
bb.2:
|
|
S_CMP_LG_U64 %7.sub0_sub1, 0, implicit-def $scc
|
|
|
|
bb.3:
|
|
S_CBRANCH_VCCZ %bb.5, implicit undef $vcc
|
|
|
|
bb.4:
|
|
S_CMP_EQ_U32 %7.sub4, 0, implicit-def $scc
|
|
|
|
bb.5:
|
|
%9:sreg_64_xexec = S_LOAD_DWORDX2_IMM %3, 40, 0 :: (dereferenceable invariant load (s64), addrspace 4)
|
|
GLOBAL_STORE_DWORD_SADDR undef %1, undef %1, %7.sub2_sub3, 0, 0, implicit $exec :: (store (s32), addrspace 1)
|
|
GLOBAL_STORE_DWORD_SADDR undef %1, undef %1, %5.sub6_sub7, 0, 0, implicit $exec :: (store (s32), addrspace 1)
|
|
%10:vgpr_32 = COPY %5.sub5
|
|
ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
|
|
$sgpr6_sgpr7 = COPY %4
|
|
$sgpr10_sgpr11 = COPY %0
|
|
ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
|
|
S_ENDPGM 0
|
|
|
|
...
|