Allocating wwm-registers and per-thread VGPR operands together imposes many challenges in the way the registers are reused during allocation. There are times when regalloc reuses the registers of regular VGPRs operations for wwm-operations in a small range leading to unwantedly clobbering their inactive lanes causing correctness issues that are hard to trace. This patch splits the VGPR allocation pipeline further to allocate wwm-registers first and the regular VGPR operands in a separate pipeline. The splitting would ensure that the physical registers used for wwm allocations won't take part in the next allocation pipeline to avoid any such clobbering.
62 lines
3.1 KiB
YAML
62 lines
3.1 KiB
YAML
# RUN: not llc -mtriple=amdgcn -mcpu=gfx908 -verify-machineinstrs=0 -start-before=greedy,1 -stop-after=virtregrewriter,2 %s -o /dev/null 2>&1 | FileCheck -check-prefix=ERR %s
|
|
# RUN: not --crash llc -mtriple=amdgcn -mcpu=gfx908 -verify-machineinstrs -start-before=greedy,1 -stop-after=virtregrewriter,2 %s -o /dev/null 2>&1 | FileCheck -check-prefixes=ERR,VERIFIER %s
|
|
|
|
# FIXME: We should not produce a verifier error after erroring
|
|
|
|
# ERR: error: inline assembly requires more registers than available
|
|
# VERIFIER: *** Bad machine code: Using an undefined physical register ***
|
|
|
|
# This testcase cannot be compiled with the enforced register
|
|
# budget. Previously, tryLastChanceRecoloring would assert here. It
|
|
# was attempting to recolor a superregister with an overlapping
|
|
# subregister over the same range.
|
|
|
|
--- |
|
|
define void @foo() #0 {
|
|
ret void
|
|
}
|
|
|
|
attributes #0 = { "amdgpu-waves-per-eu"="8,8" }
|
|
|
|
...
|
|
---
|
|
name: foo
|
|
tracksRegLiveness: true
|
|
registers:
|
|
- { id: 0, class: vgpr_32 }
|
|
- { id: 1, class: vgpr_32 }
|
|
- { id: 2, class: vreg_512 }
|
|
- { id: 3, class: vreg_256 }
|
|
- { id: 4, class: vreg_128 }
|
|
- { id: 5, class: vreg_96 }
|
|
- { id: 6, class: vreg_96 }
|
|
- { id: 7, class: vreg_512 }
|
|
- { id: 8, class: vreg_256 }
|
|
- { id: 9, class: vreg_128 }
|
|
- { id: 10, class: vreg_96 }
|
|
- { id: 11, class: vreg_96 }
|
|
- { id: 12, class: sreg_64 }
|
|
- { id: 13, class: sgpr_64 }
|
|
- { id: 14, class: vgpr_32 }
|
|
machineFunctionInfo:
|
|
scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
|
|
frameOffsetReg: '$sgpr33'
|
|
stackPtrOffsetReg: '$sgpr32'
|
|
body: |
|
|
bb.0:
|
|
|
|
INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 10 /* regdef */, implicit-def $agpr0
|
|
%14:vgpr_32 = COPY killed $agpr0
|
|
INLINEASM &"; def $0 $1 $2 $3 $4", 1 /* sideeffect attdialect */, 11534346 /* regdef:VReg_512 */, def %7, 10158090 /* regdef:VReg_256 */, def %8, 4784138 /* regdef:VReg_128 */, def %9, 3670026 /* regdef:VReg_96 */, def %10, 3670026 /* regdef:VReg_96 */, def %11
|
|
INLINEASM &"; clobber", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def dead early-clobber $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31, 12 /* clobber */, implicit-def dead early-clobber $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
|
|
INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 11534345 /* reguse:VReg_512 */, %7
|
|
INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 10158089 /* reguse:VReg_256 */, %8
|
|
INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 4784137 /* reguse:VReg_128 */, %9
|
|
INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 3670025 /* reguse:VReg_96 */, %10
|
|
INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 3670025 /* reguse:VReg_96 */, %11
|
|
$agpr1 = COPY %14
|
|
INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 9 /* reguse */, killed $agpr1
|
|
SI_RETURN
|
|
|
|
...
|