The scheduler should set critical/excess register usage thresholds that are guided by the maximum possible occupancy for the function. This change is focused on setting proper lower bounds on register usage which we would typically only see when a specific number of maximum waves is requested with the "waves-per-eu" attribute, or by setting "amdgpu-num-vgpr|sgpr" directly. This was broken previously. I have a follow-on patch that will address issues with the scheduler not targeting correct upper bounds on register usage which is typical with launch bounds and min "waves-per-eu". Changes by this patch: Set the initial critical register usage thresholds to minimum values that are determined by the maximum possible occupancy for the function, or the number of allocatable registers, whichever is lower. Avoid unisgned overflow if register limits are lower than the register tracking "ErrorMargin", I.e. when using stress-regalloc=2. Reviewed By: arsenm Differential Revision: https://reviews.llvm.org/D112373
71 lines
2.5 KiB
YAML
71 lines
2.5 KiB
YAML
# RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs -amdgpu-dce-in-ra=0 -stress-regalloc=1 -start-before=simple-register-coalescing -stop-after=greedy,1 -o - %s | FileCheck %s
|
|
# https://bugs.llvm.org/show_bug.cgi?id=33620
|
|
|
|
---
|
|
# This would assert due to the empty live interval created for %9
|
|
# on the last S_NOP with an undef subreg use.
|
|
|
|
# CHECK-LABEL: name: expecting_non_empty_interval
|
|
|
|
# CHECK: undef %5.sub1:vreg_64 = V_MOV_B32_e32 1786773504, implicit $exec
|
|
# CHECK-NEXT: dead %3:vgpr_32 = V_MUL_F32_e32 0, %5.sub1, implicit $mode, implicit $exec
|
|
# CHECK-NEXT: undef %7.sub1:vreg_64 = V_MAC_F32_e32 0, undef %1:vgpr_32, undef %7.sub1, implicit $mode, implicit $exec
|
|
# CHECK-NEXT: SI_SPILL_V64_SAVE %7, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, align 4, addrspace 5)
|
|
|
|
# CHECK: S_NOP 0, implicit %6.sub1
|
|
# CHECK-NEXT: %8:vreg_64 = SI_SPILL_V64_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s64) from %stack.0, align 4, addrspace 5)
|
|
# CHECK-NEXT: S_NOP 0, implicit %8.sub1
|
|
# CHECK-NEXT: S_NOP 0, implicit undef %9.sub0
|
|
|
|
name: expecting_non_empty_interval
|
|
tracksRegLiveness: true
|
|
machineFunctionInfo:
|
|
scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
|
|
stackPtrOffsetReg: $sgpr32
|
|
body: |
|
|
bb.0:
|
|
successors: %bb.1
|
|
|
|
undef %0.sub1:vreg_64 = V_MAC_F32_e32 0, undef %1:vgpr_32, undef %0.sub1, implicit $mode, implicit $exec
|
|
undef %2.sub1:vreg_64 = V_MOV_B32_e32 1786773504, implicit $exec
|
|
dead %3:vgpr_32 = V_MUL_F32_e32 0, %2.sub1, implicit $mode, implicit $exec
|
|
|
|
bb.1:
|
|
S_NOP 0, implicit %2.sub1
|
|
S_NOP 0, implicit %0.sub1
|
|
S_NOP 0, implicit undef %0.sub0
|
|
|
|
...
|
|
|
|
# Similar assert which happens when trying to rematerialize.
|
|
# https://bugs.llvm.org/show_bug.cgi?id=33884
|
|
---
|
|
# CHECK-LABEL: name: rematerialize_empty_interval_has_reference
|
|
|
|
# CHECK-NOT: MOV
|
|
# CHECK: undef %1.sub2:vreg_128 = V_MOV_B32_e32 1786773504, implicit $exec
|
|
|
|
# CHECK: bb.1:
|
|
# CHECK-NEXT: S_NOP 0, implicit %1.sub2
|
|
# CHECK-NEXT: undef %2.sub2:vreg_128 = V_MOV_B32_e32 0, implicit $exec
|
|
# CHECK-NEXT: S_NOP 0, implicit %2.sub2
|
|
# CHECK-NEXT: S_NOP 0, implicit undef %4.sub0
|
|
name: rematerialize_empty_interval_has_reference
|
|
tracksRegLiveness: true
|
|
machineFunctionInfo:
|
|
scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
|
|
stackPtrOffsetReg: $sgpr32
|
|
body: |
|
|
bb.0:
|
|
successors: %bb.1
|
|
|
|
undef %0.sub2:vreg_128 = V_MOV_B32_e32 0, implicit $exec
|
|
undef %1.sub2:vreg_128 = V_MOV_B32_e32 1786773504, implicit $exec
|
|
|
|
bb.1:
|
|
S_NOP 0, implicit %1.sub2
|
|
S_NOP 0, implicit undef %0.sub0
|
|
S_NOP 0, implicit %0.sub2
|
|
|
|
...
|