Files
clang-p2996/llvm/test/CodeGen/AMDGPU/debug-value-scheduler-crash.mir
Vang Thao 28322c2514 [AMDGPU] Add scheduler pass to rematerialize trivial defs
Add a new pass in the pre-ra AMDGPU scheduler to check if sinking trivially rematerializable defs that only has one use outside of the defining block will increase occupancy. If we can determine that occupancy can be increased, then rematerialize only the minimum amount of defs required to increase occupancy. Also re-schedule all regions that had occupancy matching the previous min occupancy using the new occupancy.

This is based off of the discussion in https://reviews.llvm.org/D117562.

The logic to determine the defs we should collect and determining if sinking would be beneficial is mostly the same. Main differences is that we are no longer limiting it to immediate defs and the def and use does not have to be part of a loop.

Reviewed By: rampitec

Differential Revision: https://reviews.llvm.org/D119475
2022-03-09 09:34:33 -08:00

142 lines
6.7 KiB
YAML

# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=machine-scheduler -verify-machineinstrs %s -o - | FileCheck %s
--- |
declare void @llvm.dbg.value(metadata, metadata, metadata) #0
define amdgpu_kernel void @could_not_use_debug_inst_to_query_mi2mimap() #1 {
ret void
}
declare hidden float @foo(float, float, float) local_unnamed_addr #1
attributes #0 = { nounwind readnone speculatable }
attributes #1 = {nounwind }
...
---
name: could_not_use_debug_inst_to_query_mi2mimap
tracksRegLiveness: true
frameInfo:
hasCalls: true
body: |
; CHECK-LABEL: name: could_not_use_debug_inst_to_query_mi2mimap
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.1(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF4:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF5:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF7:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: %9:vgpr_32 = nofpexcept V_MUL_F32_e32 1082130432, [[DEF]], implicit $mode, implicit $exec
; CHECK-NEXT: [[DEF8:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.2(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: DBG_VALUE
; CHECK-NEXT: DBG_VALUE
; CHECK-NEXT: DBG_VALUE
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: successors: %bb.3(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: S_BRANCH %bb.3
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; CHECK-NEXT: [[DEF10:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF11:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B32_e32_]]
; CHECK-NEXT: %16:vgpr_32 = nofpexcept V_MUL_F32_e32 [[DEF6]], [[DEF6]], implicit $mode, implicit $exec
; CHECK-NEXT: %17:vgpr_32 = nofpexcept V_MUL_F32_e32 [[DEF6]], [[DEF6]], implicit $mode, implicit $exec
; CHECK-NEXT: %18:vgpr_32 = nofpexcept V_MUL_F32_e32 [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_]], implicit $mode, implicit $exec
; CHECK-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1092616192, implicit $exec
; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: %21:vgpr_32 = nofpexcept V_ADD_F32_e32 [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_]], implicit $mode, implicit $exec
; CHECK-NEXT: %22:vgpr_32 = nofpexcept V_MUL_F32_e32 [[DEF6]], [[DEF6]], implicit $mode, implicit $exec
; CHECK-NEXT: dead %23:vgpr_32 = nofpexcept V_MUL_F32_e32 %22, [[DEF12]], implicit $mode, implicit $exec
; CHECK-NEXT: dead [[V_MOV_B32_e32_1]]:vgpr_32 = nofpexcept V_MAC_F32_e32 %21, [[COPY]], [[V_MOV_B32_e32_1]], implicit $mode, implicit $exec
; CHECK-NEXT: [[DEF13:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
; CHECK-NEXT: $sgpr4 = IMPLICIT_DEF
; CHECK-NEXT: $vgpr0 = COPY [[DEF10]]
; CHECK-NEXT: $vgpr0 = COPY [[V_MOV_B32_e32_]]
; CHECK-NEXT: $vgpr1 = COPY [[DEF6]]
; CHECK-NEXT: $vgpr0 = COPY %16
; CHECK-NEXT: $vgpr1 = COPY %17
; CHECK-NEXT: $vgpr2 = COPY %18
; CHECK-NEXT: dead $sgpr30_sgpr31 = SI_CALL [[DEF13]], @foo, csr_amdgpu_highregs, implicit undef $sgpr0_sgpr1_sgpr2_sgpr3, implicit killed $sgpr4, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit-def $vgpr0
; CHECK-NEXT: %25:vgpr_32 = nofpexcept V_ADD_F32_e32 %9, [[DEF7]], implicit $mode, implicit $exec
; CHECK-NEXT: %25:vgpr_32 = nofpexcept V_MAC_F32_e32 [[DEF11]], [[DEF8]], %25, implicit $mode, implicit $exec
; CHECK-NEXT: dead %26:vgpr_32 = nofpexcept V_MAD_F32_e64 0, %25, 0, [[DEF3]], 0, [[DEF]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: dead %27:vgpr_32 = nofpexcept V_MAD_F32_e64 0, %25, 0, [[DEF4]], 0, [[DEF1]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: dead %28:vgpr_32 = nofpexcept V_MAD_F32_e64 0, %25, 0, [[DEF5]], 0, [[DEF2]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
; CHECK-NEXT: GLOBAL_STORE_DWORD [[DEF14]], [[DEF9]], 0, 0, implicit $exec
; CHECK-NEXT: S_ENDPGM 0
bb.0:
successors: %bb.1
%0:vreg_64 = IMPLICIT_DEF
%1:vgpr_32 = IMPLICIT_DEF
%2:vgpr_32 = IMPLICIT_DEF
%3:vgpr_32 = IMPLICIT_DEF
%4:vgpr_32 = IMPLICIT_DEF
%5:vgpr_32 = IMPLICIT_DEF
%6:vgpr_32 = IMPLICIT_DEF
%7:vgpr_32 = IMPLICIT_DEF
%8:vgpr_32 = IMPLICIT_DEF
%9:vgpr_32 = nofpexcept V_MUL_F32_e32 1082130432, %1, implicit $mode, implicit $exec
%10:vgpr_32 = IMPLICIT_DEF
%11:vgpr_32 = IMPLICIT_DEF
bb.1:
successors: %bb.2
DBG_VALUE
DBG_VALUE
DBG_VALUE
bb.2:
successors: %bb.3
S_BRANCH %bb.3
bb.3:
%12:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
%13:vgpr_32 = COPY %12
%14:vgpr_32 = IMPLICIT_DEF
%15:vgpr_32 = IMPLICIT_DEF
%16:vgpr_32 = nofpexcept V_MUL_F32_e32 %7, %7, implicit $mode, implicit $exec
%17:vgpr_32 = nofpexcept V_MUL_F32_e32 %7, %7, implicit $mode, implicit $exec
%18:vgpr_32 = nofpexcept V_MUL_F32_e32 %12, %12, implicit $mode, implicit $exec
%19:vgpr_32 = V_MOV_B32_e32 1092616192, implicit $exec
%20:vgpr_32 = IMPLICIT_DEF
%21:vgpr_32 = nofpexcept V_ADD_F32_e32 %12, %12, implicit $mode, implicit $exec
%22:vgpr_32 = nofpexcept V_MUL_F32_e32 %7, %7, implicit $mode, implicit $exec
%23:vgpr_32 = nofpexcept V_MUL_F32_e32 %22, %20, implicit $mode, implicit $exec
%19:vgpr_32 = nofpexcept V_MAC_F32_e32 %21, %13, %19, implicit $mode, implicit $exec
%24:sreg_64 = IMPLICIT_DEF
$vgpr0 = COPY %14
$vgpr0 = COPY %12
$vgpr1 = COPY %7
$vgpr0 = COPY %16
$vgpr1 = COPY %17
$vgpr2 = COPY %18
$sgpr4 = IMPLICIT_DEF
dead $sgpr30_sgpr31 = SI_CALL %24, @foo, csr_amdgpu_highregs, implicit undef $sgpr0_sgpr1_sgpr2_sgpr3, implicit killed $sgpr4, implicit killed $vgpr0, implicit killed $vgpr1, implicit killed $vgpr2, implicit-def $vgpr0
%25:vgpr_32 = nofpexcept V_ADD_F32_e32 %9, %8, implicit $mode, implicit $exec
%25:vgpr_32 = nofpexcept V_MAC_F32_e32 %15, %10, %25, implicit $mode, implicit $exec
%26:vgpr_32 = nofpexcept V_MAD_F32_e64 0, %25, 0, %4, 0, %1, 0, 0, implicit $mode, implicit $exec
%27:vgpr_32 = nofpexcept V_MAD_F32_e64 0, %25, 0, %5, 0, %2, 0, 0, implicit $mode, implicit $exec
%28:vgpr_32 = nofpexcept V_MAD_F32_e64 0, %25, 0, %6, 0, %3, 0, 0, implicit $mode, implicit $exec
GLOBAL_STORE_DWORD %0, %11, 0, 0, implicit $exec
S_ENDPGM 0
...