Add a new pass in the pre-ra AMDGPU scheduler to check if sinking trivially rematerializable defs that only has one use outside of the defining block will increase occupancy. If we can determine that occupancy can be increased, then rematerialize only the minimum amount of defs required to increase occupancy. Also re-schedule all regions that had occupancy matching the previous min occupancy using the new occupancy. This is based off of the discussion in https://reviews.llvm.org/D117562. The logic to determine the defs we should collect and determining if sinking would be beneficial is mostly the same. Main differences is that we are no longer limiting it to immediate defs and the def and use does not have to be part of a loop. Reviewed By: rampitec Differential Revision: https://reviews.llvm.org/D119475
142 lines
6.7 KiB
YAML
142 lines
6.7 KiB
YAML
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
|
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=machine-scheduler -verify-machineinstrs %s -o - | FileCheck %s
|
|
--- |
|
|
|
|
declare void @llvm.dbg.value(metadata, metadata, metadata) #0
|
|
|
|
define amdgpu_kernel void @could_not_use_debug_inst_to_query_mi2mimap() #1 {
|
|
ret void
|
|
}
|
|
|
|
declare hidden float @foo(float, float, float) local_unnamed_addr #1
|
|
|
|
attributes #0 = { nounwind readnone speculatable }
|
|
attributes #1 = {nounwind }
|
|
|
|
...
|
|
---
|
|
name: could_not_use_debug_inst_to_query_mi2mimap
|
|
tracksRegLiveness: true
|
|
frameInfo:
|
|
hasCalls: true
|
|
body: |
|
|
; CHECK-LABEL: name: could_not_use_debug_inst_to_query_mi2mimap
|
|
; CHECK: bb.0:
|
|
; CHECK-NEXT: successors: %bb.1(0x80000000)
|
|
; CHECK-NEXT: {{ $}}
|
|
; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
|
|
; CHECK-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
|
|
; CHECK-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
|
|
; CHECK-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
|
|
; CHECK-NEXT: [[DEF4:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
|
|
; CHECK-NEXT: [[DEF5:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
|
|
; CHECK-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
|
|
; CHECK-NEXT: [[DEF7:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
|
|
; CHECK-NEXT: %9:vgpr_32 = nofpexcept V_MUL_F32_e32 1082130432, [[DEF]], implicit $mode, implicit $exec
|
|
; CHECK-NEXT: [[DEF8:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
|
|
; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
|
|
; CHECK-NEXT: {{ $}}
|
|
; CHECK-NEXT: bb.1:
|
|
; CHECK-NEXT: successors: %bb.2(0x80000000)
|
|
; CHECK-NEXT: {{ $}}
|
|
; CHECK-NEXT: DBG_VALUE
|
|
; CHECK-NEXT: DBG_VALUE
|
|
; CHECK-NEXT: DBG_VALUE
|
|
; CHECK-NEXT: {{ $}}
|
|
; CHECK-NEXT: bb.2:
|
|
; CHECK-NEXT: successors: %bb.3(0x80000000)
|
|
; CHECK-NEXT: {{ $}}
|
|
; CHECK-NEXT: S_BRANCH %bb.3
|
|
; CHECK-NEXT: {{ $}}
|
|
; CHECK-NEXT: bb.3:
|
|
; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
|
; CHECK-NEXT: [[DEF10:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
|
|
; CHECK-NEXT: [[DEF11:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
|
|
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B32_e32_]]
|
|
; CHECK-NEXT: %16:vgpr_32 = nofpexcept V_MUL_F32_e32 [[DEF6]], [[DEF6]], implicit $mode, implicit $exec
|
|
; CHECK-NEXT: %17:vgpr_32 = nofpexcept V_MUL_F32_e32 [[DEF6]], [[DEF6]], implicit $mode, implicit $exec
|
|
; CHECK-NEXT: %18:vgpr_32 = nofpexcept V_MUL_F32_e32 [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_]], implicit $mode, implicit $exec
|
|
; CHECK-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1092616192, implicit $exec
|
|
; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
|
|
; CHECK-NEXT: %21:vgpr_32 = nofpexcept V_ADD_F32_e32 [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_]], implicit $mode, implicit $exec
|
|
; CHECK-NEXT: %22:vgpr_32 = nofpexcept V_MUL_F32_e32 [[DEF6]], [[DEF6]], implicit $mode, implicit $exec
|
|
; CHECK-NEXT: dead %23:vgpr_32 = nofpexcept V_MUL_F32_e32 %22, [[DEF12]], implicit $mode, implicit $exec
|
|
; CHECK-NEXT: dead [[V_MOV_B32_e32_1]]:vgpr_32 = nofpexcept V_MAC_F32_e32 %21, [[COPY]], [[V_MOV_B32_e32_1]], implicit $mode, implicit $exec
|
|
; CHECK-NEXT: [[DEF13:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
|
|
; CHECK-NEXT: $sgpr4 = IMPLICIT_DEF
|
|
; CHECK-NEXT: $vgpr0 = COPY [[DEF10]]
|
|
; CHECK-NEXT: $vgpr0 = COPY [[V_MOV_B32_e32_]]
|
|
; CHECK-NEXT: $vgpr1 = COPY [[DEF6]]
|
|
; CHECK-NEXT: $vgpr0 = COPY %16
|
|
; CHECK-NEXT: $vgpr1 = COPY %17
|
|
; CHECK-NEXT: $vgpr2 = COPY %18
|
|
; CHECK-NEXT: dead $sgpr30_sgpr31 = SI_CALL [[DEF13]], @foo, csr_amdgpu_highregs, implicit undef $sgpr0_sgpr1_sgpr2_sgpr3, implicit killed $sgpr4, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit-def $vgpr0
|
|
; CHECK-NEXT: %25:vgpr_32 = nofpexcept V_ADD_F32_e32 %9, [[DEF7]], implicit $mode, implicit $exec
|
|
; CHECK-NEXT: %25:vgpr_32 = nofpexcept V_MAC_F32_e32 [[DEF11]], [[DEF8]], %25, implicit $mode, implicit $exec
|
|
; CHECK-NEXT: dead %26:vgpr_32 = nofpexcept V_MAD_F32_e64 0, %25, 0, [[DEF3]], 0, [[DEF]], 0, 0, implicit $mode, implicit $exec
|
|
; CHECK-NEXT: dead %27:vgpr_32 = nofpexcept V_MAD_F32_e64 0, %25, 0, [[DEF4]], 0, [[DEF1]], 0, 0, implicit $mode, implicit $exec
|
|
; CHECK-NEXT: dead %28:vgpr_32 = nofpexcept V_MAD_F32_e64 0, %25, 0, [[DEF5]], 0, [[DEF2]], 0, 0, implicit $mode, implicit $exec
|
|
; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
|
|
; CHECK-NEXT: GLOBAL_STORE_DWORD [[DEF14]], [[DEF9]], 0, 0, implicit $exec
|
|
; CHECK-NEXT: S_ENDPGM 0
|
|
bb.0:
|
|
successors: %bb.1
|
|
|
|
%0:vreg_64 = IMPLICIT_DEF
|
|
%1:vgpr_32 = IMPLICIT_DEF
|
|
%2:vgpr_32 = IMPLICIT_DEF
|
|
%3:vgpr_32 = IMPLICIT_DEF
|
|
%4:vgpr_32 = IMPLICIT_DEF
|
|
%5:vgpr_32 = IMPLICIT_DEF
|
|
%6:vgpr_32 = IMPLICIT_DEF
|
|
%7:vgpr_32 = IMPLICIT_DEF
|
|
%8:vgpr_32 = IMPLICIT_DEF
|
|
%9:vgpr_32 = nofpexcept V_MUL_F32_e32 1082130432, %1, implicit $mode, implicit $exec
|
|
%10:vgpr_32 = IMPLICIT_DEF
|
|
%11:vgpr_32 = IMPLICIT_DEF
|
|
|
|
bb.1:
|
|
successors: %bb.2
|
|
|
|
DBG_VALUE
|
|
DBG_VALUE
|
|
DBG_VALUE
|
|
|
|
bb.2:
|
|
successors: %bb.3
|
|
|
|
S_BRANCH %bb.3
|
|
|
|
bb.3:
|
|
%12:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
|
%13:vgpr_32 = COPY %12
|
|
%14:vgpr_32 = IMPLICIT_DEF
|
|
%15:vgpr_32 = IMPLICIT_DEF
|
|
%16:vgpr_32 = nofpexcept V_MUL_F32_e32 %7, %7, implicit $mode, implicit $exec
|
|
%17:vgpr_32 = nofpexcept V_MUL_F32_e32 %7, %7, implicit $mode, implicit $exec
|
|
%18:vgpr_32 = nofpexcept V_MUL_F32_e32 %12, %12, implicit $mode, implicit $exec
|
|
%19:vgpr_32 = V_MOV_B32_e32 1092616192, implicit $exec
|
|
%20:vgpr_32 = IMPLICIT_DEF
|
|
%21:vgpr_32 = nofpexcept V_ADD_F32_e32 %12, %12, implicit $mode, implicit $exec
|
|
%22:vgpr_32 = nofpexcept V_MUL_F32_e32 %7, %7, implicit $mode, implicit $exec
|
|
%23:vgpr_32 = nofpexcept V_MUL_F32_e32 %22, %20, implicit $mode, implicit $exec
|
|
%19:vgpr_32 = nofpexcept V_MAC_F32_e32 %21, %13, %19, implicit $mode, implicit $exec
|
|
%24:sreg_64 = IMPLICIT_DEF
|
|
$vgpr0 = COPY %14
|
|
$vgpr0 = COPY %12
|
|
$vgpr1 = COPY %7
|
|
$vgpr0 = COPY %16
|
|
$vgpr1 = COPY %17
|
|
$vgpr2 = COPY %18
|
|
$sgpr4 = IMPLICIT_DEF
|
|
dead $sgpr30_sgpr31 = SI_CALL %24, @foo, csr_amdgpu_highregs, implicit undef $sgpr0_sgpr1_sgpr2_sgpr3, implicit killed $sgpr4, implicit killed $vgpr0, implicit killed $vgpr1, implicit killed $vgpr2, implicit-def $vgpr0
|
|
%25:vgpr_32 = nofpexcept V_ADD_F32_e32 %9, %8, implicit $mode, implicit $exec
|
|
%25:vgpr_32 = nofpexcept V_MAC_F32_e32 %15, %10, %25, implicit $mode, implicit $exec
|
|
%26:vgpr_32 = nofpexcept V_MAD_F32_e64 0, %25, 0, %4, 0, %1, 0, 0, implicit $mode, implicit $exec
|
|
%27:vgpr_32 = nofpexcept V_MAD_F32_e64 0, %25, 0, %5, 0, %2, 0, 0, implicit $mode, implicit $exec
|
|
%28:vgpr_32 = nofpexcept V_MAD_F32_e64 0, %25, 0, %6, 0, %3, 0, 0, implicit $mode, implicit $exec
|
|
GLOBAL_STORE_DWORD %0, %11, 0, 0, implicit $exec
|
|
S_ENDPGM 0
|
|
|
|
...
|