[AMDGPU] Add a debug option -amdgpu-snop-padding for GCNHazardRecognizer (#146587)
This can help to identify if there is potential hazards. Co-authored-by: Byrnes, Jeffrey <Jeffrey.Byrnes@amd.com>
This commit is contained in:
@@ -44,6 +44,11 @@ static cl::opt<unsigned, false, MFMAPaddingRatioParser>
|
|||||||
cl::desc("Fill a percentage of the latency between "
|
cl::desc("Fill a percentage of the latency between "
|
||||||
"neighboring MFMA with s_nops."));
|
"neighboring MFMA with s_nops."));
|
||||||
|
|
||||||
|
// This is intended for debugging purposes only.
|
||||||
|
static cl::opt<unsigned>
|
||||||
|
NopPadding("amdgpu-snop-padding", cl::init(0), cl::Hidden,
|
||||||
|
cl::desc("Insert a s_nop x before every instruction"));
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
// Hazard Recognizer Implementation
|
// Hazard Recognizer Implementation
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
@@ -300,7 +305,7 @@ unsigned GCNHazardRecognizer::PreEmitNoops(MachineInstr *MI) {
|
|||||||
unsigned W = PreEmitNoopsCommon(MI);
|
unsigned W = PreEmitNoopsCommon(MI);
|
||||||
fixHazards(MI);
|
fixHazards(MI);
|
||||||
CurrCycleInstr = nullptr;
|
CurrCycleInstr = nullptr;
|
||||||
return W;
|
return std::max(W, NopPadding.getValue());
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned GCNHazardRecognizer::PreEmitNoopsCommon(MachineInstr *MI) {
|
unsigned GCNHazardRecognizer::PreEmitNoopsCommon(MachineInstr *MI) {
|
||||||
|
|||||||
132
llvm/test/CodeGen/AMDGPU/amdgpu-snop-padding.mir
Normal file
132
llvm/test/CodeGen/AMDGPU/amdgpu-snop-padding.mir
Normal file
@@ -0,0 +1,132 @@
|
|||||||
|
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||||
|
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -amdgpu-snop-padding=8 -run-pass post-RA-hazard-rec %s -o - | FileCheck -check-prefix=GCN8 %s
|
||||||
|
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -amdgpu-snop-padding=16 -run-pass post-RA-hazard-rec %s -o - | FileCheck -check-prefix=GCN16 %s
|
||||||
|
|
||||||
|
---
|
||||||
|
name: test_snop_padding
|
||||||
|
tracksRegLiveness: true
|
||||||
|
frameInfo:
|
||||||
|
maxAlignment: 4
|
||||||
|
stack:
|
||||||
|
- { id: 0, type: spill-slot, size: 4, alignment: 4, stack-id: sgpr-spill }
|
||||||
|
machineFunctionInfo:
|
||||||
|
isEntryFunction: false
|
||||||
|
scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
|
||||||
|
stackPtrOffsetReg: '$sgpr32'
|
||||||
|
frameOffsetReg: '$sgpr33'
|
||||||
|
hasSpilledSGPRs: true
|
||||||
|
body: |
|
||||||
|
; GCN8-LABEL: name: test_snop_padding
|
||||||
|
; GCN8: bb.0:
|
||||||
|
; GCN8-NEXT: successors: %bb.1(0x80000000)
|
||||||
|
; GCN8-NEXT: liveins: $sgpr6, $sgpr10_sgpr11
|
||||||
|
; GCN8-NEXT: {{ $}}
|
||||||
|
; GCN8-NEXT: S_NOP 7
|
||||||
|
; GCN8-NEXT: S_BRANCH %bb.1
|
||||||
|
; GCN8-NEXT: {{ $}}
|
||||||
|
; GCN8-NEXT: bb.1:
|
||||||
|
; GCN8-NEXT: successors: %bb.3(0x40000000), %bb.2(0x40000000)
|
||||||
|
; GCN8-NEXT: liveins: $sgpr6, $sgpr10_sgpr11
|
||||||
|
; GCN8-NEXT: {{ $}}
|
||||||
|
; GCN8-NEXT: S_NOP 7
|
||||||
|
; GCN8-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 10, implicit $exec
|
||||||
|
; GCN8-NEXT: S_NOP 7
|
||||||
|
; GCN8-NEXT: S_CBRANCH_EXECZ %bb.3, implicit $exec
|
||||||
|
; GCN8-NEXT: {{ $}}
|
||||||
|
; GCN8-NEXT: bb.2:
|
||||||
|
; GCN8-NEXT: successors: %bb.3(0x80000000)
|
||||||
|
; GCN8-NEXT: liveins: $sgpr6, $sgpr10_sgpr11
|
||||||
|
; GCN8-NEXT: {{ $}}
|
||||||
|
; GCN8-NEXT: S_NOP 7
|
||||||
|
; GCN8-NEXT: SI_SPILL_S32_SAVE killed $sgpr6, %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32
|
||||||
|
; GCN8-NEXT: S_NOP 7
|
||||||
|
; GCN8-NEXT: S_NOP 0
|
||||||
|
; GCN8-NEXT: S_NOP 7
|
||||||
|
; GCN8-NEXT: renamable $sgpr6 = SI_SPILL_S32_RESTORE %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32
|
||||||
|
; GCN8-NEXT: S_NOP 7
|
||||||
|
; GCN8-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 20, implicit $exec
|
||||||
|
; GCN8-NEXT: S_NOP 7
|
||||||
|
; GCN8-NEXT: S_BRANCH %bb.3
|
||||||
|
; GCN8-NEXT: {{ $}}
|
||||||
|
; GCN8-NEXT: bb.3:
|
||||||
|
; GCN8-NEXT: liveins: $sgpr10_sgpr11
|
||||||
|
; GCN8-NEXT: {{ $}}
|
||||||
|
; GCN8-NEXT: S_NOP 7
|
||||||
|
; GCN8-NEXT: $sgpr5 = V_READFIRSTLANE_B32 [[V_MOV_B32_e32_]], implicit $exec
|
||||||
|
; GCN8-NEXT: S_NOP 7
|
||||||
|
; GCN8-NEXT: S_STORE_DWORD_IMM $sgpr5, $sgpr10_sgpr11, 0, 0
|
||||||
|
; GCN8-NEXT: S_NOP 7
|
||||||
|
; GCN8-NEXT: SI_RETURN
|
||||||
|
;
|
||||||
|
; GCN16-LABEL: name: test_snop_padding
|
||||||
|
; GCN16: bb.0:
|
||||||
|
; GCN16-NEXT: successors: %bb.1(0x80000000)
|
||||||
|
; GCN16-NEXT: liveins: $sgpr6, $sgpr10_sgpr11
|
||||||
|
; GCN16-NEXT: {{ $}}
|
||||||
|
; GCN16-NEXT: S_NOP 7
|
||||||
|
; GCN16-NEXT: S_NOP 7
|
||||||
|
; GCN16-NEXT: S_BRANCH %bb.1
|
||||||
|
; GCN16-NEXT: {{ $}}
|
||||||
|
; GCN16-NEXT: bb.1:
|
||||||
|
; GCN16-NEXT: successors: %bb.3(0x40000000), %bb.2(0x40000000)
|
||||||
|
; GCN16-NEXT: liveins: $sgpr6, $sgpr10_sgpr11
|
||||||
|
; GCN16-NEXT: {{ $}}
|
||||||
|
; GCN16-NEXT: S_NOP 7
|
||||||
|
; GCN16-NEXT: S_NOP 7
|
||||||
|
; GCN16-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 10, implicit $exec
|
||||||
|
; GCN16-NEXT: S_NOP 7
|
||||||
|
; GCN16-NEXT: S_NOP 7
|
||||||
|
; GCN16-NEXT: S_CBRANCH_EXECZ %bb.3, implicit $exec
|
||||||
|
; GCN16-NEXT: {{ $}}
|
||||||
|
; GCN16-NEXT: bb.2:
|
||||||
|
; GCN16-NEXT: successors: %bb.3(0x80000000)
|
||||||
|
; GCN16-NEXT: liveins: $sgpr6, $sgpr10_sgpr11
|
||||||
|
; GCN16-NEXT: {{ $}}
|
||||||
|
; GCN16-NEXT: S_NOP 7
|
||||||
|
; GCN16-NEXT: S_NOP 7
|
||||||
|
; GCN16-NEXT: SI_SPILL_S32_SAVE killed $sgpr6, %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32
|
||||||
|
; GCN16-NEXT: S_NOP 7
|
||||||
|
; GCN16-NEXT: S_NOP 7
|
||||||
|
; GCN16-NEXT: S_NOP 0
|
||||||
|
; GCN16-NEXT: S_NOP 7
|
||||||
|
; GCN16-NEXT: S_NOP 7
|
||||||
|
; GCN16-NEXT: renamable $sgpr6 = SI_SPILL_S32_RESTORE %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32
|
||||||
|
; GCN16-NEXT: S_NOP 7
|
||||||
|
; GCN16-NEXT: S_NOP 7
|
||||||
|
; GCN16-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 20, implicit $exec
|
||||||
|
; GCN16-NEXT: S_NOP 7
|
||||||
|
; GCN16-NEXT: S_NOP 7
|
||||||
|
; GCN16-NEXT: S_BRANCH %bb.3
|
||||||
|
; GCN16-NEXT: {{ $}}
|
||||||
|
; GCN16-NEXT: bb.3:
|
||||||
|
; GCN16-NEXT: liveins: $sgpr10_sgpr11
|
||||||
|
; GCN16-NEXT: {{ $}}
|
||||||
|
; GCN16-NEXT: S_NOP 7
|
||||||
|
; GCN16-NEXT: S_NOP 7
|
||||||
|
; GCN16-NEXT: $sgpr5 = V_READFIRSTLANE_B32 [[V_MOV_B32_e32_]], implicit $exec
|
||||||
|
; GCN16-NEXT: S_NOP 7
|
||||||
|
; GCN16-NEXT: S_NOP 7
|
||||||
|
; GCN16-NEXT: S_STORE_DWORD_IMM $sgpr5, $sgpr10_sgpr11, 0, 0
|
||||||
|
; GCN16-NEXT: S_NOP 7
|
||||||
|
; GCN16-NEXT: S_NOP 7
|
||||||
|
; GCN16-NEXT: SI_RETURN
|
||||||
|
bb.0:
|
||||||
|
liveins: $sgpr6, $sgpr10_sgpr11
|
||||||
|
S_BRANCH %bb.1
|
||||||
|
bb.1:
|
||||||
|
liveins: $sgpr6, $sgpr10_sgpr11
|
||||||
|
%0:vgpr_32 = V_MOV_B32_e32 10, implicit $exec
|
||||||
|
S_CBRANCH_EXECZ %bb.3, implicit $exec
|
||||||
|
bb.2:
|
||||||
|
liveins: $sgpr6, $sgpr10_sgpr11
|
||||||
|
SI_SPILL_S32_SAVE killed $sgpr6, %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32
|
||||||
|
S_NOP 0
|
||||||
|
renamable $sgpr6 = SI_SPILL_S32_RESTORE %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32
|
||||||
|
%0:vgpr_32 = V_MOV_B32_e32 20, implicit $exec
|
||||||
|
S_BRANCH %bb.3
|
||||||
|
bb.3:
|
||||||
|
liveins: $sgpr10_sgpr11
|
||||||
|
$sgpr5 = V_READFIRSTLANE_B32 %0:vgpr_32, implicit $exec
|
||||||
|
S_STORE_DWORD_IMM $sgpr5, $sgpr10_sgpr11, 0, 0
|
||||||
|
SI_RETURN
|
||||||
|
...
|
||||||
Reference in New Issue
Block a user