diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp index bc95d3f040e1..0976fccf78d8 100644 --- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp +++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp @@ -44,6 +44,11 @@ static cl::opt cl::desc("Fill a percentage of the latency between " "neighboring MFMA with s_nops.")); +// This is intended for debugging purposes only. +static cl::opt + NopPadding("amdgpu-snop-padding", cl::init(0), cl::Hidden, + cl::desc("Insert a s_nop x before every instruction")); + //===----------------------------------------------------------------------===// // Hazard Recognizer Implementation //===----------------------------------------------------------------------===// @@ -300,7 +305,7 @@ unsigned GCNHazardRecognizer::PreEmitNoops(MachineInstr *MI) { unsigned W = PreEmitNoopsCommon(MI); fixHazards(MI); CurrCycleInstr = nullptr; - return W; + return std::max(W, NopPadding.getValue()); } unsigned GCNHazardRecognizer::PreEmitNoopsCommon(MachineInstr *MI) { diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-snop-padding.mir b/llvm/test/CodeGen/AMDGPU/amdgpu-snop-padding.mir new file mode 100644 index 000000000000..22c913496b73 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-snop-padding.mir @@ -0,0 +1,132 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -amdgpu-snop-padding=8 -run-pass post-RA-hazard-rec %s -o - | FileCheck -check-prefix=GCN8 %s +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -amdgpu-snop-padding=16 -run-pass post-RA-hazard-rec %s -o - | FileCheck -check-prefix=GCN16 %s + +--- +name: test_snop_padding +tracksRegLiveness: true +frameInfo: + maxAlignment: 4 +stack: + - { id: 0, type: spill-slot, size: 4, alignment: 4, stack-id: sgpr-spill } +machineFunctionInfo: + isEntryFunction: false + scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' + stackPtrOffsetReg: '$sgpr32' + frameOffsetReg: '$sgpr33' + hasSpilledSGPRs: true +body: | + ; GCN8-LABEL: name: test_snop_padding + ; GCN8: bb.0: + ; GCN8-NEXT: successors: %bb.1(0x80000000) + ; GCN8-NEXT: liveins: $sgpr6, $sgpr10_sgpr11 + ; GCN8-NEXT: {{ $}} + ; GCN8-NEXT: S_NOP 7 + ; GCN8-NEXT: S_BRANCH %bb.1 + ; GCN8-NEXT: {{ $}} + ; GCN8-NEXT: bb.1: + ; GCN8-NEXT: successors: %bb.3(0x40000000), %bb.2(0x40000000) + ; GCN8-NEXT: liveins: $sgpr6, $sgpr10_sgpr11 + ; GCN8-NEXT: {{ $}} + ; GCN8-NEXT: S_NOP 7 + ; GCN8-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 10, implicit $exec + ; GCN8-NEXT: S_NOP 7 + ; GCN8-NEXT: S_CBRANCH_EXECZ %bb.3, implicit $exec + ; GCN8-NEXT: {{ $}} + ; GCN8-NEXT: bb.2: + ; GCN8-NEXT: successors: %bb.3(0x80000000) + ; GCN8-NEXT: liveins: $sgpr6, $sgpr10_sgpr11 + ; GCN8-NEXT: {{ $}} + ; GCN8-NEXT: S_NOP 7 + ; GCN8-NEXT: SI_SPILL_S32_SAVE killed $sgpr6, %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32 + ; GCN8-NEXT: S_NOP 7 + ; GCN8-NEXT: S_NOP 0 + ; GCN8-NEXT: S_NOP 7 + ; GCN8-NEXT: renamable $sgpr6 = SI_SPILL_S32_RESTORE %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32 + ; GCN8-NEXT: S_NOP 7 + ; GCN8-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 20, implicit $exec + ; GCN8-NEXT: S_NOP 7 + ; GCN8-NEXT: S_BRANCH %bb.3 + ; GCN8-NEXT: {{ $}} + ; GCN8-NEXT: bb.3: + ; GCN8-NEXT: liveins: $sgpr10_sgpr11 + ; GCN8-NEXT: {{ $}} + ; GCN8-NEXT: S_NOP 7 + ; GCN8-NEXT: $sgpr5 = V_READFIRSTLANE_B32 [[V_MOV_B32_e32_]], implicit $exec + ; GCN8-NEXT: S_NOP 7 + ; GCN8-NEXT: S_STORE_DWORD_IMM $sgpr5, $sgpr10_sgpr11, 0, 0 + ; GCN8-NEXT: S_NOP 7 + ; GCN8-NEXT: SI_RETURN + ; + ; GCN16-LABEL: name: test_snop_padding + ; GCN16: bb.0: + ; GCN16-NEXT: successors: %bb.1(0x80000000) + ; GCN16-NEXT: liveins: $sgpr6, $sgpr10_sgpr11 + ; GCN16-NEXT: {{ $}} + ; GCN16-NEXT: S_NOP 7 + ; GCN16-NEXT: S_NOP 7 + ; GCN16-NEXT: S_BRANCH %bb.1 + ; GCN16-NEXT: {{ $}} + ; GCN16-NEXT: bb.1: + ; GCN16-NEXT: successors: %bb.3(0x40000000), %bb.2(0x40000000) + ; GCN16-NEXT: liveins: $sgpr6, $sgpr10_sgpr11 + ; GCN16-NEXT: {{ $}} + ; GCN16-NEXT: S_NOP 7 + ; GCN16-NEXT: S_NOP 7 + ; GCN16-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 10, implicit $exec + ; GCN16-NEXT: S_NOP 7 + ; GCN16-NEXT: S_NOP 7 + ; GCN16-NEXT: S_CBRANCH_EXECZ %bb.3, implicit $exec + ; GCN16-NEXT: {{ $}} + ; GCN16-NEXT: bb.2: + ; GCN16-NEXT: successors: %bb.3(0x80000000) + ; GCN16-NEXT: liveins: $sgpr6, $sgpr10_sgpr11 + ; GCN16-NEXT: {{ $}} + ; GCN16-NEXT: S_NOP 7 + ; GCN16-NEXT: S_NOP 7 + ; GCN16-NEXT: SI_SPILL_S32_SAVE killed $sgpr6, %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32 + ; GCN16-NEXT: S_NOP 7 + ; GCN16-NEXT: S_NOP 7 + ; GCN16-NEXT: S_NOP 0 + ; GCN16-NEXT: S_NOP 7 + ; GCN16-NEXT: S_NOP 7 + ; GCN16-NEXT: renamable $sgpr6 = SI_SPILL_S32_RESTORE %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32 + ; GCN16-NEXT: S_NOP 7 + ; GCN16-NEXT: S_NOP 7 + ; GCN16-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 20, implicit $exec + ; GCN16-NEXT: S_NOP 7 + ; GCN16-NEXT: S_NOP 7 + ; GCN16-NEXT: S_BRANCH %bb.3 + ; GCN16-NEXT: {{ $}} + ; GCN16-NEXT: bb.3: + ; GCN16-NEXT: liveins: $sgpr10_sgpr11 + ; GCN16-NEXT: {{ $}} + ; GCN16-NEXT: S_NOP 7 + ; GCN16-NEXT: S_NOP 7 + ; GCN16-NEXT: $sgpr5 = V_READFIRSTLANE_B32 [[V_MOV_B32_e32_]], implicit $exec + ; GCN16-NEXT: S_NOP 7 + ; GCN16-NEXT: S_NOP 7 + ; GCN16-NEXT: S_STORE_DWORD_IMM $sgpr5, $sgpr10_sgpr11, 0, 0 + ; GCN16-NEXT: S_NOP 7 + ; GCN16-NEXT: S_NOP 7 + ; GCN16-NEXT: SI_RETURN + bb.0: + liveins: $sgpr6, $sgpr10_sgpr11 + S_BRANCH %bb.1 + bb.1: + liveins: $sgpr6, $sgpr10_sgpr11 + %0:vgpr_32 = V_MOV_B32_e32 10, implicit $exec + S_CBRANCH_EXECZ %bb.3, implicit $exec + bb.2: + liveins: $sgpr6, $sgpr10_sgpr11 + SI_SPILL_S32_SAVE killed $sgpr6, %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32 + S_NOP 0 + renamable $sgpr6 = SI_SPILL_S32_RESTORE %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32 + %0:vgpr_32 = V_MOV_B32_e32 20, implicit $exec + S_BRANCH %bb.3 + bb.3: + liveins: $sgpr10_sgpr11 + $sgpr5 = V_READFIRSTLANE_B32 %0:vgpr_32, implicit $exec + S_STORE_DWORD_IMM $sgpr5, $sgpr10_sgpr11, 0, 0 + SI_RETURN +...