From 848bef5d8549cdc79bb0eb3c5a8e0495e432b577 Mon Sep 17 00:00:00 2001 From: Chinmay Deshpande Date: Wed, 22 May 2024 13:51:55 -0700 Subject: [PATCH] [llvm-mca] Add command line option -call-latency (#92958) Currently we assume a constant latency of 100 cycles for call instructions. This commit allows the user to specify a custom value for the same as a command line argument. Default latency is set to 100. --- llvm/include/llvm/MCA/InstrBuilder.h | 3 +- llvm/lib/MCA/InstrBuilder.cpp | 20 ++++--- llvm/test/tools/llvm-mca/X86/call-latency.s | 58 +++++++++++++++++++ llvm/tools/llvm-mca/llvm-mca.cpp | 7 ++- llvm/unittests/tools/llvm-mca/MCATestBase.cpp | 2 +- .../tools/llvm-mca/X86/TestIncrementalMCA.cpp | 4 +- 6 files changed, 80 insertions(+), 14 deletions(-) create mode 100644 llvm/test/tools/llvm-mca/X86/call-latency.s diff --git a/llvm/include/llvm/MCA/InstrBuilder.h b/llvm/include/llvm/MCA/InstrBuilder.h index 359437248914..00c7942e4fa1 100644 --- a/llvm/include/llvm/MCA/InstrBuilder.h +++ b/llvm/include/llvm/MCA/InstrBuilder.h @@ -78,6 +78,7 @@ class InstrBuilder { bool FirstCallInst; bool FirstReturnInst; + unsigned CallLatency; using InstRecycleCallback = std::function; InstRecycleCallback InstRecycleCB; @@ -98,7 +99,7 @@ class InstrBuilder { public: InstrBuilder(const MCSubtargetInfo &STI, const MCInstrInfo &MCII, const MCRegisterInfo &RI, const MCInstrAnalysis *IA, - const InstrumentManager &IM); + const InstrumentManager &IM, unsigned CallLatency); void clear() { Descriptors.clear(); diff --git a/llvm/lib/MCA/InstrBuilder.cpp b/llvm/lib/MCA/InstrBuilder.cpp index bcf065c56691..d5cbdc5de0b8 100644 --- a/llvm/lib/MCA/InstrBuilder.cpp +++ b/llvm/lib/MCA/InstrBuilder.cpp @@ -31,9 +31,9 @@ InstrBuilder::InstrBuilder(const llvm::MCSubtargetInfo &sti, const llvm::MCInstrInfo &mcii, const llvm::MCRegisterInfo &mri, const llvm::MCInstrAnalysis *mcia, - const mca::InstrumentManager &im) + const mca::InstrumentManager &im, unsigned cl) : STI(sti), MCII(mcii), MRI(mri), MCIA(mcia), IM(im), FirstCallInst(true), - FirstReturnInst(true) { + FirstReturnInst(true), CallLatency(cl) { const MCSchedModel &SM = STI.getSchedModel(); ProcResourceMasks.resize(SM.getNumProcResourceKinds()); computeProcResourceMasks(STI.getSchedModel(), ProcResourceMasks); @@ -220,17 +220,19 @@ static void initializeUsedResources(InstrDesc &ID, static void computeMaxLatency(InstrDesc &ID, const MCInstrDesc &MCDesc, const MCSchedClassDesc &SCDesc, - const MCSubtargetInfo &STI) { + const MCSubtargetInfo &STI, + unsigned CallLatency) { if (MCDesc.isCall()) { // We cannot estimate how long this call will take. - // Artificially set an arbitrarily high latency (100cy). - ID.MaxLatency = 100U; + // Artificially set an arbitrarily high latency. + ID.MaxLatency = CallLatency; return; } int Latency = MCSchedModel::computeInstrLatency(STI, SCDesc); - // If latency is unknown, then conservatively assume a MaxLatency of 100cy. - ID.MaxLatency = Latency < 0 ? 100U : static_cast(Latency); + // If latency is unknown, then conservatively assume the MaxLatency set for + // calls. + ID.MaxLatency = Latency < 0 ? CallLatency : static_cast(Latency); } static Error verifyOperands(const MCInstrDesc &MCDesc, const MCInst &MCI) { @@ -568,7 +570,7 @@ InstrBuilder::createInstrDescImpl(const MCInst &MCI, // We don't correctly model calls. WithColor::warning() << "found a call in the input assembly sequence.\n"; WithColor::note() << "call instructions are not correctly modeled. " - << "Assume a latency of 100cy.\n"; + << "Assume a latency of " << CallLatency << "cy.\n"; FirstCallInst = false; } @@ -580,7 +582,7 @@ InstrBuilder::createInstrDescImpl(const MCInst &MCI, } initializeUsedResources(*ID, SCDesc, STI, ProcResourceMasks); - computeMaxLatency(*ID, MCDesc, SCDesc, STI); + computeMaxLatency(*ID, MCDesc, SCDesc, STI, CallLatency); if (Error Err = verifyOperands(MCDesc, MCI)) return std::move(Err); diff --git a/llvm/test/tools/llvm-mca/X86/call-latency.s b/llvm/test/tools/llvm-mca/X86/call-latency.s new file mode 100644 index 000000000000..9559d11f1b0a --- /dev/null +++ b/llvm/test/tools/llvm-mca/X86/call-latency.s @@ -0,0 +1,58 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -iterations=1 %s | FileCheck --check-prefixes=ALL,DEFAULT %s +# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -call-latency=50 -iterations=1 %s | FileCheck --check-prefixes=ALL,CUSTOM %s + +callq printf + +# ALL: Iterations: 1 +# ALL-NEXT: Instructions: 1 + +# CUSTOM-NEXT: Total Cycles: 53 +# DEFAULT-NEXT: Total Cycles: 103 + +# ALL-NEXT: Total uOps: 1 + +# ALL: Dispatch Width: 2 + +# CUSTOM-NEXT: uOps Per Cycle: 0.02 +# CUSTOM-NEXT: IPC: 0.02 + +# DEFAULT-NEXT: uOps Per Cycle: 0.01 +# DEFAULT-NEXT: IPC: 0.01 + +# ALL-NEXT: Block RThroughput: 0.5 + +# ALL: Instruction Info: +# ALL-NEXT: [1]: #uOps +# ALL-NEXT: [2]: Latency +# ALL-NEXT: [3]: RThroughput +# ALL-NEXT: [4]: MayLoad +# ALL-NEXT: [5]: MayStore +# ALL-NEXT: [6]: HasSideEffects (U) + +# ALL: [1] [2] [3] [4] [5] [6] Instructions: +# ALL-NEXT: 1 1 0.50 callq printf + +# ALL: Resources: +# ALL-NEXT: [0] - JALU0 +# ALL-NEXT: [1] - JALU1 +# ALL-NEXT: [2] - JDiv +# ALL-NEXT: [3] - JFPA +# ALL-NEXT: [4] - JFPM +# ALL-NEXT: [5] - JFPU0 +# ALL-NEXT: [6] - JFPU1 +# ALL-NEXT: [7] - JLAGU +# ALL-NEXT: [8] - JMul +# ALL-NEXT: [9] - JSAGU +# ALL-NEXT: [10] - JSTC +# ALL-NEXT: [11] - JVALU0 +# ALL-NEXT: [12] - JVALU1 +# ALL-NEXT: [13] - JVIMUL + +# ALL: Resource pressure per iteration: +# ALL-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] +# ALL-NEXT: - 1.00 - - - - - - - - - - - - + +# ALL: Resource pressure by instruction: +# ALL-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions: +# ALL-NEXT: - 1.00 - - - - - - - - - - - - callq printf diff --git a/llvm/tools/llvm-mca/llvm-mca.cpp b/llvm/tools/llvm-mca/llvm-mca.cpp index 03d7d7944b9c..cc5d4f5fa05d 100644 --- a/llvm/tools/llvm-mca/llvm-mca.cpp +++ b/llvm/tools/llvm-mca/llvm-mca.cpp @@ -135,6 +135,11 @@ static cl::opt "(instructions per cycle)"), cl::cat(ToolOptions), cl::init(0)); +static cl::opt + CallLatency("call-latency", cl::Hidden, + cl::desc("Number of cycles to assume for a call instruction"), + cl::cat(ToolOptions), cl::init(100U)); + enum class SkipType { NONE, LACK_SCHED, PARSE_FAILURE, ANY_FAILURE }; static cl::opt SkipUnsupportedInstructions( @@ -568,7 +573,7 @@ int main(int argc, char **argv) { } // Create an instruction builder. - mca::InstrBuilder IB(*STI, *MCII, *MRI, MCIA.get(), *IM); + mca::InstrBuilder IB(*STI, *MCII, *MRI, MCIA.get(), *IM, CallLatency); // Create a context to control ownership of the pipeline hardware. mca::Context MCA(*MRI, *STI); diff --git a/llvm/unittests/tools/llvm-mca/MCATestBase.cpp b/llvm/unittests/tools/llvm-mca/MCATestBase.cpp index 4f444fae3d4c..4a39f5e663f2 100644 --- a/llvm/unittests/tools/llvm-mca/MCATestBase.cpp +++ b/llvm/unittests/tools/llvm-mca/MCATestBase.cpp @@ -66,7 +66,7 @@ Error MCATestBase::runBaselineMCA(json::Object &Result, ArrayRef Insts, // Default InstrumentManager auto IM = std::make_unique(*STI, *MCII); - mca::InstrBuilder IB(*STI, *MCII, *MRI, MCIA.get(), *IM); + mca::InstrBuilder IB(*STI, *MCII, *MRI, MCIA.get(), *IM, /*CallLatency=*/100); const SmallVector Instruments; SmallVector> LoweredInsts; diff --git a/llvm/unittests/tools/llvm-mca/X86/TestIncrementalMCA.cpp b/llvm/unittests/tools/llvm-mca/X86/TestIncrementalMCA.cpp index 00a44dc1bab1..ac35dce522ae 100644 --- a/llvm/unittests/tools/llvm-mca/X86/TestIncrementalMCA.cpp +++ b/llvm/unittests/tools/llvm-mca/X86/TestIncrementalMCA.cpp @@ -33,7 +33,7 @@ TEST_F(X86TestBase, TestResumablePipeline) { P->addEventListener(SV.get()); auto IM = std::make_unique(*STI, *MCII); - mca::InstrBuilder IB(*STI, *MCII, *MRI, MCIA.get(), *IM); + mca::InstrBuilder IB(*STI, *MCII, *MRI, MCIA.get(), *IM, /*CallLatency=*/100); const SmallVector Instruments; // Tile size = 7 @@ -124,7 +124,7 @@ TEST_F(X86TestBase, TestInstructionRecycling) { // Default InstrumentManager auto IM = std::make_unique(*STI, *MCII); - mca::InstrBuilder IB(*STI, *MCII, *MRI, MCIA.get(), *IM); + mca::InstrBuilder IB(*STI, *MCII, *MRI, MCIA.get(), *IM, /*CallLatency=*/100); IB.setInstRecycleCallback(GetRecycledInst); const SmallVector Instruments;