[llvm-mca] Add command line option -call-latency (#92958)
Currently we assume a constant latency of 100 cycles for call instructions. This commit allows the user to specify a custom value for the same as a command line argument. Default latency is set to 100.
This commit is contained in:
committed by
GitHub
parent
25b65be43d
commit
848bef5d85
@@ -78,6 +78,7 @@ class InstrBuilder {
|
||||
|
||||
bool FirstCallInst;
|
||||
bool FirstReturnInst;
|
||||
unsigned CallLatency;
|
||||
|
||||
using InstRecycleCallback = std::function<Instruction *(const InstrDesc &)>;
|
||||
InstRecycleCallback InstRecycleCB;
|
||||
@@ -98,7 +99,7 @@ class InstrBuilder {
|
||||
public:
|
||||
InstrBuilder(const MCSubtargetInfo &STI, const MCInstrInfo &MCII,
|
||||
const MCRegisterInfo &RI, const MCInstrAnalysis *IA,
|
||||
const InstrumentManager &IM);
|
||||
const InstrumentManager &IM, unsigned CallLatency);
|
||||
|
||||
void clear() {
|
||||
Descriptors.clear();
|
||||
|
||||
@@ -31,9 +31,9 @@ InstrBuilder::InstrBuilder(const llvm::MCSubtargetInfo &sti,
|
||||
const llvm::MCInstrInfo &mcii,
|
||||
const llvm::MCRegisterInfo &mri,
|
||||
const llvm::MCInstrAnalysis *mcia,
|
||||
const mca::InstrumentManager &im)
|
||||
const mca::InstrumentManager &im, unsigned cl)
|
||||
: STI(sti), MCII(mcii), MRI(mri), MCIA(mcia), IM(im), FirstCallInst(true),
|
||||
FirstReturnInst(true) {
|
||||
FirstReturnInst(true), CallLatency(cl) {
|
||||
const MCSchedModel &SM = STI.getSchedModel();
|
||||
ProcResourceMasks.resize(SM.getNumProcResourceKinds());
|
||||
computeProcResourceMasks(STI.getSchedModel(), ProcResourceMasks);
|
||||
@@ -220,17 +220,19 @@ static void initializeUsedResources(InstrDesc &ID,
|
||||
|
||||
static void computeMaxLatency(InstrDesc &ID, const MCInstrDesc &MCDesc,
|
||||
const MCSchedClassDesc &SCDesc,
|
||||
const MCSubtargetInfo &STI) {
|
||||
const MCSubtargetInfo &STI,
|
||||
unsigned CallLatency) {
|
||||
if (MCDesc.isCall()) {
|
||||
// We cannot estimate how long this call will take.
|
||||
// Artificially set an arbitrarily high latency (100cy).
|
||||
ID.MaxLatency = 100U;
|
||||
// Artificially set an arbitrarily high latency.
|
||||
ID.MaxLatency = CallLatency;
|
||||
return;
|
||||
}
|
||||
|
||||
int Latency = MCSchedModel::computeInstrLatency(STI, SCDesc);
|
||||
// If latency is unknown, then conservatively assume a MaxLatency of 100cy.
|
||||
ID.MaxLatency = Latency < 0 ? 100U : static_cast<unsigned>(Latency);
|
||||
// If latency is unknown, then conservatively assume the MaxLatency set for
|
||||
// calls.
|
||||
ID.MaxLatency = Latency < 0 ? CallLatency : static_cast<unsigned>(Latency);
|
||||
}
|
||||
|
||||
static Error verifyOperands(const MCInstrDesc &MCDesc, const MCInst &MCI) {
|
||||
@@ -568,7 +570,7 @@ InstrBuilder::createInstrDescImpl(const MCInst &MCI,
|
||||
// We don't correctly model calls.
|
||||
WithColor::warning() << "found a call in the input assembly sequence.\n";
|
||||
WithColor::note() << "call instructions are not correctly modeled. "
|
||||
<< "Assume a latency of 100cy.\n";
|
||||
<< "Assume a latency of " << CallLatency << "cy.\n";
|
||||
FirstCallInst = false;
|
||||
}
|
||||
|
||||
@@ -580,7 +582,7 @@ InstrBuilder::createInstrDescImpl(const MCInst &MCI,
|
||||
}
|
||||
|
||||
initializeUsedResources(*ID, SCDesc, STI, ProcResourceMasks);
|
||||
computeMaxLatency(*ID, MCDesc, SCDesc, STI);
|
||||
computeMaxLatency(*ID, MCDesc, SCDesc, STI, CallLatency);
|
||||
|
||||
if (Error Err = verifyOperands(MCDesc, MCI))
|
||||
return std::move(Err);
|
||||
|
||||
58
llvm/test/tools/llvm-mca/X86/call-latency.s
Normal file
58
llvm/test/tools/llvm-mca/X86/call-latency.s
Normal file
@@ -0,0 +1,58 @@
|
||||
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
|
||||
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -iterations=1 %s | FileCheck --check-prefixes=ALL,DEFAULT %s
|
||||
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -call-latency=50 -iterations=1 %s | FileCheck --check-prefixes=ALL,CUSTOM %s
|
||||
|
||||
callq printf
|
||||
|
||||
# ALL: Iterations: 1
|
||||
# ALL-NEXT: Instructions: 1
|
||||
|
||||
# CUSTOM-NEXT: Total Cycles: 53
|
||||
# DEFAULT-NEXT: Total Cycles: 103
|
||||
|
||||
# ALL-NEXT: Total uOps: 1
|
||||
|
||||
# ALL: Dispatch Width: 2
|
||||
|
||||
# CUSTOM-NEXT: uOps Per Cycle: 0.02
|
||||
# CUSTOM-NEXT: IPC: 0.02
|
||||
|
||||
# DEFAULT-NEXT: uOps Per Cycle: 0.01
|
||||
# DEFAULT-NEXT: IPC: 0.01
|
||||
|
||||
# ALL-NEXT: Block RThroughput: 0.5
|
||||
|
||||
# ALL: Instruction Info:
|
||||
# ALL-NEXT: [1]: #uOps
|
||||
# ALL-NEXT: [2]: Latency
|
||||
# ALL-NEXT: [3]: RThroughput
|
||||
# ALL-NEXT: [4]: MayLoad
|
||||
# ALL-NEXT: [5]: MayStore
|
||||
# ALL-NEXT: [6]: HasSideEffects (U)
|
||||
|
||||
# ALL: [1] [2] [3] [4] [5] [6] Instructions:
|
||||
# ALL-NEXT: 1 1 0.50 callq printf
|
||||
|
||||
# ALL: Resources:
|
||||
# ALL-NEXT: [0] - JALU0
|
||||
# ALL-NEXT: [1] - JALU1
|
||||
# ALL-NEXT: [2] - JDiv
|
||||
# ALL-NEXT: [3] - JFPA
|
||||
# ALL-NEXT: [4] - JFPM
|
||||
# ALL-NEXT: [5] - JFPU0
|
||||
# ALL-NEXT: [6] - JFPU1
|
||||
# ALL-NEXT: [7] - JLAGU
|
||||
# ALL-NEXT: [8] - JMul
|
||||
# ALL-NEXT: [9] - JSAGU
|
||||
# ALL-NEXT: [10] - JSTC
|
||||
# ALL-NEXT: [11] - JVALU0
|
||||
# ALL-NEXT: [12] - JVALU1
|
||||
# ALL-NEXT: [13] - JVIMUL
|
||||
|
||||
# ALL: Resource pressure per iteration:
|
||||
# ALL-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13]
|
||||
# ALL-NEXT: - 1.00 - - - - - - - - - - - -
|
||||
|
||||
# ALL: Resource pressure by instruction:
|
||||
# ALL-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions:
|
||||
# ALL-NEXT: - 1.00 - - - - - - - - - - - - callq printf
|
||||
@@ -135,6 +135,11 @@ static cl::opt<unsigned>
|
||||
"(instructions per cycle)"),
|
||||
cl::cat(ToolOptions), cl::init(0));
|
||||
|
||||
static cl::opt<unsigned>
|
||||
CallLatency("call-latency", cl::Hidden,
|
||||
cl::desc("Number of cycles to assume for a call instruction"),
|
||||
cl::cat(ToolOptions), cl::init(100U));
|
||||
|
||||
enum class SkipType { NONE, LACK_SCHED, PARSE_FAILURE, ANY_FAILURE };
|
||||
|
||||
static cl::opt<enum SkipType> SkipUnsupportedInstructions(
|
||||
@@ -568,7 +573,7 @@ int main(int argc, char **argv) {
|
||||
}
|
||||
|
||||
// Create an instruction builder.
|
||||
mca::InstrBuilder IB(*STI, *MCII, *MRI, MCIA.get(), *IM);
|
||||
mca::InstrBuilder IB(*STI, *MCII, *MRI, MCIA.get(), *IM, CallLatency);
|
||||
|
||||
// Create a context to control ownership of the pipeline hardware.
|
||||
mca::Context MCA(*MRI, *STI);
|
||||
|
||||
@@ -66,7 +66,7 @@ Error MCATestBase::runBaselineMCA(json::Object &Result, ArrayRef<MCInst> Insts,
|
||||
|
||||
// Default InstrumentManager
|
||||
auto IM = std::make_unique<mca::InstrumentManager>(*STI, *MCII);
|
||||
mca::InstrBuilder IB(*STI, *MCII, *MRI, MCIA.get(), *IM);
|
||||
mca::InstrBuilder IB(*STI, *MCII, *MRI, MCIA.get(), *IM, /*CallLatency=*/100);
|
||||
|
||||
const SmallVector<mca::Instrument *> Instruments;
|
||||
SmallVector<std::unique_ptr<mca::Instruction>> LoweredInsts;
|
||||
|
||||
@@ -33,7 +33,7 @@ TEST_F(X86TestBase, TestResumablePipeline) {
|
||||
P->addEventListener(SV.get());
|
||||
|
||||
auto IM = std::make_unique<mca::InstrumentManager>(*STI, *MCII);
|
||||
mca::InstrBuilder IB(*STI, *MCII, *MRI, MCIA.get(), *IM);
|
||||
mca::InstrBuilder IB(*STI, *MCII, *MRI, MCIA.get(), *IM, /*CallLatency=*/100);
|
||||
|
||||
const SmallVector<mca::Instrument *> Instruments;
|
||||
// Tile size = 7
|
||||
@@ -124,7 +124,7 @@ TEST_F(X86TestBase, TestInstructionRecycling) {
|
||||
// Default InstrumentManager
|
||||
auto IM = std::make_unique<mca::InstrumentManager>(*STI, *MCII);
|
||||
|
||||
mca::InstrBuilder IB(*STI, *MCII, *MRI, MCIA.get(), *IM);
|
||||
mca::InstrBuilder IB(*STI, *MCII, *MRI, MCIA.get(), *IM, /*CallLatency=*/100);
|
||||
IB.setInstRecycleCallback(GetRecycledInst);
|
||||
|
||||
const SmallVector<mca::Instrument *> Instruments;
|
||||
|
||||
Reference in New Issue
Block a user