From f2351d9e7f2e13883d15915ded79a0e931679fde Mon Sep 17 00:00:00 2001 From: Amir Ayupov Date: Mon, 12 May 2025 17:33:30 -0700 Subject: [PATCH] [BOLT][heatmap] Use parsed basic/branch events (#136531) Remove duplicate profile parsing in heatmap construction, switching to using parsed profile. #138798 adds support for using pre-aggregated profile for heatmap construction. Test Plan: added heatmap.test in https://github.com/rafaelauler/bolt-tests/commit/0868850a159903ec4dd3bbacae9c8b1726b9e60e --- bolt/include/bolt/Profile/Heatmap.h | 4 +- bolt/lib/Profile/DataAggregator.cpp | 82 ++++++++++------------------- 2 files changed, 29 insertions(+), 57 deletions(-) diff --git a/bolt/include/bolt/Profile/Heatmap.h b/bolt/include/bolt/Profile/Heatmap.h index 74d7eedc3078..fc1e2cd30011 100644 --- a/bolt/include/bolt/Profile/Heatmap.h +++ b/bolt/include/bolt/Profile/Heatmap.h @@ -57,9 +57,9 @@ public: } /// Register a single sample at \p Address. - void registerAddress(uint64_t Address) { + void registerAddress(uint64_t Address, uint64_t Count) { if (!ignoreAddress(Address)) - ++Map[Address / BucketSize]; + Map[Address / BucketSize] += Count; } /// Register \p Count samples at [\p StartAddress, \p EndAddress ]. diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp index 9453784d6557..a3671a40c550 100644 --- a/bolt/lib/Profile/DataAggregator.cpp +++ b/bolt/lib/Profile/DataAggregator.cpp @@ -497,6 +497,10 @@ Error DataAggregator::preprocessProfile(BinaryContext &BC) { filterBinaryMMapInfo(); prepareToParse("events", MainEventsPPI, ErrorCallback); + if ((!opts::BasicAggregation && parseBranchEvents()) || + (opts::BasicAggregation && parseBasicEvents())) + errs() << "PERF2BOLT: failed to parse samples\n"; + if (opts::HeatmapMode) { if (std::error_code EC = printLBRHeatMap()) { errs() << "ERROR: failed to print heat map: " << EC.message() << '\n'; @@ -505,10 +509,6 @@ Error DataAggregator::preprocessProfile(BinaryContext &BC) { exit(0); } - if ((!opts::BasicAggregation && parseBranchEvents()) || - (opts::BasicAggregation && parseBasicEvents())) - errs() << "PERF2BOLT: failed to parse samples\n"; - // Special handling for memory events if (prepareToParse("mem events", MemEventsPPI, MemEventsErrorCallback)) return Error::success(); @@ -1307,53 +1307,6 @@ std::error_code DataAggregator::printLBRHeatMap() { } Heatmap HM(opts::HeatmapBlock, opts::HeatmapMinAddress, opts::HeatmapMaxAddress, getTextSections(BC)); - uint64_t NumTotalSamples = 0; - - if (opts::BasicAggregation) { - while (hasData()) { - ErrorOr SampleRes = parseBasicSample(); - if (std::error_code EC = SampleRes.getError()) { - if (EC == errc::no_such_process) - continue; - return EC; - } - PerfBasicSample &Sample = SampleRes.get(); - HM.registerAddress(Sample.PC); - NumTotalSamples++; - } - outs() << "HEATMAP: read " << NumTotalSamples << " basic samples\n"; - } else { - while (hasData()) { - ErrorOr SampleRes = parseBranchSample(); - if (std::error_code EC = SampleRes.getError()) { - if (EC == errc::no_such_process) - continue; - return EC; - } - - PerfBranchSample &Sample = SampleRes.get(); - - // LBRs are stored in reverse execution order. NextLBR refers to the next - // executed branch record. - const LBREntry *NextLBR = nullptr; - for (const LBREntry &LBR : Sample.LBR) { - if (NextLBR) { - // Record fall-through trace. - const uint64_t TraceFrom = LBR.To; - const uint64_t TraceTo = NextLBR->From; - ++FallthroughLBRs[Trace(TraceFrom, TraceTo)].InternCount; - } - NextLBR = &LBR; - } - if (!Sample.LBR.empty()) { - HM.registerAddress(Sample.LBR.front().To); - HM.registerAddress(Sample.LBR.back().From); - } - NumTotalSamples += Sample.LBR.size(); - } - outs() << "HEATMAP: read " << NumTotalSamples << " LBR samples\n"; - outs() << "HEATMAP: " << FallthroughLBRs.size() << " unique traces\n"; - } if (!NumTotalSamples) { if (opts::BasicAggregation) { @@ -1369,10 +1322,14 @@ std::error_code DataAggregator::printLBRHeatMap() { outs() << "HEATMAP: building heat map...\n"; + // Register basic samples and perf LBR addresses not covered by fallthroughs. + for (const auto &[PC, Hits] : BasicSamples) + HM.registerAddress(PC, Hits); for (const auto &LBR : FallthroughLBRs) { const Trace &Trace = LBR.first; const FTInfo &Info = LBR.second; - HM.registerAddressRange(Trace.From, Trace.To, Info.InternCount); + HM.registerAddressRange(Trace.From, Trace.To, + Info.InternCount + Info.ExternCount); } if (HM.getNumInvalidRanges()) @@ -1418,7 +1375,10 @@ void DataAggregator::parseLBRSample(const PerfBranchSample &Sample, const uint64_t TraceTo = NextLBR->From; const BinaryFunction *TraceBF = getBinaryFunctionContainingAddress(TraceFrom); - if (TraceBF && TraceBF->containsAddress(TraceTo)) { + if (opts::HeatmapMode) { + FTInfo &Info = FallthroughLBRs[Trace(TraceFrom, TraceTo)]; + ++Info.InternCount; + } else if (TraceBF && TraceBF->containsAddress(TraceTo)) { FTInfo &Info = FallthroughLBRs[Trace(TraceFrom, TraceTo)]; if (TraceBF->containsAddress(LBR.From)) ++Info.InternCount; @@ -1452,6 +1412,12 @@ void DataAggregator::parseLBRSample(const PerfBranchSample &Sample, } NextLBR = &LBR; + // Record branches outside binary functions for heatmap. + if (opts::HeatmapMode) { + TakenBranchInfo &Info = BranchLBRs[Trace(LBR.From, LBR.To)]; + ++Info.TakenCount; + continue; + } uint64_t From = getBinaryFunctionContainingAddress(LBR.From) ? LBR.From : 0; uint64_t To = getBinaryFunctionContainingAddress(LBR.To) ? LBR.To : 0; if (!From && !To) @@ -1460,6 +1426,12 @@ void DataAggregator::parseLBRSample(const PerfBranchSample &Sample, ++Info.TakenCount; Info.MispredCount += LBR.Mispred; } + // Record LBR addresses not covered by fallthroughs (bottom-of-stack source + // and top-of-stack target) as basic samples for heatmap. + if (opts::HeatmapMode && !Sample.LBR.empty()) { + ++BasicSamples[Sample.LBR.front().To]; + ++BasicSamples[Sample.LBR.back().From]; + } } void DataAggregator::printColdSamplesDiagnostic() const { @@ -1636,6 +1608,7 @@ std::error_code DataAggregator::parseBasicEvents() { if (!Sample->PC) continue; + ++NumTotalSamples; if (BinaryFunction *BF = getBinaryFunctionContainingAddress(Sample->PC)) BF->setHasProfileAvailable(); @@ -1643,6 +1616,7 @@ std::error_code DataAggregator::parseBasicEvents() { ++BasicSamples[Sample->PC]; EventNames.insert(Sample->EventName); } + outs() << "PERF2BOLT: read " << NumTotalSamples << " basic samples\n"; return std::error_code(); } @@ -1655,7 +1629,6 @@ void DataAggregator::processBasicEvents() { for (auto &Sample : BasicSamples) { const uint64_t PC = Sample.first; const uint64_t HitCount = Sample.second; - NumTotalSamples += HitCount; BinaryFunction *Func = getBinaryFunctionContainingAddress(PC); if (!Func) { OutOfRangeSamples += HitCount; @@ -1664,7 +1637,6 @@ void DataAggregator::processBasicEvents() { doBasicSample(*Func, PC, HitCount); } - outs() << "PERF2BOLT: read " << NumTotalSamples << " samples\n"; printBasicSamplesDiagnostics(OutOfRangeSamples); }