[BOLT][heatmap] Use parsed basic/branch events (#136531)

Remove duplicate profile parsing in heatmap construction, switching to
using parsed profile. #138798 adds support for using pre-aggregated
profile for heatmap construction.

Test Plan: added heatmap.test in
0868850a15
This commit is contained in:
Amir Ayupov
2025-05-12 17:33:30 -07:00
committed by GitHub
parent e953487380
commit f2351d9e7f
2 changed files with 29 additions and 57 deletions

View File

@@ -497,6 +497,10 @@ Error DataAggregator::preprocessProfile(BinaryContext &BC) {
filterBinaryMMapInfo();
prepareToParse("events", MainEventsPPI, ErrorCallback);
if ((!opts::BasicAggregation && parseBranchEvents()) ||
(opts::BasicAggregation && parseBasicEvents()))
errs() << "PERF2BOLT: failed to parse samples\n";
if (opts::HeatmapMode) {
if (std::error_code EC = printLBRHeatMap()) {
errs() << "ERROR: failed to print heat map: " << EC.message() << '\n';
@@ -505,10 +509,6 @@ Error DataAggregator::preprocessProfile(BinaryContext &BC) {
exit(0);
}
if ((!opts::BasicAggregation && parseBranchEvents()) ||
(opts::BasicAggregation && parseBasicEvents()))
errs() << "PERF2BOLT: failed to parse samples\n";
// Special handling for memory events
if (prepareToParse("mem events", MemEventsPPI, MemEventsErrorCallback))
return Error::success();
@@ -1307,53 +1307,6 @@ std::error_code DataAggregator::printLBRHeatMap() {
}
Heatmap HM(opts::HeatmapBlock, opts::HeatmapMinAddress,
opts::HeatmapMaxAddress, getTextSections(BC));
uint64_t NumTotalSamples = 0;
if (opts::BasicAggregation) {
while (hasData()) {
ErrorOr<PerfBasicSample> SampleRes = parseBasicSample();
if (std::error_code EC = SampleRes.getError()) {
if (EC == errc::no_such_process)
continue;
return EC;
}
PerfBasicSample &Sample = SampleRes.get();
HM.registerAddress(Sample.PC);
NumTotalSamples++;
}
outs() << "HEATMAP: read " << NumTotalSamples << " basic samples\n";
} else {
while (hasData()) {
ErrorOr<PerfBranchSample> SampleRes = parseBranchSample();
if (std::error_code EC = SampleRes.getError()) {
if (EC == errc::no_such_process)
continue;
return EC;
}
PerfBranchSample &Sample = SampleRes.get();
// LBRs are stored in reverse execution order. NextLBR refers to the next
// executed branch record.
const LBREntry *NextLBR = nullptr;
for (const LBREntry &LBR : Sample.LBR) {
if (NextLBR) {
// Record fall-through trace.
const uint64_t TraceFrom = LBR.To;
const uint64_t TraceTo = NextLBR->From;
++FallthroughLBRs[Trace(TraceFrom, TraceTo)].InternCount;
}
NextLBR = &LBR;
}
if (!Sample.LBR.empty()) {
HM.registerAddress(Sample.LBR.front().To);
HM.registerAddress(Sample.LBR.back().From);
}
NumTotalSamples += Sample.LBR.size();
}
outs() << "HEATMAP: read " << NumTotalSamples << " LBR samples\n";
outs() << "HEATMAP: " << FallthroughLBRs.size() << " unique traces\n";
}
if (!NumTotalSamples) {
if (opts::BasicAggregation) {
@@ -1369,10 +1322,14 @@ std::error_code DataAggregator::printLBRHeatMap() {
outs() << "HEATMAP: building heat map...\n";
// Register basic samples and perf LBR addresses not covered by fallthroughs.
for (const auto &[PC, Hits] : BasicSamples)
HM.registerAddress(PC, Hits);
for (const auto &LBR : FallthroughLBRs) {
const Trace &Trace = LBR.first;
const FTInfo &Info = LBR.second;
HM.registerAddressRange(Trace.From, Trace.To, Info.InternCount);
HM.registerAddressRange(Trace.From, Trace.To,
Info.InternCount + Info.ExternCount);
}
if (HM.getNumInvalidRanges())
@@ -1418,7 +1375,10 @@ void DataAggregator::parseLBRSample(const PerfBranchSample &Sample,
const uint64_t TraceTo = NextLBR->From;
const BinaryFunction *TraceBF =
getBinaryFunctionContainingAddress(TraceFrom);
if (TraceBF && TraceBF->containsAddress(TraceTo)) {
if (opts::HeatmapMode) {
FTInfo &Info = FallthroughLBRs[Trace(TraceFrom, TraceTo)];
++Info.InternCount;
} else if (TraceBF && TraceBF->containsAddress(TraceTo)) {
FTInfo &Info = FallthroughLBRs[Trace(TraceFrom, TraceTo)];
if (TraceBF->containsAddress(LBR.From))
++Info.InternCount;
@@ -1452,6 +1412,12 @@ void DataAggregator::parseLBRSample(const PerfBranchSample &Sample,
}
NextLBR = &LBR;
// Record branches outside binary functions for heatmap.
if (opts::HeatmapMode) {
TakenBranchInfo &Info = BranchLBRs[Trace(LBR.From, LBR.To)];
++Info.TakenCount;
continue;
}
uint64_t From = getBinaryFunctionContainingAddress(LBR.From) ? LBR.From : 0;
uint64_t To = getBinaryFunctionContainingAddress(LBR.To) ? LBR.To : 0;
if (!From && !To)
@@ -1460,6 +1426,12 @@ void DataAggregator::parseLBRSample(const PerfBranchSample &Sample,
++Info.TakenCount;
Info.MispredCount += LBR.Mispred;
}
// Record LBR addresses not covered by fallthroughs (bottom-of-stack source
// and top-of-stack target) as basic samples for heatmap.
if (opts::HeatmapMode && !Sample.LBR.empty()) {
++BasicSamples[Sample.LBR.front().To];
++BasicSamples[Sample.LBR.back().From];
}
}
void DataAggregator::printColdSamplesDiagnostic() const {
@@ -1636,6 +1608,7 @@ std::error_code DataAggregator::parseBasicEvents() {
if (!Sample->PC)
continue;
++NumTotalSamples;
if (BinaryFunction *BF = getBinaryFunctionContainingAddress(Sample->PC))
BF->setHasProfileAvailable();
@@ -1643,6 +1616,7 @@ std::error_code DataAggregator::parseBasicEvents() {
++BasicSamples[Sample->PC];
EventNames.insert(Sample->EventName);
}
outs() << "PERF2BOLT: read " << NumTotalSamples << " basic samples\n";
return std::error_code();
}
@@ -1655,7 +1629,6 @@ void DataAggregator::processBasicEvents() {
for (auto &Sample : BasicSamples) {
const uint64_t PC = Sample.first;
const uint64_t HitCount = Sample.second;
NumTotalSamples += HitCount;
BinaryFunction *Func = getBinaryFunctionContainingAddress(PC);
if (!Func) {
OutOfRangeSamples += HitCount;
@@ -1664,7 +1637,6 @@ void DataAggregator::processBasicEvents() {
doBasicSample(*Func, PC, HitCount);
}
outs() << "PERF2BOLT: read " << NumTotalSamples << " samples\n";
printBasicSamplesDiagnostics(OutOfRangeSamples);
}