diff --git a/bolt/include/bolt/Profile/DataAggregator.h b/bolt/include/bolt/Profile/DataAggregator.h index 5eddd85ab7a4..ebf7aee5a7f6 100644 --- a/bolt/include/bolt/Profile/DataAggregator.h +++ b/bolt/include/bolt/Profile/DataAggregator.h @@ -92,16 +92,6 @@ private: uint64_t Addr; }; - /// Used for parsing specific pre-aggregated input files. - struct AggregatedLBREntry { - enum Type : char { BRANCH = 0, FT, FT_EXTERNAL_ORIGIN, TRACE }; - Location From; - Location To; - uint64_t Count; - uint64_t Mispreds; - Type EntryType; - }; - struct Trace { uint64_t From; uint64_t To; @@ -131,7 +121,6 @@ private: /// and use them later for processing and assigning profile. std::unordered_map BranchLBRs; std::unordered_map FallthroughLBRs; - std::vector AggregatedLBRs; std::unordered_map BasicSamples; std::vector MemSamples; @@ -423,9 +412,6 @@ private: /// an external tool. std::error_code parsePreAggregatedLBRSamples(); - /// Process parsed pre-aggregated data. - void processPreAggregated(); - /// If \p Address falls into the binary address space based on memory /// mapping info \p MMI, then adjust it for further processing by subtracting /// the base load address. External addresses, i.e. addresses that do not diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp index a3671a40c550..2ae1f66ee404 100644 --- a/bolt/lib/Profile/DataAggregator.cpp +++ b/bolt/lib/Profile/DataAggregator.cpp @@ -444,19 +444,6 @@ int DataAggregator::prepareToParse(StringRef Name, PerfProcessInfo &Process, Error DataAggregator::preprocessProfile(BinaryContext &BC) { this->BC = &BC; - if (opts::ReadPreAggregated) { - parsePreAggregated(); - return Error::success(); - } - - if (std::optional FileBuildID = BC.getFileBuildID()) { - outs() << "BOLT-INFO: binary build-id is: " << *FileBuildID << "\n"; - processFileBuildID(*FileBuildID); - } else { - errs() << "BOLT-WARNING: build-id will not be checked because we could " - "not read one from input binary\n"; - } - auto ErrorCallback = [](int ReturnCode, StringRef ErrBuf) { errs() << "PERF-ERROR: return code " << ReturnCode << "\n" << ErrBuf; exit(1); @@ -469,6 +456,19 @@ Error DataAggregator::preprocessProfile(BinaryContext &BC) { ErrorCallback(ReturnCode, ErrBuf); }; + if (opts::ReadPreAggregated) { + parsePreAggregated(); + goto heatmap; + } + + if (std::optional FileBuildID = BC.getFileBuildID()) { + outs() << "BOLT-INFO: binary build-id is: " << *FileBuildID << "\n"; + processFileBuildID(*FileBuildID); + } else { + errs() << "BOLT-WARNING: build-id will not be checked because we could " + "not read one from input binary\n"; + } + if (BC.IsLinuxKernel) { // Current MMap parsing logic does not work with linux kernel. // MMap entries for linux kernel uses PERF_RECORD_MMAP @@ -501,14 +501,6 @@ Error DataAggregator::preprocessProfile(BinaryContext &BC) { (opts::BasicAggregation && parseBasicEvents())) errs() << "PERF2BOLT: failed to parse samples\n"; - if (opts::HeatmapMode) { - if (std::error_code EC = printLBRHeatMap()) { - errs() << "ERROR: failed to print heat map: " << EC.message() << '\n'; - exit(1); - } - exit(0); - } - // Special handling for memory events if (prepareToParse("mem events", MemEventsPPI, MemEventsErrorCallback)) return Error::success(); @@ -519,6 +511,14 @@ Error DataAggregator::preprocessProfile(BinaryContext &BC) { deleteTempFiles(); +heatmap: + if (opts::HeatmapMode) { + if (std::error_code EC = printLBRHeatMap()) { + errs() << "ERROR: failed to print heat map: " << EC.message() << '\n'; + exit(1); + } + exit(0); + } return Error::success(); } @@ -555,9 +555,7 @@ bool DataAggregator::mayHaveProfileData(const BinaryFunction &Function) { } void DataAggregator::processProfile(BinaryContext &BC) { - if (opts::ReadPreAggregated) - processPreAggregated(); - else if (opts::BasicAggregation) + if (opts::BasicAggregation) processBasicEvents(); else processBranchEvents(); @@ -586,7 +584,6 @@ void DataAggregator::processProfile(BinaryContext &BC) { // Release intermediate storage. clear(BranchLBRs); clear(FallthroughLBRs); - clear(AggregatedLBRs); clear(BasicSamples); clear(MemSamples); } @@ -1215,15 +1212,14 @@ std::error_code DataAggregator::parseAggregatedLBREntry() { ErrorOr TypeOrErr = parseString(FieldSeparator); if (std::error_code EC = TypeOrErr.getError()) return EC; - auto Type = AggregatedLBREntry::TRACE; - if (LLVM_LIKELY(TypeOrErr.get() == "T")) { - } else if (TypeOrErr.get() == "B") { - Type = AggregatedLBREntry::BRANCH; - } else if (TypeOrErr.get() == "F") { - Type = AggregatedLBREntry::FT; - } else if (TypeOrErr.get() == "f") { - Type = AggregatedLBREntry::FT_EXTERNAL_ORIGIN; - } else { + enum AggregatedLBREntry { TRACE, BRANCH, FT, FT_EXTERNAL_ORIGIN, INVALID }; + auto Type = StringSwitch(TypeOrErr.get()) + .Case("T", TRACE) + .Case("B", BRANCH) + .Case("F", FT) + .Case("f", FT_EXTERNAL_ORIGIN) + .Default(INVALID); + if (Type == INVALID) { reportError("expected T, B, F or f"); return make_error_code(llvm::errc::io_error); } @@ -1279,13 +1275,28 @@ std::error_code DataAggregator::parseAggregatedLBREntry() { BF->setHasProfileAvailable(); uint64_t Count = static_cast(Frequency.get()); - AggregatedLBREntry Entry{From.get(), To.get(), Count, Mispreds, Type}; - AggregatedLBRs.emplace_back(Entry); - if (Type == AggregatedLBREntry::TRACE) { - auto FtType = (FromFunc == ToFunc) ? AggregatedLBREntry::FT - : AggregatedLBREntry::FT_EXTERNAL_ORIGIN; - AggregatedLBREntry TraceFt{To.get(), TraceFtEnd.get(), Count, 0, FtType}; - AggregatedLBRs.emplace_back(TraceFt); + + Trace Trace(From->Offset, To->Offset); + // Taken trace + if (Type == TRACE || Type == BRANCH) { + TakenBranchInfo &Info = BranchLBRs[Trace]; + Info.TakenCount += Count; + Info.MispredCount += Mispreds; + + NumTotalSamples += Count; + } + // Construct fallthrough part of the trace + if (Type == TRACE) { + Trace.From = To->Offset; + Trace.To = TraceFtEnd->Offset; + Type = FromFunc == ToFunc ? FT : FT_EXTERNAL_ORIGIN; + } + // Add fallthrough trace + if (Type != BRANCH) { + FTInfo &Info = FallthroughLBRs[Trace]; + (Type == FT ? Info.InternCount : Info.ExternCount) += Count; + + NumTraces += Count; } return std::error_code(); @@ -1567,7 +1578,6 @@ std::error_code DataAggregator::parseBranchEvents() { printBranchStacksDiagnostics(NumTotalSamples - NumSamples); } } - printBranchSamplesDiagnostics(); return std::error_code(); } @@ -1595,6 +1605,7 @@ void DataAggregator::processBranchEvents() { const TakenBranchInfo &Info = AggrLBR.second; doBranch(Loc.From, Loc.To, Info.TakenCount, Info.MispredCount); } + printBranchSamplesDiagnostics(); } std::error_code DataAggregator::parseBasicEvents() { @@ -1704,43 +1715,16 @@ std::error_code DataAggregator::parsePreAggregatedLBRSamples() { outs() << "PERF2BOLT: parsing pre-aggregated profile...\n"; NamedRegionTimer T("parseAggregated", "Parsing aggregated branch events", TimerGroupName, TimerGroupDesc, opts::TimeAggregator); - while (hasData()) + size_t AggregatedLBRs = 0; + while (hasData()) { if (std::error_code EC = parseAggregatedLBREntry()) return EC; - - return std::error_code(); -} - -void DataAggregator::processPreAggregated() { - outs() << "PERF2BOLT: processing pre-aggregated profile...\n"; - NamedRegionTimer T("processAggregated", "Processing aggregated branch events", - TimerGroupName, TimerGroupDesc, opts::TimeAggregator); - - for (const AggregatedLBREntry &AggrEntry : AggregatedLBRs) { - switch (AggrEntry.EntryType) { - case AggregatedLBREntry::BRANCH: - case AggregatedLBREntry::TRACE: - doBranch(AggrEntry.From.Offset, AggrEntry.To.Offset, AggrEntry.Count, - AggrEntry.Mispreds); - NumTotalSamples += AggrEntry.Count; - break; - case AggregatedLBREntry::FT: - case AggregatedLBREntry::FT_EXTERNAL_ORIGIN: { - LBREntry First{AggrEntry.EntryType == AggregatedLBREntry::FT - ? AggrEntry.From.Offset - : 0, - AggrEntry.From.Offset, false}; - LBREntry Second{AggrEntry.To.Offset, AggrEntry.To.Offset, false}; - doTrace(First, Second, AggrEntry.Count); - NumTraces += AggrEntry.Count; - break; - } - } + ++AggregatedLBRs; } - outs() << "PERF2BOLT: read " << AggregatedLBRs.size() - << " aggregated LBR entries\n"; - printBranchSamplesDiagnostics(); + outs() << "PERF2BOLT: read " << AggregatedLBRs << " aggregated LBR entries\n"; + + return std::error_code(); } std::optional DataAggregator::parseCommExecEvent() { diff --git a/bolt/test/X86/heatmap-preagg.test b/bolt/test/X86/heatmap-preagg.test new file mode 100644 index 000000000000..00d4d521b1ad --- /dev/null +++ b/bolt/test/X86/heatmap-preagg.test @@ -0,0 +1,33 @@ +## Test heatmap with pre-aggregated profile + +RUN: yaml2obj %p/Inputs/blarge_new.yaml &> %t.exe +## Non-BOLTed input binary +RUN: llvm-bolt-heatmap %t.exe -o %t --pa -p %p/Inputs/blarge_new.preagg.txt \ +RUN: 2>&1 | FileCheck --check-prefix CHECK-HEATMAP %s +RUN: FileCheck %s --check-prefix CHECK-SEC-HOT --input-file %t-section-hotness.csv + +## BOLTed input binary +RUN: llvm-bolt %t.exe -o %t.out --pa -p %p/Inputs/blarge_new.preagg.txt \ +RUN: --reorder-blocks=ext-tsp --split-functions --split-strategy=cdsplit \ +RUN: --reorder-functions=cdsort --enable-bat --dyno-stats --skip-funcs=main +RUN: llvm-bolt-heatmap %t.out -o %t2 --pa -p %p/Inputs/blarge_new_bat.preagg.txt \ +RUN: 2>&1 | FileCheck --check-prefix CHECK-HEATMAP-BAT %s +RUN: FileCheck %s --check-prefix CHECK-SEC-HOT-BAT --input-file %t2-section-hotness.csv + +CHECK-HEATMAP: PERF2BOLT: read 81 aggregated LBR entries +CHECK-HEATMAP: HEATMAP: invalid traces: 1 + +CHECK-SEC-HOT: .init, 0x401000, 0x40101b, 16.8545 +CHECK-SEC-HOT-NEXT: .plt, 0x401020, 0x4010b0, 4.7583 +CHECK-SEC-HOT-NEXT: .text, 0x4010b0, 0x401c25, 78.3872 +CHECK-SEC-HOT-NEXT: .fini, 0x401c28, 0x401c35, 0.0000 + +CHECK-HEATMAP-BAT: PERF2BOLT: read 79 aggregated LBR entries +CHECK-HEATMAP-BAT: HEATMAP: invalid traces: 2 + +CHECK-SEC-HOT-BAT: .init, 0x401000, 0x40101b, 17.2888 +CHECK-SEC-HOT-BAT-NEXT: .plt, 0x401020, 0x4010b0, 5.6132 +CHECK-SEC-HOT-BAT-NEXT: .bolt.org.text, 0x4010b0, 0x401c25, 38.3385 +CHECK-SEC-HOT-BAT-NEXT: .fini, 0x401c28, 0x401c35, 0.0000 +CHECK-SEC-HOT-BAT-NEXT: .text, 0x800000, 0x8002cc, 38.7595 +CHECK-SEC-HOT-BAT-NEXT: .text.cold, 0x800300, 0x800415, 0.0000