[BOLT] Add pre-aggregated trace support (#127125)

Traces are triplets of branch source, target, and fall-through end (next
branch).

Traces simplify differentiation of fall-throughs into local- and
external-origin, which improves performance over profile with
undifferentiated fall-throughs by eliminating profile discontinuity in
call to continuation fall-throughs. This makes it possible to avoid
converting return profile into call to continuation profile which may
introduce statistical biases.

The existing format makes provisions for local- (F) and external- (f)
origin fall-throughs, but the profile producer needs to know function
boundaries. BOLT has that information readily available, so providing
the origin branch of a fall-through is a functional replacement of the
fall-through kind (f or F). This also has an effect of combining
branches and fall-throughs into a single record.

As traces subsume other pre-aggregated profile kinds, BOLT may drop
support for them soon. Users of pre-aggregated profile format are
advised to migrate to the trace format.

Test Plan: Updated callcont-fallthru.s
This commit is contained in:
Amir Ayupov
2025-02-13 15:14:56 -08:00
committed by GitHub
parent 050933b41f
commit 61acfb07e8
4 changed files with 91 additions and 47 deletions

View File

@@ -711,7 +711,7 @@ bool DataAggregator::doInterBranch(BinaryFunction *FromFunc,
}
bool DataAggregator::doBranch(uint64_t From, uint64_t To, uint64_t Count,
uint64_t Mispreds, bool IsPreagg) {
uint64_t Mispreds) {
// Returns whether \p Offset in \p Func contains a return instruction.
auto checkReturn = [&](const BinaryFunction &Func, const uint64_t Offset) {
auto isReturn = [&](auto MI) { return MI && BC->MIB->isReturn(*MI); };
@@ -772,7 +772,8 @@ bool DataAggregator::doBranch(uint64_t From, uint64_t To, uint64_t Count,
return false;
// Record call to continuation trace.
if (IsPreagg && FromFunc != ToFunc && (IsReturn || IsCallCont)) {
if (NeedsConvertRetProfileToCallCont && FromFunc != ToFunc &&
(IsReturn || IsCallCont)) {
LBREntry First{ToOrig - 1, ToOrig - 1, false};
LBREntry Second{ToOrig, ToOrig, false};
return doTrace(First, Second, Count);
@@ -1216,23 +1217,30 @@ ErrorOr<Location> DataAggregator::parseLocationOrOffset() {
return Location(true, BuildID.get(), Offset.get());
}
ErrorOr<DataAggregator::AggregatedLBREntry>
DataAggregator::parseAggregatedLBREntry() {
std::error_code DataAggregator::parseAggregatedLBREntry() {
while (checkAndConsumeFS()) {
}
ErrorOr<StringRef> TypeOrErr = parseString(FieldSeparator);
if (std::error_code EC = TypeOrErr.getError())
return EC;
// Pre-aggregated profile with branches and fallthroughs needs to convert
// return profile into call to continuation fall-through.
auto Type = AggregatedLBREntry::BRANCH;
if (TypeOrErr.get() == "B") {
NeedsConvertRetProfileToCallCont = true;
Type = AggregatedLBREntry::BRANCH;
} else if (TypeOrErr.get() == "F") {
NeedsConvertRetProfileToCallCont = true;
Type = AggregatedLBREntry::FT;
} else if (TypeOrErr.get() == "f") {
NeedsConvertRetProfileToCallCont = true;
Type = AggregatedLBREntry::FT_EXTERNAL_ORIGIN;
} else if (TypeOrErr.get() == "T") {
// Trace is expanded into B and [Ff]
Type = AggregatedLBREntry::TRACE;
} else {
reportError("expected B, F or f");
reportError("expected T, B, F or f");
return make_error_code(llvm::errc::io_error);
}
@@ -1248,6 +1256,15 @@ DataAggregator::parseAggregatedLBREntry() {
if (std::error_code EC = To.getError())
return EC;
ErrorOr<Location> TraceFtEnd = std::error_code();
if (Type == AggregatedLBREntry::TRACE) {
while (checkAndConsumeFS()) {
}
TraceFtEnd = parseLocationOrOffset();
if (std::error_code EC = TraceFtEnd.getError())
return EC;
}
while (checkAndConsumeFS()) {
}
ErrorOr<int64_t> Frequency =
@@ -1270,9 +1287,24 @@ DataAggregator::parseAggregatedLBREntry() {
return make_error_code(llvm::errc::io_error);
}
return AggregatedLBREntry{From.get(), To.get(),
static_cast<uint64_t>(Frequency.get()), Mispreds,
Type};
BinaryFunction *FromFunc = getBinaryFunctionContainingAddress(From->Offset);
BinaryFunction *ToFunc = getBinaryFunctionContainingAddress(To->Offset);
for (BinaryFunction *BF : {FromFunc, ToFunc})
if (BF)
BF->setHasProfileAvailable();
uint64_t Count = static_cast<uint64_t>(Frequency.get());
AggregatedLBREntry Entry{From.get(), To.get(), Count, Mispreds, Type};
AggregatedLBRs.emplace_back(Entry);
if (Type == AggregatedLBREntry::TRACE) {
auto FtType = (FromFunc == ToFunc) ? AggregatedLBREntry::FT
: AggregatedLBREntry::FT_EXTERNAL_ORIGIN;
AggregatedLBREntry TraceFt{To.get(), TraceFtEnd.get(), Count, 0, FtType};
AggregatedLBRs.emplace_back(TraceFt);
}
return std::error_code();
}
bool DataAggregator::ignoreKernelInterrupt(LBREntry &LBR) const {
@@ -1585,8 +1617,7 @@ void DataAggregator::processBranchEvents() {
for (const auto &AggrLBR : BranchLBRs) {
const Trace &Loc = AggrLBR.first;
const TakenBranchInfo &Info = AggrLBR.second;
doBranch(Loc.From, Loc.To, Info.TakenCount, Info.MispredCount,
/*IsPreagg*/ false);
doBranch(Loc.From, Loc.To, Info.TakenCount, Info.MispredCount);
}
}
@@ -1722,18 +1753,10 @@ std::error_code DataAggregator::parsePreAggregatedLBRSamples() {
outs() << "PERF2BOLT: parsing pre-aggregated profile...\n";
NamedRegionTimer T("parseAggregated", "Parsing aggregated branch events",
TimerGroupName, TimerGroupDesc, opts::TimeAggregator);
while (hasData()) {
ErrorOr<AggregatedLBREntry> AggrEntry = parseAggregatedLBREntry();
if (std::error_code EC = AggrEntry.getError())
while (hasData())
if (std::error_code EC = parseAggregatedLBREntry())
return EC;
for (const uint64_t Addr : {AggrEntry->From.Offset, AggrEntry->To.Offset})
if (BinaryFunction *BF = getBinaryFunctionContainingAddress(Addr))
BF->setHasProfileAvailable();
AggregatedLBRs.emplace_back(std::move(AggrEntry.get()));
}
return std::error_code();
}
@@ -1746,8 +1769,9 @@ void DataAggregator::processPreAggregated() {
for (const AggregatedLBREntry &AggrEntry : AggregatedLBRs) {
switch (AggrEntry.EntryType) {
case AggregatedLBREntry::BRANCH:
case AggregatedLBREntry::TRACE:
doBranch(AggrEntry.From.Offset, AggrEntry.To.Offset, AggrEntry.Count,
AggrEntry.Mispreds, /*IsPreagg*/ true);
AggrEntry.Mispreds);
break;
case AggregatedLBREntry::FT:
case AggregatedLBREntry::FT_EXTERNAL_ORIGIN: {