[BOLT] Add pre-aggregated trace support (#127125)
Traces are triplets of branch source, target, and fall-through end (next branch). Traces simplify differentiation of fall-throughs into local- and external-origin, which improves performance over profile with undifferentiated fall-throughs by eliminating profile discontinuity in call to continuation fall-throughs. This makes it possible to avoid converting return profile into call to continuation profile which may introduce statistical biases. The existing format makes provisions for local- (F) and external- (f) origin fall-throughs, but the profile producer needs to know function boundaries. BOLT has that information readily available, so providing the origin branch of a fall-through is a functional replacement of the fall-through kind (f or F). This also has an effect of combining branches and fall-throughs into a single record. As traces subsume other pre-aggregated profile kinds, BOLT may drop support for them soon. Users of pre-aggregated profile format are advised to migrate to the trace format. Test Plan: Updated callcont-fallthru.s
This commit is contained in:
@@ -711,7 +711,7 @@ bool DataAggregator::doInterBranch(BinaryFunction *FromFunc,
|
||||
}
|
||||
|
||||
bool DataAggregator::doBranch(uint64_t From, uint64_t To, uint64_t Count,
|
||||
uint64_t Mispreds, bool IsPreagg) {
|
||||
uint64_t Mispreds) {
|
||||
// Returns whether \p Offset in \p Func contains a return instruction.
|
||||
auto checkReturn = [&](const BinaryFunction &Func, const uint64_t Offset) {
|
||||
auto isReturn = [&](auto MI) { return MI && BC->MIB->isReturn(*MI); };
|
||||
@@ -772,7 +772,8 @@ bool DataAggregator::doBranch(uint64_t From, uint64_t To, uint64_t Count,
|
||||
return false;
|
||||
|
||||
// Record call to continuation trace.
|
||||
if (IsPreagg && FromFunc != ToFunc && (IsReturn || IsCallCont)) {
|
||||
if (NeedsConvertRetProfileToCallCont && FromFunc != ToFunc &&
|
||||
(IsReturn || IsCallCont)) {
|
||||
LBREntry First{ToOrig - 1, ToOrig - 1, false};
|
||||
LBREntry Second{ToOrig, ToOrig, false};
|
||||
return doTrace(First, Second, Count);
|
||||
@@ -1216,23 +1217,30 @@ ErrorOr<Location> DataAggregator::parseLocationOrOffset() {
|
||||
return Location(true, BuildID.get(), Offset.get());
|
||||
}
|
||||
|
||||
ErrorOr<DataAggregator::AggregatedLBREntry>
|
||||
DataAggregator::parseAggregatedLBREntry() {
|
||||
std::error_code DataAggregator::parseAggregatedLBREntry() {
|
||||
while (checkAndConsumeFS()) {
|
||||
}
|
||||
|
||||
ErrorOr<StringRef> TypeOrErr = parseString(FieldSeparator);
|
||||
if (std::error_code EC = TypeOrErr.getError())
|
||||
return EC;
|
||||
// Pre-aggregated profile with branches and fallthroughs needs to convert
|
||||
// return profile into call to continuation fall-through.
|
||||
auto Type = AggregatedLBREntry::BRANCH;
|
||||
if (TypeOrErr.get() == "B") {
|
||||
NeedsConvertRetProfileToCallCont = true;
|
||||
Type = AggregatedLBREntry::BRANCH;
|
||||
} else if (TypeOrErr.get() == "F") {
|
||||
NeedsConvertRetProfileToCallCont = true;
|
||||
Type = AggregatedLBREntry::FT;
|
||||
} else if (TypeOrErr.get() == "f") {
|
||||
NeedsConvertRetProfileToCallCont = true;
|
||||
Type = AggregatedLBREntry::FT_EXTERNAL_ORIGIN;
|
||||
} else if (TypeOrErr.get() == "T") {
|
||||
// Trace is expanded into B and [Ff]
|
||||
Type = AggregatedLBREntry::TRACE;
|
||||
} else {
|
||||
reportError("expected B, F or f");
|
||||
reportError("expected T, B, F or f");
|
||||
return make_error_code(llvm::errc::io_error);
|
||||
}
|
||||
|
||||
@@ -1248,6 +1256,15 @@ DataAggregator::parseAggregatedLBREntry() {
|
||||
if (std::error_code EC = To.getError())
|
||||
return EC;
|
||||
|
||||
ErrorOr<Location> TraceFtEnd = std::error_code();
|
||||
if (Type == AggregatedLBREntry::TRACE) {
|
||||
while (checkAndConsumeFS()) {
|
||||
}
|
||||
TraceFtEnd = parseLocationOrOffset();
|
||||
if (std::error_code EC = TraceFtEnd.getError())
|
||||
return EC;
|
||||
}
|
||||
|
||||
while (checkAndConsumeFS()) {
|
||||
}
|
||||
ErrorOr<int64_t> Frequency =
|
||||
@@ -1270,9 +1287,24 @@ DataAggregator::parseAggregatedLBREntry() {
|
||||
return make_error_code(llvm::errc::io_error);
|
||||
}
|
||||
|
||||
return AggregatedLBREntry{From.get(), To.get(),
|
||||
static_cast<uint64_t>(Frequency.get()), Mispreds,
|
||||
Type};
|
||||
BinaryFunction *FromFunc = getBinaryFunctionContainingAddress(From->Offset);
|
||||
BinaryFunction *ToFunc = getBinaryFunctionContainingAddress(To->Offset);
|
||||
|
||||
for (BinaryFunction *BF : {FromFunc, ToFunc})
|
||||
if (BF)
|
||||
BF->setHasProfileAvailable();
|
||||
|
||||
uint64_t Count = static_cast<uint64_t>(Frequency.get());
|
||||
AggregatedLBREntry Entry{From.get(), To.get(), Count, Mispreds, Type};
|
||||
AggregatedLBRs.emplace_back(Entry);
|
||||
if (Type == AggregatedLBREntry::TRACE) {
|
||||
auto FtType = (FromFunc == ToFunc) ? AggregatedLBREntry::FT
|
||||
: AggregatedLBREntry::FT_EXTERNAL_ORIGIN;
|
||||
AggregatedLBREntry TraceFt{To.get(), TraceFtEnd.get(), Count, 0, FtType};
|
||||
AggregatedLBRs.emplace_back(TraceFt);
|
||||
}
|
||||
|
||||
return std::error_code();
|
||||
}
|
||||
|
||||
bool DataAggregator::ignoreKernelInterrupt(LBREntry &LBR) const {
|
||||
@@ -1585,8 +1617,7 @@ void DataAggregator::processBranchEvents() {
|
||||
for (const auto &AggrLBR : BranchLBRs) {
|
||||
const Trace &Loc = AggrLBR.first;
|
||||
const TakenBranchInfo &Info = AggrLBR.second;
|
||||
doBranch(Loc.From, Loc.To, Info.TakenCount, Info.MispredCount,
|
||||
/*IsPreagg*/ false);
|
||||
doBranch(Loc.From, Loc.To, Info.TakenCount, Info.MispredCount);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1722,18 +1753,10 @@ std::error_code DataAggregator::parsePreAggregatedLBRSamples() {
|
||||
outs() << "PERF2BOLT: parsing pre-aggregated profile...\n";
|
||||
NamedRegionTimer T("parseAggregated", "Parsing aggregated branch events",
|
||||
TimerGroupName, TimerGroupDesc, opts::TimeAggregator);
|
||||
while (hasData()) {
|
||||
ErrorOr<AggregatedLBREntry> AggrEntry = parseAggregatedLBREntry();
|
||||
if (std::error_code EC = AggrEntry.getError())
|
||||
while (hasData())
|
||||
if (std::error_code EC = parseAggregatedLBREntry())
|
||||
return EC;
|
||||
|
||||
for (const uint64_t Addr : {AggrEntry->From.Offset, AggrEntry->To.Offset})
|
||||
if (BinaryFunction *BF = getBinaryFunctionContainingAddress(Addr))
|
||||
BF->setHasProfileAvailable();
|
||||
|
||||
AggregatedLBRs.emplace_back(std::move(AggrEntry.get()));
|
||||
}
|
||||
|
||||
return std::error_code();
|
||||
}
|
||||
|
||||
@@ -1746,8 +1769,9 @@ void DataAggregator::processPreAggregated() {
|
||||
for (const AggregatedLBREntry &AggrEntry : AggregatedLBRs) {
|
||||
switch (AggrEntry.EntryType) {
|
||||
case AggregatedLBREntry::BRANCH:
|
||||
case AggregatedLBREntry::TRACE:
|
||||
doBranch(AggrEntry.From.Offset, AggrEntry.To.Offset, AggrEntry.Count,
|
||||
AggrEntry.Mispreds, /*IsPreagg*/ true);
|
||||
AggrEntry.Mispreds);
|
||||
break;
|
||||
case AggregatedLBREntry::FT:
|
||||
case AggregatedLBREntry::FT_EXTERNAL_ORIGIN: {
|
||||
|
||||
Reference in New Issue
Block a user