Files
clang-p2996/bolt/lib/Profile/YAMLProfileReader.cpp
spupyrev 44268271f6 [BOLT] stale profile matching [part 1 out of 2]
BOLT often has to deal with profiles collected on binaries built from several
revisions behind release. As a result, a certain percentage of functions is
considered stale and not optimized. This diff adds an ability to match profile
to functions that are not 100% binary identical, which increases the
optimization coverage and boosts the performance of applications.

The algorithm consists of two phases: matching and inference:
- At the matching phase, we try to "guess" as many block and jump counts from
  the stale profile as possible. To this end, the content of each basic block
  is hashed and stored in the (yaml) profile. When BOLT optimizes a binary,
  it computes block hashes and identifies the corresponding entries in the
  stale profile. It yields a partial profile for every CFG in the binary.
- At the inference phase, we employ a network flow-based algorithm (profi) to
  reconstruct "realistic" block and jump counts from the partial profile
  generated at the first stage. In practice, we don't always produce proper
  profile data but the majority (e.g., >90%) of CFGs get the correct counts.

This is a first part of the change; the next stacked diff extends the block hashing
and provides perf evaluation numbers.

Reviewed By: maksfb

Differential Revision: https://reviews.llvm.org/D144500
2023-06-06 12:13:52 -07:00

437 lines
14 KiB
C++

//===- bolt/Profile/YAMLProfileReader.cpp - YAML profile de-serializer ----===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "bolt/Profile/YAMLProfileReader.h"
#include "bolt/Core/BinaryBasicBlock.h"
#include "bolt/Core/BinaryFunction.h"
#include "bolt/Passes/MCF.h"
#include "bolt/Profile/ProfileYAMLMapping.h"
#include "bolt/Utils/Utils.h"
#include "llvm/Support/CommandLine.h"
using namespace llvm;
namespace opts {
extern cl::opt<unsigned> Verbosity;
extern cl::OptionCategory BoltOptCategory;
extern cl::opt<bool> InferStaleProfile;
static llvm::cl::opt<bool>
IgnoreHash("profile-ignore-hash",
cl::desc("ignore hash while reading function profile"),
cl::Hidden, cl::cat(BoltOptCategory));
}
namespace llvm {
namespace bolt {
bool YAMLProfileReader::isYAML(const StringRef Filename) {
ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
MemoryBuffer::getFileOrSTDIN(Filename);
if (std::error_code EC = MB.getError())
report_error(Filename, EC);
StringRef Buffer = MB.get()->getBuffer();
if (Buffer.startswith("---\n"))
return true;
return false;
}
void YAMLProfileReader::buildNameMaps(
std::map<uint64_t, BinaryFunction> &Functions) {
for (yaml::bolt::BinaryFunctionProfile &YamlBF : YamlBP.Functions) {
StringRef Name = YamlBF.Name;
const size_t Pos = Name.find("(*");
if (Pos != StringRef::npos)
Name = Name.substr(0, Pos);
ProfileNameToProfile[Name] = &YamlBF;
if (const std::optional<StringRef> CommonName = getLTOCommonName(Name))
LTOCommonNameMap[*CommonName].push_back(&YamlBF);
}
for (auto &BFI : Functions) {
const BinaryFunction &Function = BFI.second;
for (StringRef Name : Function.getNames())
if (const std::optional<StringRef> CommonName = getLTOCommonName(Name))
LTOCommonNameFunctionMap[*CommonName].insert(&Function);
}
}
bool YAMLProfileReader::hasLocalsWithFileName() const {
for (const StringMapEntry<yaml::bolt::BinaryFunctionProfile *> &KV :
ProfileNameToProfile) {
const StringRef &FuncName = KV.getKey();
if (FuncName.count('/') == 2 && FuncName[0] != '/')
return true;
}
return false;
}
bool YAMLProfileReader::parseFunctionProfile(
BinaryFunction &BF, const yaml::bolt::BinaryFunctionProfile &YamlBF) {
BinaryContext &BC = BF.getBinaryContext();
bool ProfileMatched = true;
uint64_t MismatchedBlocks = 0;
uint64_t MismatchedCalls = 0;
uint64_t MismatchedEdges = 0;
uint64_t FunctionExecutionCount = 0;
BF.setExecutionCount(YamlBF.ExecCount);
uint64_t FuncRawBranchCount = 0;
for (const yaml::bolt::BinaryBasicBlockProfile &YamlBB : YamlBF.Blocks)
for (const yaml::bolt::SuccessorInfo &YamlSI : YamlBB.Successors)
FuncRawBranchCount += YamlSI.Count;
BF.setRawBranchCount(FuncRawBranchCount);
if (!opts::IgnoreHash && YamlBF.Hash != BF.computeHash(/*UseDFS=*/true)) {
if (opts::Verbosity >= 1)
errs() << "BOLT-WARNING: function hash mismatch\n";
ProfileMatched = false;
}
if (YamlBF.NumBasicBlocks != BF.size()) {
if (opts::Verbosity >= 1)
errs() << "BOLT-WARNING: number of basic blocks mismatch\n";
ProfileMatched = false;
}
BinaryFunction::BasicBlockOrderType DFSOrder = BF.dfs();
for (const yaml::bolt::BinaryBasicBlockProfile &YamlBB : YamlBF.Blocks) {
if (YamlBB.Index >= DFSOrder.size()) {
if (opts::Verbosity >= 2)
errs() << "BOLT-WARNING: index " << YamlBB.Index
<< " is out of bounds\n";
++MismatchedBlocks;
continue;
}
BinaryBasicBlock &BB = *DFSOrder[YamlBB.Index];
// Basic samples profile (without LBR) does not have branches information
// and needs a special processing.
if (YamlBP.Header.Flags & BinaryFunction::PF_SAMPLE) {
if (!YamlBB.EventCount) {
BB.setExecutionCount(0);
continue;
}
uint64_t NumSamples = YamlBB.EventCount * 1000;
if (NormalizeByInsnCount && BB.getNumNonPseudos())
NumSamples /= BB.getNumNonPseudos();
else if (NormalizeByCalls)
NumSamples /= BB.getNumCalls() + 1;
BB.setExecutionCount(NumSamples);
if (BB.isEntryPoint())
FunctionExecutionCount += NumSamples;
continue;
}
BB.setExecutionCount(YamlBB.ExecCount);
for (const yaml::bolt::CallSiteInfo &YamlCSI : YamlBB.CallSites) {
BinaryFunction *Callee = YamlCSI.DestId < YamlProfileToFunction.size()
? YamlProfileToFunction[YamlCSI.DestId]
: nullptr;
bool IsFunction = Callee ? true : false;
MCSymbol *CalleeSymbol = nullptr;
if (IsFunction)
CalleeSymbol = Callee->getSymbolForEntryID(YamlCSI.EntryDiscriminator);
BF.getAllCallSites().emplace_back(CalleeSymbol, YamlCSI.Count,
YamlCSI.Mispreds, YamlCSI.Offset);
if (YamlCSI.Offset >= BB.getOriginalSize()) {
if (opts::Verbosity >= 2)
errs() << "BOLT-WARNING: offset " << YamlCSI.Offset
<< " out of bounds in block " << BB.getName() << '\n';
++MismatchedCalls;
continue;
}
MCInst *Instr =
BF.getInstructionAtOffset(BB.getInputOffset() + YamlCSI.Offset);
if (!Instr) {
if (opts::Verbosity >= 2)
errs() << "BOLT-WARNING: no instruction at offset " << YamlCSI.Offset
<< " in block " << BB.getName() << '\n';
++MismatchedCalls;
continue;
}
if (!BC.MIB->isCall(*Instr) && !BC.MIB->isIndirectBranch(*Instr)) {
if (opts::Verbosity >= 2)
errs() << "BOLT-WARNING: expected call at offset " << YamlCSI.Offset
<< " in block " << BB.getName() << '\n';
++MismatchedCalls;
continue;
}
auto setAnnotation = [&](StringRef Name, uint64_t Count) {
if (BC.MIB->hasAnnotation(*Instr, Name)) {
if (opts::Verbosity >= 1)
errs() << "BOLT-WARNING: ignoring duplicate " << Name
<< " info for offset 0x" << Twine::utohexstr(YamlCSI.Offset)
<< " in function " << BF << '\n';
return;
}
BC.MIB->addAnnotation(*Instr, Name, Count);
};
if (BC.MIB->isIndirectCall(*Instr) || BC.MIB->isIndirectBranch(*Instr)) {
auto &CSP = BC.MIB->getOrCreateAnnotationAs<IndirectCallSiteProfile>(
*Instr, "CallProfile");
CSP.emplace_back(CalleeSymbol, YamlCSI.Count, YamlCSI.Mispreds);
} else if (BC.MIB->getConditionalTailCall(*Instr)) {
setAnnotation("CTCTakenCount", YamlCSI.Count);
setAnnotation("CTCMispredCount", YamlCSI.Mispreds);
} else {
setAnnotation("Count", YamlCSI.Count);
}
}
for (const yaml::bolt::SuccessorInfo &YamlSI : YamlBB.Successors) {
if (YamlSI.Index >= DFSOrder.size()) {
if (opts::Verbosity >= 1)
errs() << "BOLT-WARNING: index out of bounds for profiled block\n";
++MismatchedEdges;
continue;
}
BinaryBasicBlock &SuccessorBB = *DFSOrder[YamlSI.Index];
if (!BB.getSuccessor(SuccessorBB.getLabel())) {
if (opts::Verbosity >= 1)
errs() << "BOLT-WARNING: no successor for block " << BB.getName()
<< " that matches index " << YamlSI.Index << " or block "
<< SuccessorBB.getName() << '\n';
++MismatchedEdges;
continue;
}
BinaryBasicBlock::BinaryBranchInfo &BI = BB.getBranchInfo(SuccessorBB);
BI.Count += YamlSI.Count;
BI.MispredictedCount += YamlSI.Mispreds;
}
}
// If basic block profile wasn't read it should be 0.
for (BinaryBasicBlock &BB : BF)
if (BB.getExecutionCount() == BinaryBasicBlock::COUNT_NO_PROFILE)
BB.setExecutionCount(0);
if (YamlBP.Header.Flags & BinaryFunction::PF_SAMPLE) {
BF.setExecutionCount(FunctionExecutionCount);
estimateEdgeCounts(BF);
}
ProfileMatched &= !MismatchedBlocks && !MismatchedCalls && !MismatchedEdges;
if (ProfileMatched)
BF.markProfiled(YamlBP.Header.Flags);
if (!ProfileMatched && opts::Verbosity >= 1)
errs() << "BOLT-WARNING: " << MismatchedBlocks << " blocks, "
<< MismatchedCalls << " calls, and " << MismatchedEdges
<< " edges in profile did not match function " << BF << '\n';
if (!ProfileMatched && opts::InferStaleProfile) {
if (opts::Verbosity >= 1)
outs() << "BOLT-INFO: applying profile inference for "
<< "\"" << BF.getPrintName() << "\"\n";
if (inferStaleProfile(BF, YamlBF)) {
ProfileMatched = true;
BF.markProfiled(YamlBP.Header.Flags);
}
}
return ProfileMatched;
}
Error YAMLProfileReader::preprocessProfile(BinaryContext &BC) {
ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
MemoryBuffer::getFileOrSTDIN(Filename);
if (std::error_code EC = MB.getError()) {
errs() << "ERROR: cannot open " << Filename << ": " << EC.message() << "\n";
return errorCodeToError(EC);
}
yaml::Input YamlInput(MB.get()->getBuffer());
// Consume YAML file.
YamlInput >> YamlBP;
if (YamlInput.error()) {
errs() << "BOLT-ERROR: syntax error parsing profile in " << Filename
<< " : " << YamlInput.error().message() << '\n';
return errorCodeToError(YamlInput.error());
}
// Sanity check.
if (YamlBP.Header.Version != 1)
return make_error<StringError>(
Twine("cannot read profile : unsupported version"),
inconvertibleErrorCode());
if (YamlBP.Header.EventNames.find(',') != StringRef::npos)
return make_error<StringError>(
Twine("multiple events in profile are not supported"),
inconvertibleErrorCode());
// Match profile to function based on a function name.
buildNameMaps(BC.getBinaryFunctions());
// Preliminary assign function execution count.
for (auto &KV : BC.getBinaryFunctions()) {
BinaryFunction &BF = KV.second;
for (StringRef Name : BF.getNames()) {
auto PI = ProfileNameToProfile.find(Name);
if (PI != ProfileNameToProfile.end()) {
yaml::bolt::BinaryFunctionProfile &YamlBF = *PI->getValue();
BF.setExecutionCount(YamlBF.ExecCount);
break;
}
}
}
return Error::success();
}
bool YAMLProfileReader::mayHaveProfileData(const BinaryFunction &BF) {
for (StringRef Name : BF.getNames()) {
if (ProfileNameToProfile.contains(Name))
return true;
if (const std::optional<StringRef> CommonName = getLTOCommonName(Name)) {
if (LTOCommonNameMap.contains(*CommonName))
return true;
}
}
return false;
}
Error YAMLProfileReader::readProfile(BinaryContext &BC) {
YamlProfileToFunction.resize(YamlBP.Functions.size() + 1);
auto profileMatches = [](const yaml::bolt::BinaryFunctionProfile &Profile,
BinaryFunction &BF) {
if (opts::IgnoreHash && Profile.NumBasicBlocks == BF.size())
return true;
if (!opts::IgnoreHash &&
Profile.Hash == static_cast<uint64_t>(BF.getHash()))
return true;
return false;
};
// We have to do 2 passes since LTO introduces an ambiguity in function
// names. The first pass assigns profiles that match 100% by name and
// by hash. The second pass allows name ambiguity for LTO private functions.
for (auto &BFI : BC.getBinaryFunctions()) {
BinaryFunction &Function = BFI.second;
// Clear function call count that may have been set while pre-processing
// the profile.
Function.setExecutionCount(BinaryFunction::COUNT_NO_PROFILE);
// Recompute hash once per function.
if (!opts::IgnoreHash)
Function.computeHash(/*UseDFS=*/true);
for (StringRef FunctionName : Function.getNames()) {
auto PI = ProfileNameToProfile.find(FunctionName);
if (PI == ProfileNameToProfile.end())
continue;
yaml::bolt::BinaryFunctionProfile &YamlBF = *PI->getValue();
if (profileMatches(YamlBF, Function))
matchProfileToFunction(YamlBF, Function);
}
}
for (auto &BFI : BC.getBinaryFunctions()) {
BinaryFunction &Function = BFI.second;
if (ProfiledFunctions.count(&Function))
continue;
for (StringRef FunctionName : Function.getNames()) {
const std::optional<StringRef> CommonName =
getLTOCommonName(FunctionName);
if (CommonName) {
auto I = LTOCommonNameMap.find(*CommonName);
if (I == LTOCommonNameMap.end())
continue;
bool ProfileMatched = false;
std::vector<yaml::bolt::BinaryFunctionProfile *> &LTOProfiles =
I->getValue();
for (yaml::bolt::BinaryFunctionProfile *YamlBF : LTOProfiles) {
if (YamlBF->Used)
continue;
if ((ProfileMatched = profileMatches(*YamlBF, Function))) {
matchProfileToFunction(*YamlBF, Function);
break;
}
}
if (ProfileMatched)
break;
// If there's only one function with a given name, try to
// match it partially.
if (LTOProfiles.size() == 1 &&
LTOCommonNameFunctionMap[*CommonName].size() == 1 &&
!LTOProfiles.front()->Used) {
matchProfileToFunction(*LTOProfiles.front(), Function);
break;
}
} else {
auto PI = ProfileNameToProfile.find(FunctionName);
if (PI == ProfileNameToProfile.end())
continue;
yaml::bolt::BinaryFunctionProfile &YamlBF = *PI->getValue();
if (!YamlBF.Used) {
matchProfileToFunction(YamlBF, Function);
break;
}
}
}
}
for (yaml::bolt::BinaryFunctionProfile &YamlBF : YamlBP.Functions)
if (!YamlBF.Used && opts::Verbosity >= 1)
errs() << "BOLT-WARNING: profile ignored for function " << YamlBF.Name
<< '\n';
// Set for parseFunctionProfile().
NormalizeByInsnCount = usesEvent("cycles") || usesEvent("instructions");
NormalizeByCalls = usesEvent("branches");
uint64_t NumUnused = 0;
for (yaml::bolt::BinaryFunctionProfile &YamlBF : YamlBP.Functions) {
if (YamlBF.Id >= YamlProfileToFunction.size()) {
// Such profile was ignored.
++NumUnused;
continue;
}
if (BinaryFunction *BF = YamlProfileToFunction[YamlBF.Id])
parseFunctionProfile(*BF, YamlBF);
else
++NumUnused;
}
BC.setNumUnusedProfiledObjects(NumUnused);
return Error::success();
}
bool YAMLProfileReader::usesEvent(StringRef Name) const {
return YamlBP.Header.EventNames.find(std::string(Name)) != StringRef::npos;
}
} // end namespace bolt
} // end namespace llvm