[BOLT] New profile format
Summary: A new profile that is more resilient to minor binary modifications. BranchData is eliminated. For calls, the data is converted into instruction annotations if the profile matches a function. If a profile cannot be matched, AllCallSites data should have call sites profiles. The new profile format is YAML, which is quite verbose. It still takes less space than the older format because we avoid function name repetition. The plan is to get rid of the old profile format eventually. merge-fdata does not work with the new format yet. (cherry picked from FBD6753747)
This commit is contained in:
@@ -342,12 +342,17 @@ public:
|
||||
/// an unconditional branch) and thus has 2 successors, return a successor
|
||||
/// corresponding to a jump condition which could be true or false.
|
||||
/// Return nullptr if the basic block does not have a conditional jump.
|
||||
const BinaryBasicBlock *getConditionalSuccessor(bool Condition) const {
|
||||
BinaryBasicBlock *getConditionalSuccessor(bool Condition) {
|
||||
if (succ_size() != 2)
|
||||
return nullptr;
|
||||
return Successors[Condition == true ? 0 : 1];
|
||||
}
|
||||
|
||||
const BinaryBasicBlock *getConditionalSuccessor(bool Condition) const {
|
||||
return
|
||||
const_cast<BinaryBasicBlock *>(this)->getConditionalSuccessor(Condition);
|
||||
}
|
||||
|
||||
/// Find the fallthrough successor for a block, or nullptr if there is
|
||||
/// none.
|
||||
const BinaryBasicBlock* getFallthrough() const {
|
||||
|
||||
@@ -1466,7 +1466,7 @@ bool BinaryFunction::buildCFG() {
|
||||
|
||||
for (auto I = Instructions.begin(), E = Instructions.end(); I != E; ++I) {
|
||||
const auto Offset = I->first;
|
||||
const auto &Instr = I->second;
|
||||
auto &Instr = I->second;
|
||||
|
||||
auto LI = Labels.find(Offset);
|
||||
if (LI != Labels.end()) {
|
||||
@@ -1819,6 +1819,11 @@ uint64_t BinaryFunction::getFunctionScore() {
|
||||
if (FunctionScore != -1)
|
||||
return FunctionScore;
|
||||
|
||||
if (!isSimple() || !hasValidProfile()) {
|
||||
FunctionScore = 0;
|
||||
return FunctionScore;
|
||||
}
|
||||
|
||||
uint64_t TotalScore = 0ULL;
|
||||
for (auto BB : layout()) {
|
||||
uint64_t BBExecCount = BB->getExecutionCount();
|
||||
@@ -2620,6 +2625,41 @@ void BinaryFunction::postProcessBranches() {
|
||||
assert(validateCFG() && "invalid CFG");
|
||||
}
|
||||
|
||||
const MCSymbol *BinaryFunction::getSymbolForEntry(uint64_t EntryNum) const {
|
||||
if (EntryNum == 0)
|
||||
return getSymbol();
|
||||
|
||||
if (!isMultiEntry())
|
||||
return nullptr;
|
||||
|
||||
uint64_t NumEntries = 0;
|
||||
for (auto *BB : BasicBlocks) {
|
||||
if (!BB->isEntryPoint())
|
||||
continue;
|
||||
if (NumEntries == EntryNum)
|
||||
return BB->getLabel();
|
||||
++NumEntries;
|
||||
}
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
uint64_t BinaryFunction::getEntryForSymbol(const MCSymbol *EntrySymbol) const {
|
||||
if (getSymbol() == EntrySymbol)
|
||||
return 0;
|
||||
|
||||
uint64_t NumEntries = 0;
|
||||
for (const auto *BB : BasicBlocks) {
|
||||
if (!BB->isEntryPoint())
|
||||
continue;
|
||||
if (BB->getLabel() == EntrySymbol)
|
||||
return NumEntries;
|
||||
++NumEntries;
|
||||
}
|
||||
|
||||
llvm_unreachable("no entry for symbol");
|
||||
}
|
||||
|
||||
BinaryFunction::BasicBlockOrderType BinaryFunction::dfs() const {
|
||||
BasicBlockOrderType DFS;
|
||||
unsigned Index = 0;
|
||||
@@ -2649,8 +2689,24 @@ BinaryFunction::BasicBlockOrderType BinaryFunction::dfs() const {
|
||||
Stack.push(SuccBB);
|
||||
}
|
||||
|
||||
for (auto *SuccBB : BB->successors()) {
|
||||
Stack.push(SuccBB);
|
||||
const MCSymbol *TBB = nullptr;
|
||||
const MCSymbol *FBB = nullptr;
|
||||
MCInst *CondBranch = nullptr;
|
||||
MCInst *UncondBranch = nullptr;
|
||||
if (BB->analyzeBranch(TBB, FBB, CondBranch, UncondBranch) &&
|
||||
CondBranch && BB->succ_size() == 2) {
|
||||
if (BC.MIA->getCanonicalBranchOpcode(CondBranch->getOpcode()) ==
|
||||
CondBranch->getOpcode()) {
|
||||
Stack.push(BB->getConditionalSuccessor(true));
|
||||
Stack.push(BB->getConditionalSuccessor(false));
|
||||
} else {
|
||||
Stack.push(BB->getConditionalSuccessor(false));
|
||||
Stack.push(BB->getConditionalSuccessor(true));
|
||||
}
|
||||
} else {
|
||||
for (auto *SuccBB : BB->successors()) {
|
||||
Stack.push(SuccBB);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2826,6 +2882,9 @@ bool BinaryFunction::equalJumpTables(const JumpTable *JumpTableA,
|
||||
}
|
||||
|
||||
std::size_t BinaryFunction::hash(bool Recompute, bool UseDFS) const {
|
||||
if (size() == 0)
|
||||
return 0;
|
||||
|
||||
assert(hasCFG() && "function is expected to have CFG");
|
||||
|
||||
if (!Recompute)
|
||||
@@ -3687,13 +3746,14 @@ DynoStats BinaryFunction::getDynoStats() const {
|
||||
Stats[DynoStats::INDIRECT_CALLS] += CallFreq;
|
||||
} else if (const auto *CallSymbol = BC.MIA->getTargetSymbol(Instr)) {
|
||||
const auto *BF = BC.getFunctionForSymbol(CallSymbol);
|
||||
if (BF && BF->isPLTFunction())
|
||||
if (BF && BF->isPLTFunction()) {
|
||||
Stats[DynoStats::PLT_CALLS] += CallFreq;
|
||||
|
||||
// We don't process PLT functions and hence have to adjust
|
||||
// relevant dynostats here.
|
||||
Stats[DynoStats::LOADS] += CallFreq;
|
||||
Stats[DynoStats::INDIRECT_CALLS] += CallFreq;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -164,6 +164,40 @@ enum IndirectCallPromotionType : char {
|
||||
ICP_ALL /// Perform ICP on calls and jump tables.
|
||||
};
|
||||
|
||||
/// Information on a single indirect call to a particular callee.
|
||||
struct IndirectCallProfile {
|
||||
bool IsFunction;
|
||||
uint32_t Offset;
|
||||
StringRef Name;
|
||||
uint64_t Count;
|
||||
uint64_t Mispreds;
|
||||
|
||||
IndirectCallProfile(bool IsFunction, StringRef Name, uint64_t Count,
|
||||
uint64_t Mispreds, uint32_t Offset = 0)
|
||||
: IsFunction(IsFunction), Offset(Offset), Name(Name), Count(Count),
|
||||
Mispreds(Mispreds) {}
|
||||
|
||||
bool operator==(const IndirectCallProfile &Other) const {
|
||||
return IsFunction == Other.IsFunction &&
|
||||
Name == Other.Name &&
|
||||
Offset == Other.Offset;
|
||||
}
|
||||
};
|
||||
|
||||
/// Aggregated information for an indirect call site.
|
||||
using IndirectCallSiteProfile = SmallVector<IndirectCallProfile, 4>;
|
||||
|
||||
inline raw_ostream &operator<<(raw_ostream &OS,
|
||||
const bolt::IndirectCallSiteProfile &ICSP) {
|
||||
const char *Sep = "";
|
||||
for (auto &CSP : ICSP) {
|
||||
OS << Sep << "{ " << (CSP.IsFunction ? CSP.Name : "<unknown>") << ": "
|
||||
<< CSP.Count << " (" << CSP.Mispreds << " misses) }";
|
||||
Sep = ", ";
|
||||
}
|
||||
return OS;
|
||||
}
|
||||
|
||||
/// BinaryFunction is a representation of machine-level function.
|
||||
///
|
||||
/// We use the term "Binary" as "Machine" was already taken.
|
||||
@@ -294,6 +328,14 @@ private:
|
||||
/// Profile match ratio for BranchData.
|
||||
float ProfileMatchRatio{0.0f};
|
||||
|
||||
/// Indicates if function profile was collected using LBRs.
|
||||
bool HasLBRProfile{true};
|
||||
|
||||
/// For functions with mismatched profile we store all call profile
|
||||
/// information at a function level (as opposed to tying it to
|
||||
/// specific call sites).
|
||||
IndirectCallSiteProfile AllCallSites;
|
||||
|
||||
/// Score of the function (estimated number of instructions executed,
|
||||
/// according to profile data). -1 if the score has not been calculated yet.
|
||||
int64_t FunctionScore{-1};
|
||||
@@ -511,11 +553,11 @@ private:
|
||||
/// function and that apply before the entry basic block).
|
||||
CFIInstrMapType CIEFrameInstructions;
|
||||
|
||||
public:
|
||||
/// Representation of a jump table.
|
||||
///
|
||||
/// The jump table may include other jump tables that are referenced by
|
||||
/// a different label at a different offset in this jump table.
|
||||
public:
|
||||
struct JumpTable {
|
||||
enum JumpTableType : char {
|
||||
JTT_NORMAL,
|
||||
@@ -745,10 +787,6 @@ private:
|
||||
Instructions.emplace(Offset, std::forward<MCInst>(Instruction));
|
||||
}
|
||||
|
||||
/// Return instruction at a given offset in the function. Valid before
|
||||
/// CFG is constructed or while instruction offsets are available in CFG.
|
||||
MCInst *getInstructionAtOffset(uint64_t Offset);
|
||||
|
||||
/// Analyze and process indirect branch \p Instruction before it is
|
||||
/// added to Instructions list.
|
||||
IndirectBranchType processIndirectBranch(MCInst &Instruction,
|
||||
@@ -978,6 +1016,10 @@ public:
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
/// Return instruction at a given offset in the function. Valid before
|
||||
/// CFG is constructed or while instruction offsets are available in CFG.
|
||||
MCInst *getInstructionAtOffset(uint64_t Offset);
|
||||
|
||||
/// Return the name of the function as extracted from the binary file.
|
||||
/// If the function has multiple names - return the last one
|
||||
/// followed by "(*#<numnames>)".
|
||||
@@ -1102,6 +1144,13 @@ public:
|
||||
return OutputSymbol;
|
||||
}
|
||||
|
||||
/// Return MC symbol corresponding to an enumerated entry for multiple-entry
|
||||
/// functions.
|
||||
const MCSymbol *getSymbolForEntry(uint64_t EntryNum) const;
|
||||
|
||||
/// Return an entry ID corresponding to a symbol.
|
||||
uint64_t getEntryForSymbol(const MCSymbol *EntrySymbol) const;
|
||||
|
||||
MCSymbol *getColdSymbol() {
|
||||
if (ColdSymbol)
|
||||
return ColdSymbol;
|
||||
@@ -1895,6 +1944,15 @@ public:
|
||||
MemData = Data;
|
||||
}
|
||||
|
||||
/// Return all call site profile info for this function.
|
||||
IndirectCallSiteProfile &getAllCallSites() {
|
||||
return AllCallSites;
|
||||
}
|
||||
|
||||
const IndirectCallSiteProfile &getAllCallSites() const {
|
||||
return AllCallSites;
|
||||
}
|
||||
|
||||
/// Walks the list of basic blocks filling in missing information about
|
||||
/// edge frequency for fall-throughs.
|
||||
///
|
||||
@@ -2004,6 +2062,9 @@ public:
|
||||
/// isIdenticalWith.
|
||||
void mergeProfileDataInto(BinaryFunction &BF) const;
|
||||
|
||||
/// Convert function-level branch data into instruction annotations.
|
||||
void convertBranchData();
|
||||
|
||||
/// Returns true if this function has identical code and CFG with
|
||||
/// the given function \p BF.
|
||||
///
|
||||
@@ -2303,6 +2364,13 @@ template <> struct GraphTraits<Inverse<const bolt::BinaryFunction *>> :
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
class MCAnnotationPrinter<bolt::IndirectCallSiteProfile> {
|
||||
public:
|
||||
void print(raw_ostream &OS, const bolt::IndirectCallSiteProfile &ICSP) const {
|
||||
OS << ICSP;
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace llvm
|
||||
|
||||
|
||||
@@ -261,7 +261,8 @@ bool BinaryFunction::recordBranch(uint64_t From, uint64_t To,
|
||||
|
||||
if (!FromBB->getSuccessor(ToBB->getLabel())) {
|
||||
// Check if this is a recursive call or a return from a recursive call.
|
||||
if (ToBB->isEntryPoint()) {
|
||||
if (ToBB->isEntryPoint() && (BC.MIA->isCall(*FromInstruction) ||
|
||||
BC.MIA->isIndirectBranch(*FromInstruction))) {
|
||||
// Execution count is already accounted for.
|
||||
return true;
|
||||
}
|
||||
@@ -289,8 +290,18 @@ bool BinaryFunction::recordEntry(uint64_t To, bool Mispred, uint64_t Count) {
|
||||
if (!hasProfile())
|
||||
ExecutionCount = 0;
|
||||
|
||||
if (To == 0)
|
||||
BinaryBasicBlock *EntryBB = nullptr;
|
||||
if (To == 0) {
|
||||
ExecutionCount += Count;
|
||||
if (!empty())
|
||||
EntryBB = &front();
|
||||
} else if (auto *BB = getBasicBlockAtOffset(To)) {
|
||||
if (BB->isEntryPoint())
|
||||
EntryBB = BB;
|
||||
}
|
||||
|
||||
if (EntryBB)
|
||||
EntryBB->setExecutionCount(EntryBB->getKnownExecutionCount() + Count);
|
||||
|
||||
return true;
|
||||
}
|
||||
@@ -319,8 +330,7 @@ void BinaryFunction::postProcessProfile() {
|
||||
return;
|
||||
}
|
||||
|
||||
// Is we are using non-LBR sampling there's nothing left to do.
|
||||
if (!BranchData)
|
||||
if (!HasLBRProfile)
|
||||
return;
|
||||
|
||||
// Bug compatibility with previous version - double accounting for conditional
|
||||
@@ -339,7 +349,8 @@ void BinaryFunction::postProcessProfile() {
|
||||
}
|
||||
|
||||
// Pre-sort branch data.
|
||||
std::stable_sort(BranchData->Data.begin(), BranchData->Data.end());
|
||||
if (BranchData)
|
||||
std::stable_sort(BranchData->Data.begin(), BranchData->Data.end());
|
||||
|
||||
// If we have at least some branch data for the function indicate that it
|
||||
// was executed.
|
||||
@@ -347,39 +358,22 @@ void BinaryFunction::postProcessProfile() {
|
||||
ExecutionCount = 1;
|
||||
}
|
||||
|
||||
// Compute preliminary execution count for each basic block
|
||||
// Compute preliminary execution count for each basic block.
|
||||
for (auto *BB : BasicBlocks) {
|
||||
BB->ExecutionCount = 0;
|
||||
if ((!BB->isEntryPoint() && !BB->isLandingPad()) ||
|
||||
BB->ExecutionCount == BinaryBasicBlock::COUNT_NO_PROFILE)
|
||||
BB->ExecutionCount = 0;
|
||||
}
|
||||
for (auto *BB : BasicBlocks) {
|
||||
auto SuccBIIter = BB->branch_info_begin();
|
||||
for (auto Succ : BB->successors()) {
|
||||
if (SuccBIIter->Count != BinaryBasicBlock::COUNT_NO_PROFILE)
|
||||
if (!Succ->isEntryPoint() &&
|
||||
SuccBIIter->Count != BinaryBasicBlock::COUNT_NO_PROFILE)
|
||||
Succ->setExecutionCount(Succ->getExecutionCount() + SuccBIIter->Count);
|
||||
++SuccBIIter;
|
||||
}
|
||||
}
|
||||
|
||||
// Set entry BBs to zero, we'll update their execution count next with entry
|
||||
// data (we maintain a separate data structure for branches to function entry
|
||||
// points)
|
||||
for (auto *BB : BasicBlocks) {
|
||||
if (BB->isEntryPoint())
|
||||
BB->ExecutionCount = 0;
|
||||
}
|
||||
|
||||
// Update execution counts of landing pad blocks and entry BBs
|
||||
// There is a slight skew introduced here as branches originated from RETs
|
||||
// may be accounted for in the execution count of an entry block if the last
|
||||
// instruction in a predecessor fall-through block is a call. This situation
|
||||
// should rarely happen because there are few multiple-entry functions.
|
||||
for (const auto &I : BranchData->EntryData) {
|
||||
BinaryBasicBlock *BB = getBasicBlockAtOffset(I.To.Offset);
|
||||
if (BB && (BB->isEntryPoint() || BB->isLandingPad())) {
|
||||
BB->setExecutionCount(BB->getExecutionCount() + I.Branches);
|
||||
}
|
||||
}
|
||||
|
||||
inferFallThroughCounts();
|
||||
|
||||
// Update profile information for jump tables based on CFG branch data.
|
||||
@@ -442,6 +436,7 @@ void BinaryFunction::readProfile() {
|
||||
return;
|
||||
|
||||
if (!BC.DR.hasLBR()) {
|
||||
HasLBRProfile = false;
|
||||
readSampleData();
|
||||
return;
|
||||
}
|
||||
@@ -452,6 +447,23 @@ void BinaryFunction::readProfile() {
|
||||
if (!BranchData)
|
||||
return;
|
||||
|
||||
// Assign basic block counts to function entry points. These only include
|
||||
// counts for outside entries.
|
||||
//
|
||||
// There is a slight skew introduced here as branches originated from RETs
|
||||
// may be accounted for in the execution count of an entry block if the last
|
||||
// instruction in a predecessor fall-through block is a call. This situation
|
||||
// should rarely happen because there are few multiple-entry functions.
|
||||
for (const auto &BI : BranchData->EntryData) {
|
||||
BinaryBasicBlock *BB = getBasicBlockAtOffset(BI.To.Offset);
|
||||
if (BB && (BB->isEntryPoint() || BB->isLandingPad())) {
|
||||
auto Count = BB->getExecutionCount();
|
||||
if (Count == BinaryBasicBlock::COUNT_NO_PROFILE)
|
||||
Count = 0;
|
||||
BB->setExecutionCount(Count + BI.Branches);
|
||||
}
|
||||
}
|
||||
|
||||
uint64_t MismatchedBranches = 0;
|
||||
for (const auto &BI : BranchData->Data) {
|
||||
if (BI.From.Name != BI.To.Name) {
|
||||
@@ -466,25 +478,59 @@ void BinaryFunction::readProfile() {
|
||||
}
|
||||
}
|
||||
|
||||
// Special profile data propagation is required for conditional tail calls.
|
||||
for (auto BB : BasicBlocks) {
|
||||
auto *CTCInstr = BB->getLastNonPseudoInstr();
|
||||
if (!CTCInstr || !BC.MIA->getConditionalTailCall(*CTCInstr))
|
||||
// Convert branch data into annotations.
|
||||
convertBranchData();
|
||||
}
|
||||
|
||||
void BinaryFunction::convertBranchData() {
|
||||
if (!BranchData || empty())
|
||||
return;
|
||||
|
||||
// Profile information for calls.
|
||||
//
|
||||
// There are 3 cases that we annotate differently:
|
||||
// 1) Conditional tail calls that could be mispredicted.
|
||||
// 2) Indirect calls to multiple destinations with mispredictions.
|
||||
// Before we validate CFG we have to handle indirect branches here too.
|
||||
// 3) Regular direct calls. The count could be different from containing
|
||||
// basic block count. Keep this data in case we find it useful.
|
||||
//
|
||||
for (auto &BI : BranchData->Data) {
|
||||
// Ignore internal branches.
|
||||
if (BI.To.IsSymbol && BI.To.Name == BI.From.Name && BI.To.Offset != 0)
|
||||
continue;
|
||||
|
||||
auto OffsetOrErr =
|
||||
BC.MIA->tryGetAnnotationAs<uint64_t>(*CTCInstr, "Offset");
|
||||
assert(OffsetOrErr && "offset not set for conditional tail call");
|
||||
|
||||
auto BranchInfoOrErr = BranchData->getDirectCallBranch(*OffsetOrErr);
|
||||
if (!BranchInfoOrErr)
|
||||
auto *Instr = getInstructionAtOffset(BI.From.Offset);
|
||||
if (!Instr ||
|
||||
(!BC.MIA->isCall(*Instr) && !BC.MIA->isIndirectBranch(*Instr)))
|
||||
continue;
|
||||
|
||||
BC.MIA->addAnnotation(BC.Ctx.get(), *CTCInstr, "CTCTakenCount",
|
||||
BranchInfoOrErr->Branches);
|
||||
BC.MIA->addAnnotation(BC.Ctx.get(), *CTCInstr, "CTCMispredCount",
|
||||
BranchInfoOrErr->Mispreds);
|
||||
auto setOrUpdateAnnotation = [&](StringRef Name, uint64_t Count) {
|
||||
if (opts::Verbosity >= 1 && BC.MIA->hasAnnotation(*Instr, Name)) {
|
||||
errs() << "BOLT-WARNING: duplicate " << Name << " info for offset 0x"
|
||||
<< Twine::utohexstr(BI.From.Offset)
|
||||
<< " in function " << *this << '\n';
|
||||
}
|
||||
auto &Value = BC.MIA->getOrCreateAnnotationAs<uint64_t>(BC.Ctx.get(),
|
||||
*Instr, Name);
|
||||
Value += Count;
|
||||
};
|
||||
|
||||
if (BC.MIA->isIndirectCall(*Instr) || BC.MIA->isIndirectBranch(*Instr)) {
|
||||
IndirectCallSiteProfile &CSP =
|
||||
BC.MIA->getOrCreateAnnotationAs<IndirectCallSiteProfile>(BC.Ctx.get(),
|
||||
*Instr, "CallProfile");
|
||||
CSP.emplace_back(BI.To.IsSymbol, BI.To.Name, BI.Branches,
|
||||
BI.Mispreds);
|
||||
} else if (BC.MIA->getConditionalTailCall(*Instr)) {
|
||||
setOrUpdateAnnotation("CTCTakenCount", BI.Branches);
|
||||
setOrUpdateAnnotation("CTCMispredCount", BI.Mispreds);
|
||||
} else {
|
||||
setOrUpdateAnnotation("Count", BI.Branches);
|
||||
}
|
||||
}
|
||||
|
||||
BranchData = nullptr;
|
||||
}
|
||||
|
||||
void BinaryFunction::mergeProfileDataInto(BinaryFunction &BF) const {
|
||||
|
||||
@@ -71,6 +71,8 @@ add_llvm_tool(llvm-bolt
|
||||
DebugData.cpp
|
||||
DWARFRewriter.cpp
|
||||
Exceptions.cpp
|
||||
ProfileReader.cpp
|
||||
ProfileWriter.cpp
|
||||
RewriteInstance.cpp
|
||||
)
|
||||
|
||||
|
||||
@@ -746,8 +746,7 @@ std::error_code DataAggregator::parseBranchEvents() {
|
||||
++NumSamples;
|
||||
NumEntries += Sample.LBR.size();
|
||||
|
||||
// Parser semantic actions
|
||||
// LBRs are stored in reverse execution order. NextLBR refers to next
|
||||
// LBRs are stored in reverse execution order. NextLBR refers to the next
|
||||
// executed branch record.
|
||||
const LBREntry *NextLBR{nullptr};
|
||||
for (const auto &LBR : Sample.LBR) {
|
||||
|
||||
@@ -134,7 +134,6 @@ BinaryFunctionCallGraph buildCallGraph(BinaryContext &BC,
|
||||
continue;
|
||||
}
|
||||
|
||||
const auto *BranchData = Function->getBranchData();
|
||||
const auto SrcId = lookupNode(Function);
|
||||
// Offset of the current basic block from the beginning of the function
|
||||
uint64_t Offset = 0;
|
||||
@@ -166,25 +165,6 @@ BinaryFunctionCallGraph buildCallGraph(BinaryContext &BC,
|
||||
return false;
|
||||
};
|
||||
|
||||
auto getCallInfoFromBranchData = [&](const BranchInfo &BI, bool IsStale) {
|
||||
MCSymbol *DstSym = nullptr;
|
||||
uint64_t Count;
|
||||
if (BI.To.IsSymbol && (DstSym = BC.getGlobalSymbolByName(BI.To.Name))) {
|
||||
Count = BI.Branches;
|
||||
} else {
|
||||
Count = COUNT_NO_PROFILE;
|
||||
}
|
||||
// If we are using the perf data for a stale function we need to filter
|
||||
// out data which comes from branches. We'll assume that the To offset
|
||||
// is non-zero for branches.
|
||||
if (IsStale && BI.To.Offset != 0 &&
|
||||
(!DstSym || Function == BC.getFunctionForSymbol(DstSym))) {
|
||||
DstSym = nullptr;
|
||||
Count = COUNT_NO_PROFILE;
|
||||
}
|
||||
return std::make_pair(DstSym, Count);
|
||||
};
|
||||
|
||||
// Get pairs of (symbol, count) for each target at this callsite.
|
||||
// If the call is to an unknown function the symbol will be nullptr.
|
||||
// If there is no profiling data the count will be COUNT_NO_PROFILE.
|
||||
@@ -193,12 +173,15 @@ BinaryFunctionCallGraph buildCallGraph(BinaryContext &BC,
|
||||
const auto *DstSym = BC.MIA->getTargetSymbol(Inst);
|
||||
|
||||
// If this is an indirect call use perf data directly.
|
||||
if (!DstSym && BranchData &&
|
||||
BC.MIA->hasAnnotation(Inst, "Offset")) {
|
||||
const auto InstrOffset =
|
||||
BC.MIA->getAnnotationAs<uint64_t>(Inst, "Offset");
|
||||
for (const auto &BI : BranchData->getBranchRange(InstrOffset)) {
|
||||
Counts.push_back(getCallInfoFromBranchData(BI, false));
|
||||
if (!DstSym && BC.MIA->hasAnnotation(Inst, "CallProfile")) {
|
||||
const auto &ICSP =
|
||||
BC.MIA->getAnnotationAs<IndirectCallSiteProfile>(Inst, "CallProfile");
|
||||
for (const auto &CSI : ICSP) {
|
||||
if (!CSI.IsFunction)
|
||||
continue;
|
||||
if (auto DstSym = BC.getGlobalSymbolByName(CSI.Name)) {
|
||||
Counts.push_back(std::make_pair(DstSym, CSI.Count));
|
||||
}
|
||||
}
|
||||
} else {
|
||||
const auto Count = BB->getExecutionCount();
|
||||
@@ -211,23 +194,29 @@ BinaryFunctionCallGraph buildCallGraph(BinaryContext &BC,
|
||||
// If the function has an invalid profile, try to use the perf data
|
||||
// directly (if requested). If there is no perf data for this function,
|
||||
// fall back to the CFG walker which attempts to handle missing data.
|
||||
if (!Function->hasValidProfile() && CgFromPerfData && BranchData) {
|
||||
if (!Function->hasValidProfile() && CgFromPerfData &&
|
||||
!Function->getAllCallSites().empty()) {
|
||||
DEBUG(dbgs() << "BOLT-DEBUG: buildCallGraph: Falling back to perf data"
|
||||
<< " for " << *Function << "\n");
|
||||
++NumFallbacks;
|
||||
const auto Size = functionSize(Function);
|
||||
for (const auto &BI : BranchData->Data) {
|
||||
Offset = BI.From.Offset;
|
||||
for (const auto &CSI : Function->getAllCallSites()) {
|
||||
++TotalCallsites;
|
||||
|
||||
if (!CSI.IsFunction)
|
||||
continue;
|
||||
|
||||
auto *DstSym = BC.getGlobalSymbolByName(CSI.Name);
|
||||
if (!DstSym)
|
||||
continue;
|
||||
|
||||
// The computed offset may exceed the hot part of the function; hence,
|
||||
// bound it the size
|
||||
// bound it by the size.
|
||||
Offset = CSI.Offset;
|
||||
if (Offset > Size)
|
||||
Offset = Size;
|
||||
|
||||
const auto CI = getCallInfoFromBranchData(BI, true);
|
||||
if (!CI.first && CI.second == COUNT_NO_PROFILE) // probably a branch
|
||||
continue;
|
||||
++TotalCallsites;
|
||||
if (!recordCall(CI.first, CI.second)) {
|
||||
if (!recordCall(DstSym, CSI.Count)) {
|
||||
++NotProcessed;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -142,21 +142,13 @@ namespace llvm {
|
||||
namespace bolt {
|
||||
|
||||
IndirectCallPromotion::Callsite::Callsite(BinaryFunction &BF,
|
||||
const BranchInfo &BI)
|
||||
: From(BF.getSymbol()),
|
||||
To(uint64_t(BI.To.Offset)),
|
||||
Mispreds{uint64_t(BI.Mispreds)},
|
||||
Branches{uint64_t(BI.Branches)},
|
||||
Histories{BI.Histories} {
|
||||
if (BI.To.IsSymbol) {
|
||||
auto &BC = BF.getBinaryContext();
|
||||
auto Itr = BC.GlobalSymbols.find(BI.To.Name);
|
||||
if (Itr != BC.GlobalSymbols.end()) {
|
||||
To.IsSymbol = true;
|
||||
To.Sym = BC.getOrCreateGlobalSymbol(Itr->second, "FUNCat");
|
||||
To.Addr = 0;
|
||||
assert(To.Sym);
|
||||
}
|
||||
const IndirectCallProfile &ICP)
|
||||
: From(BF.getSymbol()),
|
||||
To(ICP.Offset),
|
||||
Mispreds(ICP.Mispreds),
|
||||
Branches(ICP.Count) {
|
||||
if (ICP.IsFunction) {
|
||||
To.Sym = BF.getBinaryContext().getGlobalSymbolByName(ICP.Name);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -192,20 +184,18 @@ IndirectCallPromotion::getCallTargets(
|
||||
Entry == BF.getFunctionColdEndLabel())
|
||||
continue;
|
||||
const Location To(Entry);
|
||||
Callsite CS{
|
||||
From, To, JI->Mispreds, JI->Count, BranchHistories(),
|
||||
I - Range.first};
|
||||
Targets.emplace_back(CS);
|
||||
Targets.emplace_back(
|
||||
From, To, JI->Mispreds, JI->Count, I - Range.first);
|
||||
}
|
||||
|
||||
// Sort by symbol then addr.
|
||||
std::sort(Targets.begin(), Targets.end(),
|
||||
[](const Callsite &A, const Callsite &B) {
|
||||
if (A.To.IsSymbol && B.To.IsSymbol)
|
||||
if (A.To.Sym && B.To.Sym)
|
||||
return A.To.Sym < B.To.Sym;
|
||||
else if (A.To.IsSymbol && !B.To.IsSymbol)
|
||||
else if (A.To.Sym && !B.To.Sym)
|
||||
return true;
|
||||
else if (!A.To.IsSymbol && B.To.IsSymbol)
|
||||
else if (!A.To.Sym && B.To.Sym)
|
||||
return false;
|
||||
else
|
||||
return A.To.Addr < B.To.Addr;
|
||||
@@ -221,7 +211,7 @@ IndirectCallPromotion::getCallTargets(
|
||||
while (++First != Last) {
|
||||
auto &A = *Result;
|
||||
const auto &B = *First;
|
||||
if (A.To.IsSymbol && B.To.IsSymbol && A.To.Sym == B.To.Sym) {
|
||||
if (A.To.Sym && B.To.Sym && A.To.Sym == B.To.Sym) {
|
||||
A.JTIndex.insert(A.JTIndex.end(), B.JTIndex.begin(), B.JTIndex.end());
|
||||
} else {
|
||||
*(++Result) = *First;
|
||||
@@ -241,13 +231,13 @@ IndirectCallPromotion::getCallTargets(
|
||||
Inst.getOperand(0).getReg() == BC.MRI->getProgramCounter()) {
|
||||
return Targets;
|
||||
}
|
||||
const auto *BranchData = BF.getBranchData();
|
||||
assert(BranchData && "expected initialized branch data");
|
||||
auto Offset = BC.MIA->getAnnotationAs<uint64_t>(Inst, "Offset");
|
||||
for (const auto &BI : BranchData->getBranchRange(Offset)) {
|
||||
Callsite Site(BF, BI);
|
||||
if (Site.isValid()) {
|
||||
Targets.emplace_back(std::move(Site));
|
||||
auto ICSP =
|
||||
BC.MIA->tryGetAnnotationAs<IndirectCallSiteProfile>(Inst, "CallProfile");
|
||||
if (ICSP) {
|
||||
for (const auto &CSP : ICSP.get()) {
|
||||
Callsite Site(BF, CSP);
|
||||
if (Site.isValid())
|
||||
Targets.emplace_back(std::move(Site));
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -262,7 +252,7 @@ IndirectCallPromotion::getCallTargets(
|
||||
auto Last = std::remove_if(Targets.begin(),
|
||||
Targets.end(),
|
||||
[](const Callsite &CS) {
|
||||
return !CS.To.IsSymbol;
|
||||
return !CS.To.Sym;
|
||||
});
|
||||
Targets.erase(Last, Targets.end());
|
||||
|
||||
@@ -540,7 +530,7 @@ IndirectCallPromotion::findCallTargetSymbols(
|
||||
|
||||
for (size_t I = 0, TgtIdx = 0; I < N; ++TgtIdx) {
|
||||
auto &Target = Targets[TgtIdx];
|
||||
assert(Target.To.IsSymbol && "All ICP targets must be to known symbols");
|
||||
assert(Target.To.Sym && "All ICP targets must be to known symbols");
|
||||
assert(!Target.JTIndex.empty() && "Jump tables must have indices");
|
||||
for (auto Idx : Target.JTIndex) {
|
||||
SymTargets.push_back(std::make_pair(Target.To.Sym, Idx));
|
||||
@@ -549,7 +539,7 @@ IndirectCallPromotion::findCallTargetSymbols(
|
||||
}
|
||||
} else {
|
||||
for (size_t I = 0; I < N; ++I) {
|
||||
assert(Targets[I].To.IsSymbol &&
|
||||
assert(Targets[I].To.Sym &&
|
||||
"All ICP targets must be to known symbols");
|
||||
assert(Targets[I].JTIndex.empty() &&
|
||||
"Can't have jump table indices for non-jump tables");
|
||||
@@ -725,7 +715,7 @@ IndirectCallPromotion::rewriteCall(
|
||||
auto TBB = Function.createBasicBlock(0, Sym);
|
||||
for (auto &Inst : Insts) { // sanitize new instructions.
|
||||
if (BC.MIA->isCall(Inst))
|
||||
BC.MIA->removeAnnotation(Inst, "Offset");
|
||||
BC.MIA->removeAnnotation(Inst, "CallProfile");
|
||||
}
|
||||
TBB->addInstructions(Insts.begin(), Insts.end());
|
||||
NewBBs.emplace_back(std::move(TBB));
|
||||
@@ -822,7 +812,7 @@ BinaryBasicBlock *IndirectCallPromotion::fixCFG(
|
||||
|
||||
std::vector<MCSymbol*> SymTargets;
|
||||
for (size_t I = 0; I < Targets.size(); ++I) {
|
||||
assert(Targets[I].To.IsSymbol);
|
||||
assert(Targets[I].To.Sym);
|
||||
if (Targets[I].JTIndex.empty())
|
||||
SymTargets.push_back(Targets[I].To.Sym);
|
||||
else {
|
||||
@@ -1089,7 +1079,7 @@ IndirectCallPromotion::printCallsiteInfo(const BinaryBasicBlock *BB,
|
||||
const auto Frequency = 100.0 * Targets[I].Branches / NumCalls;
|
||||
const auto MisFrequency = 100.0 * Targets[I].Mispreds / NumCalls;
|
||||
outs() << "BOLT-INFO: ";
|
||||
if (Targets[I].To.IsSymbol)
|
||||
if (Targets[I].To.Sym)
|
||||
outs() << Targets[I].To.Sym->getName();
|
||||
else
|
||||
outs() << Targets[I].To.Addr;
|
||||
@@ -1188,7 +1178,7 @@ void IndirectCallPromotion::runOnFunctions(
|
||||
|
||||
if (!Function.isSimple() ||
|
||||
!opts::shouldProcess(Function) ||
|
||||
!Function.getBranchData())
|
||||
!Function.hasProfile())
|
||||
continue;
|
||||
|
||||
const bool HasLayout = !Function.layout_empty();
|
||||
@@ -1199,12 +1189,13 @@ void IndirectCallPromotion::runOnFunctions(
|
||||
|
||||
for (auto &Inst : BB) {
|
||||
const bool IsJumpTable = Function.getJumpTable(Inst);
|
||||
const bool HasBranchData = BC.MIA->hasAnnotation(Inst, "Offset");
|
||||
const bool HasIndirectCallProfile =
|
||||
BC.MIA->hasAnnotation(Inst, "CallProfile");
|
||||
const bool IsDirectCall = (BC.MIA->isCall(Inst) &&
|
||||
BC.MIA->getTargetSymbol(Inst, 0));
|
||||
|
||||
if (!IsDirectCall &&
|
||||
((HasBranchData && !IsJumpTable && OptimizeCalls) ||
|
||||
((HasIndirectCallProfile && !IsJumpTable && OptimizeCalls) ||
|
||||
(IsJumpTable && OptimizeJumpTables))) {
|
||||
uint64_t NumCalls = 0;
|
||||
for (const auto &BInfo : getCallTargets(Function, Inst)) {
|
||||
@@ -1233,8 +1224,8 @@ void IndirectCallPromotion::runOnFunctions(
|
||||
++Num;
|
||||
}
|
||||
outs() << "BOLT-INFO: ICP Total indirect calls = " << TotalIndirectCalls
|
||||
<< ", " << Num << " callsites cover " << opts::ICPTopCallsites << "% "
|
||||
<< "of all indirect calls\n";
|
||||
<< ", " << Num << " callsites cover " << opts::ICPTopCallsites
|
||||
<< "% of all indirect calls\n";
|
||||
|
||||
// Mark sites to optimize with "DoICP" annotation.
|
||||
for (size_t I = 0; I < Num; ++I) {
|
||||
@@ -1249,8 +1240,7 @@ void IndirectCallPromotion::runOnFunctions(
|
||||
if (!Function.isSimple() || !opts::shouldProcess(Function))
|
||||
continue;
|
||||
|
||||
const auto *BranchData = Function.getBranchData();
|
||||
if (!BranchData)
|
||||
if (!Function.hasProfile())
|
||||
continue;
|
||||
|
||||
const bool HasLayout = !Function.layout_empty();
|
||||
@@ -1279,15 +1269,15 @@ void IndirectCallPromotion::runOnFunctions(
|
||||
auto &Inst = BB->getInstructionAtIndex(Idx);
|
||||
const auto InstIdx = &Inst - &(*BB->begin());
|
||||
const bool IsTailCall = BC.MIA->isTailCall(Inst);
|
||||
const bool HasBranchData = Function.getBranchData() &&
|
||||
BC.MIA->hasAnnotation(Inst, "Offset");
|
||||
const bool HasIndirectCallProfile =
|
||||
BC.MIA->hasAnnotation(Inst, "CallProfile");
|
||||
const bool IsJumpTable = Function.getJumpTable(Inst);
|
||||
|
||||
if (BC.MIA->isCall(Inst)) {
|
||||
TotalCalls += BB->getKnownExecutionCount();
|
||||
}
|
||||
|
||||
if (!((HasBranchData && !IsJumpTable && OptimizeCalls) ||
|
||||
if (!((HasIndirectCallProfile && !IsJumpTable && OptimizeCalls) ||
|
||||
(IsJumpTable && OptimizeJumpTables)))
|
||||
continue;
|
||||
|
||||
@@ -1458,7 +1448,7 @@ void IndirectCallPromotion::runOnFunctions(
|
||||
TotalIndirectJmps += FuncTotalIndirectJmps;
|
||||
}
|
||||
|
||||
outs() << "BOLT-INFO: ICP total indirect callsites = "
|
||||
outs() << "BOLT-INFO: ICP total indirect callsites with profile = "
|
||||
<< TotalIndirectCallsites
|
||||
<< "\n"
|
||||
<< "BOLT-INFO: ICP total jump table callsites = "
|
||||
@@ -1475,7 +1465,8 @@ void IndirectCallPromotion::runOnFunctions(
|
||||
<< format("%.1f", (100.0 * TotalNumFrequentCalls) /
|
||||
std::max(TotalIndirectCalls, 1ul))
|
||||
<< "%\n"
|
||||
<< "BOLT-INFO: ICP percentage of indirect calls that are optimized = "
|
||||
<< "BOLT-INFO: ICP percentage of indirect callsites that are "
|
||||
"optimized = "
|
||||
<< format("%.1f", (100.0 * TotalOptimizedIndirectCallsites) /
|
||||
std::max(TotalIndirectCallsites, 1ul))
|
||||
<< "%\n"
|
||||
|
||||
@@ -22,7 +22,7 @@ namespace bolt {
|
||||
|
||||
/// Optimize indirect calls.
|
||||
/// The indirect call promotion pass visits each indirect call and
|
||||
/// examines the BranchData for each. If the most frequent targets
|
||||
/// examines a branch profile for each. If the most frequent targets
|
||||
/// from that callsite exceed the specified threshold (default 90%),
|
||||
/// the call is promoted. Otherwise, it is ignored. By default,
|
||||
/// only one target is considered at each callsite.
|
||||
@@ -103,14 +103,13 @@ class IndirectCallPromotion : public BinaryFunctionPass {
|
||||
using JumpTableInfoType = std::vector<std::pair<uint64_t, uint64_t>>;
|
||||
using SymTargetsType = std::vector<std::pair<MCSymbol *, uint64_t>>;
|
||||
struct Location {
|
||||
bool IsSymbol{false};
|
||||
MCSymbol *Sym{nullptr};
|
||||
uint64_t Addr{0};
|
||||
bool isValid() const {
|
||||
return (IsSymbol && Sym) || (!IsSymbol && Addr != 0);
|
||||
return Sym || (!Sym && Addr != 0);
|
||||
}
|
||||
Location() { }
|
||||
explicit Location(MCSymbol *Sym) : IsSymbol(true), Sym(Sym) { }
|
||||
explicit Location(MCSymbol *Sym) : Sym(Sym) { }
|
||||
explicit Location(uint64_t Addr) : Addr(Addr) { }
|
||||
};
|
||||
|
||||
@@ -119,18 +118,17 @@ class IndirectCallPromotion : public BinaryFunctionPass {
|
||||
Location To;
|
||||
uint64_t Mispreds{0};
|
||||
uint64_t Branches{0};
|
||||
BranchHistories Histories;
|
||||
// Indices in the jmp table (jt only)
|
||||
std::vector<uint64_t> JTIndex;
|
||||
bool isValid() const {
|
||||
return From.isValid() && To.isValid();
|
||||
}
|
||||
Callsite(BinaryFunction &BF, const BranchInfo &BI);
|
||||
Callsite(BinaryFunction &BF, const IndirectCallProfile &ICP);
|
||||
Callsite(const Location &From, const Location &To,
|
||||
uint64_t Mispreds, uint64_t Branches,
|
||||
const BranchHistories &Histories, uint64_t JTIndex)
|
||||
uint64_t JTIndex)
|
||||
: From(From), To(To), Mispreds(Mispreds), Branches(Branches),
|
||||
Histories(Histories), JTIndex(1, JTIndex) { }
|
||||
JTIndex(1, JTIndex) { }
|
||||
};
|
||||
|
||||
std::unordered_set<const BinaryFunction *> Modified;
|
||||
|
||||
@@ -85,7 +85,7 @@ void PLTCall::runOnFunctions(
|
||||
if (NumCallsOptimized) {
|
||||
BC.RequiresZNow = true;
|
||||
outs() << "BOLT-INFO: " << NumCallsOptimized
|
||||
<< " PLT calls in the binary were opitmized.\n";
|
||||
<< " PLT calls in the binary were optimized.\n";
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
265
bolt/ProfileReader.cpp
Normal file
265
bolt/ProfileReader.cpp
Normal file
@@ -0,0 +1,265 @@
|
||||
//===-- ProfileReader.cpp - BOLT profile de-serializer ----------*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "BinaryBasicBlock.h"
|
||||
#include "BinaryFunction.h"
|
||||
#include "ProfileReader.h"
|
||||
#include "ProfileYAMLMapping.h"
|
||||
#include "llvm/Support/CommandLine.h"
|
||||
|
||||
namespace opts {
|
||||
extern llvm::cl::opt<unsigned> Verbosity;
|
||||
}
|
||||
|
||||
namespace llvm {
|
||||
namespace bolt {
|
||||
|
||||
void
|
||||
ProfileReader::buildNameMaps(std::map<uint64_t, BinaryFunction> &Functions) {
|
||||
for (auto &YamlBF : YamlBFs) {
|
||||
StringRef Name = YamlBF.Name;
|
||||
const auto Pos = Name.find("(*");
|
||||
if (Pos != StringRef::npos)
|
||||
Name = Name.substr(0, Pos);
|
||||
ProfileNameToProfile[Name] = &YamlBF;
|
||||
if (const auto CommonName = getLTOCommonName(Name)) {
|
||||
LTOCommonNameMap[*CommonName].push_back(&YamlBF);
|
||||
}
|
||||
}
|
||||
for (auto &BFI : Functions) {
|
||||
const auto &Function = BFI.second;
|
||||
for (auto &Name : Function.getNames()) {
|
||||
if (const auto CommonName = getLTOCommonName(Name)) {
|
||||
LTOCommonNameFunctionMap[*CommonName].insert(&Function);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool
|
||||
ProfileReader::parseFunctionProfile(BinaryFunction &BF,
|
||||
const yaml::bolt::BinaryFunctionProfile &YamlBF) {
|
||||
auto &BC = BF.getBinaryContext();
|
||||
|
||||
bool ProfileMatched = true;
|
||||
uint64_t MismatchedBlocks = 0;
|
||||
uint64_t MismatchedCalls = 0;
|
||||
uint64_t MismatchedEdges = 0;
|
||||
|
||||
BF.setExecutionCount(YamlBF.ExecCount);
|
||||
|
||||
if (YamlBF.Hash != BF.hash(true, true)) {
|
||||
if (opts::Verbosity >= 1)
|
||||
errs() << "BOLT-WARNING: hash mismatch\n";
|
||||
ProfileMatched = false;
|
||||
}
|
||||
|
||||
if (YamlBF.NumBasicBlocks != BF.size()) {
|
||||
if (opts::Verbosity >= 1)
|
||||
errs() << "BOLT-WARNING: number of basic blocks mismatch\n";
|
||||
ProfileMatched = false;
|
||||
}
|
||||
|
||||
auto DFSOrder = BF.dfs();
|
||||
|
||||
for (const auto &YamlBB : YamlBF.Blocks) {
|
||||
if (YamlBB.Index >= DFSOrder.size()) {
|
||||
if (opts::Verbosity >= 2)
|
||||
errs() << "BOLT-WARNING: index " << YamlBB.Index
|
||||
<< " is out of bounds\n";
|
||||
++MismatchedBlocks;
|
||||
continue;
|
||||
}
|
||||
|
||||
auto &BB = *DFSOrder[YamlBB.Index];
|
||||
BB.setExecutionCount(YamlBB.ExecCount);
|
||||
|
||||
for (const auto &YamlCSI: YamlBB.CallSites) {
|
||||
auto *Callee = YamlCSI.DestId < YamlProfileToFunction.size() ?
|
||||
YamlProfileToFunction[YamlCSI.DestId] : nullptr;
|
||||
bool IsFunction = Callee ? true : false;
|
||||
const MCSymbol *CalleeSymbol = nullptr;
|
||||
if (IsFunction) {
|
||||
CalleeSymbol = Callee->getSymbolForEntry(YamlCSI.EntryDiscriminator);
|
||||
}
|
||||
StringRef Name = CalleeSymbol ? CalleeSymbol->getName() : "<unknown>";
|
||||
BF.getAllCallSites().emplace_back(
|
||||
IsFunction, Name, YamlCSI.Count, YamlCSI.Mispreds, YamlCSI.Offset);
|
||||
|
||||
if (YamlCSI.Offset >= BB.getOriginalSize()) {
|
||||
if (opts::Verbosity >= 2)
|
||||
errs() << "BOLT-WARNING: offset " << YamlCSI.Offset
|
||||
<< " out of bounds in block " << BB.getName() << '\n';
|
||||
++MismatchedCalls;
|
||||
continue;
|
||||
}
|
||||
|
||||
auto *Instr =
|
||||
BF.getInstructionAtOffset(BB.getInputOffset() + YamlCSI.Offset);
|
||||
if (!Instr) {
|
||||
if (opts::Verbosity >= 2)
|
||||
errs() << "BOLT-WARNING: no instruction at offset " << YamlCSI.Offset
|
||||
<< " in block " << BB.getName() << '\n';
|
||||
++MismatchedCalls;
|
||||
continue;
|
||||
}
|
||||
if (!BC.MIA->isCall(*Instr) && !BC.MIA->isIndirectBranch(*Instr)) {
|
||||
if (opts::Verbosity >= 2)
|
||||
errs() << "BOLT-WARNING: expected call at offset " << YamlCSI.Offset
|
||||
<< " in block " << BB.getName() << '\n';
|
||||
++MismatchedCalls;
|
||||
continue;
|
||||
}
|
||||
|
||||
auto setAnnotation = [&](StringRef Name, uint64_t Count) {
|
||||
if (BC.MIA->hasAnnotation(*Instr, Name)) {
|
||||
if (opts::Verbosity >= 1)
|
||||
errs() << "BOLT-WARNING: ignoring duplicate " << Name
|
||||
<< " info for offset 0x" << Twine::utohexstr(YamlCSI.Offset)
|
||||
<< " in function " << BF << '\n';
|
||||
return;
|
||||
}
|
||||
BC.MIA->addAnnotation(BC.Ctx.get(), *Instr, Name, Count);
|
||||
};
|
||||
|
||||
if (BC.MIA->isIndirectCall(*Instr) || BC.MIA->isIndirectBranch(*Instr)) {
|
||||
IndirectCallSiteProfile &CSP =
|
||||
BC.MIA->getOrCreateAnnotationAs<IndirectCallSiteProfile>(BC.Ctx.get(),
|
||||
*Instr, "CallProfile");
|
||||
CSP.emplace_back(IsFunction, Name, YamlCSI.Count, YamlCSI.Mispreds);
|
||||
} else if (BC.MIA->getConditionalTailCall(*Instr)) {
|
||||
setAnnotation("CTCTakenCount", YamlCSI.Count);
|
||||
setAnnotation("CTCMispredCount", YamlCSI.Mispreds);
|
||||
} else {
|
||||
setAnnotation("Count", YamlCSI.Count);
|
||||
}
|
||||
}
|
||||
|
||||
for (const auto &YamlSI : YamlBB.Successors) {
|
||||
if (YamlSI.Index >= DFSOrder.size()) {
|
||||
if (opts::Verbosity >= 1)
|
||||
errs() << "BOLT-WARNING: index out of bounds for profiled block\n";
|
||||
++MismatchedEdges;
|
||||
continue;
|
||||
}
|
||||
|
||||
auto &SuccessorBB = *DFSOrder[YamlSI.Index];
|
||||
if (!BB.getSuccessor(SuccessorBB.getLabel())) {
|
||||
if (opts::Verbosity >= 1)
|
||||
errs() << "BOLT-WARNING: no successor for block " << BB.getName()
|
||||
<< " that matches index " << YamlSI.Index << " or block "
|
||||
<< SuccessorBB.getName() << '\n';
|
||||
++MismatchedEdges;
|
||||
continue;
|
||||
}
|
||||
|
||||
BB.setSuccessorBranchInfo(SuccessorBB, YamlSI.Count, YamlSI.Mispreds);
|
||||
}
|
||||
}
|
||||
|
||||
ProfileMatched &= !MismatchedBlocks && !MismatchedCalls && !MismatchedEdges;
|
||||
|
||||
if (ProfileMatched)
|
||||
BF.markProfiled();
|
||||
|
||||
if (!ProfileMatched && opts::Verbosity >= 1) {
|
||||
errs() << "BOLT-WARNING: " << MismatchedBlocks << " blocks, "
|
||||
<< MismatchedCalls << " calls, and " << MismatchedEdges
|
||||
<< " edges in profile did not match function " << BF << '\n';
|
||||
}
|
||||
|
||||
return ProfileMatched;
|
||||
}
|
||||
|
||||
std::error_code
|
||||
ProfileReader::readProfile(const std::string &FileName,
|
||||
std::map<uint64_t, BinaryFunction> &Functions) {
|
||||
auto MB = MemoryBuffer::getFileOrSTDIN(FileName);
|
||||
if (std::error_code EC = MB.getError()) {
|
||||
errs() << "ERROR: cannot open " << FileName << ": " << EC.message() << "\n";
|
||||
return EC;
|
||||
}
|
||||
|
||||
yaml::Input YamlInput(MB.get()->getBuffer());
|
||||
YamlInput >> YamlBFs;
|
||||
if (YamlInput.error()) {
|
||||
errs() << "BOLT-ERROR: syntax error parsing " << FileName << " : "
|
||||
<< YamlInput.error().message() << '\n';
|
||||
return YamlInput.error();
|
||||
}
|
||||
|
||||
buildNameMaps(Functions);
|
||||
|
||||
YamlProfileToFunction.resize(YamlBFs.size() + 1);
|
||||
for (auto &BFI : Functions) {
|
||||
auto &Function = BFI.second;
|
||||
auto Hash = Function.hash(true, true);
|
||||
for (auto &FunctionName : Function.getNames()) {
|
||||
const auto CommonName = getLTOCommonName(FunctionName);
|
||||
if (CommonName) {
|
||||
auto I = LTOCommonNameMap.find(*CommonName);
|
||||
if (I == LTOCommonNameMap.end())
|
||||
continue;
|
||||
|
||||
bool ProfileMatched{false};
|
||||
auto <OProfiles = I->getValue();
|
||||
for (auto *YamlBF : LTOProfiles) {
|
||||
if (YamlBF->Used)
|
||||
continue;
|
||||
if (YamlBF->Hash == Hash) {
|
||||
matchProfileToFunction(*YamlBF, Function);
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (ProfileMatched)
|
||||
break;
|
||||
|
||||
// If there's only one function with a given name, try to
|
||||
// match it partially.
|
||||
if (LTOProfiles.size() == 1 &&
|
||||
LTOCommonNameFunctionMap[*CommonName].size() == 1 &&
|
||||
!LTOProfiles.front()->Used) {
|
||||
matchProfileToFunction(*LTOProfiles.front(), Function);
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
auto PI = ProfileNameToProfile.find(FunctionName);
|
||||
if (PI == ProfileNameToProfile.end())
|
||||
continue;
|
||||
|
||||
auto &YamlBF = *PI->getValue();
|
||||
matchProfileToFunction(YamlBF, Function);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
for (auto &YamlBF : YamlBFs) {
|
||||
if (!YamlBF.Used) {
|
||||
errs() << "BOLT-WARNING: profile ignored for function "
|
||||
<< YamlBF.Name << '\n';
|
||||
}
|
||||
}
|
||||
|
||||
for (auto &YamlBF : YamlBFs) {
|
||||
if (YamlBF.Id >= YamlProfileToFunction.size()) {
|
||||
// Such profile was ignored.
|
||||
continue;
|
||||
}
|
||||
if (auto *BF = YamlProfileToFunction[YamlBF.Id]) {
|
||||
parseFunctionProfile(*BF, YamlBF);
|
||||
}
|
||||
}
|
||||
|
||||
return YamlInput.error();
|
||||
}
|
||||
|
||||
} // end namespace bolt
|
||||
} // end namespace llvm
|
||||
68
bolt/ProfileReader.h
Normal file
68
bolt/ProfileReader.h
Normal file
@@ -0,0 +1,68 @@
|
||||
//===-- ProfileReader.h - BOLT profile deserializer -------------*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef LLVM_TOOLS_LLVM_BOLT_PROFILEREADER_H
|
||||
#define LLVM_TOOLS_LLVM_BOLT_PROFILEREADER_H
|
||||
|
||||
#include "BinaryFunction.h"
|
||||
#include "ProfileYAMLMapping.h"
|
||||
#include <unordered_set>
|
||||
|
||||
namespace llvm {
|
||||
namespace bolt {
|
||||
|
||||
class ProfileReader {
|
||||
/// Number of function profiles that were unused by the reader.
|
||||
uint64_t NumUnusedProfiles{0};
|
||||
|
||||
/// Map a function ID from a profile to a BinaryFunction object.
|
||||
std::vector<BinaryFunction *> YamlProfileToFunction;
|
||||
|
||||
void reportError(StringRef Message);
|
||||
|
||||
bool parseFunctionProfile(BinaryFunction &Function,
|
||||
const yaml::bolt::BinaryFunctionProfile &YamlBF);
|
||||
|
||||
/// Profile for binary functions.
|
||||
std::vector<yaml::bolt::BinaryFunctionProfile> YamlBFs;
|
||||
|
||||
/// For LTO symbol resolution.
|
||||
/// Map a common LTO prefix to a list of profiles matching the prefix.
|
||||
StringMap<std::vector<yaml::bolt::BinaryFunctionProfile *>> LTOCommonNameMap;
|
||||
|
||||
/// Map a common LTO prefix to a set of binary functions.
|
||||
StringMap<std::unordered_set<const BinaryFunction *>>
|
||||
LTOCommonNameFunctionMap;
|
||||
|
||||
StringMap<yaml::bolt::BinaryFunctionProfile *> ProfileNameToProfile;
|
||||
|
||||
void buildNameMaps(std::map<uint64_t, BinaryFunction> &Functions);
|
||||
|
||||
/// Update matched YAML -> BinaryFunction pair.
|
||||
void matchProfileToFunction(yaml::bolt::BinaryFunctionProfile &YamlBF,
|
||||
BinaryFunction &BF) {
|
||||
if (YamlBF.Id >= YamlProfileToFunction.size())
|
||||
YamlProfileToFunction.resize(YamlBF.Id + 1);
|
||||
YamlProfileToFunction[YamlBF.Id] = &BF;
|
||||
YamlBF.Used = true;
|
||||
}
|
||||
|
||||
public:
|
||||
/// Read profile from a file and associate with a set of functions.
|
||||
std::error_code readProfile(const std::string &FileName,
|
||||
std::map<uint64_t, BinaryFunction> &Functions);
|
||||
|
||||
};
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
174
bolt/ProfileWriter.cpp
Normal file
174
bolt/ProfileWriter.cpp
Normal file
@@ -0,0 +1,174 @@
|
||||
//===-- ProfileWriter.cpp - Serialize profiling data ------------*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "BinaryBasicBlock.h"
|
||||
#include "BinaryFunction.h"
|
||||
#include "ProfileWriter.h"
|
||||
#include "ProfileYAMLMapping.h"
|
||||
#include "llvm/ADT/STLExtras.h"
|
||||
#include "llvm/Support/CommandLine.h"
|
||||
#include "llvm/Support/Debug.h"
|
||||
#include "llvm/Support/MemoryBuffer.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
|
||||
#undef DEBUG_TYPE
|
||||
#define DEBUG_TYPE "bolt-prof"
|
||||
|
||||
namespace llvm {
|
||||
namespace bolt {
|
||||
|
||||
std::error_code
|
||||
ProfileWriter::writeProfile(std::map<uint64_t, BinaryFunction> &Functions) {
|
||||
std::error_code EC;
|
||||
OS = make_unique<raw_fd_ostream>(FileName, EC, sys::fs::F_None);
|
||||
if (EC) {
|
||||
errs() << "BOLT-WARNING: " << EC.message() << " : unable to open "
|
||||
<< FileName << " for output.\n";
|
||||
return EC;
|
||||
}
|
||||
|
||||
printBinaryFunctionsProfile(Functions);
|
||||
|
||||
return std::error_code();
|
||||
}
|
||||
|
||||
namespace {
|
||||
void
|
||||
convert(const BinaryFunction &BF, yaml::bolt::BinaryFunctionProfile &YamlBF) {
|
||||
auto &BC = BF.getBinaryContext();
|
||||
|
||||
YamlBF.Name = BF.getPrintName();
|
||||
YamlBF.Id = BF.getFunctionNumber();
|
||||
YamlBF.Hash = BF.hash(true, true);
|
||||
YamlBF.ExecCount = BF.getKnownExecutionCount();
|
||||
YamlBF.NumBasicBlocks = BF.size();
|
||||
|
||||
for (const auto *BB : BF.dfs()) {
|
||||
yaml::bolt::BinaryBasicBlockProfile YamlBB;
|
||||
YamlBB.Index = BB->getLayoutIndex();
|
||||
YamlBB.NumInstructions = BB->getNumNonPseudos();
|
||||
YamlBB.ExecCount = BB->getKnownExecutionCount();
|
||||
|
||||
for (const auto &Instr : *BB) {
|
||||
if (!BC.MIA->isCall(Instr) && !BC.MIA->isIndirectBranch(Instr))
|
||||
continue;
|
||||
|
||||
yaml::bolt::CallSiteInfo CSI;
|
||||
auto Offset = BC.MIA->tryGetAnnotationAs<uint64_t>(Instr, "Offset");
|
||||
if (!Offset || Offset.get() < BB->getInputOffset())
|
||||
continue;
|
||||
CSI.Offset = Offset.get() - BB->getInputOffset();
|
||||
|
||||
if (BC.MIA->isIndirectCall(Instr) || BC.MIA->isIndirectBranch(Instr)) {
|
||||
auto ICSP =
|
||||
BC.MIA->tryGetAnnotationAs<IndirectCallSiteProfile>(Instr,
|
||||
"CallProfile");
|
||||
if (!ICSP)
|
||||
continue;
|
||||
for (auto &CSP : ICSP.get()) {
|
||||
CSI.DestId = 0; // designated for unknown functions
|
||||
CSI.EntryDiscriminator = 0;
|
||||
if (CSP.IsFunction) {
|
||||
const auto *CalleeSymbol = BC.getGlobalSymbolByName(CSP.Name);
|
||||
if (CalleeSymbol) {
|
||||
const auto *Callee = BC.getFunctionForSymbol(CalleeSymbol);
|
||||
if (Callee) {
|
||||
CSI.DestId = Callee->getFunctionNumber();
|
||||
}
|
||||
}
|
||||
}
|
||||
CSI.Count = CSP.Count;
|
||||
CSI.Mispreds = CSP.Mispreds;
|
||||
YamlBB.CallSites.push_back(CSI);
|
||||
}
|
||||
} else { // direct call or a tail call
|
||||
const auto *CalleeSymbol = BC.MIA->getTargetSymbol(Instr);
|
||||
const auto Callee = BC.getFunctionForSymbol(CalleeSymbol);
|
||||
if (Callee) {
|
||||
CSI.DestId = Callee->getFunctionNumber();;
|
||||
CSI.EntryDiscriminator = Callee->getEntryForSymbol(CalleeSymbol);
|
||||
}
|
||||
|
||||
if (BC.MIA->getConditionalTailCall(Instr)) {
|
||||
auto CTCCount =
|
||||
BC.MIA->tryGetAnnotationAs<uint64_t>(Instr, "CTCTakenCount");
|
||||
if (CTCCount) {
|
||||
CSI.Count = *CTCCount;
|
||||
auto CTCMispreds =
|
||||
BC.MIA->tryGetAnnotationAs<uint64_t>(Instr, "CTCMispredCount");
|
||||
if (CTCMispreds)
|
||||
CSI.Mispreds = *CTCMispreds;
|
||||
}
|
||||
} else {
|
||||
auto Count = BC.MIA->tryGetAnnotationAs<uint64_t>(Instr, "Count");
|
||||
if (Count)
|
||||
CSI.Count = *Count;
|
||||
}
|
||||
|
||||
if (CSI.Count)
|
||||
YamlBB.CallSites.emplace_back(CSI);
|
||||
}
|
||||
}
|
||||
|
||||
// Skip printing if there's no profile data for non-entry basic block.
|
||||
if (YamlBB.CallSites.empty() && !BB->isEntryPoint()) {
|
||||
uint64_t SuccessorExecCount = 0;
|
||||
for (auto &BranchInfo : BB->branch_info()) {
|
||||
SuccessorExecCount += BranchInfo.Count;
|
||||
}
|
||||
if (!SuccessorExecCount)
|
||||
continue;
|
||||
}
|
||||
|
||||
auto BranchInfo = BB->branch_info_begin();
|
||||
for (const auto *Successor : BB->successors()) {
|
||||
yaml::bolt::SuccessorInfo YamlSI;
|
||||
YamlSI.Index = Successor->getLayoutIndex();
|
||||
YamlSI.Count = BranchInfo->Count;
|
||||
YamlSI.Mispreds = BranchInfo->MispredictedCount;
|
||||
|
||||
YamlBB.Successors.emplace_back(YamlSI);
|
||||
|
||||
++BranchInfo;
|
||||
}
|
||||
|
||||
YamlBF.Blocks.emplace_back(YamlBB);
|
||||
}
|
||||
}
|
||||
} // end anonymous namespace
|
||||
|
||||
void ProfileWriter::printBinaryFunctionProfile(const BinaryFunction &BF) {
|
||||
yaml::bolt::BinaryFunctionProfile YamlBF;
|
||||
convert(BF, YamlBF);
|
||||
|
||||
yaml::Output Out(*OS);
|
||||
Out << YamlBF;
|
||||
}
|
||||
|
||||
void ProfileWriter::printBinaryFunctionsProfile(
|
||||
std::map<uint64_t, BinaryFunction> &BFs) {
|
||||
std::vector<yaml::bolt::BinaryFunctionProfile> YamlBFs;
|
||||
for (auto &BFI : BFs) {
|
||||
const auto &BF = BFI.second;
|
||||
if (BF.hasProfile()) {
|
||||
yaml::bolt::BinaryFunctionProfile YamlBF;
|
||||
convert(BF, YamlBF);
|
||||
YamlBFs.emplace_back(YamlBF);
|
||||
}
|
||||
}
|
||||
|
||||
yaml::Output Out(*OS);
|
||||
Out << YamlBFs;
|
||||
}
|
||||
|
||||
} // namespace bolt
|
||||
} // namespace llvm
|
||||
53
bolt/ProfileWriter.h
Normal file
53
bolt/ProfileWriter.h
Normal file
@@ -0,0 +1,53 @@
|
||||
//===-- ProfileWriter.cpp - serialize profiling data ------------*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
|
||||
#ifndef LLVM_TOOLS_LLVM_BOLT_PROFILE_WRITER_H
|
||||
#define LLVM_TOOLS_LLVM_BOLT_PROFILE_WRITER_H
|
||||
|
||||
#include "BinaryBasicBlock.h"
|
||||
#include "BinaryContext.h"
|
||||
#include "BinaryFunction.h"
|
||||
#include "ProfileYAMLMapping.h"
|
||||
#include "llvm/Support/ErrorOr.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
#include <system_error>
|
||||
|
||||
namespace llvm {
|
||||
namespace bolt {
|
||||
|
||||
class ProfileWriter {
|
||||
ProfileWriter() = delete;
|
||||
|
||||
std::string FileName;
|
||||
|
||||
std::error_code write(BinaryFunction &BF);
|
||||
|
||||
std::unique_ptr<raw_fd_ostream> OS;
|
||||
|
||||
void printBinaryFunctionProfile(const BinaryFunction &BF);
|
||||
|
||||
void printBinaryFunctionsProfile(std::map<uint64_t, BinaryFunction> &BFs);
|
||||
|
||||
public:
|
||||
explicit ProfileWriter(const std::string &FileName)
|
||||
: FileName(FileName) {
|
||||
}
|
||||
|
||||
/// Write profile for functions.
|
||||
std::error_code writeProfile(std::map<uint64_t, BinaryFunction> &Functions);
|
||||
};
|
||||
|
||||
} // namespace bolt
|
||||
} // namespace llvm
|
||||
|
||||
#endif // LLVM_TOOLS_LLVM_BOLT_PROFILE_WRITER_H
|
||||
147
bolt/ProfileYAMLMapping.h
Normal file
147
bolt/ProfileYAMLMapping.h
Normal file
@@ -0,0 +1,147 @@
|
||||
//===-- ProfileYAMLMapping.h - mappings for BOLT profile --------*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Implement mapping between binary function profile and YAML representation.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef LLVM_TOOLS_LLVM_BOLT_PROFILEYAMLMAPPING_H
|
||||
#define LLVM_TOOLS_LLVM_BOLT_PROFILEYAMLMAPPING_H
|
||||
|
||||
#include "llvm/ADT/StringRef.h"
|
||||
#include "llvm/Support/YAMLTraits.h"
|
||||
#include <vector>
|
||||
|
||||
namespace llvm {
|
||||
namespace yaml {
|
||||
|
||||
namespace bolt {
|
||||
struct CallSiteInfo {
|
||||
llvm::yaml::Hex32 Offset{0};
|
||||
uint32_t DestId{0};
|
||||
uint32_t EntryDiscriminator{0}; // multiple entry discriminator
|
||||
uint64_t Count{0};
|
||||
uint64_t Mispreds{0};
|
||||
|
||||
bool operator==(const CallSiteInfo &Other) const {
|
||||
return Offset == Other.Offset &&
|
||||
DestId == Other.DestId &&
|
||||
EntryDiscriminator == Other.EntryDiscriminator;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
template <> struct MappingTraits<bolt::CallSiteInfo> {
|
||||
static void mapping(IO &YamlIO, bolt::CallSiteInfo &CSI) {
|
||||
YamlIO.mapRequired("off", CSI.Offset);
|
||||
YamlIO.mapRequired("fid", CSI.DestId);
|
||||
YamlIO.mapOptional("disc", CSI.EntryDiscriminator, (uint32_t)0);
|
||||
YamlIO.mapRequired("cnt", CSI.Count);
|
||||
YamlIO.mapOptional("mis", CSI.Mispreds, (uint64_t)0);
|
||||
}
|
||||
|
||||
static const bool flow = true;
|
||||
};
|
||||
|
||||
namespace bolt {
|
||||
struct SuccessorInfo {
|
||||
uint32_t Index{0};
|
||||
uint64_t Count{0};
|
||||
uint64_t Mispreds{0};
|
||||
|
||||
bool operator==(const SuccessorInfo &Other) const {
|
||||
return Index == Other.Index;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
template <> struct MappingTraits<bolt::SuccessorInfo> {
|
||||
static void mapping(IO &YamlIO, bolt::SuccessorInfo &SI) {
|
||||
YamlIO.mapRequired("bid", SI.Index);
|
||||
YamlIO.mapRequired("cnt", SI.Count);
|
||||
YamlIO.mapOptional("mis", SI.Mispreds, (uint64_t)0);
|
||||
}
|
||||
|
||||
static const bool flow = true;
|
||||
};
|
||||
|
||||
} // end namespace yaml
|
||||
} // end namespace llvm
|
||||
|
||||
LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(llvm::yaml::bolt::CallSiteInfo)
|
||||
LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(llvm::yaml::bolt::SuccessorInfo)
|
||||
|
||||
namespace llvm {
|
||||
namespace yaml {
|
||||
|
||||
namespace bolt {
|
||||
struct BinaryBasicBlockProfile {
|
||||
uint32_t Index{0};
|
||||
uint32_t NumInstructions{0};
|
||||
llvm::yaml::Hex64 Hash{0};
|
||||
uint64_t ExecCount{0};
|
||||
std::vector<CallSiteInfo> CallSites;
|
||||
std::vector<SuccessorInfo> Successors;
|
||||
|
||||
bool operator==(const BinaryBasicBlockProfile &Other) const {
|
||||
return Index == Other.Index;
|
||||
}
|
||||
};
|
||||
} // namespace bolt
|
||||
|
||||
template <> struct MappingTraits<bolt::BinaryBasicBlockProfile> {
|
||||
static void mapping(IO &YamlIO, bolt::BinaryBasicBlockProfile &BBP) {
|
||||
YamlIO.mapRequired("bid", BBP.Index);
|
||||
YamlIO.mapRequired("insns", BBP.NumInstructions);
|
||||
YamlIO.mapOptional("exec", BBP.ExecCount, (uint64_t)0);
|
||||
YamlIO.mapOptional("calls", BBP.CallSites,
|
||||
std::vector<bolt::CallSiteInfo>());
|
||||
YamlIO.mapOptional("succ", BBP.Successors,
|
||||
std::vector<bolt::SuccessorInfo>());
|
||||
}
|
||||
};
|
||||
|
||||
} // end namespace yaml
|
||||
} // end namespace llvm
|
||||
|
||||
LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::yaml::bolt::BinaryBasicBlockProfile)
|
||||
|
||||
namespace llvm {
|
||||
namespace yaml {
|
||||
|
||||
namespace bolt {
|
||||
struct BinaryFunctionProfile {
|
||||
std::string Name;
|
||||
uint32_t NumBasicBlocks;
|
||||
uint32_t Id;
|
||||
llvm::yaml::Hex64 Hash;
|
||||
uint64_t ExecCount;
|
||||
std::vector<BinaryBasicBlockProfile> Blocks;
|
||||
bool Used{false};
|
||||
};
|
||||
}
|
||||
|
||||
template <> struct MappingTraits<bolt::BinaryFunctionProfile> {
|
||||
static void mapping(IO &YamlIO, bolt::BinaryFunctionProfile &BFP) {
|
||||
YamlIO.mapRequired("name", BFP.Name);
|
||||
YamlIO.mapRequired("fid", BFP.Id);
|
||||
YamlIO.mapRequired("hash", BFP.Hash);
|
||||
YamlIO.mapRequired("exec", BFP.ExecCount);
|
||||
YamlIO.mapRequired("nblocks", BFP.NumBasicBlocks);
|
||||
YamlIO.mapOptional("blocks", BFP.Blocks,
|
||||
std::vector<bolt::BinaryBasicBlockProfile>());
|
||||
}
|
||||
};
|
||||
|
||||
} // end namespace yaml
|
||||
} // end namespace llvm
|
||||
|
||||
LLVM_YAML_IS_DOCUMENT_LIST_VECTOR(llvm::yaml::bolt::BinaryFunctionProfile)
|
||||
|
||||
#endif
|
||||
@@ -18,6 +18,8 @@
|
||||
#include "DataAggregator.h"
|
||||
#include "DataReader.h"
|
||||
#include "Exceptions.h"
|
||||
#include "ProfileReader.h"
|
||||
#include "ProfileWriter.h"
|
||||
#include "RewriteInstance.h"
|
||||
#include "llvm/ADT/Optional.h"
|
||||
#include "llvm/ADT/STLExtras.h"
|
||||
@@ -97,6 +99,11 @@ AllowStripped("allow-stripped",
|
||||
cl::Hidden,
|
||||
cl::cat(BoltCategory));
|
||||
|
||||
static cl::opt<std::string>
|
||||
BoltProfile("b",
|
||||
cl::desc("<bolt profile>"),
|
||||
cl::cat(BoltCategory));
|
||||
|
||||
cl::opt<bool>
|
||||
BoostMacroops("boost-macroops",
|
||||
cl::desc("try to boost macro-op fusions by avoiding the cache-line boundary"),
|
||||
@@ -217,6 +224,11 @@ RelocationMode("relocs",
|
||||
cl::ZeroOrMore,
|
||||
cl::cat(BoltCategory));
|
||||
|
||||
static cl::opt<std::string>
|
||||
SaveProfile("w",
|
||||
cl::desc("save recorded profile to a file"),
|
||||
cl::cat(BoltOutputCategory));
|
||||
|
||||
static cl::list<std::string>
|
||||
SkipFunctionNames("skip-funcs",
|
||||
cl::CommaSeparated,
|
||||
@@ -873,7 +885,7 @@ void RewriteInstance::run() {
|
||||
discoverFileObjects();
|
||||
readDebugInfo();
|
||||
disassembleFunctions();
|
||||
readProfileData();
|
||||
processProfileData();
|
||||
if (opts::AggregateOnly)
|
||||
return;
|
||||
postProcessFunctions();
|
||||
@@ -1901,39 +1913,56 @@ void RewriteInstance::readDebugInfo() {
|
||||
BC->preprocessDebugInfo(BinaryFunctions);
|
||||
}
|
||||
|
||||
void RewriteInstance::readProfileData() {
|
||||
void RewriteInstance::processProfileData() {
|
||||
if (DA.started()) {
|
||||
NamedRegionTimer T("aggregate data", TimerGroupName, opts::TimeRewrite);
|
||||
DA.aggregate(*BC.get(), BinaryFunctions);
|
||||
|
||||
for (auto &BFI : BinaryFunctions) {
|
||||
auto &Function = BFI.second;
|
||||
Function.convertBranchData();
|
||||
}
|
||||
|
||||
if (opts::AggregateOnly) {
|
||||
if (std::error_code EC = DA.writeAggregatedFile()) {
|
||||
check_error(EC, "cannot create output data file");
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
} else {
|
||||
NamedRegionTimer T("read profile data", TimerGroupName, opts::TimeRewrite);
|
||||
|
||||
if (!opts::BoltProfile.empty()) {
|
||||
ProfileReader PR;
|
||||
PR.readProfile(opts::BoltProfile, BinaryFunctions);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
// Preliminary match profile data to functions.
|
||||
if (!BC->DR.getAllFuncsData().empty()) {
|
||||
for (auto &BFI : BinaryFunctions) {
|
||||
auto &Function = BFI.second;
|
||||
if (auto *MemData = BC->DR.getFuncMemData(Function.getNames())) {
|
||||
Function.MemData = MemData;
|
||||
MemData->Used = true;
|
||||
}
|
||||
if (auto *FuncData = BC->DR.getFuncBranchData(Function.getNames())) {
|
||||
Function.BranchData = FuncData;
|
||||
Function.ExecutionCount = FuncData->ExecutionCount;
|
||||
FuncData->Used = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
NamedRegionTimer T("read profile data", TimerGroupName, opts::TimeRewrite);
|
||||
// Preliminary match profile data to functions.
|
||||
if (!BC->DR.getAllFuncsData().empty()) {
|
||||
for (auto &BFI : BinaryFunctions) {
|
||||
auto &Function = BFI.second;
|
||||
if (auto *MemData = BC->DR.getFuncMemData(Function.getNames())) {
|
||||
Function.MemData = MemData;
|
||||
MemData->Used = true;
|
||||
}
|
||||
if (auto *FuncData = BC->DR.getFuncBranchData(Function.getNames())) {
|
||||
Function.BranchData = FuncData;
|
||||
Function.ExecutionCount = FuncData->ExecutionCount;
|
||||
FuncData->Used = true;
|
||||
}
|
||||
Function.readProfile();
|
||||
}
|
||||
}
|
||||
|
||||
for (auto &BFI : BinaryFunctions) {
|
||||
auto &Function = BFI.second;
|
||||
Function.readProfile();
|
||||
if (!opts::SaveProfile.empty()) {
|
||||
ProfileWriter PW(opts::SaveProfile);
|
||||
PW.writeProfile(BinaryFunctions);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -178,8 +178,8 @@ public:
|
||||
/// Read information from debug sections.
|
||||
void readDebugInfo();
|
||||
|
||||
/// Associate profile data with functions.
|
||||
void readProfileData();
|
||||
/// Associate profile data with binary objects.
|
||||
void processProfileData();
|
||||
|
||||
/// Disassemble each function in the binary and associate it with a
|
||||
/// BinaryFunction object, preparing all information necessary for binary
|
||||
|
||||
Reference in New Issue
Block a user