Summary: Profile reading was tightly coupled with building CFG. Since I plan to move to a new profile format that will be associated with CFG it is critical to decouple the two phases. We now have read profile right after the cfg was constructed, but before it is "canonicalized", i.e. CTCs will till be there. After reading the profile, we do a post-processing pass that fixes CFG and does some post-processing for debug info, such as inference of fall-throughs, which is still required with the current format. Another good reason for decoupling is that we can use profile with CFG to more accurately record fall-through branches during aggregation. At the moment we use "Offset" annotations to facilitate location of instructions corresponding to the profile. This might not be super efficient. However, once we switch to the new profile format the offsets would be no longer needed. We might keep them for the aggregator, but if we have to trust LBR data that might not be strictly necessary. I've tried to make changes while keeping backwards compatibly. This makes it easier to verify correctness of the changes, but that also means that we lose accuracy of the profile. Some refactoring is included. Flag "-prof-compat-mode" (on by default) is used for bug-level backwards compatibility. Disable it for more accurate tracing. (cherry picked from FBD6506156)
855 lines
27 KiB
C++
855 lines
27 KiB
C++
//===--- BinaryFunctionProfile.cpp --------===//
|
|
//
|
|
// The LLVM Compiler Infrastructure
|
|
//
|
|
// This file is distributed under the University of Illinois Open Source
|
|
// License. See LICENSE.TXT for details.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
#include "BinaryBasicBlock.h"
|
|
#include "BinaryFunction.h"
|
|
#include "DataReader.h"
|
|
#include "Passes/MCF.h"
|
|
#include "llvm/Support/CommandLine.h"
|
|
#include "llvm/Support/Debug.h"
|
|
#include "llvm/Support/raw_ostream.h"
|
|
|
|
#undef DEBUG_TYPE
|
|
#define DEBUG_TYPE "bolt-prof"
|
|
|
|
using namespace llvm;
|
|
using namespace bolt;
|
|
|
|
namespace opts {
|
|
|
|
extern cl::OptionCategory AggregatorCategory;
|
|
extern cl::OptionCategory BoltOptCategory;
|
|
|
|
extern cl::opt<unsigned> Verbosity;
|
|
extern cl::opt<IndirectCallPromotionType> IndirectCallPromotion;
|
|
extern cl::opt<JumpTableSupportLevel> JumpTables;
|
|
|
|
static cl::opt<bool>
|
|
CompatMode("prof-compat-mode",
|
|
cl::desc("maintain bug-level compatibility with old profile"),
|
|
cl::init(true),
|
|
cl::ZeroOrMore,
|
|
cl::Hidden,
|
|
cl::cat(BoltOptCategory));
|
|
|
|
static cl::opt<MCFCostFunction>
|
|
DoMCF("mcf",
|
|
cl::desc("solve a min cost flow problem on the CFG to fix edge counts "
|
|
"(default=disable)"),
|
|
cl::init(MCF_DISABLE),
|
|
cl::values(
|
|
clEnumValN(MCF_DISABLE, "none",
|
|
"disable MCF"),
|
|
clEnumValN(MCF_LINEAR, "linear",
|
|
"cost function is inversely proportional to edge count"),
|
|
clEnumValN(MCF_QUADRATIC, "quadratic",
|
|
"cost function is inversely proportional to edge count squared"),
|
|
clEnumValN(MCF_LOG, "log",
|
|
"cost function is inversely proportional to log of edge count"),
|
|
clEnumValN(MCF_BLAMEFTS, "blamefts",
|
|
"tune cost to blame fall-through edges for surplus flow"),
|
|
clEnumValEnd),
|
|
cl::ZeroOrMore,
|
|
cl::Hidden,
|
|
cl::cat(BoltOptCategory));
|
|
|
|
static cl::opt<bool>
|
|
FixFuncCounts("fix-func-counts",
|
|
cl::desc("adjust function counts based on basic blocks execution count"),
|
|
cl::init(false),
|
|
cl::ZeroOrMore,
|
|
cl::Hidden,
|
|
cl::cat(BoltOptCategory));
|
|
|
|
} // namespace opts
|
|
|
|
namespace llvm {
|
|
namespace bolt {
|
|
|
|
bool BinaryFunction::recordTrace(
|
|
const LBREntry &First,
|
|
const LBREntry &Second,
|
|
uint64_t Count,
|
|
SmallVector<std::pair<uint64_t, uint64_t>, 16> *Branches) {
|
|
if (!isSimple())
|
|
return false;
|
|
|
|
assert(CurrentState == State::CFG && "can only record traces in CFG state");
|
|
|
|
// Offsets of the trace within this function.
|
|
const auto From = First.To - getAddress();
|
|
const auto To = Second.From - getAddress();
|
|
|
|
if (From > To)
|
|
return false;
|
|
|
|
auto *FromBB = getBasicBlockContainingOffset(From);
|
|
auto *ToBB = getBasicBlockContainingOffset(To);
|
|
|
|
if (!FromBB || !ToBB)
|
|
return false;
|
|
|
|
// Fill out information for fall-through edges. The From and To could be
|
|
// within the same basic block, e.g. when two call instructions are in the
|
|
// same block. In this case we skip the processing.
|
|
if (FromBB == ToBB) {
|
|
if (opts::CompatMode)
|
|
return true;
|
|
|
|
// If the previous block ended with a call, the destination of a return
|
|
// would be in ToBB basic block. And if the ToBB starts with a control
|
|
// transfer instruction, we will have a 0-length trace that we have to
|
|
// account for as a fall-through edge.
|
|
if (To == ToBB->getOffset()) {
|
|
// External entry point.
|
|
if (ToBB->isEntryPoint() || ToBB->isLandingPad())
|
|
return true;
|
|
|
|
// Check that the origin LBR of a trace starts in another function.
|
|
// Otherwise it's an internal branch that was accounted for.
|
|
if (containsAddress(First.From))
|
|
return true;
|
|
|
|
auto *PrevBB = BasicBlocksLayout[ToBB->getIndex() - 1];
|
|
|
|
// This could be a bad trace.
|
|
if (!PrevBB->getSuccessor(ToBB->getLabel())) {
|
|
DEBUG(dbgs() << "invalid LBR sequence:\n"
|
|
<< " " << First << '\n'
|
|
<< " " << Second << '\n');
|
|
return false;
|
|
}
|
|
|
|
auto &BI = PrevBB->getBranchInfo(*ToBB);
|
|
BI.Count += Count;
|
|
if (Branches) {
|
|
const auto *Instr = PrevBB->getLastNonPseudoInstr();
|
|
const auto Offset =
|
|
BC.MIA->getAnnotationWithDefault<uint64_t>(*Instr, "Offset");
|
|
Branches->push_back(std::make_pair(Offset, ToBB->getOffset()));
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
// Process blocks in the original layout order.
|
|
auto *BB = BasicBlocksLayout[FromBB->getIndex()];
|
|
assert(BB == FromBB && "index mismatch");
|
|
while (BB != ToBB) {
|
|
auto *NextBB = BasicBlocksLayout[BB->getIndex() + 1];
|
|
assert((NextBB && NextBB->getOffset() > BB->getOffset()) && "bad layout");
|
|
|
|
// Check for bad LBRs.
|
|
if (!BB->getSuccessor(NextBB->getLabel())) {
|
|
DEBUG(dbgs() << "no fall-through for the trace:\n"
|
|
<< " " << First << '\n'
|
|
<< " " << Second << '\n');
|
|
return false;
|
|
}
|
|
|
|
// To keep backwards compatibility we skip recording fall-throughs that
|
|
// are not a result of a conditional jump.
|
|
if (!opts::CompatMode ||
|
|
(BB->succ_size() == 2 &&
|
|
BB->getConditionalSuccessor(false) == NextBB)) {
|
|
auto &BI = BB->getBranchInfo(*NextBB);
|
|
BI.Count += Count;
|
|
|
|
if (Branches) {
|
|
const auto *Instr = BB->getLastNonPseudoInstr();
|
|
// Note: real offset for conditional jump instruction shouldn't be 0.
|
|
const auto Offset =
|
|
BC.MIA->getAnnotationWithDefault<uint64_t>(*Instr, "Offset");
|
|
if (Offset) {
|
|
Branches->push_back(std::make_pair(Offset, NextBB->getOffset()));
|
|
}
|
|
}
|
|
}
|
|
|
|
BB = NextBB;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
bool BinaryFunction::recordBranch(uint64_t From, uint64_t To,
|
|
uint64_t Count, uint64_t Mispreds) {
|
|
auto *FromBB = getBasicBlockContainingOffset(From);
|
|
auto *ToBB = getBasicBlockContainingOffset(To);
|
|
|
|
if (!FromBB || !ToBB) {
|
|
DEBUG(dbgs() << "failed to get block for recorded branch\n");
|
|
return false;
|
|
}
|
|
|
|
// Could be bad LBR data. Ignore, or report as a bad profile for backwards
|
|
// compatibility.
|
|
if (From == To) {
|
|
if (!opts::CompatMode)
|
|
return true;
|
|
auto *Instr = getInstructionAtOffset(0);
|
|
if (Instr && BC.MIA->isCall(*Instr))
|
|
return true;
|
|
return false;
|
|
}
|
|
|
|
if (FromBB->succ_size() == 0) {
|
|
// Return from a tail call.
|
|
return true;
|
|
}
|
|
|
|
// Very rarely we will see ignored branches. Do a linear check.
|
|
for (auto &Branch : IgnoredBranches) {
|
|
if (Branch == std::make_pair(static_cast<uint32_t>(From),
|
|
static_cast<uint32_t>(To)))
|
|
return true;
|
|
}
|
|
|
|
if (To != ToBB->getOffset()) {
|
|
// "To" could be referring to nop instructions in between 2 basic blocks.
|
|
// While building the CFG we make sure these nops are attributed to the
|
|
// previous basic block, thus we check if the destination belongs to the
|
|
// gap past the last instruction.
|
|
const auto *LastInstr = ToBB->getLastNonPseudoInstr();
|
|
if (LastInstr) {
|
|
const auto LastInstrOffset =
|
|
BC.MIA->getAnnotationWithDefault<uint64_t>(*LastInstr, "Offset");
|
|
|
|
// With old .fdata we are getting FT branches for "jcc,jmp" sequences.
|
|
if (To == LastInstrOffset && BC.MIA->isUnconditionalBranch(*LastInstr)) {
|
|
return true;
|
|
}
|
|
|
|
if (To <= LastInstrOffset) {
|
|
DEBUG(dbgs() << "branch recorded into the middle of the block" << " in "
|
|
<< *this << " : " << From << " -> " << To << '\n');
|
|
return false;
|
|
}
|
|
}
|
|
|
|
// The real destination is the layout successor of the detected ToBB.
|
|
if (ToBB == BasicBlocksLayout.back())
|
|
return false;
|
|
auto *NextBB = BasicBlocksLayout[ToBB->getIndex() + 1];
|
|
assert((NextBB && NextBB->getOffset() > ToBB->getOffset()) && "bad layout");
|
|
ToBB = NextBB;
|
|
}
|
|
|
|
// If there's no corresponding instruction for 'From', we have probably
|
|
// discarded it as a FT from __builtin_unreachable.
|
|
auto *FromInstruction = getInstructionAtOffset(From);
|
|
if (!FromInstruction) {
|
|
DEBUG(dbgs() << "no instruction for offset " << From << " in "
|
|
<< *this << '\n');
|
|
return false;
|
|
}
|
|
|
|
if (FromBB == ToBB) {
|
|
// Check for a return from a recursive call.
|
|
// Otherwise it's a simple loop.
|
|
}
|
|
|
|
if (!FromBB->getSuccessor(ToBB->getLabel())) {
|
|
// Check if this is a recursive call or a return from a recursive call.
|
|
if (ToBB->isEntryPoint()) {
|
|
// Execution count is already accounted for.
|
|
return true;
|
|
}
|
|
|
|
DEBUG(dbgs() << "invalid branch in " << *this << '\n'
|
|
<< Twine::utohexstr(From) << " -> "
|
|
<< Twine::utohexstr(To) << '\n');
|
|
return false;
|
|
}
|
|
|
|
auto &BI = FromBB->getBranchInfo(*ToBB);
|
|
BI.Count += Count;
|
|
// Only update mispredicted count if it the count was real.
|
|
if (Count) {
|
|
BI.MispredictedCount += Mispreds;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
bool BinaryFunction::recordEntry(uint64_t To, bool Mispred, uint64_t Count) {
|
|
if (To > getSize())
|
|
return false;
|
|
|
|
if (!hasProfile())
|
|
ExecutionCount = 0;
|
|
|
|
if (To == 0)
|
|
ExecutionCount += Count;
|
|
|
|
return true;
|
|
}
|
|
|
|
bool BinaryFunction::recordExit(uint64_t From, bool Mispred, uint64_t Count) {
|
|
if (!isSimple())
|
|
return false;
|
|
assert(From <= getSize() && "wrong From address");
|
|
|
|
if (!hasProfile())
|
|
ExecutionCount = 0;
|
|
|
|
return true;
|
|
}
|
|
|
|
void BinaryFunction::postProcessProfile() {
|
|
if (!hasValidProfile()) {
|
|
clearProfile();
|
|
return;
|
|
}
|
|
|
|
// Check if MCF post-processing was requested.
|
|
if (opts::DoMCF != MCF_DISABLE) {
|
|
removeTagsFromProfile();
|
|
solveMCF(*this, opts::DoMCF);
|
|
return;
|
|
}
|
|
|
|
// Is we are using non-LBR sampling there's nothing left to do.
|
|
if (!BranchData)
|
|
return;
|
|
|
|
// Bug compatibility with previous version - double accounting for conditional
|
|
// jump into a fall-through block.
|
|
if (opts::CompatMode) {
|
|
for (auto *BB : BasicBlocks) {
|
|
if (BB->succ_size() == 2 &&
|
|
BB->getConditionalSuccessor(false) ==
|
|
BB->getConditionalSuccessor(true)) {
|
|
auto &TakenBI = *BB->branch_info_begin();
|
|
auto &FallThroughBI = *BB->branch_info_rbegin();
|
|
FallThroughBI.Count = TakenBI.Count;
|
|
FallThroughBI.MispredictedCount = 0;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Pre-sort branch data.
|
|
std::stable_sort(BranchData->Data.begin(), BranchData->Data.end());
|
|
|
|
// If we have at least some branch data for the function indicate that it
|
|
// was executed.
|
|
if (opts::FixFuncCounts && ExecutionCount == 0) {
|
|
ExecutionCount = 1;
|
|
}
|
|
|
|
// Compute preliminary execution count for each basic block
|
|
for (auto *BB : BasicBlocks) {
|
|
BB->ExecutionCount = 0;
|
|
}
|
|
for (auto *BB : BasicBlocks) {
|
|
auto SuccBIIter = BB->branch_info_begin();
|
|
for (auto Succ : BB->successors()) {
|
|
if (SuccBIIter->Count != BinaryBasicBlock::COUNT_NO_PROFILE)
|
|
Succ->setExecutionCount(Succ->getExecutionCount() + SuccBIIter->Count);
|
|
++SuccBIIter;
|
|
}
|
|
}
|
|
|
|
// Set entry BBs to zero, we'll update their execution count next with entry
|
|
// data (we maintain a separate data structure for branches to function entry
|
|
// points)
|
|
for (auto *BB : BasicBlocks) {
|
|
if (BB->isEntryPoint())
|
|
BB->ExecutionCount = 0;
|
|
}
|
|
|
|
// Update execution counts of landing pad blocks and entry BBs
|
|
// There is a slight skew introduced here as branches originated from RETs
|
|
// may be accounted for in the execution count of an entry block if the last
|
|
// instruction in a predecessor fall-through block is a call. This situation
|
|
// should rarely happen because there are few multiple-entry functions.
|
|
for (const auto &I : BranchData->EntryData) {
|
|
BinaryBasicBlock *BB = getBasicBlockAtOffset(I.To.Offset);
|
|
if (BB && (BB->isEntryPoint() || BB->isLandingPad())) {
|
|
BB->setExecutionCount(BB->getExecutionCount() + I.Branches);
|
|
}
|
|
}
|
|
|
|
inferFallThroughCounts();
|
|
|
|
// Update profile information for jump tables based on CFG branch data.
|
|
for (auto *BB : BasicBlocks) {
|
|
const auto *LastInstr = BB->getLastNonPseudoInstr();
|
|
if (!LastInstr)
|
|
continue;
|
|
const auto JTAddress = BC.MIA->getJumpTable(*LastInstr);
|
|
if (!JTAddress)
|
|
continue;
|
|
auto *JT = getJumpTableContainingAddress(JTAddress);
|
|
if (!JT)
|
|
continue;
|
|
|
|
uint64_t TotalBranchCount = 0;
|
|
for (const auto &BranchInfo : BB->branch_info()) {
|
|
TotalBranchCount += BranchInfo.Count;
|
|
}
|
|
JT->Count += TotalBranchCount;
|
|
|
|
if (opts::IndirectCallPromotion < ICP_JUMP_TABLES &&
|
|
opts::JumpTables < JTS_AGGRESSIVE)
|
|
continue;
|
|
|
|
if (JT->Counts.empty())
|
|
JT->Counts.resize(JT->Entries.size());
|
|
auto EI = JT->Entries.begin();
|
|
auto Delta = (JTAddress - JT->Address) / JT->EntrySize;
|
|
EI += Delta;
|
|
while (EI != JT->Entries.end()) {
|
|
const auto *TargetBB = getBasicBlockForLabel(*EI);
|
|
if (TargetBB) {
|
|
const auto &BranchInfo = BB->getBranchInfo(*TargetBB);
|
|
assert(Delta < JT->Counts.size());
|
|
JT->Counts[Delta].Count += BranchInfo.Count;
|
|
JT->Counts[Delta].Mispreds += BranchInfo.MispredictedCount;
|
|
}
|
|
++Delta;
|
|
++EI;
|
|
// A label marks the start of another jump table.
|
|
if (JT->Labels.count(Delta * JT->EntrySize))
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
Optional<SmallVector<std::pair<uint64_t, uint64_t>, 16>>
|
|
BinaryFunction::getFallthroughsInTrace(const LBREntry &First,
|
|
const LBREntry &Second) {
|
|
SmallVector<std::pair<uint64_t, uint64_t>, 16> Res;
|
|
|
|
if (!recordTrace(First, Second, 1, &Res))
|
|
return NoneType();
|
|
|
|
return Res;
|
|
}
|
|
|
|
void BinaryFunction::readProfile() {
|
|
if (empty())
|
|
return;
|
|
|
|
if (!BC.DR.hasLBR()) {
|
|
readSampleData();
|
|
return;
|
|
}
|
|
|
|
// Possibly assign/re-assign branch profile data.
|
|
matchProfileData();
|
|
|
|
if (!BranchData)
|
|
return;
|
|
|
|
uint64_t MismatchedBranches = 0;
|
|
for (const auto &BI : BranchData->Data) {
|
|
if (BI.From.Name != BI.To.Name) {
|
|
continue;
|
|
}
|
|
|
|
if (!recordBranch(BI.From.Offset, BI.To.Offset,
|
|
BI.Branches, BI.Mispreds)) {
|
|
DEBUG(dbgs() << "bad branch : " << BI.From.Offset << " -> "
|
|
<< BI.To.Offset << '\n');
|
|
++MismatchedBranches;
|
|
}
|
|
}
|
|
|
|
// Special profile data propagation is required for conditional tail calls.
|
|
for (auto BB : BasicBlocks) {
|
|
auto *CTCInstr = BB->getLastNonPseudoInstr();
|
|
if (!CTCInstr || !BC.MIA->getConditionalTailCall(*CTCInstr))
|
|
continue;
|
|
|
|
auto OffsetOrErr =
|
|
BC.MIA->tryGetAnnotationAs<uint64_t>(*CTCInstr, "Offset");
|
|
assert(OffsetOrErr && "offset not set for conditional tail call");
|
|
|
|
auto BranchInfoOrErr = BranchData->getDirectCallBranch(*OffsetOrErr);
|
|
if (!BranchInfoOrErr)
|
|
continue;
|
|
|
|
BC.MIA->addAnnotation(BC.Ctx.get(), *CTCInstr, "CTCTakenCount",
|
|
BranchInfoOrErr->Branches);
|
|
BC.MIA->addAnnotation(BC.Ctx.get(), *CTCInstr, "CTCMispredCount",
|
|
BranchInfoOrErr->Mispreds);
|
|
}
|
|
}
|
|
|
|
void BinaryFunction::mergeProfileDataInto(BinaryFunction &BF) const {
|
|
// No reason to merge invalid or empty profiles into BF.
|
|
if (!hasValidProfile())
|
|
return;
|
|
|
|
// Update function execution count.
|
|
if (getExecutionCount() != BinaryFunction::COUNT_NO_PROFILE) {
|
|
BF.setExecutionCount(BF.getKnownExecutionCount() + getExecutionCount());
|
|
}
|
|
|
|
// Since we are merging a valid profile, the new profile should be valid too.
|
|
// It has either already been valid, or it has been cleaned up.
|
|
BF.ProfileMatchRatio = 1.0f;
|
|
|
|
// Update basic block and edge counts.
|
|
auto BBMergeI = BF.begin();
|
|
for (BinaryBasicBlock *BB : BasicBlocks) {
|
|
BinaryBasicBlock *BBMerge = &*BBMergeI;
|
|
assert(getIndex(BB) == BF.getIndex(BBMerge));
|
|
|
|
// Update basic block count.
|
|
if (BB->getExecutionCount() != BinaryBasicBlock::COUNT_NO_PROFILE) {
|
|
BBMerge->setExecutionCount(
|
|
BBMerge->getKnownExecutionCount() + BB->getExecutionCount());
|
|
}
|
|
|
|
// Update edge count for successors of this basic block.
|
|
auto BBMergeSI = BBMerge->succ_begin();
|
|
auto BIMergeI = BBMerge->branch_info_begin();
|
|
auto BII = BB->branch_info_begin();
|
|
for (const auto *BBSucc : BB->successors()) {
|
|
(void)BBSucc;
|
|
assert(getIndex(BBSucc) == BF.getIndex(*BBMergeSI));
|
|
|
|
// At this point no branch count should be set to COUNT_NO_PROFILE.
|
|
assert(BII->Count != BinaryBasicBlock::COUNT_NO_PROFILE &&
|
|
"unexpected unknown branch profile");
|
|
assert(BIMergeI->Count != BinaryBasicBlock::COUNT_NO_PROFILE &&
|
|
"unexpected unknown branch profile");
|
|
|
|
BIMergeI->Count += BII->Count;
|
|
|
|
// When we merge inferred and real fall-through branch data, the merged
|
|
// data is considered inferred.
|
|
if (BII->MispredictedCount != BinaryBasicBlock::COUNT_INFERRED &&
|
|
BIMergeI->MispredictedCount != BinaryBasicBlock::COUNT_INFERRED) {
|
|
BIMergeI->MispredictedCount += BII->MispredictedCount;
|
|
} else {
|
|
BIMergeI->MispredictedCount = BinaryBasicBlock::COUNT_INFERRED;
|
|
}
|
|
|
|
++BBMergeSI;
|
|
++BII;
|
|
++BIMergeI;
|
|
}
|
|
assert(BBMergeSI == BBMerge->succ_end());
|
|
|
|
++BBMergeI;
|
|
}
|
|
assert(BBMergeI == BF.end());
|
|
}
|
|
|
|
void BinaryFunction::readSampleData() {
|
|
auto SampleDataOrErr = BC.DR.getFuncSampleData(getNames());
|
|
|
|
if (!SampleDataOrErr)
|
|
return;
|
|
|
|
// Non-LBR mode territory
|
|
// First step is to assign BB execution count based on samples from perf
|
|
ProfileMatchRatio = 1.0f;
|
|
removeTagsFromProfile();
|
|
bool NormalizeByInsnCount =
|
|
BC.DR.usesEvent("cycles") || BC.DR.usesEvent("instructions");
|
|
bool NormalizeByCalls = BC.DR.usesEvent("branches");
|
|
static bool NagUser{true};
|
|
if (NagUser) {
|
|
outs() << "BOLT-INFO: operating with non-LBR profiling data.\n";
|
|
if (NormalizeByInsnCount) {
|
|
outs() << "BOLT-INFO: normalizing samples by instruction count.\n";
|
|
} else if (NormalizeByCalls) {
|
|
outs() << "BOLT-INFO: normalizing samples by branches.\n";
|
|
}
|
|
NagUser = false;
|
|
}
|
|
uint64_t LastOffset = getSize();
|
|
uint64_t TotalEntryCount{0};
|
|
for (auto I = BasicBlockOffsets.rbegin(), E = BasicBlockOffsets.rend();
|
|
I != E; ++I) {
|
|
uint64_t CurOffset = I->first;
|
|
// Always work with samples multiplied by 1000 to avoid losing them if we
|
|
// later need to normalize numbers
|
|
uint64_t NumSamples =
|
|
SampleDataOrErr->getSamples(CurOffset, LastOffset) * 1000;
|
|
if (NormalizeByInsnCount && I->second->getNumNonPseudos())
|
|
NumSamples /= I->second->getNumNonPseudos();
|
|
else if (NormalizeByCalls) {
|
|
uint32_t NumCalls = I->second->getNumCalls();
|
|
NumSamples /= NumCalls + 1;
|
|
}
|
|
I->second->setExecutionCount(NumSamples);
|
|
if (I->second->isEntryPoint())
|
|
TotalEntryCount += NumSamples;
|
|
LastOffset = CurOffset;
|
|
}
|
|
ExecutionCount = TotalEntryCount;
|
|
|
|
estimateEdgeCounts(BC, *this);
|
|
|
|
if (opts::DoMCF != MCF_DISABLE)
|
|
solveMCF(*this, opts::DoMCF);
|
|
}
|
|
|
|
void BinaryFunction::inferFallThroughCounts() {
|
|
// Work on a basic block at a time, propagating frequency information
|
|
// forwards.
|
|
// It is important to walk in the layout order.
|
|
for (auto *BB : BasicBlocks) {
|
|
const uint64_t BBExecCount = BB->getExecutionCount();
|
|
|
|
// Propagate this information to successors, filling in fall-through edges
|
|
// with frequency information
|
|
if (BB->succ_size() == 0)
|
|
continue;
|
|
|
|
// Calculate frequency of outgoing branches from this node according to
|
|
// LBR data.
|
|
uint64_t ReportedBranches = 0;
|
|
for (const auto &SuccBI : BB->branch_info()) {
|
|
if (SuccBI.Count != BinaryBasicBlock::COUNT_NO_PROFILE)
|
|
ReportedBranches += SuccBI.Count;
|
|
}
|
|
|
|
// Get taken count of conditional tail call if the block ends with one.
|
|
uint64_t CTCTakenCount = 0;
|
|
const auto CTCInstr = BB->getLastNonPseudoInstr();
|
|
if (CTCInstr && BC.MIA->getConditionalTailCall(*CTCInstr)) {
|
|
CTCTakenCount =
|
|
BC.MIA->getAnnotationWithDefault<uint64_t>(*CTCInstr, "CTCTakenCount");
|
|
}
|
|
|
|
// Calculate frequency of throws from this node according to LBR data
|
|
// for branching into associated landing pads. Since it is possible
|
|
// for a landing pad to be associated with more than one basic blocks,
|
|
// we may overestimate the frequency of throws for such blocks.
|
|
uint64_t ReportedThrows = 0;
|
|
for (const auto *LP: BB->landing_pads()) {
|
|
ReportedThrows += LP->getExecutionCount();
|
|
}
|
|
|
|
const uint64_t TotalReportedJumps =
|
|
ReportedBranches + CTCTakenCount + ReportedThrows;
|
|
|
|
// Infer the frequency of the fall-through edge, representing not taking the
|
|
// branch.
|
|
uint64_t Inferred = 0;
|
|
if (BBExecCount > TotalReportedJumps)
|
|
Inferred = BBExecCount - TotalReportedJumps;
|
|
|
|
DEBUG(
|
|
if (BBExecCount < TotalReportedJumps)
|
|
dbgs()
|
|
<< "Fall-through inference is slightly inconsistent. "
|
|
"exec frequency is less than the outgoing edges frequency ("
|
|
<< BBExecCount << " < " << ReportedBranches
|
|
<< ") for BB at offset 0x"
|
|
<< Twine::utohexstr(getAddress() + BB->getOffset()) << '\n';
|
|
);
|
|
|
|
if (BB->succ_size() <= 2) {
|
|
// Skip if the last instruction is an unconditional jump.
|
|
const auto *LastInstr = BB->getLastNonPseudoInstr();
|
|
if (LastInstr &&
|
|
(BC.MIA->isUnconditionalBranch(*LastInstr) ||
|
|
BC.MIA->isIndirectBranch(*LastInstr)))
|
|
continue;
|
|
// If there is an FT it will be the last successor.
|
|
auto &SuccBI = *BB->branch_info_rbegin();
|
|
auto &Succ = *BB->succ_rbegin();
|
|
if (SuccBI.Count == 0) {
|
|
SuccBI.Count = Inferred;
|
|
SuccBI.MispredictedCount = BinaryBasicBlock::COUNT_INFERRED;
|
|
Succ->ExecutionCount += Inferred;
|
|
}
|
|
}
|
|
}
|
|
|
|
return;
|
|
}
|
|
|
|
bool BinaryFunction::fetchProfileForOtherEntryPoints() {
|
|
if (!BranchData)
|
|
return false;
|
|
|
|
// Check if we are missing profiling data for secondary entry points
|
|
bool First{true};
|
|
bool Updated{false};
|
|
for (auto BB : BasicBlocks) {
|
|
if (First) {
|
|
First = false;
|
|
continue;
|
|
}
|
|
if (BB->isEntryPoint()) {
|
|
uint64_t EntryAddress = BB->getOffset() + getAddress();
|
|
// Look for branch data associated with this entry point
|
|
std::vector<std::string> Names;
|
|
std::multimap<uint64_t, std::string>::iterator I, E;
|
|
for (std::tie(I, E) = BC.GlobalAddresses.equal_range(EntryAddress);
|
|
I != E; ++I) {
|
|
Names.push_back(I->second);
|
|
}
|
|
if (!Names.empty()) {
|
|
if (FuncBranchData *Data = BC.DR.getFuncBranchData(Names)) {
|
|
BranchData->appendFrom(*Data, BB->getOffset());
|
|
Data->Used = true;
|
|
Updated = true;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return Updated;
|
|
}
|
|
|
|
void BinaryFunction::matchProfileMemData() {
|
|
const auto AllMemData = BC.DR.getFuncMemDataRegex(getNames());
|
|
for (auto *NewMemData : AllMemData) {
|
|
// Prevent functions from sharing the same profile.
|
|
if (NewMemData->Used)
|
|
continue;
|
|
|
|
if (MemData)
|
|
MemData->Used = false;
|
|
|
|
// Update function profile data with the new set.
|
|
MemData = NewMemData;
|
|
MemData->Used = true;
|
|
break;
|
|
}
|
|
}
|
|
|
|
void BinaryFunction::matchProfileData() {
|
|
// This functionality is available for LBR-mode only
|
|
// TODO: Implement evaluateProfileData() for samples, checking whether
|
|
// sample addresses match instruction addresses in the function
|
|
if (!BC.DR.hasLBR())
|
|
return;
|
|
|
|
if (BranchData) {
|
|
ProfileMatchRatio = evaluateProfileData(*BranchData);
|
|
if (ProfileMatchRatio == 1.0f) {
|
|
if (fetchProfileForOtherEntryPoints()) {
|
|
ProfileMatchRatio = evaluateProfileData(*BranchData);
|
|
ExecutionCount = BranchData->ExecutionCount;
|
|
}
|
|
return;
|
|
}
|
|
}
|
|
|
|
// Check if the function name can fluctuate between several compilations
|
|
// possibly triggered by minor unrelated code changes in the source code
|
|
// of the input binary.
|
|
const auto HasVolatileName = [this]() {
|
|
for (const auto Name : getNames()) {
|
|
if (getLTOCommonName(Name))
|
|
return true;
|
|
}
|
|
return false;
|
|
}();
|
|
if (!HasVolatileName)
|
|
return;
|
|
|
|
// Check for a profile that matches with 100% confidence.
|
|
const auto AllBranchData = BC.DR.getFuncBranchDataRegex(getNames());
|
|
for (auto *NewBranchData : AllBranchData) {
|
|
// Prevent functions from sharing the same profile.
|
|
if (NewBranchData->Used)
|
|
continue;
|
|
|
|
if (evaluateProfileData(*NewBranchData) != 1.0f)
|
|
continue;
|
|
|
|
if (BranchData)
|
|
BranchData->Used = false;
|
|
|
|
// Update function profile data with the new set.
|
|
BranchData = NewBranchData;
|
|
ExecutionCount = NewBranchData->ExecutionCount;
|
|
ProfileMatchRatio = 1.0f;
|
|
BranchData->Used = true;
|
|
break;
|
|
}
|
|
}
|
|
|
|
float BinaryFunction::evaluateProfileData(const FuncBranchData &BranchData) {
|
|
// Until we define a minimal profile, we consider an empty branch data to be
|
|
// a valid profile. It could happen to a function without branches when we
|
|
// still have an EntryData for execution count.
|
|
if (BranchData.Data.empty()) {
|
|
return 1.0f;
|
|
}
|
|
|
|
uint64_t NumMatchedBranches = 0;
|
|
for (const auto &BI : BranchData.Data) {
|
|
bool IsValid = false;
|
|
if (BI.From.Name == BI.To.Name) {
|
|
// Try to record information with 0 count.
|
|
IsValid = recordBranch(BI.From.Offset, BI.To.Offset, 0);
|
|
} else {
|
|
// The branch has to originate from this function.
|
|
// Check for calls, tail calls, rets and indirect branches.
|
|
// When matching profiling info, we did not reach the stage
|
|
// when we identify tail calls, so they are still represented
|
|
// by regular branch instructions and we need isBranch() here.
|
|
auto *Instr = getInstructionAtOffset(BI.From.Offset);
|
|
// If it's a prefix - skip it.
|
|
if (Instr && BC.MIA->isPrefix(*Instr))
|
|
Instr = getInstructionAtOffset(BI.From.Offset + 1);
|
|
if (Instr &&
|
|
(BC.MIA->isCall(*Instr) ||
|
|
BC.MIA->isBranch(*Instr) ||
|
|
BC.MIA->isReturn(*Instr))) {
|
|
IsValid = true;
|
|
}
|
|
}
|
|
|
|
if (IsValid) {
|
|
++NumMatchedBranches;
|
|
continue;
|
|
}
|
|
|
|
DEBUG(dbgs()
|
|
<< "\tinvalid branch in " << *this << " : 0x"
|
|
<< Twine::utohexstr(BI.From.Offset) << " -> ";
|
|
if (BI.From.Name == BI.To.Name)
|
|
dbgs() << "0x" << Twine::utohexstr(BI.To.Offset) << '\n';
|
|
else
|
|
dbgs() << "<outbounds>\n";
|
|
);
|
|
}
|
|
|
|
const auto MatchRatio = (float) NumMatchedBranches / BranchData.Data.size();
|
|
if (opts::Verbosity >= 2 && NumMatchedBranches < BranchData.Data.size()) {
|
|
errs() << "BOLT-WARNING: profile branches match only "
|
|
<< format("%.1f%%", MatchRatio * 100.0f) << " ("
|
|
<< NumMatchedBranches << '/' << BranchData.Data.size()
|
|
<< ") for function " << *this << '\n';
|
|
}
|
|
|
|
return MatchRatio;
|
|
}
|
|
|
|
void BinaryFunction::clearProfile() {
|
|
// Keep function execution profile the same. Only clear basic block and edge
|
|
// counts.
|
|
for (auto *BB : BasicBlocks) {
|
|
BB->ExecutionCount = 0;
|
|
for (auto &BI : BB->branch_info()) {
|
|
BI.Count = 0;
|
|
BI.MispredictedCount = 0;
|
|
}
|
|
}
|
|
}
|
|
|
|
} // namespace bolt
|
|
} // namespace llvm
|