[BOLT] Expose external entry count for functions (#141674)
Record the number of function invocations from external code - code outside the binary, which may include JIT code and DSOs. Accounting external entry counts improves the fidelity of call graph flow conservation analysis. Test Plan: updated shrinkwrapping.test
This commit is contained in:
@@ -388,6 +388,10 @@ private:
|
||||
/// The profile data for the number of times the function was executed.
|
||||
uint64_t ExecutionCount{COUNT_NO_PROFILE};
|
||||
|
||||
/// Profile data for the number of times this function was entered from
|
||||
/// external code (DSO, JIT, etc).
|
||||
uint64_t ExternEntryCount{0};
|
||||
|
||||
/// Profile match ratio.
|
||||
float ProfileMatchRatio{0.0f};
|
||||
|
||||
@@ -1877,6 +1881,10 @@ public:
|
||||
return *this;
|
||||
}
|
||||
|
||||
/// Set the profile data for the number of times the function was entered from
|
||||
/// external code (DSO/JIT).
|
||||
void setExternEntryCount(uint64_t Count) { ExternEntryCount = Count; }
|
||||
|
||||
/// Adjust execution count for the function by a given \p Count. The value
|
||||
/// \p Count will be subtracted from the current function count.
|
||||
///
|
||||
@@ -1904,6 +1912,10 @@ public:
|
||||
/// Return COUNT_NO_PROFILE if there's no profile info.
|
||||
uint64_t getExecutionCount() const { return ExecutionCount; }
|
||||
|
||||
/// Return the profile information about the number of times the function was
|
||||
/// entered from external code (DSO/JIT).
|
||||
uint64_t getExternEntryCount() const { return ExternEntryCount; }
|
||||
|
||||
/// Return the raw profile information about the number of branch
|
||||
/// executions corresponding to this function.
|
||||
uint64_t getRawSampleCount() const { return RawSampleCount; }
|
||||
|
||||
@@ -97,6 +97,9 @@ struct FuncBranchData {
|
||||
/// Total execution count for the function.
|
||||
int64_t ExecutionCount{0};
|
||||
|
||||
/// Total entry count from external code for the function.
|
||||
uint64_t ExternEntryCount{0};
|
||||
|
||||
/// Indicate if the data was used.
|
||||
bool Used{false};
|
||||
|
||||
|
||||
@@ -206,6 +206,7 @@ struct BinaryFunctionProfile {
|
||||
uint32_t Id{0};
|
||||
llvm::yaml::Hex64 Hash{0};
|
||||
uint64_t ExecCount{0};
|
||||
uint64_t ExternEntryCount{0};
|
||||
std::vector<BinaryBasicBlockProfile> Blocks;
|
||||
std::vector<InlineTreeNode> InlineTree;
|
||||
bool Used{false};
|
||||
@@ -218,6 +219,7 @@ template <> struct MappingTraits<bolt::BinaryFunctionProfile> {
|
||||
YamlIO.mapRequired("fid", BFP.Id);
|
||||
YamlIO.mapRequired("hash", BFP.Hash);
|
||||
YamlIO.mapRequired("exec", BFP.ExecCount);
|
||||
YamlIO.mapOptional("extern", BFP.ExternEntryCount, 0);
|
||||
YamlIO.mapRequired("nblocks", BFP.NumBasicBlocks);
|
||||
YamlIO.mapOptional("blocks", BFP.Blocks,
|
||||
std::vector<bolt::BinaryBasicBlockProfile>());
|
||||
|
||||
@@ -471,6 +471,8 @@ void BinaryFunction::print(raw_ostream &OS, std::string Annotation) {
|
||||
OS << "\n Sample Count: " << RawSampleCount;
|
||||
OS << "\n Profile Acc : " << format("%.1f%%", ProfileMatchRatio * 100.0f);
|
||||
}
|
||||
if (ExternEntryCount)
|
||||
OS << "\n Extern Entry Count: " << ExternEntryCount;
|
||||
|
||||
if (opts::PrintDynoStats && !getLayout().block_empty()) {
|
||||
OS << '\n';
|
||||
|
||||
@@ -532,6 +532,9 @@ void computeFlowMappings(const BinaryContext &BC, FlowInfo &TotalFlowMap) {
|
||||
std::vector<uint64_t> &MaxCountMap = TotalMaxCountMaps[FunctionNum];
|
||||
std::vector<uint64_t> &MinCountMap = TotalMinCountMaps[FunctionNum];
|
||||
|
||||
// Record external entry count into CallGraphIncomingFlows
|
||||
CallGraphIncomingFlows[FunctionNum] += Function->getExternEntryCount();
|
||||
|
||||
// Update MaxCountMap, MinCountMap, and CallGraphIncomingFlows
|
||||
auto recordCall = [&](const BinaryBasicBlock *SourceBB,
|
||||
const MCSymbol *DestSymbol, uint64_t Count,
|
||||
|
||||
@@ -2255,6 +2255,7 @@ std::error_code DataAggregator::writeBATYAML(BinaryContext &BC,
|
||||
YamlBF.Id = BF->getFunctionNumber();
|
||||
YamlBF.Hash = BAT->getBFHash(FuncAddress);
|
||||
YamlBF.ExecCount = BF->getKnownExecutionCount();
|
||||
YamlBF.ExternEntryCount = BF->getExternEntryCount();
|
||||
YamlBF.NumBasicBlocks = BAT->getNumBasicBlocks(FuncAddress);
|
||||
const BoltAddressTranslation::BBHashMapTy &BlockMap =
|
||||
BAT->getBBHashMap(FuncAddress);
|
||||
|
||||
@@ -85,6 +85,7 @@ void FuncBranchData::appendFrom(const FuncBranchData &FBD, uint64_t Offset) {
|
||||
}
|
||||
llvm::stable_sort(Data);
|
||||
ExecutionCount += FBD.ExecutionCount;
|
||||
ExternEntryCount += FBD.ExternEntryCount;
|
||||
for (auto I = FBD.EntryData.begin(), E = FBD.EntryData.end(); I != E; ++I) {
|
||||
assert(I->To.Name == FBD.Name);
|
||||
auto NewElmt = EntryData.insert(EntryData.end(), *I);
|
||||
@@ -269,6 +270,7 @@ Error DataReader::preprocessProfile(BinaryContext &BC) {
|
||||
if (FuncBranchData *FuncData = getBranchDataForNames(Function.getNames())) {
|
||||
setBranchData(Function, FuncData);
|
||||
Function.ExecutionCount = FuncData->ExecutionCount;
|
||||
Function.ExternEntryCount = FuncData->ExternEntryCount;
|
||||
FuncData->Used = true;
|
||||
}
|
||||
}
|
||||
@@ -419,6 +421,7 @@ void DataReader::matchProfileData(BinaryFunction &BF) {
|
||||
if (fetchProfileForOtherEntryPoints(BF)) {
|
||||
BF.ProfileMatchRatio = evaluateProfileData(BF, *FBD);
|
||||
BF.ExecutionCount = FBD->ExecutionCount;
|
||||
BF.ExternEntryCount = FBD->ExternEntryCount;
|
||||
BF.RawSampleCount = FBD->getNumExecutedBranches();
|
||||
}
|
||||
return;
|
||||
@@ -449,6 +452,7 @@ void DataReader::matchProfileData(BinaryFunction &BF) {
|
||||
setBranchData(BF, NewBranchData);
|
||||
NewBranchData->Used = true;
|
||||
BF.ExecutionCount = NewBranchData->ExecutionCount;
|
||||
BF.ExternEntryCount = NewBranchData->ExternEntryCount;
|
||||
BF.ProfileMatchRatio = 1.0f;
|
||||
break;
|
||||
}
|
||||
@@ -1190,6 +1194,8 @@ std::error_code DataReader::parse() {
|
||||
if (BI.To.IsSymbol && BI.To.Offset == 0) {
|
||||
I = GetOrCreateFuncEntry(BI.To.Name);
|
||||
I->second.ExecutionCount += BI.Branches;
|
||||
if (!BI.From.IsSymbol)
|
||||
I->second.ExternEntryCount += BI.Branches;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -176,6 +176,7 @@ bool YAMLProfileReader::parseFunctionProfile(
|
||||
uint64_t FunctionExecutionCount = 0;
|
||||
|
||||
BF.setExecutionCount(YamlBF.ExecCount);
|
||||
BF.setExternEntryCount(YamlBF.ExternEntryCount);
|
||||
|
||||
uint64_t FuncRawBranchCount = 0;
|
||||
for (const yaml::bolt::BinaryBasicBlockProfile &YamlBB : YamlBF.Blocks)
|
||||
|
||||
@@ -226,6 +226,7 @@ YAMLProfileWriter::convert(const BinaryFunction &BF, bool UseDFS,
|
||||
YamlBF.Hash = BF.getHash();
|
||||
YamlBF.NumBasicBlocks = BF.size();
|
||||
YamlBF.ExecCount = BF.getKnownExecutionCount();
|
||||
YamlBF.ExternEntryCount = BF.getExternEntryCount();
|
||||
DenseMap<const MCDecodedPseudoProbeInlineTree *, uint32_t> InlineTreeNodeId;
|
||||
if (PseudoProbeDecoder && BF.getGUID()) {
|
||||
std::tie(YamlBF.InlineTree, InlineTreeNodeId) =
|
||||
|
||||
@@ -8,6 +8,7 @@ REQUIRES: shell
|
||||
|
||||
RUN: %clangxx %cxxflags -no-pie %S/Inputs/exc4sw.S -o %t.exe -Wl,-q
|
||||
RUN: llvm-bolt %t.exe -o %t --relocs --frame-opt=all \
|
||||
RUN: --print-only=main --print-cfg \
|
||||
RUN: --data=%p/Inputs/exc4sw.fdata --reorder-blocks=cache 2>&1 | \
|
||||
RUN: FileCheck %s --check-prefix=CHECK-BOLT
|
||||
|
||||
@@ -19,6 +20,7 @@ RUN: llvm-objdump --dwarf=frames %t | grep -A20 -e \
|
||||
RUN: `llvm-nm --numeric-sort %t | grep main | tail -n 1 | cut -f1 -d' ' | \
|
||||
RUN: tail -c9` 2>&1 | FileCheck %s --check-prefix=CHECK-OUTPUT
|
||||
|
||||
CHECK-BOLT: Extern Entry Count: 100
|
||||
CHECK-BOLT: Shrink wrapping moved 2 spills inserting load/stores and 0 spills inserting push/pops
|
||||
|
||||
CHECK-INPUT: DW_CFA_advance_loc: 2
|
||||
|
||||
Reference in New Issue
Block a user