[BOLT] Expose external entry count for functions (#141674)

Record the number of function invocations from external code - code
outside the binary, which may include JIT code and DSOs. Accounting
external entry counts improves the fidelity of call graph flow 
conservation analysis.

Test Plan: updated shrinkwrapping.test
This commit is contained in:
Amir Ayupov
2025-06-10 14:31:22 -07:00
committed by GitHub
parent 13ccce2877
commit 0c77468288
10 changed files with 33 additions and 0 deletions

View File

@@ -388,6 +388,10 @@ private:
/// The profile data for the number of times the function was executed.
uint64_t ExecutionCount{COUNT_NO_PROFILE};
/// Profile data for the number of times this function was entered from
/// external code (DSO, JIT, etc).
uint64_t ExternEntryCount{0};
/// Profile match ratio.
float ProfileMatchRatio{0.0f};
@@ -1877,6 +1881,10 @@ public:
return *this;
}
/// Set the profile data for the number of times the function was entered from
/// external code (DSO/JIT).
void setExternEntryCount(uint64_t Count) { ExternEntryCount = Count; }
/// Adjust execution count for the function by a given \p Count. The value
/// \p Count will be subtracted from the current function count.
///
@@ -1904,6 +1912,10 @@ public:
/// Return COUNT_NO_PROFILE if there's no profile info.
uint64_t getExecutionCount() const { return ExecutionCount; }
/// Return the profile information about the number of times the function was
/// entered from external code (DSO/JIT).
uint64_t getExternEntryCount() const { return ExternEntryCount; }
/// Return the raw profile information about the number of branch
/// executions corresponding to this function.
uint64_t getRawSampleCount() const { return RawSampleCount; }

View File

@@ -97,6 +97,9 @@ struct FuncBranchData {
/// Total execution count for the function.
int64_t ExecutionCount{0};
/// Total entry count from external code for the function.
uint64_t ExternEntryCount{0};
/// Indicate if the data was used.
bool Used{false};

View File

@@ -206,6 +206,7 @@ struct BinaryFunctionProfile {
uint32_t Id{0};
llvm::yaml::Hex64 Hash{0};
uint64_t ExecCount{0};
uint64_t ExternEntryCount{0};
std::vector<BinaryBasicBlockProfile> Blocks;
std::vector<InlineTreeNode> InlineTree;
bool Used{false};
@@ -218,6 +219,7 @@ template <> struct MappingTraits<bolt::BinaryFunctionProfile> {
YamlIO.mapRequired("fid", BFP.Id);
YamlIO.mapRequired("hash", BFP.Hash);
YamlIO.mapRequired("exec", BFP.ExecCount);
YamlIO.mapOptional("extern", BFP.ExternEntryCount, 0);
YamlIO.mapRequired("nblocks", BFP.NumBasicBlocks);
YamlIO.mapOptional("blocks", BFP.Blocks,
std::vector<bolt::BinaryBasicBlockProfile>());

View File

@@ -471,6 +471,8 @@ void BinaryFunction::print(raw_ostream &OS, std::string Annotation) {
OS << "\n Sample Count: " << RawSampleCount;
OS << "\n Profile Acc : " << format("%.1f%%", ProfileMatchRatio * 100.0f);
}
if (ExternEntryCount)
OS << "\n Extern Entry Count: " << ExternEntryCount;
if (opts::PrintDynoStats && !getLayout().block_empty()) {
OS << '\n';

View File

@@ -532,6 +532,9 @@ void computeFlowMappings(const BinaryContext &BC, FlowInfo &TotalFlowMap) {
std::vector<uint64_t> &MaxCountMap = TotalMaxCountMaps[FunctionNum];
std::vector<uint64_t> &MinCountMap = TotalMinCountMaps[FunctionNum];
// Record external entry count into CallGraphIncomingFlows
CallGraphIncomingFlows[FunctionNum] += Function->getExternEntryCount();
// Update MaxCountMap, MinCountMap, and CallGraphIncomingFlows
auto recordCall = [&](const BinaryBasicBlock *SourceBB,
const MCSymbol *DestSymbol, uint64_t Count,

View File

@@ -2255,6 +2255,7 @@ std::error_code DataAggregator::writeBATYAML(BinaryContext &BC,
YamlBF.Id = BF->getFunctionNumber();
YamlBF.Hash = BAT->getBFHash(FuncAddress);
YamlBF.ExecCount = BF->getKnownExecutionCount();
YamlBF.ExternEntryCount = BF->getExternEntryCount();
YamlBF.NumBasicBlocks = BAT->getNumBasicBlocks(FuncAddress);
const BoltAddressTranslation::BBHashMapTy &BlockMap =
BAT->getBBHashMap(FuncAddress);

View File

@@ -85,6 +85,7 @@ void FuncBranchData::appendFrom(const FuncBranchData &FBD, uint64_t Offset) {
}
llvm::stable_sort(Data);
ExecutionCount += FBD.ExecutionCount;
ExternEntryCount += FBD.ExternEntryCount;
for (auto I = FBD.EntryData.begin(), E = FBD.EntryData.end(); I != E; ++I) {
assert(I->To.Name == FBD.Name);
auto NewElmt = EntryData.insert(EntryData.end(), *I);
@@ -269,6 +270,7 @@ Error DataReader::preprocessProfile(BinaryContext &BC) {
if (FuncBranchData *FuncData = getBranchDataForNames(Function.getNames())) {
setBranchData(Function, FuncData);
Function.ExecutionCount = FuncData->ExecutionCount;
Function.ExternEntryCount = FuncData->ExternEntryCount;
FuncData->Used = true;
}
}
@@ -419,6 +421,7 @@ void DataReader::matchProfileData(BinaryFunction &BF) {
if (fetchProfileForOtherEntryPoints(BF)) {
BF.ProfileMatchRatio = evaluateProfileData(BF, *FBD);
BF.ExecutionCount = FBD->ExecutionCount;
BF.ExternEntryCount = FBD->ExternEntryCount;
BF.RawSampleCount = FBD->getNumExecutedBranches();
}
return;
@@ -449,6 +452,7 @@ void DataReader::matchProfileData(BinaryFunction &BF) {
setBranchData(BF, NewBranchData);
NewBranchData->Used = true;
BF.ExecutionCount = NewBranchData->ExecutionCount;
BF.ExternEntryCount = NewBranchData->ExternEntryCount;
BF.ProfileMatchRatio = 1.0f;
break;
}
@@ -1190,6 +1194,8 @@ std::error_code DataReader::parse() {
if (BI.To.IsSymbol && BI.To.Offset == 0) {
I = GetOrCreateFuncEntry(BI.To.Name);
I->second.ExecutionCount += BI.Branches;
if (!BI.From.IsSymbol)
I->second.ExternEntryCount += BI.Branches;
}
}

View File

@@ -176,6 +176,7 @@ bool YAMLProfileReader::parseFunctionProfile(
uint64_t FunctionExecutionCount = 0;
BF.setExecutionCount(YamlBF.ExecCount);
BF.setExternEntryCount(YamlBF.ExternEntryCount);
uint64_t FuncRawBranchCount = 0;
for (const yaml::bolt::BinaryBasicBlockProfile &YamlBB : YamlBF.Blocks)

View File

@@ -226,6 +226,7 @@ YAMLProfileWriter::convert(const BinaryFunction &BF, bool UseDFS,
YamlBF.Hash = BF.getHash();
YamlBF.NumBasicBlocks = BF.size();
YamlBF.ExecCount = BF.getKnownExecutionCount();
YamlBF.ExternEntryCount = BF.getExternEntryCount();
DenseMap<const MCDecodedPseudoProbeInlineTree *, uint32_t> InlineTreeNodeId;
if (PseudoProbeDecoder && BF.getGUID()) {
std::tie(YamlBF.InlineTree, InlineTreeNodeId) =

View File

@@ -8,6 +8,7 @@ REQUIRES: shell
RUN: %clangxx %cxxflags -no-pie %S/Inputs/exc4sw.S -o %t.exe -Wl,-q
RUN: llvm-bolt %t.exe -o %t --relocs --frame-opt=all \
RUN: --print-only=main --print-cfg \
RUN: --data=%p/Inputs/exc4sw.fdata --reorder-blocks=cache 2>&1 | \
RUN: FileCheck %s --check-prefix=CHECK-BOLT
@@ -19,6 +20,7 @@ RUN: llvm-objdump --dwarf=frames %t | grep -A20 -e \
RUN: `llvm-nm --numeric-sort %t | grep main | tail -n 1 | cut -f1 -d' ' | \
RUN: tail -c9` 2>&1 | FileCheck %s --check-prefix=CHECK-OUTPUT
CHECK-BOLT: Extern Entry Count: 100
CHECK-BOLT: Shrink wrapping moved 2 spills inserting load/stores and 0 spills inserting push/pops
CHECK-INPUT: DW_CFA_advance_loc: 2