[BOLT][heatmap] Produce zoomed-out heatmaps (#140153)
Add a capability to produce multiple heatmaps with given bucket sizes. The default heatmap block size (64B) could be too fine-grained for large binaries. Extend the option `block-size` to accept a list of bucket sizes for additional heatmaps with coarser granularity. The heatmap is simply rescaled so provided sizes should be multiples of each other. Human-readable suffixes can be used, e.g. 4K, 16kb, 1MiB. New defaults: 64B (base bucket size), 4KB (default page size), 256KB (for large binaries). Test Plan: updated heatmap-preagg.test
This commit is contained in:
@@ -89,7 +89,13 @@ For the generation, the default bucket size was used with a line size of 128.
|
||||
Some useful options are:
|
||||
|
||||
```
|
||||
-line-size=<uint> - number of entries per line (default 256)
|
||||
-line-size=<uint> - number of entries per line (default 256).
|
||||
Use a smaller value (e.g. 128) if the heatmap doesn't fit
|
||||
the screen horizontally.
|
||||
-block-size=<initial size>[,<zoom-out size>,...] - heatmap bucket size,
|
||||
optionally followed by zoom-out sizes to produce coarse-
|
||||
grained heatmaps. Size can be specified in human-readable
|
||||
format with [kKmMgG][i][B] suffix. Default 64B, 4K, 256K.
|
||||
-max-address=<uint> - maximum address considered valid for heatmap (default 4GB)
|
||||
-print-mappings - print mappings in the legend, between characters/blocks and text sections (default false)
|
||||
```
|
||||
|
||||
@@ -85,6 +85,9 @@ public:
|
||||
void printSectionHotness(raw_ostream &OS) const;
|
||||
|
||||
size_t size() const { return Map.size(); }
|
||||
|
||||
/// Increase bucket size to \p NewSize, recomputing the heatmap.
|
||||
void resizeBucket(uint64_t NewSize);
|
||||
};
|
||||
|
||||
} // namespace bolt
|
||||
|
||||
@@ -23,6 +23,15 @@ enum HeatmapModeKind {
|
||||
HM_Optional // perf2bolt --heatmap
|
||||
};
|
||||
|
||||
using HeatmapBlockSizes = std::vector<unsigned>;
|
||||
struct HeatmapBlockSpecParser : public llvm::cl::parser<HeatmapBlockSizes> {
|
||||
explicit HeatmapBlockSpecParser(llvm::cl::Option &O)
|
||||
: llvm::cl::parser<HeatmapBlockSizes>(O) {}
|
||||
// Return true on error.
|
||||
bool parse(llvm::cl::Option &O, llvm::StringRef ArgName, llvm::StringRef Arg,
|
||||
HeatmapBlockSizes &Val);
|
||||
};
|
||||
|
||||
extern HeatmapModeKind HeatmapMode;
|
||||
extern bool BinaryAnalysisMode;
|
||||
|
||||
@@ -47,7 +56,8 @@ extern llvm::cl::opt<bool> EqualizeBBCounts;
|
||||
extern llvm::cl::opt<bool> ForcePatch;
|
||||
extern llvm::cl::opt<bool> RemoveSymtab;
|
||||
extern llvm::cl::opt<unsigned> ExecutionCountThreshold;
|
||||
extern llvm::cl::opt<unsigned> HeatmapBlock;
|
||||
extern llvm::cl::opt<HeatmapBlockSizes, false, HeatmapBlockSpecParser>
|
||||
HeatmapBlock;
|
||||
extern llvm::cl::opt<unsigned long long> HeatmapMaxAddress;
|
||||
extern llvm::cl::opt<unsigned long long> HeatmapMinAddress;
|
||||
extern llvm::cl::opt<bool> HeatmapPrintMappings;
|
||||
|
||||
@@ -1314,8 +1314,9 @@ std::error_code DataAggregator::printLBRHeatMap() {
|
||||
opts::HeatmapMaxAddress = 0xffffffffffffffff;
|
||||
opts::HeatmapMinAddress = KernelBaseAddr;
|
||||
}
|
||||
Heatmap HM(opts::HeatmapBlock, opts::HeatmapMinAddress,
|
||||
opts::HeatmapMaxAddress, getTextSections(BC));
|
||||
opts::HeatmapBlockSizes &HMBS = opts::HeatmapBlock;
|
||||
Heatmap HM(HMBS[0], opts::HeatmapMinAddress, opts::HeatmapMaxAddress,
|
||||
getTextSections(BC));
|
||||
auto getSymbolValue = [&](const MCSymbol *Symbol) -> uint64_t {
|
||||
if (Symbol)
|
||||
if (ErrorOr<uint64_t> SymValue = BC->getSymbolValue(*Symbol))
|
||||
@@ -1365,6 +1366,14 @@ std::error_code DataAggregator::printLBRHeatMap() {
|
||||
HM.printCDF(opts::HeatmapOutput + ".csv");
|
||||
HM.printSectionHotness(opts::HeatmapOutput + "-section-hotness.csv");
|
||||
}
|
||||
// Provide coarse-grained heatmaps if requested via zoom-out scales
|
||||
for (const uint64_t NewBucketSize : ArrayRef(HMBS).drop_front()) {
|
||||
HM.resizeBucket(NewBucketSize);
|
||||
if (opts::HeatmapOutput == "-")
|
||||
HM.print(opts::HeatmapOutput);
|
||||
else
|
||||
HM.print(formatv("{0}-{1}", opts::HeatmapOutput, NewBucketSize).str());
|
||||
}
|
||||
|
||||
return std::error_code();
|
||||
}
|
||||
|
||||
@@ -55,6 +55,8 @@ void Heatmap::print(StringRef FileName) const {
|
||||
errs() << "error opening output file: " << EC.message() << '\n';
|
||||
exit(1);
|
||||
}
|
||||
outs() << "HEATMAP: dumping heatmap with bucket size " << BucketSize << " to "
|
||||
<< FileName << '\n';
|
||||
print(OS);
|
||||
}
|
||||
|
||||
@@ -364,5 +366,13 @@ void Heatmap::printSectionHotness(raw_ostream &OS) const {
|
||||
OS << formatv("[unmapped], 0x0, 0x0, {0:f4}, 0, 0\n",
|
||||
100.0 * UnmappedHotness / NumTotalCounts);
|
||||
}
|
||||
|
||||
void Heatmap::resizeBucket(uint64_t NewSize) {
|
||||
std::map<uint64_t, uint64_t> NewMap;
|
||||
for (const auto [Bucket, Count] : Map)
|
||||
NewMap[Bucket * BucketSize / NewSize] += Count;
|
||||
Map = NewMap;
|
||||
BucketSize = NewSize;
|
||||
}
|
||||
} // namespace bolt
|
||||
} // namespace llvm
|
||||
|
||||
@@ -12,6 +12,7 @@
|
||||
|
||||
#include "bolt/Utils/CommandLineOpts.h"
|
||||
#include "VCSVersion.inc"
|
||||
#include "llvm/Support/Regex.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
@@ -103,10 +104,56 @@ ExecutionCountThreshold("execution-count-threshold",
|
||||
cl::Hidden,
|
||||
cl::cat(BoltOptCategory));
|
||||
|
||||
cl::opt<unsigned>
|
||||
HeatmapBlock("block-size",
|
||||
cl::desc("size of a heat map block in bytes (default 64)"),
|
||||
cl::init(64), cl::cat(HeatmapCategory));
|
||||
bool HeatmapBlockSpecParser::parse(cl::Option &O, StringRef ArgName,
|
||||
StringRef Arg, HeatmapBlockSizes &Val) {
|
||||
// Parses a human-readable suffix into a shift amount or nullopt on error.
|
||||
auto parseSuffix = [](StringRef Suffix) -> std::optional<unsigned> {
|
||||
if (Suffix.empty())
|
||||
return 0;
|
||||
if (!Regex{"^[kKmMgG]i?[bB]?$"}.match(Suffix))
|
||||
return std::nullopt;
|
||||
// clang-format off
|
||||
switch (Suffix.front()) {
|
||||
case 'k': case 'K': return 10;
|
||||
case 'm': case 'M': return 20;
|
||||
case 'g': case 'G': return 30;
|
||||
}
|
||||
// clang-format on
|
||||
llvm_unreachable("Unexpected suffix");
|
||||
};
|
||||
|
||||
SmallVector<StringRef> Sizes;
|
||||
Arg.split(Sizes, ',');
|
||||
unsigned PreviousSize = 0;
|
||||
for (StringRef Size : Sizes) {
|
||||
StringRef OrigSize = Size;
|
||||
unsigned &SizeVal = Val.emplace_back(0);
|
||||
if (Size.consumeInteger(10, SizeVal)) {
|
||||
O.error("'" + OrigSize + "' value can't be parsed as an integer");
|
||||
return true;
|
||||
}
|
||||
if (std::optional<unsigned> ShiftAmt = parseSuffix(Size)) {
|
||||
SizeVal <<= *ShiftAmt;
|
||||
} else {
|
||||
O.error("'" + Size + "' value can't be parsed as a suffix");
|
||||
return true;
|
||||
}
|
||||
if (SizeVal <= PreviousSize || (PreviousSize && SizeVal % PreviousSize)) {
|
||||
O.error("'" + OrigSize + "' must be a multiple of previous value");
|
||||
return true;
|
||||
}
|
||||
PreviousSize = SizeVal;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
cl::opt<opts::HeatmapBlockSizes, false, opts::HeatmapBlockSpecParser>
|
||||
HeatmapBlock(
|
||||
"block-size", cl::value_desc("initial_size{,zoom-out_size,...}"),
|
||||
cl::desc("heatmap bucket size, optionally followed by zoom-out sizes "
|
||||
"for coarse-grained heatmaps (default 64B, 4K, 256K)."),
|
||||
cl::init(HeatmapBlockSizes{/*Initial*/ 64, /*Zoom-out*/ 4096, 262144}),
|
||||
cl::cat(HeatmapCategory));
|
||||
|
||||
cl::opt<unsigned long long> HeatmapMaxAddress(
|
||||
"max-address", cl::init(0xffffffff),
|
||||
|
||||
@@ -3,20 +3,41 @@
|
||||
RUN: yaml2obj %p/Inputs/blarge_new.yaml &> %t.exe
|
||||
## Non-BOLTed input binary
|
||||
RUN: llvm-bolt-heatmap %t.exe -o %t --pa -p %p/Inputs/blarge_new.preagg.txt \
|
||||
# Heatmaps for 64B, 128B, 1K buckets
|
||||
RUN: --block-size=64,128,1K --line-size 64 \
|
||||
RUN: 2>&1 | FileCheck --check-prefix CHECK-HEATMAP %s
|
||||
RUN: FileCheck %s --check-prefix CHECK-SEC-HOT --input-file %t-section-hotness.csv
|
||||
RUN: FileCheck %s --check-prefix CHECK-HM-64 --input-file %t
|
||||
RUN: FileCheck %s --check-prefix CHECK-HM-128 --input-file %t-128
|
||||
RUN: FileCheck %s --check-prefix CHECK-HM-1024 --input-file %t-1024
|
||||
|
||||
## BOLTed input binary
|
||||
RUN: llvm-bolt %t.exe -o %t.out --pa -p %p/Inputs/blarge_new.preagg.txt \
|
||||
RUN: --reorder-blocks=ext-tsp --split-functions --split-strategy=cdsplit \
|
||||
RUN: --reorder-functions=cdsort --enable-bat --dyno-stats --skip-funcs=main
|
||||
# Heatmaps for 64B, 4K, 16K, 1M buckets
|
||||
RUN: llvm-bolt-heatmap %t.out -o %t2 --pa -p %p/Inputs/blarge_new_bat.preagg.txt \
|
||||
RUN: 2>&1 | FileCheck --check-prefix CHECK-HEATMAP-BAT %s
|
||||
RUN: --block-size=64,4KB,16kb,1MiB 2>&1 | FileCheck --check-prefix CHECK-HEATMAP-BAT %s
|
||||
RUN: FileCheck %s --check-prefix CHECK-SEC-HOT-BAT --input-file %t2-section-hotness.csv
|
||||
RUN: llvm-nm -n %t.out | FileCheck %s --check-prefix=CHECK-HOT-SYMS
|
||||
RUN: FileCheck %s --check-prefix CHECK-BAT-HM-64 --input-file %t2
|
||||
# Identical hottest range for 4K, 16K, 1M heatmaps
|
||||
RUN: FileCheck %s --check-prefix CHECK-BAT-HM-4K --input-file %t2-4096
|
||||
RUN: FileCheck %s --check-prefix CHECK-BAT-HM-4K --input-file %t2-16384
|
||||
RUN: FileCheck %s --check-prefix CHECK-BAT-HM-4K --input-file %t2-1048576
|
||||
|
||||
# No zoomed-out heatmaps
|
||||
RUN: llvm-bolt-heatmap %t.out -o %t3 --pa -p %p/Inputs/blarge_new_bat.preagg.txt \
|
||||
RUN: --block-size=1024 | FileCheck --check-prefix CHECK-HEATMAP-BAT-1K %s
|
||||
CHECK-HEATMAP-BAT-1K: HEATMAP: dumping heatmap with bucket size 1024
|
||||
CHECK-HEATMAP-BAT-1K-NOT: HEATMAP: dumping heatmap with bucket size
|
||||
|
||||
CHECK-HEATMAP: PERF2BOLT: read 81 aggregated LBR entries
|
||||
CHECK-HEATMAP: HEATMAP: invalid traces: 1
|
||||
CHECK-HEATMAP: HEATMAP: dumping heatmap with bucket size 64
|
||||
CHECK-HEATMAP: HEATMAP: dumping heatmap with bucket size 128
|
||||
CHECK-HEATMAP: HEATMAP: dumping heatmap with bucket size 1024
|
||||
CHECK-HEATMAP-NOT: HEATMAP: dumping heatmap with bucket size
|
||||
|
||||
CHECK-SEC-HOT: Section Name, Begin Address, End Address, Percentage Hotness, Utilization Pct, Partition Score
|
||||
CHECK-SEC-HOT-NEXT: .init, 0x401000, 0x40101b, 16.8545, 100.0000, 0.1685
|
||||
@@ -24,8 +45,39 @@ CHECK-SEC-HOT-NEXT: .plt, 0x401020, 0x4010b0, 4.7583, 66.6667, 0.0317
|
||||
CHECK-SEC-HOT-NEXT: .text, 0x4010b0, 0x401c25, 78.3872, 85.1064, 0.6671
|
||||
CHECK-SEC-HOT-NEXT: .fini, 0x401c28, 0x401c35, 0.0000, 0.0000, 0.0000
|
||||
|
||||
# Only check x scales – can't check colors, and FileCheck doesn't strip color
|
||||
# codes by default.
|
||||
CHECK-HM-64: (299, 937]
|
||||
CHECK-HM-64-NEXT: 0
|
||||
CHECK-HM-64-NEXT: 0
|
||||
CHECK-HM-64-NEXT: 0 1 2 3 4 5 6 7 8 9 a b c d e f
|
||||
CHECK-HM-64-NEXT: 048c048c048c048c048c048c048c048c048c048c048c048c048c048c048c048c
|
||||
CHECK-HM-64-NEXT: 0
|
||||
|
||||
CHECK-HM-128: (299, 937]
|
||||
CHECK-HM-128-NEXT: 0
|
||||
CHECK-HM-128-NEXT: 0 1
|
||||
CHECK-HM-128-NEXT: 0 1 2 3 4 5 6 7 8 9 a b c d e f 0 1 2 3 4 5 6 7 8 9 a b c d e f
|
||||
CHECK-HM-128-NEXT: 0808080808080808080808080808080808080808080808080808080808080808
|
||||
CHECK-HM-128-NEXT: 0
|
||||
|
||||
CHECK-HM-1024: (483, 1663]
|
||||
CHECK-HM-1024-NEXT: 0
|
||||
CHECK-HM-1024-NEXT: 0 1 2 3 4 5 6 7 8 9 a b c d e f
|
||||
CHECK-HM-1024-NEXT: 048c048c048c048c048c048c048c048c048c048c048c048c048c048c048c048c
|
||||
CHECK-HM-1024-NEXT: 0
|
||||
CHECK-HM-1024-NEXT: 0
|
||||
|
||||
CHECK-BAT-HM-64: (349, 1126]
|
||||
CHECK-BAT-HM-4K: (605, 2182]
|
||||
|
||||
CHECK-HEATMAP-BAT: PERF2BOLT: read 79 aggregated LBR entries
|
||||
CHECK-HEATMAP-BAT: HEATMAP: invalid traces: 2
|
||||
CHECK-HEATMAP-BAT: HEATMAP: dumping heatmap with bucket size 64
|
||||
CHECK-HEATMAP-BAT: HEATMAP: dumping heatmap with bucket size 4096
|
||||
CHECK-HEATMAP-BAT: HEATMAP: dumping heatmap with bucket size 16384
|
||||
CHECK-HEATMAP-BAT: HEATMAP: dumping heatmap with bucket size 1048576
|
||||
CHECK-HEATMAP-BAT-NOT: HEATMAP: dumping heatmap with bucket size
|
||||
|
||||
CHECK-SEC-HOT-BAT: Section Name, Begin Address, End Address, Percentage Hotness, Utilization Pct, Partition Score
|
||||
CHECK-SEC-HOT-BAT-NEXT: .init, 0x401000, 0x40101b, 17.2888, 100.0000, 0.1729
|
||||
|
||||
@@ -59,7 +59,26 @@ static std::string GetExecutablePath(const char *Argv0) {
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
cl::HideUnrelatedOptions(ArrayRef(opts::HeatmapCategories));
|
||||
cl::ParseCommandLineOptions(argc, argv, "");
|
||||
cl::ParseCommandLineOptions(
|
||||
argc, argv,
|
||||
" BOLT Code Heatmap tool\n\n"
|
||||
" Produces code heatmaps using sampled profile\n\n"
|
||||
|
||||
" Inputs:\n"
|
||||
" - Binary (supports BOLT-optimized binaries),\n"
|
||||
" - Sampled profile collected from the binary:\n"
|
||||
" - perf data or pre-aggregated profile data (instrumentation profile "
|
||||
"not supported)\n"
|
||||
" - perf data can have basic (IP) or branch-stack (LBR) samples\n\n"
|
||||
|
||||
" Outputs:\n"
|
||||
" - Heatmaps: colored ASCII (requires a color-capable terminal or a"
|
||||
" conversion tool like `aha`)\n"
|
||||
" Multiple heatmaps are produced by default with different "
|
||||
"granularities (set by `block-size` option)\n"
|
||||
" - Section hotness: per-section samples% and utilization%\n"
|
||||
" - Cumulative distribution: working set size corresponding to a "
|
||||
"given percentile of samples\n");
|
||||
|
||||
if (opts::PerfData.empty()) {
|
||||
errs() << ToolName << ": expected -perfdata=<filename> option.\n";
|
||||
|
||||
Reference in New Issue
Block a user