[MemProf] Add v4 which contains CalleeGuids to CallSiteInfo. (#137394)

This patch adds CalleeGuids to the serialized format and increments the version number to 4. The unit tests are updated to include a new test for v4 and the YAML format is also updated to be able to roundtrip the v4 format.
This commit is contained in:
Snehasish Kumar
2025-05-01 20:17:21 -07:00
committed by GitHub
parent 36541ec3ca
commit 099a0fa3f2
9 changed files with 253 additions and 58 deletions

View File

@@ -705,7 +705,8 @@ private:
unsigned RadixTreeSize = 0;
Error deserializeV2(const unsigned char *Start, const unsigned char *Ptr);
Error deserializeV3(const unsigned char *Start, const unsigned char *Ptr);
Error deserializeRadixTreeBased(const unsigned char *Start,
const unsigned char *Ptr);
public:
IndexedMemProfReader() = default;

View File

@@ -35,10 +35,12 @@ enum IndexedVersion : uint64_t {
// Version 3: Added a radix tree for call stacks. Switched to linear IDs for
// frames and call stacks.
Version3 = 3,
// Version 4: Added CalleeGuids to call site info.
Version4 = 4,
};
constexpr uint64_t MinimumSupportedVersion = Version2;
constexpr uint64_t MaximumSupportedVersion = Version3;
constexpr uint64_t MaximumSupportedVersion = Version4;
// Verify that the minimum and maximum satisfy the obvious constraint.
static_assert(MinimumSupportedVersion <= MaximumSupportedVersion);

View File

@@ -1,6 +1,7 @@
#ifndef LLVM_PROFILEDATA_MEMPROFYAML_H_
#define LLVM_PROFILEDATA_MEMPROFYAML_H_
#include "llvm/ADT/SmallVector.h"
#include "llvm/ProfileData/MemProf.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/YAMLTraits.h"
@@ -28,8 +29,9 @@ struct AllMemProfData {
namespace yaml {
template <> struct ScalarTraits<memprof::GUIDHex64> {
static void output(const memprof::GUIDHex64 &Val, void *, raw_ostream &Out) {
// Print GUID as a 16-digit hexadecimal number.
Out << format("0x%016" PRIx64, (uint64_t)Val);
// Print GUID as a hexadecimal number with 0x prefix, no padding to keep
// test strings compact.
Out << format("0x%" PRIx64, (uint64_t)Val);
}
static StringRef input(StringRef Scalar, void *, memprof::GUIDHex64 &Val) {
// Reject decimal GUIDs.
@@ -156,10 +158,43 @@ template <> struct MappingTraits<memprof::AllocationInfo> {
// treat the GUID and the fields within MemProfRecord at the same level as if
// the GUID were part of MemProfRecord.
template <> struct MappingTraits<memprof::CallSiteInfo> {
// Helper class to normalize CalleeGuids to use GUIDHex64 for YAML I/O.
class CallSiteInfoWithHex64Guids {
public:
CallSiteInfoWithHex64Guids(IO &) {}
CallSiteInfoWithHex64Guids(IO &, const memprof::CallSiteInfo &CS)
: Frames(CS.Frames) {
// Convert uint64_t GUIDs to GUIDHex64 for serialization.
CalleeGuids.reserve(CS.CalleeGuids.size());
for (uint64_t Guid : CS.CalleeGuids)
CalleeGuids.push_back(memprof::GUIDHex64(Guid));
}
memprof::CallSiteInfo denormalize(IO &) {
memprof::CallSiteInfo CS;
CS.Frames = Frames;
// Convert GUIDHex64 back to uint64_t GUIDs after deserialization.
CS.CalleeGuids.reserve(CalleeGuids.size());
for (memprof::GUIDHex64 HexGuid : CalleeGuids)
CS.CalleeGuids.push_back(HexGuid.value);
return CS;
}
// Keep Frames as is, since MappingTraits<memprof::Frame> handles its
// Function GUID.
decltype(memprof::CallSiteInfo::Frames) Frames;
// Use a vector of GUIDHex64 for CalleeGuids to leverage its ScalarTraits.
SmallVector<memprof::GUIDHex64> CalleeGuids;
};
static void mapping(IO &Io, memprof::CallSiteInfo &CS) {
Io.mapRequired("Frames", CS.Frames);
// Keep this optional to make it easier to write tests.
Io.mapOptional("CalleeGuids", CS.CalleeGuids);
// Use MappingNormalization to handle the conversion between
// memprof::CallSiteInfo and CallSiteInfoWithHex64Guids.
MappingNormalization<CallSiteInfoWithHex64Guids, memprof::CallSiteInfo>
Keys(Io, CS);
Io.mapRequired("Frames", Keys->Frames);
// Map the normalized CalleeGuids (which are now GUIDHex64).
Io.mapOptional("CalleeGuids", Keys->CalleeGuids);
}
};
@@ -176,6 +211,20 @@ template <> struct MappingTraits<memprof::AllMemProfData> {
Io.mapRequired("HeapProfileRecords", Data.HeapProfileRecords);
}
};
template <> struct SequenceTraits<SmallVector<memprof::GUIDHex64>> {
static size_t size(IO &io, SmallVector<memprof::GUIDHex64> &Seq) {
return Seq.size();
}
static memprof::GUIDHex64 &
element(IO &io, SmallVector<memprof::GUIDHex64> &Seq, size_t Index) {
if (Index >= Seq.size())
Seq.resize(Index + 1);
return Seq[Index];
}
static const bool flow = true;
};
} // namespace yaml
} // namespace llvm
@@ -184,5 +233,6 @@ LLVM_YAML_IS_SEQUENCE_VECTOR(std::vector<memprof::Frame>)
LLVM_YAML_IS_SEQUENCE_VECTOR(memprof::AllocationInfo)
LLVM_YAML_IS_SEQUENCE_VECTOR(memprof::CallSiteInfo)
LLVM_YAML_IS_SEQUENCE_VECTOR(memprof::GUIDMemProfRecordPair)
LLVM_YAML_IS_SEQUENCE_VECTOR(memprof::GUIDHex64) // Used for CalleeGuids
#endif // LLVM_PROFILEDATA_MEMPROFYAML_H_

View File

@@ -214,23 +214,13 @@ static Error writeMemProfV2(ProfOStream &OS,
return Error::success();
}
// Write out MemProf Version3 as follows:
// uint64_t Version
// uint64_t CallStackPayloadOffset = Offset for the call stack payload
// uint64_t RecordPayloadOffset = Offset for the record payload
// uint64_t RecordTableOffset = RecordTableGenerator.Emit
// uint64_t Num schema entries
// uint64_t Schema entry 0
// uint64_t Schema entry 1
// ....
// uint64_t Schema entry N - 1
// Frames serialized one after another
// Call stacks encoded as a radix tree
// OnDiskChainedHashTable MemProfRecordData
static Error writeMemProfV3(ProfOStream &OS,
memprof::IndexedMemProfData &MemProfData,
bool MemProfFullSchema) {
OS.write(memprof::Version3);
static Error writeMemProfRadixTreeBased(
ProfOStream &OS, memprof::IndexedMemProfData &MemProfData,
memprof::IndexedVersion Version, bool MemProfFullSchema) {
assert((Version == memprof::Version3 || Version == memprof::Version4) &&
"Unsupported version for radix tree format");
OS.write(Version); // Write the specific version (V3 or V4)
uint64_t HeaderUpdatePos = OS.tell();
OS.write(0ULL); // Reserve space for the memprof call stack payload offset.
OS.write(0ULL); // Reserve space for the memprof record payload offset.
@@ -258,13 +248,11 @@ static Error writeMemProfV3(ProfOStream &OS,
NumElements);
uint64_t RecordPayloadOffset = OS.tell();
uint64_t RecordTableOffset =
writeMemProfRecords(OS, MemProfData.Records, &Schema, memprof::Version3,
&MemProfCallStackIndexes);
uint64_t RecordTableOffset = writeMemProfRecords(
OS, MemProfData.Records, &Schema, Version, &MemProfCallStackIndexes);
// IndexedMemProfReader::deserializeV3 computes the number of elements in the
// call stack array from the difference between CallStackPayloadOffset and
// RecordPayloadOffset. Verify that the computation works.
// Verify that the computation for the number of elements in the call stack
// array works.
assert(CallStackPayloadOffset +
NumElements * sizeof(memprof::LinearFrameId) ==
RecordPayloadOffset);
@@ -279,6 +267,22 @@ static Error writeMemProfV3(ProfOStream &OS,
return Error::success();
}
// Write out MemProf Version3
static Error writeMemProfV3(ProfOStream &OS,
memprof::IndexedMemProfData &MemProfData,
bool MemProfFullSchema) {
return writeMemProfRadixTreeBased(OS, MemProfData, memprof::Version3,
MemProfFullSchema);
}
// Write out MemProf Version4
static Error writeMemProfV4(ProfOStream &OS,
memprof::IndexedMemProfData &MemProfData,
bool MemProfFullSchema) {
return writeMemProfRadixTreeBased(OS, MemProfData, memprof::Version4,
MemProfFullSchema);
}
// Write out the MemProf data in a requested version.
Error writeMemProf(ProfOStream &OS, memprof::IndexedMemProfData &MemProfData,
memprof::IndexedVersion MemProfVersionRequested,
@@ -288,6 +292,8 @@ Error writeMemProf(ProfOStream &OS, memprof::IndexedMemProfData &MemProfData,
return writeMemProfV2(OS, MemProfData, MemProfFullSchema);
case memprof::Version3:
return writeMemProfV3(OS, MemProfData, MemProfFullSchema);
case memprof::Version4:
return writeMemProfV4(OS, MemProfData, MemProfFullSchema);
}
return make_error<InstrProfError>(
@@ -350,8 +356,8 @@ Error IndexedMemProfReader::deserializeV2(const unsigned char *Start,
return Error::success();
}
Error IndexedMemProfReader::deserializeV3(const unsigned char *Start,
const unsigned char *Ptr) {
Error IndexedMemProfReader::deserializeRadixTreeBased(
const unsigned char *Start, const unsigned char *Ptr) {
// The offset in the stream right before invoking
// CallStackTableGenerator.Emit.
const uint64_t CallStackPayloadOffset =
@@ -382,7 +388,7 @@ Error IndexedMemProfReader::deserializeV3(const unsigned char *Start,
MemProfRecordTable.reset(MemProfRecordHashTable::Create(
/*Buckets=*/Start + RecordTableOffset,
/*Payload=*/Start + RecordPayloadOffset,
/*Base=*/Start, memprof::RecordLookupTrait(memprof::Version3, Schema)));
/*Base=*/Start, memprof::RecordLookupTrait(Version, Schema)));
return Error::success();
}
@@ -395,8 +401,10 @@ Error IndexedMemProfReader::deserialize(const unsigned char *Start,
const uint64_t FirstWord =
support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
if (FirstWord == memprof::Version2 || FirstWord == memprof::Version3) {
// Everything is good. We can proceed to deserialize the rest.
// Check if the version is supported
if (FirstWord >= memprof::MinimumSupportedVersion &&
FirstWord <= memprof::MaximumSupportedVersion) {
// Everything is good. We can proceed to deserialize the rest.
Version = static_cast<memprof::IndexedVersion>(FirstWord);
} else {
return make_error<InstrProfError>(
@@ -413,12 +421,13 @@ Error IndexedMemProfReader::deserialize(const unsigned char *Start,
return E;
break;
case memprof::Version3:
if (Error E = deserializeV3(Start, Ptr))
case memprof::Version4:
// V3 and V4 share the same high-level structure (radix tree, linear IDs).
if (Error E = deserializeRadixTreeBased(Start, Ptr))
return E;
break;
}
return Error::success();
}
} // namespace llvm

View File

@@ -1456,16 +1456,6 @@ getMemProfRecordV2(const memprof::IndexedMemProfRecord &IndexedRecord,
return Record;
}
static Expected<memprof::MemProfRecord>
getMemProfRecordV3(const memprof::IndexedMemProfRecord &IndexedRecord,
const unsigned char *FrameBase,
const unsigned char *CallStackBase) {
memprof::LinearFrameIdConverter FrameIdConv(FrameBase);
memprof::LinearCallStackIdConverter CSIdConv(CallStackBase, FrameIdConv);
memprof::MemProfRecord Record = IndexedRecord.toMemProfRecord(CSIdConv);
return Record;
}
Expected<memprof::MemProfRecord>
IndexedMemProfReader::getMemProfRecord(const uint64_t FuncNameHash) const {
// TODO: Add memprof specific errors.
@@ -1485,13 +1475,20 @@ IndexedMemProfReader::getMemProfRecord(const uint64_t FuncNameHash) const {
assert(MemProfCallStackTable && "MemProfCallStackTable must be available");
return getMemProfRecordV2(IndexedRecord, *MemProfFrameTable,
*MemProfCallStackTable);
// Combine V3 and V4 cases as the record conversion logic is the same.
case memprof::Version3:
case memprof::Version4:
assert(!MemProfFrameTable && "MemProfFrameTable must not be available");
assert(!MemProfCallStackTable &&
"MemProfCallStackTable must not be available");
assert(FrameBase && "FrameBase must be available");
assert(CallStackBase && "CallStackBase must be available");
return getMemProfRecordV3(IndexedRecord, FrameBase, CallStackBase);
{
memprof::LinearFrameIdConverter FrameIdConv(FrameBase);
memprof::LinearCallStackIdConverter CSIdConv(CallStackBase, FrameIdConv);
memprof::MemProfRecord Record = IndexedRecord.toMemProfRecord(CSIdConv);
return Record;
}
}
return make_error<InstrProfError>(
@@ -1505,7 +1502,7 @@ IndexedMemProfReader::getMemProfRecord(const uint64_t FuncNameHash) const {
DenseMap<uint64_t, SmallVector<memprof::CallEdgeTy, 0>>
IndexedMemProfReader::getMemProfCallerCalleePairs() const {
assert(MemProfRecordTable);
assert(Version == memprof::Version3);
assert(Version == memprof::Version3 || Version == memprof::Version4);
memprof::LinearFrameIdConverter FrameIdConv(FrameBase);
memprof::CallerCalleePairExtractor Extractor(CallStackBase, FrameIdConv,

View File

@@ -48,7 +48,9 @@ size_t IndexedAllocationInfo::serializedSize(const MemProfSchema &Schema,
switch (Version) {
case Version2:
return serializedSizeV2(*this, Schema);
// Combine V3 and V4 as the size calculation is the same
case Version3:
case Version4:
return serializedSizeV3(*this, Schema);
}
llvm_unreachable("unsupported MemProf version");
@@ -78,10 +80,26 @@ static size_t serializedSizeV3(const IndexedMemProfRecord &Record,
// The number of callsites we have information for.
Result += sizeof(uint64_t);
// The linear call stack ID.
// Note: V3 only stored the LinearCallStackId per call site.
Result += Record.CallSites.size() * sizeof(LinearCallStackId);
return Result;
}
static size_t serializedSizeV4(const IndexedMemProfRecord &Record,
const MemProfSchema &Schema) {
// The number of alloc sites to serialize.
size_t Result = sizeof(uint64_t);
for (const IndexedAllocationInfo &N : Record.AllocSites)
Result += N.serializedSize(Schema, Version4);
// The number of callsites we have information for.
Result += sizeof(uint64_t);
for (const auto &CS : Record.CallSites)
Result += sizeof(LinearCallStackId) + sizeof(uint64_t) +
CS.CalleeGuids.size() * sizeof(GlobalValue::GUID);
return Result;
}
size_t IndexedMemProfRecord::serializedSize(const MemProfSchema &Schema,
IndexedVersion Version) const {
switch (Version) {
@@ -89,6 +107,8 @@ size_t IndexedMemProfRecord::serializedSize(const MemProfSchema &Schema,
return serializedSizeV2(*this, Schema);
case Version3:
return serializedSizeV3(*this, Schema);
case Version4:
return serializedSizeV4(*this, Schema);
}
llvm_unreachable("unsupported MemProf version");
}
@@ -134,6 +154,32 @@ static void serializeV3(
}
}
static void serializeV4(
const IndexedMemProfRecord &Record, const MemProfSchema &Schema,
raw_ostream &OS,
llvm::DenseMap<CallStackId, LinearCallStackId> &MemProfCallStackIndexes) {
using namespace support;
endian::Writer LE(OS, llvm::endianness::little);
LE.write<uint64_t>(Record.AllocSites.size());
for (const IndexedAllocationInfo &N : Record.AllocSites) {
assert(MemProfCallStackIndexes.contains(N.CSId));
LE.write<LinearCallStackId>(MemProfCallStackIndexes[N.CSId]);
N.Info.serialize(Schema, OS);
}
// Related contexts.
LE.write<uint64_t>(Record.CallSites.size());
for (const auto &CS : Record.CallSites) {
assert(MemProfCallStackIndexes.contains(CS.CSId));
LE.write<LinearCallStackId>(MemProfCallStackIndexes[CS.CSId]);
LE.write<uint64_t>(CS.CalleeGuids.size());
for (const auto &Guid : CS.CalleeGuids)
LE.write<GlobalValue::GUID>(Guid);
}
}
void IndexedMemProfRecord::serialize(
const MemProfSchema &Schema, raw_ostream &OS, IndexedVersion Version,
llvm::DenseMap<CallStackId, LinearCallStackId> *MemProfCallStackIndexes)
@@ -145,6 +191,9 @@ void IndexedMemProfRecord::serialize(
case Version3:
serializeV3(*this, Schema, OS, *MemProfCallStackIndexes);
return;
case Version4:
serializeV4(*this, Schema, OS, *MemProfCallStackIndexes);
return;
}
llvm_unreachable("unsupported MemProf version");
}
@@ -217,6 +266,47 @@ static IndexedMemProfRecord deserializeV3(const MemProfSchema &Schema,
return Record;
}
static IndexedMemProfRecord deserializeV4(const MemProfSchema &Schema,
const unsigned char *Ptr) {
using namespace support;
IndexedMemProfRecord Record;
// Read the meminfo nodes.
const uint64_t NumNodes =
endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
Record.AllocSites.reserve(NumNodes);
const size_t SerializedSize = PortableMemInfoBlock::serializedSize(Schema);
for (uint64_t I = 0; I < NumNodes; I++) {
IndexedAllocationInfo Node;
Node.CSId =
endian::readNext<LinearCallStackId, llvm::endianness::little>(Ptr);
Node.Info.deserialize(Schema, Ptr);
Ptr += SerializedSize;
Record.AllocSites.push_back(Node);
}
// Read the callsite information.
const uint64_t NumCtxs =
endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
Record.CallSites.reserve(NumCtxs);
for (uint64_t J = 0; J < NumCtxs; J++) {
static_assert(sizeof(LinearCallStackId) <= sizeof(CallStackId));
LinearCallStackId CSId =
endian::readNext<LinearCallStackId, llvm::endianness::little>(Ptr);
const uint64_t NumGuids =
endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
SmallVector<GlobalValue::GUID, 1> Guids;
Guids.reserve(NumGuids);
for (uint64_t K = 0; K < NumGuids; ++K)
Guids.push_back(
endian::readNext<GlobalValue::GUID, llvm::endianness::little>(Ptr));
Record.CallSites.emplace_back(CSId, std::move(Guids));
}
return Record;
}
IndexedMemProfRecord
IndexedMemProfRecord::deserialize(const MemProfSchema &Schema,
const unsigned char *Ptr,
@@ -226,6 +316,8 @@ IndexedMemProfRecord::deserialize(const MemProfSchema &Schema,
return deserializeV2(Schema, Ptr);
case Version3:
return deserializeV3(Schema, Ptr);
case Version4:
return deserializeV4(Schema, Ptr);
}
llvm_unreachable("unsupported MemProf version");
}

View File

@@ -1,10 +1,8 @@
; RUN: split-file %s %t
; RUN: llvm-profdata merge %t/memprof-in.yaml -o %t/memprof-out.indexed
; COM: The text format only supports the latest version.
; RUN: llvm-profdata merge --memprof-version=4 %t/memprof-in.yaml -o %t/memprof-out.indexed
; RUN: llvm-profdata show --memory %t/memprof-out.indexed > %t/memprof-out.yaml
; RUN: cmp %t/memprof-in.yaml %t/memprof-out.yaml
; This test is expected to fail until the profile format is updated to handle CalleeGuids.
; XFAIL: *
; RUN: diff -b %t/memprof-in.yaml %t/memprof-out.yaml
; Verify that the YAML output is identical to the YAML input.
;--- memprof-in.yaml
@@ -32,9 +30,9 @@ HeapProfileRecords:
- Frames:
- { Function: 0x5555555555555555, LineOffset: 55, Column: 50, IsInlineFrame: true }
- { Function: 0x6666666666666666, LineOffset: 66, Column: 60, IsInlineFrame: false }
CalleeGuids: [0x100, 0x200]
CalleeGuids: [ 0x100, 0x200 ]
- Frames:
- { Function: 0x7777777777777777, LineOffset: 77, Column: 70, IsInlineFrame: true }
- { Function: 0x8888888888888888, LineOffset: 88, Column: 80, IsInlineFrame: false }
CalleeGuids: [0x300]
CalleeGuids: [ 0x300 ]
...

View File

@@ -336,7 +336,8 @@ static cl::opt<memprof::IndexedVersion> MemProfVersionRequested(
cl::desc("Specify the version of the memprof format to use"),
cl::init(memprof::Version3),
cl::values(clEnumValN(memprof::Version2, "2", "version 2"),
clEnumValN(memprof::Version3, "3", "version 3")));
clEnumValN(memprof::Version3, "3", "version 3"),
clEnumValN(memprof::Version4, "4", "version 4")));
static cl::opt<bool> MemProfFullSchema(
"memprof-full-schema", cl::Hidden, cl::sub(MergeSubcommand),

View File

@@ -292,6 +292,51 @@ TEST(MemProf, RecordSerializationRoundTripVerion2) {
EXPECT_EQ(Record, GotRecord);
}
TEST(MemProf, RecordSerializationRoundTripVersion4) {
const auto Schema = getFullSchema();
MemInfoBlock Info(/*size=*/16, /*access_count=*/7, /*alloc_timestamp=*/1000,
/*dealloc_timestamp=*/2000, /*alloc_cpu=*/3,
/*dealloc_cpu=*/4, /*Histogram=*/0, /*HistogramSize=*/0);
llvm::SmallVector<CallStackId> CallStackIds = {0x123, 0x456};
llvm::SmallVector<IndexedCallSiteInfo> CallSites;
CallSites.push_back(
IndexedCallSiteInfo(0x333, {0xaaa, 0xbbb})); // CSId with GUIDs
CallSites.push_back(IndexedCallSiteInfo(0x444)); // CSId without GUIDs
IndexedMemProfRecord Record;
for (const auto &CSId : CallStackIds) {
// Use the same info block for both allocation sites.
Record.AllocSites.emplace_back(CSId, Info);
}
Record.CallSites = std::move(CallSites);
std::string Buffer;
llvm::raw_string_ostream OS(Buffer);
// Need a dummy map for V4 serialization
llvm::DenseMap<CallStackId, LinearCallStackId> DummyMap = {
{0x123, 1}, {0x456, 2}, {0x333, 3}, {0x444, 4}};
Record.serialize(Schema, OS, Version4, &DummyMap);
const IndexedMemProfRecord GotRecord = IndexedMemProfRecord::deserialize(
Schema, reinterpret_cast<const unsigned char *>(Buffer.data()), Version4);
// Create the expected record using the linear IDs from the dummy map.
IndexedMemProfRecord ExpectedRecord;
for (const auto &CSId : CallStackIds) {
ExpectedRecord.AllocSites.emplace_back(DummyMap[CSId], Info);
}
for (const auto &CSInfo :
Record.CallSites) { // Use original Record's CallSites to get GUIDs
ExpectedRecord.CallSites.emplace_back(DummyMap[CSInfo.CSId],
CSInfo.CalleeGuids);
}
EXPECT_EQ(ExpectedRecord, GotRecord);
}
TEST(MemProf, RecordSerializationRoundTripVersion2HotColdSchema) {
const auto Schema = getHotColdSchema();
@@ -791,7 +836,7 @@ TEST(MemProf, YAMLWriterFrame) {
std::string Out = serializeInYAML(F);
EXPECT_EQ(Out, R"YAML(---
{ Function: 0x0123456789abcdef, LineOffset: 22, Column: 33, IsInlineFrame: true }
{ Function: 0x123456789abcdef, LineOffset: 22, Column: 33, IsInlineFrame: true }
...
)YAML");
}