[ctxprof] Track unhandled call targets (#131417)
Collect profiles for functions we encounter when collecting a contextual profile, that are not associated with a call site. This is expected to happen for signal handlers, but it also - problematically - currently happens for mem{memset|copy|move|set}, which are currently inserted after profile instrumentation.
Collecting a "regular" flat profile in these cases would hide the problem - that we loose better profile opportunities.
This commit is contained in:
@@ -121,6 +121,7 @@ class ProfileWriter {
|
||||
public:
|
||||
virtual void startContextSection() = 0;
|
||||
virtual void writeContextual(const ctx_profile::ContextNode &RootNode,
|
||||
const ctx_profile::ContextNode *Unhandled,
|
||||
uint64_t TotalRootEntryCount) = 0;
|
||||
virtual void endContextSection() = 0;
|
||||
|
||||
|
||||
@@ -246,22 +246,37 @@ ContextNode *getFlatProfile(FunctionData &Data, GUID Guid,
|
||||
|
||||
ContextNode *getUnhandledContext(FunctionData &Data, GUID Guid,
|
||||
uint32_t NumCounters) {
|
||||
// 1) if we are under a root (regardless if this thread is collecting or not a
|
||||
|
||||
// 1) if we are currently collecting a contextual profile, fetch a ContextNode
|
||||
// in the `Unhandled` set. We want to do this regardless of `ProfilingStarted`
|
||||
// to (hopefully) offset the penalty of creating these contexts to before
|
||||
// profiling.
|
||||
//
|
||||
// 2) if we are under a root (regardless if this thread is collecting or not a
|
||||
// contextual profile for that root), do not collect a flat profile. We want
|
||||
// to keep flat profiles only for activations that can't happen under a root,
|
||||
// to avoid confusing profiles. We can, for example, combine flattened and
|
||||
// flat profiles meaningfully, as we wouldn't double-count anything.
|
||||
//
|
||||
// 2) to avoid lengthy startup, don't bother with flat profiles until the
|
||||
// profiling started. We would reset them anyway when profiling starts.
|
||||
// 3) to avoid lengthy startup, don't bother with flat profiles until the
|
||||
// profiling has started. We would reset them anyway when profiling starts.
|
||||
// HOWEVER. This does lose profiling for message pumps: those functions are
|
||||
// entered once and never exit. They should be assumed to be entered before
|
||||
// profiling starts - because profiling should start after the server is up
|
||||
// and running (which is equivalent to "message pumps are set up").
|
||||
if (IsUnderContext || !__sanitizer::atomic_load_relaxed(&ProfilingStarted))
|
||||
return TheScratchContext;
|
||||
return markAsScratch(
|
||||
onContextEnter(*getFlatProfile(Data, Guid, NumCounters)));
|
||||
ContextRoot *R = __llvm_ctx_profile_current_context_root;
|
||||
if (!R) {
|
||||
if (IsUnderContext || !__sanitizer::atomic_load_relaxed(&ProfilingStarted))
|
||||
return TheScratchContext;
|
||||
else
|
||||
return markAsScratch(
|
||||
onContextEnter(*getFlatProfile(Data, Guid, NumCounters)));
|
||||
}
|
||||
auto [Iter, Ins] = R->Unhandled.insert({Guid, nullptr});
|
||||
if (Ins)
|
||||
Iter->second =
|
||||
getCallsiteSlow(Guid, &R->FirstUnhandledCalleeNode, NumCounters, 0);
|
||||
return markAsScratch(onContextEnter(*Iter->second));
|
||||
}
|
||||
|
||||
ContextNode *__llvm_ctx_profile_get_context(FunctionData *Data, void *Callee,
|
||||
@@ -396,6 +411,8 @@ void __llvm_ctx_profile_start_collection() {
|
||||
++NumMemUnits;
|
||||
|
||||
resetContextNode(*Root->FirstNode);
|
||||
if (Root->FirstUnhandledCalleeNode)
|
||||
resetContextNode(*Root->FirstUnhandledCalleeNode);
|
||||
__sanitizer::atomic_store_relaxed(&Root->TotalEntries, 0);
|
||||
}
|
||||
__sanitizer::atomic_store_relaxed(&ProfilingStarted, true);
|
||||
@@ -416,8 +433,9 @@ bool __llvm_ctx_profile_fetch(ProfileWriter &Writer) {
|
||||
__sanitizer::Printf("[ctxprof] Contextual Profile is %s\n", "invalid");
|
||||
return false;
|
||||
}
|
||||
Writer.writeContextual(*Root->FirstNode, __sanitizer::atomic_load_relaxed(
|
||||
&Root->TotalEntries));
|
||||
Writer.writeContextual(
|
||||
*Root->FirstNode, Root->FirstUnhandledCalleeNode,
|
||||
__sanitizer::atomic_load_relaxed(&Root->TotalEntries));
|
||||
}
|
||||
Writer.endContextSection();
|
||||
Writer.startFlatSection();
|
||||
|
||||
@@ -10,6 +10,7 @@
|
||||
#define CTX_PROFILE_CTXINSTRPROFILING_H_
|
||||
|
||||
#include "CtxInstrContextNode.h"
|
||||
#include "sanitizer_common/sanitizer_dense_map.h"
|
||||
#include "sanitizer_common/sanitizer_mutex.h"
|
||||
#include <sanitizer/common_interface_defs.h>
|
||||
|
||||
@@ -83,6 +84,20 @@ struct ContextRoot {
|
||||
// Count the number of entries - regardless if we could take the `Taken` mutex
|
||||
::__sanitizer::atomic_uint64_t TotalEntries = {};
|
||||
|
||||
// Profiles for functions we encounter when collecting a contexutal profile,
|
||||
// that are not associated with a callsite. This is expected to happen for
|
||||
// signal handlers, but it also - problematically - currently happens for
|
||||
// call sites generated after profile instrumentation, primarily
|
||||
// mem{memset|copy|move|set}.
|
||||
// `Unhandled` serves 2 purposes:
|
||||
// 1. identifying such cases (like the memops)
|
||||
// 2. collecting a profile for them, which can be at least used as a flat
|
||||
// profile
|
||||
::__sanitizer::DenseMap<GUID, ContextNode *> Unhandled;
|
||||
// Keep the unhandled contexts in a list, as we allocate them, as it makes it
|
||||
// simpler to send to the writer when the profile is fetched.
|
||||
ContextNode *FirstUnhandledCalleeNode = nullptr;
|
||||
|
||||
// Taken is used to ensure only one thread traverses the contextual graph -
|
||||
// either to read it or to write it. On server side, the same entrypoint will
|
||||
// be entered by numerous threads, but over time, the profile aggregated by
|
||||
|
||||
@@ -240,7 +240,7 @@ TEST_F(ContextTest, Dump) {
|
||||
TestProfileWriter(ContextRoot *Root, size_t Entries)
|
||||
: Root(Root), Entries(Entries) {}
|
||||
|
||||
void writeContextual(const ContextNode &Node,
|
||||
void writeContextual(const ContextNode &Node, const ContextNode *Unhandled,
|
||||
uint64_t TotalRootEntryCount) override {
|
||||
EXPECT_EQ(TotalRootEntryCount, Entries);
|
||||
EXPECT_EQ(EnteredSectionCount, 1);
|
||||
|
||||
@@ -5,7 +5,8 @@
|
||||
// RUN: cp %llvm_src/include/llvm/ProfileData/CtxInstrContextNode.h %t_include/
|
||||
//
|
||||
// Compile with ctx instrumentation "on". We treat "theRoot" as callgraph root.
|
||||
// RUN: %clangxx %s %ctxprofilelib -I%t_include -O2 -o %t.bin -mllvm -profile-context-root=theRoot
|
||||
// RUN: %clangxx %s %ctxprofilelib -I%t_include -O2 -o %t.bin -mllvm -profile-context-root=theRoot \
|
||||
// RUN: -mllvm -ctx-prof-skip-callsite-instr=skip_me
|
||||
//
|
||||
// Run the binary, and observe the profile fetch handler's output.
|
||||
// RUN: %t.bin | FileCheck %s
|
||||
@@ -20,11 +21,14 @@ extern "C" bool __llvm_ctx_profile_fetch(ProfileWriter &);
|
||||
|
||||
// avoid name mangling
|
||||
extern "C" {
|
||||
__attribute__((noinline)) void skip_me() {}
|
||||
|
||||
__attribute__((noinline)) void someFunction(int I) {
|
||||
if (I % 2)
|
||||
printf("check odd\n");
|
||||
else
|
||||
printf("check even\n");
|
||||
skip_me();
|
||||
}
|
||||
|
||||
// block inlining because the pre-inliner otherwise will inline this - it's
|
||||
@@ -36,6 +40,7 @@ __attribute__((noinline)) void theRoot() {
|
||||
for (auto I = 0; I < 2; ++I) {
|
||||
someFunction(I);
|
||||
}
|
||||
skip_me();
|
||||
}
|
||||
|
||||
__attribute__((noinline)) void flatFct() {
|
||||
@@ -85,9 +90,13 @@ class TestProfileWriter : public ProfileWriter {
|
||||
}
|
||||
|
||||
void writeContextual(const ContextNode &RootNode,
|
||||
const ContextNode *Unhandled,
|
||||
uint64_t EntryCount) override {
|
||||
std::cout << "Entering Root " << RootNode.guid()
|
||||
<< " with total entry count " << EntryCount << std::endl;
|
||||
for (const auto *P = Unhandled; P; P = P->next())
|
||||
std::cout << "Unhandled GUID: " << P->guid() << " entered "
|
||||
<< P->entrycount() << " times" << std::endl;
|
||||
printProfile(RootNode, "", "");
|
||||
}
|
||||
|
||||
@@ -119,6 +128,8 @@ class TestProfileWriter : public ProfileWriter {
|
||||
// branches would be taken once, so the second counter is 1.
|
||||
// CHECK-NEXT: Entered Context Section
|
||||
// CHECK-NEXT: Entering Root 8657661246551306189 with total entry count 1
|
||||
// skip_me is entered 4 times: 3 via `someFunction`, and once from `theRoot`
|
||||
// CHECK-NEXT: Unhandled GUID: 17928815489886282963 entered 4 times
|
||||
// CHECK-NEXT: Guid: 8657661246551306189
|
||||
// CHECK-NEXT: Entries: 1
|
||||
// CHECK-NEXT: 2 counters and 3 callsites
|
||||
@@ -135,6 +146,8 @@ class TestProfileWriter : public ProfileWriter {
|
||||
// CHECK-NEXT: Counter values: 2 1
|
||||
// CHECK-NEXT: Exited Context Section
|
||||
// CHECK-NEXT: Entered Flat Section
|
||||
// This is `skip_me`. Entered 3 times via `someFunction`
|
||||
// CHECK-NEXT: Flat: 17928815489886282963 3
|
||||
// CHECK-NEXT: Flat: 6759619411192316602 3,1
|
||||
// This is flatFct (guid: 14569438697463215220)
|
||||
// CHECK-NEXT: Flat: 14569438697463215220 1,2
|
||||
|
||||
@@ -121,6 +121,7 @@ class ProfileWriter {
|
||||
public:
|
||||
virtual void startContextSection() = 0;
|
||||
virtual void writeContextual(const ctx_profile::ContextNode &RootNode,
|
||||
const ctx_profile::ContextNode *Unhandled,
|
||||
uint64_t TotalRootEntryCount) = 0;
|
||||
virtual void endContextSection() = 0;
|
||||
|
||||
|
||||
@@ -74,6 +74,12 @@ class IndexNode {
|
||||
};
|
||||
} // namespace internal
|
||||
|
||||
// Setting initial capacity to 1 because all contexts must have at least 1
|
||||
// counter, and then, because all contexts belonging to a function have the same
|
||||
// size, there'll be at most one other heap allocation.
|
||||
using CtxProfFlatProfile =
|
||||
std::map<GlobalValue::GUID, SmallVector<uint64_t, 1>>;
|
||||
|
||||
/// A node (context) in the loaded contextual profile, suitable for mutation
|
||||
/// during IPO passes. We generally expect a fraction of counters and
|
||||
/// callsites to be populated. We continue to model counters as vectors, but
|
||||
@@ -93,11 +99,16 @@ private:
|
||||
GlobalValue::GUID GUID = 0;
|
||||
SmallVector<uint64_t, 16> Counters;
|
||||
const std::optional<uint64_t> RootEntryCount{};
|
||||
std::optional<CtxProfFlatProfile> Unhandled{};
|
||||
CallsiteMapTy Callsites;
|
||||
|
||||
PGOCtxProfContext(GlobalValue::GUID G, SmallVectorImpl<uint64_t> &&Counters,
|
||||
std::optional<uint64_t> RootEntryCount = std::nullopt)
|
||||
: GUID(G), Counters(std::move(Counters)), RootEntryCount(RootEntryCount) {
|
||||
PGOCtxProfContext(
|
||||
GlobalValue::GUID G, SmallVectorImpl<uint64_t> &&Counters,
|
||||
std::optional<uint64_t> RootEntryCount = std::nullopt,
|
||||
std::optional<CtxProfFlatProfile> &&Unhandled = std::nullopt)
|
||||
: GUID(G), Counters(std::move(Counters)), RootEntryCount(RootEntryCount),
|
||||
Unhandled(std::move(Unhandled)) {
|
||||
assert(RootEntryCount.has_value() == Unhandled.has_value());
|
||||
}
|
||||
|
||||
Expected<PGOCtxProfContext &>
|
||||
@@ -121,6 +132,8 @@ public:
|
||||
bool isRoot() const { return RootEntryCount.has_value(); }
|
||||
uint64_t getTotalRootEntryCount() const { return RootEntryCount.value(); }
|
||||
|
||||
const CtxProfFlatProfile &getUnhandled() const { return Unhandled.value(); }
|
||||
|
||||
uint64_t getEntrycount() const {
|
||||
assert(!Counters.empty() &&
|
||||
"Functions are expected to have at their entry BB instrumented, so "
|
||||
@@ -170,12 +183,6 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
// Setting initial capacity to 1 because all contexts must have at least 1
|
||||
// counter, and then, because all contexts belonging to a function have the same
|
||||
// size, there'll be at most one other heap allocation.
|
||||
using CtxProfFlatProfile =
|
||||
std::map<GlobalValue::GUID, SmallVector<uint64_t, 1>>;
|
||||
|
||||
using CtxProfContextualProfiles =
|
||||
std::map<GlobalValue::GUID, PGOCtxProfContext>;
|
||||
struct PGOCtxProfile {
|
||||
@@ -205,6 +212,7 @@ class PGOCtxProfileReader final {
|
||||
|
||||
Error loadContexts(CtxProfContextualProfiles &P);
|
||||
Error loadFlatProfiles(CtxProfFlatProfile &P);
|
||||
Error loadFlatProfileList(CtxProfFlatProfile &P);
|
||||
|
||||
public:
|
||||
PGOCtxProfileReader(StringRef Buffer)
|
||||
|
||||
@@ -36,7 +36,8 @@ enum PGOCtxProfileBlockIDs {
|
||||
ContextNodeBlockID = ContextRootBlockID + 1,
|
||||
FlatProfilesSectionBlockID = ContextNodeBlockID + 1,
|
||||
FlatProfileBlockID = FlatProfilesSectionBlockID + 1,
|
||||
LAST_VALID = FlatProfileBlockID
|
||||
UnhandledBlockID = FlatProfileBlockID + 1,
|
||||
LAST_VALID = UnhandledBlockID
|
||||
};
|
||||
|
||||
/// Write one or more ContextNodes to the provided raw_fd_stream.
|
||||
@@ -94,6 +95,7 @@ public:
|
||||
|
||||
void startContextSection() override;
|
||||
void writeContextual(const ctx_profile::ContextNode &RootNode,
|
||||
const ctx_profile::ContextNode *Unhandled,
|
||||
uint64_t TotalRootEntryCount) override;
|
||||
void endContextSection() override;
|
||||
|
||||
@@ -104,7 +106,7 @@ public:
|
||||
|
||||
// constants used in writing which a reader may find useful.
|
||||
static constexpr unsigned CodeLen = 2;
|
||||
static constexpr uint32_t CurrentVersion = 3;
|
||||
static constexpr uint32_t CurrentVersion = 4;
|
||||
static constexpr unsigned VBREncodingBits = 6;
|
||||
static constexpr StringRef ContainerMagic = "CTXP";
|
||||
};
|
||||
|
||||
@@ -97,7 +97,7 @@ PGOCtxProfileReader::readProfile(PGOCtxProfileBlockIDs Kind) {
|
||||
std::optional<SmallVector<uint64_t, 16>> Counters;
|
||||
std::optional<uint32_t> CallsiteIndex;
|
||||
std::optional<uint64_t> TotalEntryCount;
|
||||
|
||||
std::optional<CtxProfFlatProfile> Unhandled;
|
||||
SmallVector<uint64_t, 1> RecordValues;
|
||||
|
||||
const bool ExpectIndex = Kind == PGOCtxProfileBlockIDs::ContextNodeBlockID;
|
||||
@@ -108,14 +108,24 @@ PGOCtxProfileReader::readProfile(PGOCtxProfileBlockIDs Kind) {
|
||||
auto GotAllWeNeed = [&]() {
|
||||
return Guid.has_value() && Counters.has_value() &&
|
||||
(!ExpectIndex || CallsiteIndex.has_value()) &&
|
||||
(!IsRoot || TotalEntryCount.has_value());
|
||||
(!IsRoot || TotalEntryCount.has_value()) &&
|
||||
(!IsRoot || Unhandled.has_value());
|
||||
};
|
||||
|
||||
while (!GotAllWeNeed()) {
|
||||
RecordValues.clear();
|
||||
EXPECT_OR_RET(Entry, advance());
|
||||
if (Entry->Kind != BitstreamEntry::Record)
|
||||
if (Entry->Kind != BitstreamEntry::Record) {
|
||||
if (IsRoot && Entry->Kind == BitstreamEntry::SubBlock &&
|
||||
Entry->ID == PGOCtxProfileBlockIDs::UnhandledBlockID) {
|
||||
RET_ON_ERR(enterBlockWithID(PGOCtxProfileBlockIDs::UnhandledBlockID));
|
||||
Unhandled = CtxProfFlatProfile();
|
||||
RET_ON_ERR(loadFlatProfileList(*Unhandled));
|
||||
continue;
|
||||
}
|
||||
return wrongValue(
|
||||
"Expected records before encountering more subcontexts");
|
||||
}
|
||||
EXPECT_OR_RET(ReadRecord,
|
||||
Cursor.readRecord(bitc::UNABBREV_RECORD, RecordValues));
|
||||
switch (*ReadRecord) {
|
||||
@@ -152,7 +162,8 @@ PGOCtxProfileReader::readProfile(PGOCtxProfileBlockIDs Kind) {
|
||||
}
|
||||
}
|
||||
|
||||
PGOCtxProfContext Ret(*Guid, std::move(*Counters), TotalEntryCount);
|
||||
PGOCtxProfContext Ret(*Guid, std::move(*Counters), TotalEntryCount,
|
||||
std::move(Unhandled));
|
||||
|
||||
while (canEnterBlockWithID(PGOCtxProfileBlockIDs::ContextNodeBlockID)) {
|
||||
EXPECT_OR_RET(SC, readProfile(PGOCtxProfileBlockIDs::ContextNodeBlockID));
|
||||
@@ -212,9 +223,7 @@ Error PGOCtxProfileReader::loadContexts(CtxProfContextualProfiles &P) {
|
||||
return Error::success();
|
||||
}
|
||||
|
||||
Error PGOCtxProfileReader::loadFlatProfiles(CtxProfFlatProfile &P) {
|
||||
RET_ON_ERR(
|
||||
enterBlockWithID(PGOCtxProfileBlockIDs::FlatProfilesSectionBlockID));
|
||||
Error PGOCtxProfileReader::loadFlatProfileList(CtxProfFlatProfile &P) {
|
||||
while (canEnterBlockWithID(PGOCtxProfileBlockIDs::FlatProfileBlockID)) {
|
||||
EXPECT_OR_RET(E, readProfile(PGOCtxProfileBlockIDs::FlatProfileBlockID));
|
||||
auto Guid = E->second.guid();
|
||||
@@ -224,6 +233,12 @@ Error PGOCtxProfileReader::loadFlatProfiles(CtxProfFlatProfile &P) {
|
||||
return Error::success();
|
||||
}
|
||||
|
||||
Error PGOCtxProfileReader::loadFlatProfiles(CtxProfFlatProfile &P) {
|
||||
RET_ON_ERR(
|
||||
enterBlockWithID(PGOCtxProfileBlockIDs::FlatProfilesSectionBlockID));
|
||||
return loadFlatProfileList(P);
|
||||
}
|
||||
|
||||
Expected<PGOCtxProfile> PGOCtxProfileReader::loadProfiles() {
|
||||
RET_ON_ERR(readMetadata());
|
||||
PGOCtxProfile Ret;
|
||||
@@ -287,10 +302,13 @@ void toYaml(yaml::Output &Out,
|
||||
Out.endSequence();
|
||||
}
|
||||
|
||||
void toYaml(yaml::Output &Out, const CtxProfFlatProfile &Flat);
|
||||
|
||||
void toYaml(yaml::Output &Out, GlobalValue::GUID Guid,
|
||||
const SmallVectorImpl<uint64_t> &Counters,
|
||||
const PGOCtxProfContext::CallsiteMapTy &Callsites,
|
||||
std::optional<uint64_t> TotalRootEntryCount = std::nullopt) {
|
||||
std::optional<uint64_t> TotalRootEntryCount = std::nullopt,
|
||||
CtxProfFlatProfile Unhandled = {}) {
|
||||
yaml::EmptyContext Empty;
|
||||
Out.beginMapping();
|
||||
void *SaveInfo = nullptr;
|
||||
@@ -318,6 +336,14 @@ void toYaml(yaml::Output &Out, GlobalValue::GUID Guid,
|
||||
Out.endFlowSequence();
|
||||
Out.postflightKey(nullptr);
|
||||
}
|
||||
|
||||
if (!Unhandled.empty()) {
|
||||
assert(TotalRootEntryCount.has_value());
|
||||
Out.preflightKey("Unhandled", false, false, UseDefault, SaveInfo);
|
||||
toYaml(Out, Unhandled);
|
||||
Out.postflightKey(nullptr);
|
||||
}
|
||||
|
||||
if (!Callsites.empty()) {
|
||||
Out.preflightKey("Callsites", true, false, UseDefault, SaveInfo);
|
||||
toYaml(Out, Callsites);
|
||||
@@ -326,10 +352,22 @@ void toYaml(yaml::Output &Out, GlobalValue::GUID Guid,
|
||||
Out.endMapping();
|
||||
}
|
||||
|
||||
void toYaml(yaml::Output &Out, const CtxProfFlatProfile &Flat) {
|
||||
void *SaveInfo = nullptr;
|
||||
Out.beginSequence();
|
||||
size_t ElemID = 0;
|
||||
for (const auto &[Guid, Counters] : Flat) {
|
||||
Out.preflightElement(ElemID++, SaveInfo);
|
||||
toYaml(Out, Guid, Counters, {});
|
||||
Out.postflightElement(nullptr);
|
||||
}
|
||||
Out.endSequence();
|
||||
}
|
||||
|
||||
void toYaml(yaml::Output &Out, const PGOCtxProfContext &Ctx) {
|
||||
if (Ctx.isRoot())
|
||||
toYaml(Out, Ctx.guid(), Ctx.counters(), Ctx.callsites(),
|
||||
Ctx.getTotalRootEntryCount());
|
||||
Ctx.getTotalRootEntryCount(), Ctx.getUnhandled());
|
||||
else
|
||||
toYaml(Out, Ctx.guid(), Ctx.counters(), Ctx.callsites());
|
||||
}
|
||||
@@ -348,14 +386,7 @@ void llvm::convertCtxProfToYaml(raw_ostream &OS, const PGOCtxProfile &Profile) {
|
||||
}
|
||||
if (!Profile.FlatProfiles.empty()) {
|
||||
Out.preflightKey("FlatProfiles", false, false, UseDefault, SaveInfo);
|
||||
Out.beginSequence();
|
||||
size_t ElemID = 0;
|
||||
for (const auto &[Guid, Counters] : Profile.FlatProfiles) {
|
||||
Out.preflightElement(ElemID++, SaveInfo);
|
||||
toYaml(Out, Guid, Counters, {});
|
||||
Out.postflightElement(nullptr);
|
||||
}
|
||||
Out.endSequence();
|
||||
toYaml(Out, Profile.FlatProfiles);
|
||||
Out.postflightKey(nullptr);
|
||||
}
|
||||
Out.endMapping();
|
||||
|
||||
@@ -58,6 +58,7 @@ PGOCtxProfileWriter::PGOCtxProfileWriter(
|
||||
DescribeRecord(PGOCtxProfileRecords::TotalRootEntryCount,
|
||||
"TotalRootEntryCount");
|
||||
DescribeRecord(PGOCtxProfileRecords::Counters, "Counters");
|
||||
DescribeBlock(PGOCtxProfileBlockIDs::UnhandledBlockID, "Unhandled");
|
||||
DescribeBlock(PGOCtxProfileBlockIDs::ContextNodeBlockID, "Context");
|
||||
DescribeRecord(PGOCtxProfileRecords::Guid, "GUID");
|
||||
DescribeRecord(PGOCtxProfileRecords::CallsiteIndex, "CalleeIndex");
|
||||
@@ -135,6 +136,7 @@ void PGOCtxProfileWriter::endContextSection() { Writer.ExitBlock(); }
|
||||
void PGOCtxProfileWriter::endFlatSection() { Writer.ExitBlock(); }
|
||||
|
||||
void PGOCtxProfileWriter::writeContextual(const ContextNode &RootNode,
|
||||
const ContextNode *Unhandled,
|
||||
uint64_t TotalRootEntryCount) {
|
||||
if (!IncludeEmpty && (!TotalRootEntryCount || (RootNode.counters_size() > 0 &&
|
||||
RootNode.entrycount() == 0)))
|
||||
@@ -143,6 +145,12 @@ void PGOCtxProfileWriter::writeContextual(const ContextNode &RootNode,
|
||||
writeGuid(RootNode.guid());
|
||||
writeRootEntryCount(TotalRootEntryCount);
|
||||
writeCounters({RootNode.counters(), RootNode.counters_size()});
|
||||
|
||||
Writer.EnterSubblock(PGOCtxProfileBlockIDs::UnhandledBlockID, CodeLen);
|
||||
for (const auto *P = Unhandled; P; P = P->next())
|
||||
writeFlat(P->guid(), P->counters(), P->counters_size());
|
||||
Writer.ExitBlock();
|
||||
|
||||
writeSubcontexts(RootNode);
|
||||
Writer.ExitBlock();
|
||||
}
|
||||
@@ -159,6 +167,9 @@ namespace {
|
||||
|
||||
/// Representation of the context node suitable for yaml serialization /
|
||||
/// deserialization.
|
||||
using SerializableFlatProfileRepresentation =
|
||||
std::pair<ctx_profile::GUID, std::vector<uint64_t>>;
|
||||
|
||||
struct SerializableCtxRepresentation {
|
||||
ctx_profile::GUID Guid = 0;
|
||||
std::vector<uint64_t> Counters;
|
||||
@@ -167,11 +178,9 @@ struct SerializableCtxRepresentation {
|
||||
|
||||
struct SerializableRootRepresentation : public SerializableCtxRepresentation {
|
||||
uint64_t TotalRootEntryCount = 0;
|
||||
std::vector<SerializableFlatProfileRepresentation> Unhandled;
|
||||
};
|
||||
|
||||
using SerializableFlatProfileRepresentation =
|
||||
std::pair<ctx_profile::GUID, std::vector<uint64_t>>;
|
||||
|
||||
struct SerializableProfileRepresentation {
|
||||
std::vector<SerializableRootRepresentation> Contexts;
|
||||
std::vector<SerializableFlatProfileRepresentation> FlatProfiles;
|
||||
@@ -228,6 +237,7 @@ template <> struct yaml::MappingTraits<SerializableRootRepresentation> {
|
||||
static void mapping(yaml::IO &IO, SerializableRootRepresentation &R) {
|
||||
yaml::MappingTraits<SerializableCtxRepresentation>::mapping(IO, R);
|
||||
IO.mapRequired("TotalRootEntryCount", R.TotalRootEntryCount);
|
||||
IO.mapOptional("Unhandled", R.Unhandled);
|
||||
}
|
||||
};
|
||||
|
||||
@@ -265,7 +275,15 @@ Error llvm::createCtxProfFromYAML(StringRef Profile, raw_ostream &Out) {
|
||||
if (!TopList)
|
||||
return createStringError(
|
||||
"Unexpected error converting internal structure to ctx profile");
|
||||
Writer.writeContextual(*TopList, DC.TotalRootEntryCount);
|
||||
|
||||
ctx_profile::ContextNode *FirstUnhandled = nullptr;
|
||||
for (const auto &U : DC.Unhandled) {
|
||||
SerializableCtxRepresentation Unhandled;
|
||||
Unhandled.Guid = U.first;
|
||||
Unhandled.Counters = U.second;
|
||||
FirstUnhandled = createNode(Nodes, Unhandled, FirstUnhandled);
|
||||
}
|
||||
Writer.writeContextual(*TopList, FirstUnhandled, DC.TotalRootEntryCount);
|
||||
}
|
||||
Writer.endContextSection();
|
||||
}
|
||||
|
||||
@@ -55,6 +55,7 @@
|
||||
#include "llvm/ADT/SmallVector.h"
|
||||
#include "llvm/ADT/Statistic.h"
|
||||
#include "llvm/ADT/StringRef.h"
|
||||
#include "llvm/ADT/StringSet.h"
|
||||
#include "llvm/ADT/Twine.h"
|
||||
#include "llvm/ADT/iterator.h"
|
||||
#include "llvm/ADT/iterator_range.h"
|
||||
@@ -338,6 +339,11 @@ cl::opt<bool> PGOInstrumentColdFunctionOnly(
|
||||
"pgo-instrument-cold-function-only", cl::init(false), cl::Hidden,
|
||||
cl::desc("Enable cold function only instrumentation."));
|
||||
|
||||
cl::list<std::string> CtxPGOSkipCallsiteInstrument(
|
||||
"ctx-prof-skip-callsite-instr", cl::Hidden,
|
||||
cl::desc("Do not instrument callsites to functions in this list. Intended "
|
||||
"for testing."));
|
||||
|
||||
extern cl::opt<unsigned> MaxNumVTableAnnotations;
|
||||
|
||||
namespace llvm {
|
||||
@@ -957,6 +963,10 @@ void FunctionInstrumenter::instrument() {
|
||||
InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts();
|
||||
|
||||
if (IsCtxProf) {
|
||||
StringSet<> SkipCSInstr;
|
||||
SkipCSInstr.insert(CtxPGOSkipCallsiteInstrument.begin(),
|
||||
CtxPGOSkipCallsiteInstrument.end());
|
||||
|
||||
auto *CSIntrinsic =
|
||||
Intrinsic::getOrInsertDeclaration(&M, Intrinsic::instrprof_callsite);
|
||||
// We want to count the instrumentable callsites, then instrument them. This
|
||||
@@ -973,6 +983,9 @@ void FunctionInstrumenter::instrument() {
|
||||
if (auto *CS = dyn_cast<CallBase>(&Instr)) {
|
||||
if (!InstrProfCallsite::canInstrumentCallsite(*CS))
|
||||
continue;
|
||||
if (CS->getCalledFunction() &&
|
||||
SkipCSInstr.contains(CS->getCalledFunction()->getName()))
|
||||
continue;
|
||||
Visitor(CS);
|
||||
}
|
||||
};
|
||||
|
||||
@@ -0,0 +1,26 @@
|
||||
|
||||
Contexts:
|
||||
- Guid: 1000
|
||||
TotalRootEntryCount: 5
|
||||
Counters: [ 1, 2, 3 ]
|
||||
Callsites:
|
||||
- [ ]
|
||||
- - Guid: 2000
|
||||
Counters: [ 4, 5 ]
|
||||
- Guid: 18446744073709551613
|
||||
Counters: [ 6, 7, 8 ]
|
||||
- - Guid: 3000
|
||||
Counters: [ 40, 50 ]
|
||||
- Guid: 18446744073709551612
|
||||
TotalRootEntryCount: 45
|
||||
Counters: [ 5, 9, 10 ]
|
||||
Unhandled:
|
||||
- Guid: 555
|
||||
Counters: [ 1, 2, 3 ]
|
||||
- Guid: 2000
|
||||
Counters: [ 1, 2 ]
|
||||
FlatProfiles:
|
||||
- Guid: 1234
|
||||
Counters: [ 5, 6, 7 ]
|
||||
- Guid: 5555
|
||||
Counters: [ 1 ]
|
||||
@@ -23,6 +23,11 @@
|
||||
; RUN: llvm-ctxprof-util toYAML -input %t/valid-flat-first.bitstream -output %t/valid-flat-first.yaml
|
||||
; RUN: diff %t/valid-flat-first.yaml %S/Inputs/valid.yaml
|
||||
|
||||
; A variant with unhandled contexts:
|
||||
; RUN: llvm-ctxprof-util fromYAML -input %S/Inputs/valid-with-unhandled.yaml -output %t/valid-with-unhandled.bitstream
|
||||
; RUN: llvm-ctxprof-util toYAML -input %t/valid-with-unhandled.bitstream -output %t/valid-with-unhandled.yaml
|
||||
; RUN: diff %t/valid-with-unhandled.yaml %S/Inputs/valid-with-unhandled.yaml
|
||||
|
||||
; For the valid case, check against a reference output.
|
||||
; Note that uint64_t are printed as signed values by llvm-bcanalyzer:
|
||||
; * 18446744073709551613 in yaml is -3 in the output
|
||||
@@ -33,17 +38,19 @@
|
||||
|
||||
; EMPTY: <BLOCKINFO_BLOCK/>
|
||||
; EMPTY-NEXT: <Metadata NumWords=1 BlockCodeSize=2>
|
||||
; EMPTY-NEXT: <Version op0=3/>
|
||||
; EMPTY-NEXT: <Version op0=4/>
|
||||
; EMPTY-NEXT: </Metadata>
|
||||
|
||||
; VALID: <BLOCKINFO_BLOCK/>
|
||||
; VALID-NEXT: <Metadata NumWords=46 BlockCodeSize=2>
|
||||
; VALID-NEXT: <Version op0=3/>
|
||||
; VALID-NEXT: <Contexts NumWords=30 BlockCodeSize=2>
|
||||
; VALID-NEXT: <Root NumWords=20 BlockCodeSize=2>
|
||||
; VALID-NEXT: <Metadata NumWords=53 BlockCodeSize=2>
|
||||
; VALID-NEXT: <Version op0=4/>
|
||||
; VALID-NEXT: <Contexts NumWords=37 BlockCodeSize=2>
|
||||
; VALID-NEXT: <Root NumWords=23 BlockCodeSize=2>
|
||||
; VALID-NEXT: <GUID op0=1000/>
|
||||
; VALID-NEXT: <TotalRootEntryCount op0=5/>
|
||||
; VALID-NEXT: <Counters op0=1 op1=2 op2=3/>
|
||||
; VALID-NEXT: <Unhandled NumWords=1 BlockCodeSize=2>
|
||||
; VALID-NEXT: </Unhandled>
|
||||
; VALID-NEXT: <Context NumWords=5 BlockCodeSize=2>
|
||||
; VALID-NEXT: <GUID op0=-3/>
|
||||
; VALID-NEXT: <CalleeIndex op0=1/>
|
||||
@@ -60,10 +67,12 @@
|
||||
; VALID-NEXT: <Counters op0=40 op1=50/>
|
||||
; VALID-NEXT: </Context>
|
||||
; VALID-NEXT: </Root>
|
||||
; VALID-NEXT: <Root NumWords=5 BlockCodeSize=2>
|
||||
; VALID-NEXT: <Root NumWords=9 BlockCodeSize=2>
|
||||
; VALID-NEXT: <GUID op0=-4/>
|
||||
; VALID-NEXT: <TotalRootEntryCount op0=45/>
|
||||
; VALID-NEXT: <Counters op0=5 op1=9 op2=10/>
|
||||
; VALID-NEXT: <Unhandled NumWords=1 BlockCodeSize=2>
|
||||
; VALID-NEXT: </Unhandled>
|
||||
; VALID-NEXT: </Root>
|
||||
; VALID-NEXT: </Contexts>
|
||||
; VALID-NEXT: <FlatProfiles NumWords=10 BlockCodeSize=2>
|
||||
|
||||
@@ -103,7 +103,7 @@ TEST_F(PGOCtxProfRWTest, RoundTrip) {
|
||||
PGOCtxProfileWriter Writer(Out);
|
||||
Writer.startContextSection();
|
||||
for (auto &[_, R] : roots())
|
||||
Writer.writeContextual(*R, 1);
|
||||
Writer.writeContextual(*R, nullptr, 1);
|
||||
Writer.endContextSection();
|
||||
}
|
||||
}
|
||||
@@ -155,7 +155,7 @@ TEST_F(PGOCtxProfRWTest, InvalidCounters) {
|
||||
{
|
||||
PGOCtxProfileWriter Writer(Out);
|
||||
Writer.startContextSection();
|
||||
Writer.writeContextual(*R, 2);
|
||||
Writer.writeContextual(*R, nullptr, 2);
|
||||
Writer.endContextSection();
|
||||
}
|
||||
}
|
||||
@@ -181,7 +181,7 @@ TEST_F(PGOCtxProfRWTest, CountersAllZero) {
|
||||
{
|
||||
PGOCtxProfileWriter Writer(Out);
|
||||
Writer.startContextSection();
|
||||
Writer.writeContextual(*R, 42);
|
||||
Writer.writeContextual(*R, nullptr, 42);
|
||||
Writer.endContextSection();
|
||||
}
|
||||
}
|
||||
@@ -208,7 +208,7 @@ TEST_F(PGOCtxProfRWTest, CountersAllZeroWithOverride) {
|
||||
PGOCtxProfileWriter Writer(Out, /*VersionOverride=*/std::nullopt,
|
||||
/*IncludeEmpty=*/true);
|
||||
Writer.startContextSection();
|
||||
Writer.writeContextual(*R, 8);
|
||||
Writer.writeContextual(*R, nullptr, 8);
|
||||
Writer.endContextSection();
|
||||
}
|
||||
}
|
||||
@@ -293,8 +293,8 @@ TEST_F(PGOCtxProfRWTest, DuplicateRoots) {
|
||||
PGOCtxProfileWriter Writer(Out, /*VersionOverride=*/std::nullopt,
|
||||
/*IncludeEmpty=*/true);
|
||||
Writer.startContextSection();
|
||||
Writer.writeContextual(*createNode(1, 1, 1), 1);
|
||||
Writer.writeContextual(*createNode(1, 1, 1), 1);
|
||||
Writer.writeContextual(*createNode(1, 1, 1), nullptr, 1);
|
||||
Writer.writeContextual(*createNode(1, 1, 1), nullptr, 1);
|
||||
Writer.endContextSection();
|
||||
}
|
||||
}
|
||||
@@ -322,7 +322,7 @@ TEST_F(PGOCtxProfRWTest, DuplicateTargets) {
|
||||
R->subContexts()[0] = L2;
|
||||
PGOCtxProfileWriter Writer(Out);
|
||||
Writer.startContextSection();
|
||||
Writer.writeContextual(*R, 1);
|
||||
Writer.writeContextual(*R, nullptr, 1);
|
||||
Writer.endContextSection();
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user