Restore "[MemProf] ThinLTO summary support" with fixes

This restores 4745945500, which was
reverted in commit 452a14efc8, along with
fixes for a couple of bot failures.
This commit is contained in:
Teresa Johnson
2022-11-15 07:59:37 -08:00
parent 674729813e
commit 98ed423361
21 changed files with 1304 additions and 48 deletions

View File

@@ -211,12 +211,10 @@ protected:
void writePerModuleGlobalValueSummary();
private:
void writePerModuleFunctionSummaryRecord(SmallVector<uint64_t, 64> &NameVals,
GlobalValueSummary *Summary,
unsigned ValueID,
unsigned FSCallsAbbrev,
unsigned FSCallsProfileAbbrev,
const Function &F);
void writePerModuleFunctionSummaryRecord(
SmallVector<uint64_t, 64> &NameVals, GlobalValueSummary *Summary,
unsigned ValueID, unsigned FSCallsAbbrev, unsigned FSCallsProfileAbbrev,
unsigned CallsiteAbbrev, unsigned AllocAbbrev, const Function &F);
void writeModuleLevelReferences(const GlobalVariable &V,
SmallVector<uint64_t, 64> &NameVals,
unsigned FSModRefsAbbrev,
@@ -424,6 +422,11 @@ class IndexBitcodeWriter : public BitcodeWriterBase {
/// index and a value id generated by this class to use in references.
std::map<GlobalValue::GUID, unsigned> GUIDToValueIdMap;
// The sorted stack id indices actually used in the summary entries being
// written, which will be a subset of those in the full index in the case of
// distributed indexes.
std::vector<unsigned> StackIdIndices;
/// Tracks the last value id recorded in the GUIDToValueMap.
unsigned GlobalValueId = 0;
@@ -441,9 +444,28 @@ public:
// in writing out the call graph edges. Save the mapping from GUID
// to the new global value id to use when writing those edges, which
// are currently saved in the index in terms of GUID.
forEachSummary([&](GVInfo I, bool) {
forEachSummary([&](GVInfo I, bool IsAliasee) {
GUIDToValueIdMap[I.first] = ++GlobalValueId;
if (IsAliasee)
return;
auto *FS = dyn_cast<FunctionSummary>(I.second);
if (!FS)
return;
// Record all stack id indices actually used in the summary entries being
// written, so that we can compact them in the case of distributed ThinLTO
// indexes.
for (auto &CI : FS->callsites())
for (auto Idx : CI.StackIdIndices)
StackIdIndices.push_back(Idx);
for (auto &AI : FS->allocs())
for (auto &MIB : AI.MIBs)
for (auto Idx : MIB.StackIdIndices)
StackIdIndices.push_back(Idx);
});
llvm::sort(StackIdIndices);
StackIdIndices.erase(
std::unique(StackIdIndices.begin(), StackIdIndices.end()),
StackIdIndices.end());
}
/// The below iterator returns the GUID and associated summary.
@@ -3888,11 +3910,64 @@ static void writeTypeIdCompatibleVtableSummaryRecord(
}
}
static void writeFunctionHeapProfileRecords(
BitstreamWriter &Stream, FunctionSummary *FS, unsigned CallsiteAbbrev,
unsigned AllocAbbrev, bool PerModule,
std::function<unsigned(const ValueInfo &VI)> GetValueID,
std::function<unsigned(unsigned)> GetStackIndex) {
SmallVector<uint64_t> Record;
for (auto &CI : FS->callsites()) {
Record.clear();
// Per module callsite clones should always have a single entry of
// value 0.
assert(!PerModule || (CI.Clones.size() == 1 && CI.Clones[0] == 0));
Record.push_back(GetValueID(CI.Callee));
if (!PerModule) {
Record.push_back(CI.StackIdIndices.size());
Record.push_back(CI.Clones.size());
}
for (auto Id : CI.StackIdIndices)
Record.push_back(GetStackIndex(Id));
if (!PerModule) {
for (auto V : CI.Clones)
Record.push_back(V);
}
Stream.EmitRecord(PerModule ? bitc::FS_PERMODULE_CALLSITE_INFO
: bitc::FS_COMBINED_CALLSITE_INFO,
Record, CallsiteAbbrev);
}
for (auto &AI : FS->allocs()) {
Record.clear();
// Per module alloc versions should always have a single entry of
// value 0.
assert(!PerModule || (AI.Versions.size() == 1 && AI.Versions[0] == 0));
if (!PerModule) {
Record.push_back(AI.MIBs.size());
Record.push_back(AI.Versions.size());
}
for (auto &MIB : AI.MIBs) {
Record.push_back((uint8_t)MIB.AllocType);
Record.push_back(MIB.StackIdIndices.size());
for (auto Id : MIB.StackIdIndices)
Record.push_back(GetStackIndex(Id));
}
if (!PerModule) {
for (auto V : AI.Versions)
Record.push_back(V);
}
Stream.EmitRecord(PerModule ? bitc::FS_PERMODULE_ALLOC_INFO
: bitc::FS_COMBINED_ALLOC_INFO,
Record, AllocAbbrev);
}
}
// Helper to emit a single function summary record.
void ModuleBitcodeWriterBase::writePerModuleFunctionSummaryRecord(
SmallVector<uint64_t, 64> &NameVals, GlobalValueSummary *Summary,
unsigned ValueID, unsigned FSCallsAbbrev, unsigned FSCallsProfileAbbrev,
const Function &F) {
unsigned CallsiteAbbrev, unsigned AllocAbbrev, const Function &F) {
NameVals.push_back(ValueID);
FunctionSummary *FS = cast<FunctionSummary>(Summary);
@@ -3902,6 +3977,12 @@ void ModuleBitcodeWriterBase::writePerModuleFunctionSummaryRecord(
return {VE.getValueID(VI.getValue())};
});
writeFunctionHeapProfileRecords(
Stream, FS, CallsiteAbbrev, AllocAbbrev,
/*PerModule*/ true,
/*GetValueId*/ [&](const ValueInfo &VI) { return getValueId(VI); },
/*GetStackIndex*/ [&](unsigned I) { return I; });
auto SpecialRefCnts = FS->specialRefCounts();
NameVals.push_back(getEncodedGVSummaryFlags(FS->flags()));
NameVals.push_back(FS->instCount());
@@ -4013,6 +4094,16 @@ void ModuleBitcodeWriterBase::writePerModuleGlobalValueSummary() {
ArrayRef<uint64_t>{GVI.second, GVI.first});
}
if (!Index->stackIds().empty()) {
auto StackIdAbbv = std::make_shared<BitCodeAbbrev>();
StackIdAbbv->Add(BitCodeAbbrevOp(bitc::FS_STACK_IDS));
// numids x stackid
StackIdAbbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
StackIdAbbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
unsigned StackIdAbbvId = Stream.EmitAbbrev(std::move(StackIdAbbv));
Stream.EmitRecord(bitc::FS_STACK_IDS, Index->stackIds(), StackIdAbbvId);
}
// Abbrev for FS_PERMODULE_PROFILE.
auto Abbv = std::make_shared<BitCodeAbbrev>();
Abbv->Add(BitCodeAbbrevOp(bitc::FS_PERMODULE_PROFILE));
@@ -4084,6 +4175,21 @@ void ModuleBitcodeWriterBase::writePerModuleGlobalValueSummary() {
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
unsigned TypeIdCompatibleVtableAbbrev = Stream.EmitAbbrev(std::move(Abbv));
Abbv = std::make_shared<BitCodeAbbrev>();
Abbv->Add(BitCodeAbbrevOp(bitc::FS_PERMODULE_CALLSITE_INFO));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // valueid
// n x stackidindex
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
unsigned CallsiteAbbrev = Stream.EmitAbbrev(std::move(Abbv));
Abbv = std::make_shared<BitCodeAbbrev>();
Abbv->Add(BitCodeAbbrevOp(bitc::FS_PERMODULE_ALLOC_INFO));
// n x (alloc type, numstackids, numstackids x stackidindex)
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
unsigned AllocAbbrev = Stream.EmitAbbrev(std::move(Abbv));
SmallVector<uint64_t, 64> NameVals;
// Iterate over the list of functions instead of the Index to
// ensure the ordering is stable.
@@ -4102,7 +4208,8 @@ void ModuleBitcodeWriterBase::writePerModuleGlobalValueSummary() {
}
auto *Summary = VI.getSummaryList()[0].get();
writePerModuleFunctionSummaryRecord(NameVals, Summary, VE.getValueID(&F),
FSCallsAbbrev, FSCallsProfileAbbrev, F);
FSCallsAbbrev, FSCallsProfileAbbrev,
CallsiteAbbrev, AllocAbbrev, F);
}
// Capture references from GlobalVariable initializers, which are outside
@@ -4144,7 +4251,7 @@ void ModuleBitcodeWriterBase::writePerModuleGlobalValueSummary() {
/// Emit the combined summary section into the combined index file.
void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
Stream.EnterSubblock(bitc::GLOBALVAL_SUMMARY_BLOCK_ID, 3);
Stream.EnterSubblock(bitc::GLOBALVAL_SUMMARY_BLOCK_ID, 4);
Stream.EmitRecord(
bitc::FS_VERSION,
ArrayRef<uint64_t>{ModuleSummaryIndex::BitcodeSummaryVersion});
@@ -4157,6 +4264,21 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
ArrayRef<uint64_t>{GVI.second, GVI.first});
}
if (!StackIdIndices.empty()) {
auto StackIdAbbv = std::make_shared<BitCodeAbbrev>();
StackIdAbbv->Add(BitCodeAbbrevOp(bitc::FS_STACK_IDS));
// numids x stackid
StackIdAbbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
StackIdAbbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
unsigned StackIdAbbvId = Stream.EmitAbbrev(std::move(StackIdAbbv));
// Write the stack ids used by this index, which will be a subset of those in
// the full index in the case of distributed indexes.
std::vector<uint64_t> StackIds;
for (auto &I : StackIdIndices)
StackIds.push_back(Index.getStackIdAtIndex(I));
Stream.EmitRecord(bitc::FS_STACK_IDS, StackIds, StackIdAbbvId);
}
// Abbrev for FS_COMBINED.
auto Abbv = std::make_shared<BitCodeAbbrev>();
Abbv->Add(BitCodeAbbrevOp(bitc::FS_COMBINED));
@@ -4210,6 +4332,26 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // valueid
unsigned FSAliasAbbrev = Stream.EmitAbbrev(std::move(Abbv));
Abbv = std::make_shared<BitCodeAbbrev>();
Abbv->Add(BitCodeAbbrevOp(bitc::FS_COMBINED_CALLSITE_INFO));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // valueid
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // numstackindices
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // numver
// numstackindices x stackidindex, numver x version
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
unsigned CallsiteAbbrev = Stream.EmitAbbrev(std::move(Abbv));
Abbv = std::make_shared<BitCodeAbbrev>();
Abbv->Add(BitCodeAbbrevOp(bitc::FS_COMBINED_ALLOC_INFO));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // nummib
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // numver
// nummib x (alloc type, numstackids, numstackids x stackidindex),
// numver x version
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
unsigned AllocAbbrev = Stream.EmitAbbrev(std::move(Abbv));
// The aliases are emitted as a post-pass, and will point to the value
// id of the aliasee. Save them in a vector for post-processing.
SmallVector<AliasSummary *, 64> Aliases;
@@ -4286,6 +4428,8 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
}
auto GetValueId = [&](const ValueInfo &VI) -> Optional<unsigned> {
if (!VI)
return None;
return getValueId(VI.getGUID());
};
@@ -4293,6 +4437,27 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
writeFunctionTypeMetadataRecords(Stream, FS, GetValueId);
getReferencedTypeIds(FS, ReferencedTypeIds);
writeFunctionHeapProfileRecords(
Stream, FS, CallsiteAbbrev, AllocAbbrev,
/*PerModule*/ false,
/*GetValueId*/ [&](const ValueInfo &VI) -> unsigned {
Optional<unsigned> ValueID = GetValueId(VI);
// This can happen in shared index files for distributed ThinLTO if
// the callee function summary is not included. Record 0 which we
// will have to deal with conservatively when doing any kind of
// validation in the ThinLTO backends.
if (!ValueID)
return 0;
return *ValueID;
},
/*GetStackIndex*/ [&](unsigned I) {
// Get the corresponding index into the list of StackIdIndices
// actually being written for this combined index (which may be a
// subset in the case of distributed indexes).
auto Lower = llvm::lower_bound(StackIdIndices, I);
return std::distance(StackIdIndices.begin(), Lower);
});
NameVals.push_back(*ValueId);
NameVals.push_back(Index.getModuleId(FS->modulePath()));
NameVals.push_back(getEncodedGVSummaryFlags(FS->flags()));