Restore "[MemProf] ThinLTO summary support" with fixes
This restores4745945500, which was reverted in commit452a14efc8, along with fixes for a couple of bot failures.
This commit is contained in:
@@ -211,12 +211,10 @@ protected:
|
||||
void writePerModuleGlobalValueSummary();
|
||||
|
||||
private:
|
||||
void writePerModuleFunctionSummaryRecord(SmallVector<uint64_t, 64> &NameVals,
|
||||
GlobalValueSummary *Summary,
|
||||
unsigned ValueID,
|
||||
unsigned FSCallsAbbrev,
|
||||
unsigned FSCallsProfileAbbrev,
|
||||
const Function &F);
|
||||
void writePerModuleFunctionSummaryRecord(
|
||||
SmallVector<uint64_t, 64> &NameVals, GlobalValueSummary *Summary,
|
||||
unsigned ValueID, unsigned FSCallsAbbrev, unsigned FSCallsProfileAbbrev,
|
||||
unsigned CallsiteAbbrev, unsigned AllocAbbrev, const Function &F);
|
||||
void writeModuleLevelReferences(const GlobalVariable &V,
|
||||
SmallVector<uint64_t, 64> &NameVals,
|
||||
unsigned FSModRefsAbbrev,
|
||||
@@ -424,6 +422,11 @@ class IndexBitcodeWriter : public BitcodeWriterBase {
|
||||
/// index and a value id generated by this class to use in references.
|
||||
std::map<GlobalValue::GUID, unsigned> GUIDToValueIdMap;
|
||||
|
||||
// The sorted stack id indices actually used in the summary entries being
|
||||
// written, which will be a subset of those in the full index in the case of
|
||||
// distributed indexes.
|
||||
std::vector<unsigned> StackIdIndices;
|
||||
|
||||
/// Tracks the last value id recorded in the GUIDToValueMap.
|
||||
unsigned GlobalValueId = 0;
|
||||
|
||||
@@ -441,9 +444,28 @@ public:
|
||||
// in writing out the call graph edges. Save the mapping from GUID
|
||||
// to the new global value id to use when writing those edges, which
|
||||
// are currently saved in the index in terms of GUID.
|
||||
forEachSummary([&](GVInfo I, bool) {
|
||||
forEachSummary([&](GVInfo I, bool IsAliasee) {
|
||||
GUIDToValueIdMap[I.first] = ++GlobalValueId;
|
||||
if (IsAliasee)
|
||||
return;
|
||||
auto *FS = dyn_cast<FunctionSummary>(I.second);
|
||||
if (!FS)
|
||||
return;
|
||||
// Record all stack id indices actually used in the summary entries being
|
||||
// written, so that we can compact them in the case of distributed ThinLTO
|
||||
// indexes.
|
||||
for (auto &CI : FS->callsites())
|
||||
for (auto Idx : CI.StackIdIndices)
|
||||
StackIdIndices.push_back(Idx);
|
||||
for (auto &AI : FS->allocs())
|
||||
for (auto &MIB : AI.MIBs)
|
||||
for (auto Idx : MIB.StackIdIndices)
|
||||
StackIdIndices.push_back(Idx);
|
||||
});
|
||||
llvm::sort(StackIdIndices);
|
||||
StackIdIndices.erase(
|
||||
std::unique(StackIdIndices.begin(), StackIdIndices.end()),
|
||||
StackIdIndices.end());
|
||||
}
|
||||
|
||||
/// The below iterator returns the GUID and associated summary.
|
||||
@@ -3888,11 +3910,64 @@ static void writeTypeIdCompatibleVtableSummaryRecord(
|
||||
}
|
||||
}
|
||||
|
||||
static void writeFunctionHeapProfileRecords(
|
||||
BitstreamWriter &Stream, FunctionSummary *FS, unsigned CallsiteAbbrev,
|
||||
unsigned AllocAbbrev, bool PerModule,
|
||||
std::function<unsigned(const ValueInfo &VI)> GetValueID,
|
||||
std::function<unsigned(unsigned)> GetStackIndex) {
|
||||
SmallVector<uint64_t> Record;
|
||||
|
||||
for (auto &CI : FS->callsites()) {
|
||||
Record.clear();
|
||||
// Per module callsite clones should always have a single entry of
|
||||
// value 0.
|
||||
assert(!PerModule || (CI.Clones.size() == 1 && CI.Clones[0] == 0));
|
||||
Record.push_back(GetValueID(CI.Callee));
|
||||
if (!PerModule) {
|
||||
Record.push_back(CI.StackIdIndices.size());
|
||||
Record.push_back(CI.Clones.size());
|
||||
}
|
||||
for (auto Id : CI.StackIdIndices)
|
||||
Record.push_back(GetStackIndex(Id));
|
||||
if (!PerModule) {
|
||||
for (auto V : CI.Clones)
|
||||
Record.push_back(V);
|
||||
}
|
||||
Stream.EmitRecord(PerModule ? bitc::FS_PERMODULE_CALLSITE_INFO
|
||||
: bitc::FS_COMBINED_CALLSITE_INFO,
|
||||
Record, CallsiteAbbrev);
|
||||
}
|
||||
|
||||
for (auto &AI : FS->allocs()) {
|
||||
Record.clear();
|
||||
// Per module alloc versions should always have a single entry of
|
||||
// value 0.
|
||||
assert(!PerModule || (AI.Versions.size() == 1 && AI.Versions[0] == 0));
|
||||
if (!PerModule) {
|
||||
Record.push_back(AI.MIBs.size());
|
||||
Record.push_back(AI.Versions.size());
|
||||
}
|
||||
for (auto &MIB : AI.MIBs) {
|
||||
Record.push_back((uint8_t)MIB.AllocType);
|
||||
Record.push_back(MIB.StackIdIndices.size());
|
||||
for (auto Id : MIB.StackIdIndices)
|
||||
Record.push_back(GetStackIndex(Id));
|
||||
}
|
||||
if (!PerModule) {
|
||||
for (auto V : AI.Versions)
|
||||
Record.push_back(V);
|
||||
}
|
||||
Stream.EmitRecord(PerModule ? bitc::FS_PERMODULE_ALLOC_INFO
|
||||
: bitc::FS_COMBINED_ALLOC_INFO,
|
||||
Record, AllocAbbrev);
|
||||
}
|
||||
}
|
||||
|
||||
// Helper to emit a single function summary record.
|
||||
void ModuleBitcodeWriterBase::writePerModuleFunctionSummaryRecord(
|
||||
SmallVector<uint64_t, 64> &NameVals, GlobalValueSummary *Summary,
|
||||
unsigned ValueID, unsigned FSCallsAbbrev, unsigned FSCallsProfileAbbrev,
|
||||
const Function &F) {
|
||||
unsigned CallsiteAbbrev, unsigned AllocAbbrev, const Function &F) {
|
||||
NameVals.push_back(ValueID);
|
||||
|
||||
FunctionSummary *FS = cast<FunctionSummary>(Summary);
|
||||
@@ -3902,6 +3977,12 @@ void ModuleBitcodeWriterBase::writePerModuleFunctionSummaryRecord(
|
||||
return {VE.getValueID(VI.getValue())};
|
||||
});
|
||||
|
||||
writeFunctionHeapProfileRecords(
|
||||
Stream, FS, CallsiteAbbrev, AllocAbbrev,
|
||||
/*PerModule*/ true,
|
||||
/*GetValueId*/ [&](const ValueInfo &VI) { return getValueId(VI); },
|
||||
/*GetStackIndex*/ [&](unsigned I) { return I; });
|
||||
|
||||
auto SpecialRefCnts = FS->specialRefCounts();
|
||||
NameVals.push_back(getEncodedGVSummaryFlags(FS->flags()));
|
||||
NameVals.push_back(FS->instCount());
|
||||
@@ -4013,6 +4094,16 @@ void ModuleBitcodeWriterBase::writePerModuleGlobalValueSummary() {
|
||||
ArrayRef<uint64_t>{GVI.second, GVI.first});
|
||||
}
|
||||
|
||||
if (!Index->stackIds().empty()) {
|
||||
auto StackIdAbbv = std::make_shared<BitCodeAbbrev>();
|
||||
StackIdAbbv->Add(BitCodeAbbrevOp(bitc::FS_STACK_IDS));
|
||||
// numids x stackid
|
||||
StackIdAbbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
|
||||
StackIdAbbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
|
||||
unsigned StackIdAbbvId = Stream.EmitAbbrev(std::move(StackIdAbbv));
|
||||
Stream.EmitRecord(bitc::FS_STACK_IDS, Index->stackIds(), StackIdAbbvId);
|
||||
}
|
||||
|
||||
// Abbrev for FS_PERMODULE_PROFILE.
|
||||
auto Abbv = std::make_shared<BitCodeAbbrev>();
|
||||
Abbv->Add(BitCodeAbbrevOp(bitc::FS_PERMODULE_PROFILE));
|
||||
@@ -4084,6 +4175,21 @@ void ModuleBitcodeWriterBase::writePerModuleGlobalValueSummary() {
|
||||
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
|
||||
unsigned TypeIdCompatibleVtableAbbrev = Stream.EmitAbbrev(std::move(Abbv));
|
||||
|
||||
Abbv = std::make_shared<BitCodeAbbrev>();
|
||||
Abbv->Add(BitCodeAbbrevOp(bitc::FS_PERMODULE_CALLSITE_INFO));
|
||||
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // valueid
|
||||
// n x stackidindex
|
||||
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
|
||||
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
|
||||
unsigned CallsiteAbbrev = Stream.EmitAbbrev(std::move(Abbv));
|
||||
|
||||
Abbv = std::make_shared<BitCodeAbbrev>();
|
||||
Abbv->Add(BitCodeAbbrevOp(bitc::FS_PERMODULE_ALLOC_INFO));
|
||||
// n x (alloc type, numstackids, numstackids x stackidindex)
|
||||
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
|
||||
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
|
||||
unsigned AllocAbbrev = Stream.EmitAbbrev(std::move(Abbv));
|
||||
|
||||
SmallVector<uint64_t, 64> NameVals;
|
||||
// Iterate over the list of functions instead of the Index to
|
||||
// ensure the ordering is stable.
|
||||
@@ -4102,7 +4208,8 @@ void ModuleBitcodeWriterBase::writePerModuleGlobalValueSummary() {
|
||||
}
|
||||
auto *Summary = VI.getSummaryList()[0].get();
|
||||
writePerModuleFunctionSummaryRecord(NameVals, Summary, VE.getValueID(&F),
|
||||
FSCallsAbbrev, FSCallsProfileAbbrev, F);
|
||||
FSCallsAbbrev, FSCallsProfileAbbrev,
|
||||
CallsiteAbbrev, AllocAbbrev, F);
|
||||
}
|
||||
|
||||
// Capture references from GlobalVariable initializers, which are outside
|
||||
@@ -4144,7 +4251,7 @@ void ModuleBitcodeWriterBase::writePerModuleGlobalValueSummary() {
|
||||
|
||||
/// Emit the combined summary section into the combined index file.
|
||||
void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
|
||||
Stream.EnterSubblock(bitc::GLOBALVAL_SUMMARY_BLOCK_ID, 3);
|
||||
Stream.EnterSubblock(bitc::GLOBALVAL_SUMMARY_BLOCK_ID, 4);
|
||||
Stream.EmitRecord(
|
||||
bitc::FS_VERSION,
|
||||
ArrayRef<uint64_t>{ModuleSummaryIndex::BitcodeSummaryVersion});
|
||||
@@ -4157,6 +4264,21 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
|
||||
ArrayRef<uint64_t>{GVI.second, GVI.first});
|
||||
}
|
||||
|
||||
if (!StackIdIndices.empty()) {
|
||||
auto StackIdAbbv = std::make_shared<BitCodeAbbrev>();
|
||||
StackIdAbbv->Add(BitCodeAbbrevOp(bitc::FS_STACK_IDS));
|
||||
// numids x stackid
|
||||
StackIdAbbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
|
||||
StackIdAbbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
|
||||
unsigned StackIdAbbvId = Stream.EmitAbbrev(std::move(StackIdAbbv));
|
||||
// Write the stack ids used by this index, which will be a subset of those in
|
||||
// the full index in the case of distributed indexes.
|
||||
std::vector<uint64_t> StackIds;
|
||||
for (auto &I : StackIdIndices)
|
||||
StackIds.push_back(Index.getStackIdAtIndex(I));
|
||||
Stream.EmitRecord(bitc::FS_STACK_IDS, StackIds, StackIdAbbvId);
|
||||
}
|
||||
|
||||
// Abbrev for FS_COMBINED.
|
||||
auto Abbv = std::make_shared<BitCodeAbbrev>();
|
||||
Abbv->Add(BitCodeAbbrevOp(bitc::FS_COMBINED));
|
||||
@@ -4210,6 +4332,26 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
|
||||
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // valueid
|
||||
unsigned FSAliasAbbrev = Stream.EmitAbbrev(std::move(Abbv));
|
||||
|
||||
Abbv = std::make_shared<BitCodeAbbrev>();
|
||||
Abbv->Add(BitCodeAbbrevOp(bitc::FS_COMBINED_CALLSITE_INFO));
|
||||
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // valueid
|
||||
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // numstackindices
|
||||
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // numver
|
||||
// numstackindices x stackidindex, numver x version
|
||||
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
|
||||
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
|
||||
unsigned CallsiteAbbrev = Stream.EmitAbbrev(std::move(Abbv));
|
||||
|
||||
Abbv = std::make_shared<BitCodeAbbrev>();
|
||||
Abbv->Add(BitCodeAbbrevOp(bitc::FS_COMBINED_ALLOC_INFO));
|
||||
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // nummib
|
||||
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // numver
|
||||
// nummib x (alloc type, numstackids, numstackids x stackidindex),
|
||||
// numver x version
|
||||
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
|
||||
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
|
||||
unsigned AllocAbbrev = Stream.EmitAbbrev(std::move(Abbv));
|
||||
|
||||
// The aliases are emitted as a post-pass, and will point to the value
|
||||
// id of the aliasee. Save them in a vector for post-processing.
|
||||
SmallVector<AliasSummary *, 64> Aliases;
|
||||
@@ -4286,6 +4428,8 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
|
||||
}
|
||||
|
||||
auto GetValueId = [&](const ValueInfo &VI) -> Optional<unsigned> {
|
||||
if (!VI)
|
||||
return None;
|
||||
return getValueId(VI.getGUID());
|
||||
};
|
||||
|
||||
@@ -4293,6 +4437,27 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
|
||||
writeFunctionTypeMetadataRecords(Stream, FS, GetValueId);
|
||||
getReferencedTypeIds(FS, ReferencedTypeIds);
|
||||
|
||||
writeFunctionHeapProfileRecords(
|
||||
Stream, FS, CallsiteAbbrev, AllocAbbrev,
|
||||
/*PerModule*/ false,
|
||||
/*GetValueId*/ [&](const ValueInfo &VI) -> unsigned {
|
||||
Optional<unsigned> ValueID = GetValueId(VI);
|
||||
// This can happen in shared index files for distributed ThinLTO if
|
||||
// the callee function summary is not included. Record 0 which we
|
||||
// will have to deal with conservatively when doing any kind of
|
||||
// validation in the ThinLTO backends.
|
||||
if (!ValueID)
|
||||
return 0;
|
||||
return *ValueID;
|
||||
},
|
||||
/*GetStackIndex*/ [&](unsigned I) {
|
||||
// Get the corresponding index into the list of StackIdIndices
|
||||
// actually being written for this combined index (which may be a
|
||||
// subset in the case of distributed indexes).
|
||||
auto Lower = llvm::lower_bound(StackIdIndices, I);
|
||||
return std::distance(StackIdIndices.begin(), Lower);
|
||||
});
|
||||
|
||||
NameVals.push_back(*ValueId);
|
||||
NameVals.push_back(Index.getModuleId(FS->modulePath()));
|
||||
NameVals.push_back(getEncodedGVSummaryFlags(FS->flags()));
|
||||
|
||||
Reference in New Issue
Block a user