Reduce llvm-gsymutil memory usage (#140740)

Same as https://github.com/llvm/llvm-project/pull/139907/ except there
is now a special dovoidwork helper function.
Previous approach with assert(f();return success;) failed tests for
release builds, so I created a separate helper. Open to suggestions how
to solve this more elegantly.

Co-authored-by: Arslan Khabutdinov <akhabutdinov@fb.com>
This commit is contained in:
peremyach
2025-05-21 17:49:12 +01:00
committed by GitHub
parent 584616c878
commit d997b4f531
5 changed files with 139 additions and 107 deletions

View File

@@ -102,6 +102,8 @@ public:
/// Parse a macro[.dwo] or macinfo[.dwo] section.
std::unique_ptr<DWARFDebugMacro>
parseMacroOrMacinfo(MacroSecType SectionType);
virtual Error doWorkThreadSafely(function_ref<Error()> Work) = 0;
};
friend class DWARFContextState;
@@ -490,6 +492,10 @@ public:
/// manually only for DWARF5.
void setParseCUTUIndexManually(bool PCUTU) { ParseCUTUIndexManually = PCUTU; }
Error doWorkThreadSafely(function_ref<Error()> Work) {
return State->doWorkThreadSafely(Work);
}
private:
void addLocalsForDie(DWARFCompileUnit *CU, DWARFDie Subprogram, DWARFDie Die,
std::vector<DILocal> &Result);

View File

@@ -566,6 +566,9 @@ public:
Error tryExtractDIEsIfNeeded(bool CUDieOnly);
/// clearDIEs - Clear parsed DIEs to keep memory usage low.
void clearDIEs(bool KeepCUDie, bool KeepDWODies = false);
private:
/// Size in bytes of the .debug_info data associated with this compile unit.
size_t getDebugInfoSize() const {
@@ -581,9 +584,6 @@ private:
void extractDIEsToVector(bool AppendCUDie, bool AppendNonCUDIEs,
std::vector<DWARFDebugInfoEntry> &DIEs) const;
/// clearDIEs - Clear parsed DIEs to keep memory usage low.
void clearDIEs(bool KeepCUDie);
/// parseDWO - Parses .dwo file for current compile unit. Returns true if
/// it was actually constructed.
/// The \p AlternativeLocation specifies an alternative location to get

View File

@@ -621,6 +621,10 @@ public:
else
return getNormalTypeUnitMap();
}
Error doWorkThreadSafely(function_ref<Error()> Work) override {
return Work();
}
};
class ThreadSafeState : public ThreadUnsafeDWARFContextState {
@@ -736,6 +740,11 @@ public:
std::unique_lock<std::recursive_mutex> LockGuard(Mutex);
return ThreadUnsafeDWARFContextState::getTypeUnitMap(IsDWO);
}
Error doWorkThreadSafely(function_ref<Error()> Work) override {
std::unique_lock<std::recursive_mutex> LockGuard(Mutex);
return ThreadUnsafeDWARFContextState::doWorkThreadSafely(Work);
}
};
} // namespace

View File

@@ -496,107 +496,111 @@ void DWARFUnit::extractDIEsIfNeeded(bool CUDieOnly) {
}
Error DWARFUnit::tryExtractDIEsIfNeeded(bool CUDieOnly) {
if ((CUDieOnly && !DieArray.empty()) || DieArray.size() > 1)
return Error::success(); // Already parsed.
return Context.doWorkThreadSafely([&]() -> Error {
if ((CUDieOnly && !DieArray.empty()) || DieArray.size() > 1)
return Error::success(); // Already parsed.
bool HasCUDie = !DieArray.empty();
extractDIEsToVector(!HasCUDie, !CUDieOnly, DieArray);
bool HasCUDie = !DieArray.empty();
extractDIEsToVector(!HasCUDie, !CUDieOnly, DieArray);
if (DieArray.empty())
return Error::success();
if (DieArray.empty())
return Error::success();
// If CU DIE was just parsed, copy several attribute values from it.
if (HasCUDie)
return Error::success();
// If CU DIE was just parsed, copy several attribute values from it.
if (HasCUDie)
return Error::success();
DWARFDie UnitDie(this, &DieArray[0]);
if (std::optional<uint64_t> DWOId =
toUnsigned(UnitDie.find(DW_AT_GNU_dwo_id)))
Header.setDWOId(*DWOId);
if (!IsDWO) {
assert(AddrOffsetSectionBase == std::nullopt);
assert(RangeSectionBase == 0);
assert(LocSectionBase == 0);
AddrOffsetSectionBase = toSectionOffset(UnitDie.find(DW_AT_addr_base));
if (!AddrOffsetSectionBase)
AddrOffsetSectionBase =
toSectionOffset(UnitDie.find(DW_AT_GNU_addr_base));
RangeSectionBase = toSectionOffset(UnitDie.find(DW_AT_rnglists_base), 0);
LocSectionBase = toSectionOffset(UnitDie.find(DW_AT_loclists_base), 0);
}
DWARFDie UnitDie(this, &DieArray[0]);
if (std::optional<uint64_t> DWOId =
toUnsigned(UnitDie.find(DW_AT_GNU_dwo_id)))
Header.setDWOId(*DWOId);
if (!IsDWO) {
assert(AddrOffsetSectionBase == std::nullopt);
assert(RangeSectionBase == 0);
assert(LocSectionBase == 0);
AddrOffsetSectionBase = toSectionOffset(UnitDie.find(DW_AT_addr_base));
if (!AddrOffsetSectionBase)
AddrOffsetSectionBase =
toSectionOffset(UnitDie.find(DW_AT_GNU_addr_base));
RangeSectionBase = toSectionOffset(UnitDie.find(DW_AT_rnglists_base), 0);
LocSectionBase = toSectionOffset(UnitDie.find(DW_AT_loclists_base), 0);
}
// In general, in DWARF v5 and beyond we derive the start of the unit's
// contribution to the string offsets table from the unit DIE's
// DW_AT_str_offsets_base attribute. Split DWARF units do not use this
// attribute, so we assume that there is a contribution to the string
// offsets table starting at offset 0 of the debug_str_offsets.dwo section.
// In both cases we need to determine the format of the contribution,
// which may differ from the unit's format.
DWARFDataExtractor DA(Context.getDWARFObj(), StringOffsetSection,
IsLittleEndian, 0);
if (IsDWO || getVersion() >= 5) {
auto StringOffsetOrError =
IsDWO ? determineStringOffsetsTableContributionDWO(DA)
: determineStringOffsetsTableContribution(DA);
if (!StringOffsetOrError)
return createStringError(errc::invalid_argument,
"invalid reference to or invalid content in "
".debug_str_offsets[.dwo]: " +
toString(StringOffsetOrError.takeError()));
// In general, in DWARF v5 and beyond we derive the start of the unit's
// contribution to the string offsets table from the unit DIE's
// DW_AT_str_offsets_base attribute. Split DWARF units do not use this
// attribute, so we assume that there is a contribution to the string
// offsets table starting at offset 0 of the debug_str_offsets.dwo section.
// In both cases we need to determine the format of the contribution,
// which may differ from the unit's format.
DWARFDataExtractor DA(Context.getDWARFObj(), StringOffsetSection,
IsLittleEndian, 0);
if (IsDWO || getVersion() >= 5) {
auto StringOffsetOrError =
IsDWO ? determineStringOffsetsTableContributionDWO(DA)
: determineStringOffsetsTableContribution(DA);
if (!StringOffsetOrError) {
return createStringError(errc::invalid_argument,
"invalid reference to or invalid content in "
".debug_str_offsets[.dwo]: " +
toString(StringOffsetOrError.takeError()));
}
StringOffsetsTableContribution = *StringOffsetOrError;
}
StringOffsetsTableContribution = *StringOffsetOrError;
}
// DWARF v5 uses the .debug_rnglists and .debug_rnglists.dwo sections to
// describe address ranges.
if (getVersion() >= 5) {
// In case of DWP, the base offset from the index has to be added.
if (IsDWO) {
uint64_t ContributionBaseOffset = 0;
if (auto *IndexEntry = Header.getIndexEntry())
if (auto *Contrib = IndexEntry->getContribution(DW_SECT_RNGLISTS))
ContributionBaseOffset = Contrib->getOffset();
setRangesSection(
&Context.getDWARFObj().getRnglistsDWOSection(),
ContributionBaseOffset +
DWARFListTableHeader::getHeaderSize(Header.getFormat()));
} else
setRangesSection(&Context.getDWARFObj().getRnglistsSection(),
toSectionOffset(UnitDie.find(DW_AT_rnglists_base),
DWARFListTableHeader::getHeaderSize(
Header.getFormat())));
}
// DWARF v5 uses the .debug_rnglists and .debug_rnglists.dwo sections to
// describe address ranges.
if (getVersion() >= 5) {
// In case of DWP, the base offset from the index has to be added.
if (IsDWO) {
uint64_t ContributionBaseOffset = 0;
// If we are reading a package file, we need to adjust the location list
// data based on the index entries.
StringRef Data = Header.getVersion() >= 5
? Context.getDWARFObj().getLoclistsDWOSection().Data
: Context.getDWARFObj().getLocDWOSection().Data;
if (auto *IndexEntry = Header.getIndexEntry())
if (auto *Contrib = IndexEntry->getContribution(DW_SECT_RNGLISTS))
ContributionBaseOffset = Contrib->getOffset();
setRangesSection(
&Context.getDWARFObj().getRnglistsDWOSection(),
ContributionBaseOffset +
DWARFListTableHeader::getHeaderSize(Header.getFormat()));
} else
setRangesSection(&Context.getDWARFObj().getRnglistsSection(),
toSectionOffset(UnitDie.find(DW_AT_rnglists_base),
DWARFListTableHeader::getHeaderSize(
Header.getFormat())));
}
if (const auto *C = IndexEntry->getContribution(
Header.getVersion() >= 5 ? DW_SECT_LOCLISTS : DW_SECT_EXT_LOC))
Data = Data.substr(C->getOffset(), C->getLength());
if (IsDWO) {
// If we are reading a package file, we need to adjust the location list
// data based on the index entries.
StringRef Data = Header.getVersion() >= 5
? Context.getDWARFObj().getLoclistsDWOSection().Data
: Context.getDWARFObj().getLocDWOSection().Data;
if (auto *IndexEntry = Header.getIndexEntry())
if (const auto *C = IndexEntry->getContribution(
Header.getVersion() >= 5 ? DW_SECT_LOCLISTS : DW_SECT_EXT_LOC))
Data = Data.substr(C->getOffset(), C->getLength());
DWARFDataExtractor DWARFData(Data, IsLittleEndian, getAddressByteSize());
LocTable =
std::make_unique<DWARFDebugLoclists>(DWARFData, Header.getVersion());
LocSectionBase = DWARFListTableHeader::getHeaderSize(Header.getFormat());
} else if (getVersion() >= 5) {
LocTable = std::make_unique<DWARFDebugLoclists>(
DWARFDataExtractor(Context.getDWARFObj(),
Context.getDWARFObj().getLoclistsSection(),
IsLittleEndian, getAddressByteSize()),
getVersion());
} else {
LocTable = std::make_unique<DWARFDebugLoc>(DWARFDataExtractor(
Context.getDWARFObj(), Context.getDWARFObj().getLocSection(),
IsLittleEndian, getAddressByteSize()));
}
DWARFDataExtractor DWARFData(Data, IsLittleEndian, getAddressByteSize());
LocTable =
std::make_unique<DWARFDebugLoclists>(DWARFData, Header.getVersion());
LocSectionBase = DWARFListTableHeader::getHeaderSize(Header.getFormat());
} else if (getVersion() >= 5) {
LocTable = std::make_unique<DWARFDebugLoclists>(
DWARFDataExtractor(Context.getDWARFObj(),
Context.getDWARFObj().getLoclistsSection(),
IsLittleEndian, getAddressByteSize()),
getVersion());
} else {
LocTable = std::make_unique<DWARFDebugLoc>(DWARFDataExtractor(
Context.getDWARFObj(), Context.getDWARFObj().getLocSection(),
IsLittleEndian, getAddressByteSize()));
}
// Don't fall back to DW_AT_GNU_ranges_base: it should be ignored for
// skeleton CU DIE, so that DWARF users not aware of it are not broken.
// Don't fall back to DW_AT_GNU_ranges_base: it should be ignored for
// skeleton CU DIE, so that DWARF users not aware of it are not broken.
return Error::success();
return Error::success();
});
}
bool DWARFUnit::parseDWO(StringRef DWOAlternativeLocation) {
@@ -651,15 +655,21 @@ bool DWARFUnit::parseDWO(StringRef DWOAlternativeLocation) {
return true;
}
void DWARFUnit::clearDIEs(bool KeepCUDie) {
// Do not use resize() + shrink_to_fit() to free memory occupied by dies.
// shrink_to_fit() is a *non-binding* request to reduce capacity() to size().
// It depends on the implementation whether the request is fulfilled.
// Create a new vector with a small capacity and assign it to the DieArray to
// have previous contents freed.
DieArray = (KeepCUDie && !DieArray.empty())
? std::vector<DWARFDebugInfoEntry>({DieArray[0]})
: std::vector<DWARFDebugInfoEntry>();
void DWARFUnit::clearDIEs(bool KeepCUDie, bool KeepDWODies) {
cantFail(Context.doWorkThreadSafely([&] {
if (!KeepDWODies && DWO) {
DWO->clearDIEs(KeepCUDie, KeepDWODies);
}
// Do not use resize() + shrink_to_fit() to free memory occupied by dies.
// shrink_to_fit() is a *non-binding* request to reduce capacity() to
// size(). It depends on the implementation whether the request is
// fulfilled. Create a new vector with a small capacity and assign it to the
// DieArray to have previous contents freed.
DieArray = (KeepCUDie && !DieArray.empty())
? std::vector<DWARFDebugInfoEntry>({DieArray[0]})
: std::vector<DWARFDebugInfoEntry>();
return Error::success();
}));
}
Expected<DWARFAddressRangesVector>

View File

@@ -656,6 +656,11 @@ Error DwarfTransformer::convert(uint32_t NumThreads, OutputAggregator &Out) {
DWARFDie Die = getDie(*CU);
CUInfo CUI(DICtx, dyn_cast<DWARFCompileUnit>(CU.get()));
handleDie(Out, CUI, Die);
// Release the line table, once we're done.
DICtx.clearLineTableForUnit(CU.get());
// Free any DIEs that were allocated by the DWARF parser.
// If/when they're needed by other CU's, they'll be recreated.
CU->clearDIEs(/*KeepCUDie=*/false, /*KeepDWODIEs=*/false);
}
} else {
// LLVM Dwarf parser is not thread-safe and we need to parse all DWARF up
@@ -668,12 +673,7 @@ Error DwarfTransformer::convert(uint32_t NumThreads, OutputAggregator &Out) {
for (const auto &CU : DICtx.compile_units())
CU->getAbbreviations();
// Now parse all DIEs in case we have cross compile unit references in a
// thread pool.
DefaultThreadPool pool(hardware_concurrency(NumThreads));
for (const auto &CU : DICtx.compile_units())
pool.async([&CU]() { CU->getUnitDIE(false /*CUDieOnly*/); });
pool.wait();
// Now convert all DWARF to GSYM in a thread pool.
std::mutex LogMutex;
@@ -681,11 +681,15 @@ Error DwarfTransformer::convert(uint32_t NumThreads, OutputAggregator &Out) {
DWARFDie Die = getDie(*CU);
if (Die) {
CUInfo CUI(DICtx, dyn_cast<DWARFCompileUnit>(CU.get()));
pool.async([this, CUI, &LogMutex, &Out, Die]() mutable {
pool.async([this, CUI, &CU, &LogMutex, &Out, Die]() mutable {
std::string storage;
raw_string_ostream StrStream(storage);
OutputAggregator ThreadOut(Out.GetOS() ? &StrStream : nullptr);
handleDie(ThreadOut, CUI, Die);
DICtx.clearLineTableForUnit(CU.get());
// Free any DIEs that were allocated by the DWARF parser.
// If/when they're needed by other CU's, they'll be recreated.
CU->clearDIEs(/*KeepCUDie=*/false, /*KeepDWODIEs=*/false);
// Print ThreadLogStorage lines into an actual stream under a lock
std::lock_guard<std::mutex> guard(LogMutex);
if (Out.GetOS()) {
@@ -697,6 +701,9 @@ Error DwarfTransformer::convert(uint32_t NumThreads, OutputAggregator &Out) {
}
pool.wait();
}
// Now get rid of all the DIEs that may have been recreated
for (const auto &CU : DICtx.compile_units())
CU->clearDIEs(/*KeepCUDie=*/false, /*KeepDWODIEs=*/false);
size_t FunctionsAddedCount = Gsym.getNumFunctionInfos() - NumBefore;
Out << "Loaded " << FunctionsAddedCount << " functions from DWARF.\n";
return Error::success();