[lld][COFF] Remove duplicate strtab entries (#141197)
String table size is too big for large binary when symbol table is enabled. Some strings in strtab is same so it can be reused. This patch revives9ffeaaaauthored by mstorsjo with the prioritized string table builder to fix debug section name issue (see4d2eda2for more details). --------- Co-authored-by: Wen Haohai <whh108@live.com> Co-authored-by: James Henderson <James.Henderson@sony.com>
This commit is contained in:
@@ -24,6 +24,7 @@
|
||||
#include "llvm/ADT/STLExtras.h"
|
||||
#include "llvm/ADT/StringSet.h"
|
||||
#include "llvm/BinaryFormat/COFF.h"
|
||||
#include "llvm/MC/StringTableBuilder.h"
|
||||
#include "llvm/Support/Endian.h"
|
||||
#include "llvm/Support/FileOutputBuffer.h"
|
||||
#include "llvm/Support/Parallel.h"
|
||||
@@ -201,7 +202,8 @@ struct ChunkRange {
|
||||
class Writer {
|
||||
public:
|
||||
Writer(COFFLinkerContext &c)
|
||||
: buffer(c.e.outputBuffer), delayIdata(c), ctx(c) {}
|
||||
: buffer(c.e.outputBuffer), strtab(StringTableBuilder::WinCOFF),
|
||||
delayIdata(c), ctx(c) {}
|
||||
void run();
|
||||
|
||||
private:
|
||||
@@ -281,7 +283,7 @@ private:
|
||||
|
||||
std::unique_ptr<FileOutputBuffer> &buffer;
|
||||
std::map<PartialSectionKey, PartialSection *> partialSections;
|
||||
std::vector<char> strtab;
|
||||
StringTableBuilder strtab;
|
||||
std::vector<llvm::object::coff_symbol16> outputSymtab;
|
||||
std::vector<ECCodeMapEntry> codeMap;
|
||||
IdataContents idata;
|
||||
@@ -1434,14 +1436,6 @@ void Writer::assignOutputSectionIndices() {
|
||||
sc->setOutputSectionIdx(mc->getOutputSectionIdx());
|
||||
}
|
||||
|
||||
size_t Writer::addEntryToStringTable(StringRef str) {
|
||||
assert(str.size() > COFF::NameSize);
|
||||
size_t offsetOfEntry = strtab.size() + 4; // +4 for the size field
|
||||
strtab.insert(strtab.end(), str.begin(), str.end());
|
||||
strtab.push_back('\0');
|
||||
return offsetOfEntry;
|
||||
}
|
||||
|
||||
std::optional<coff_symbol16> Writer::createSymbol(Defined *def) {
|
||||
coff_symbol16 sym;
|
||||
switch (def->kind()) {
|
||||
@@ -1482,7 +1476,8 @@ std::optional<coff_symbol16> Writer::createSymbol(Defined *def) {
|
||||
StringRef name = def->getName();
|
||||
if (name.size() > COFF::NameSize) {
|
||||
sym.Name.Offset.Zeroes = 0;
|
||||
sym.Name.Offset.Offset = addEntryToStringTable(name);
|
||||
sym.Name.Offset.Offset = 0; // Filled in later.
|
||||
strtab.add(name);
|
||||
} else {
|
||||
memset(sym.Name.ShortName, 0, COFF::NameSize);
|
||||
memcpy(sym.Name.ShortName, name.data(), name.size());
|
||||
@@ -1514,6 +1509,7 @@ void Writer::createSymbolAndStringTable() {
|
||||
// solution where discardable sections have long names preserved and
|
||||
// non-discardable sections have their names truncated, to ensure that any
|
||||
// section which is mapped at runtime also has its name mapped at runtime.
|
||||
SmallVector<OutputSection *> longNameSections;
|
||||
for (OutputSection *sec : ctx.outputSections) {
|
||||
if (sec->name.size() <= COFF::NameSize)
|
||||
continue;
|
||||
@@ -1525,9 +1521,13 @@ void Writer::createSymbolAndStringTable() {
|
||||
<< " is longer than 8 characters and will use a non-standard string "
|
||||
"table";
|
||||
}
|
||||
sec->setStringTableOff(addEntryToStringTable(sec->name));
|
||||
// Put the section name in the begin of strtab so that its offset is less
|
||||
// than Max7DecimalOffset otherwise lldb/gdb will not read it.
|
||||
strtab.add(sec->name, /*Priority=*/UINT8_MAX);
|
||||
longNameSections.push_back(sec);
|
||||
}
|
||||
|
||||
std::vector<std::pair<size_t, StringRef>> longNameSymbols;
|
||||
if (ctx.config.writeSymtab) {
|
||||
for (ObjFile *file : ctx.objFileInstances) {
|
||||
for (Symbol *b : file->getSymbols()) {
|
||||
@@ -1542,15 +1542,22 @@ void Writer::createSymbolAndStringTable() {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (std::optional<coff_symbol16> sym = createSymbol(d))
|
||||
if (std::optional<coff_symbol16> sym = createSymbol(d)) {
|
||||
if (d->getName().size() > COFF::NameSize)
|
||||
longNameSymbols.emplace_back(outputSymtab.size(), d->getName());
|
||||
outputSymtab.push_back(*sym);
|
||||
}
|
||||
|
||||
if (auto *dthunk = dyn_cast<DefinedImportThunk>(d)) {
|
||||
if (!dthunk->wrappedSym->writtenToSymtab) {
|
||||
dthunk->wrappedSym->writtenToSymtab = true;
|
||||
if (std::optional<coff_symbol16> sym =
|
||||
createSymbol(dthunk->wrappedSym))
|
||||
createSymbol(dthunk->wrappedSym)) {
|
||||
if (d->getName().size() > COFF::NameSize)
|
||||
longNameSymbols.emplace_back(outputSymtab.size(),
|
||||
dthunk->wrappedSym->getName());
|
||||
outputSymtab.push_back(*sym);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1560,11 +1567,19 @@ void Writer::createSymbolAndStringTable() {
|
||||
if (outputSymtab.empty() && strtab.empty())
|
||||
return;
|
||||
|
||||
strtab.finalize();
|
||||
for (OutputSection *sec : longNameSections)
|
||||
sec->setStringTableOff(strtab.getOffset(sec->name));
|
||||
for (auto P : longNameSymbols) {
|
||||
coff_symbol16 &sym = outputSymtab[P.first];
|
||||
sym.Name.Offset.Offset = strtab.getOffset(P.second);
|
||||
}
|
||||
|
||||
// We position the symbol table to be adjacent to the end of the last section.
|
||||
uint64_t fileOff = fileSize;
|
||||
pointerToSymbolTable = fileOff;
|
||||
fileOff += outputSymtab.size() * sizeof(coff_symbol16);
|
||||
fileOff += 4 + strtab.size();
|
||||
fileOff += strtab.getSize();
|
||||
fileSize = alignTo(fileOff, ctx.config.fileAlign);
|
||||
}
|
||||
|
||||
@@ -1945,9 +1960,7 @@ template <typename PEHeaderTy> void Writer::writeHeader() {
|
||||
// Create the string table, it follows immediately after the symbol table.
|
||||
// The first 4 bytes is length including itself.
|
||||
buf = reinterpret_cast<uint8_t *>(&symbolTable[numberOfSymbols]);
|
||||
write32le(buf, strtab.size() + 4);
|
||||
if (!strtab.empty())
|
||||
memcpy(buf + 4, strtab.data(), strtab.size());
|
||||
strtab.write(buf);
|
||||
}
|
||||
|
||||
void Writer::openFile(StringRef path) {
|
||||
|
||||
29
lld/test/COFF/strtab.s
Normal file
29
lld/test/COFF/strtab.s
Normal file
@@ -0,0 +1,29 @@
|
||||
# RUN: llvm-mc -triple=x86_64-windows-msvc %s -filetype=obj -o %t.obj
|
||||
# RUN: lld-link -out:%t.exe -entry:main %t.obj -debug:dwarf
|
||||
# RUN: llvm-readobj --string-table %t.exe | FileCheck %s
|
||||
|
||||
# CHECK: StringTable {
|
||||
# CHECK-NEXT: Length: 87
|
||||
# CHECK-NEXT: [ 4] .debug_abbrev
|
||||
# CHECK-NEXT: [ 12] .debug_line
|
||||
# CHECK-NEXT: [ 1e] long_name_symbolz
|
||||
# CHECK-NEXT: [ 30] .debug_abbrez
|
||||
# CHECK-NEXT: [ 3e] __impl_long_name_symbolA
|
||||
# CHECK-NEXT: }
|
||||
|
||||
|
||||
.global main
|
||||
.text
|
||||
main:
|
||||
long_name_symbolz:
|
||||
long_name_symbolA:
|
||||
__impl_long_name_symbolA:
|
||||
name_symbolA:
|
||||
.debug_abbrez:
|
||||
ret
|
||||
|
||||
.section .debug_abbrev,"dr"
|
||||
.byte 0
|
||||
|
||||
.section .debug_line,"dr"
|
||||
.byte 0
|
||||
@@ -38,6 +38,8 @@ public:
|
||||
};
|
||||
|
||||
private:
|
||||
// Only non-zero priority will be recorded.
|
||||
DenseMap<CachedHashStringRef, uint8_t> StringPriorityMap;
|
||||
DenseMap<CachedHashStringRef, size_t> StringIndexMap;
|
||||
size_t Size = 0;
|
||||
Kind K;
|
||||
@@ -51,11 +53,16 @@ public:
|
||||
LLVM_ABI StringTableBuilder(Kind K, Align Alignment = Align(1));
|
||||
LLVM_ABI ~StringTableBuilder();
|
||||
|
||||
/// Add a string to the builder. Returns the position of S in the
|
||||
/// table. The position will be changed if finalize is used.
|
||||
/// Can only be used before the table is finalized.
|
||||
LLVM_ABI size_t add(CachedHashStringRef S);
|
||||
size_t add(StringRef S) { return add(CachedHashStringRef(S)); }
|
||||
/// Add a string to the builder. Returns the position of S in the table. The
|
||||
/// position will be changed if finalize is used. Can only be used before the
|
||||
/// table is finalized. Priority is only useful with reordering. Strings with
|
||||
/// the same priority will be put together. Strings with higher priority are
|
||||
/// placed closer to the begin of string table. When adding same string with
|
||||
/// different priority, the maximum priority win.
|
||||
LLVM_ABI size_t add(CachedHashStringRef S, uint8_t Priority = 0);
|
||||
size_t add(StringRef S, uint8_t Priority = 0) {
|
||||
return add(CachedHashStringRef(S), Priority);
|
||||
}
|
||||
|
||||
/// Analyze the strings and build the final table. No more strings can
|
||||
/// be added after this point.
|
||||
@@ -78,6 +85,7 @@ public:
|
||||
bool contains(StringRef S) const { return contains(CachedHashStringRef(S)); }
|
||||
bool contains(CachedHashStringRef S) const { return StringIndexMap.count(S); }
|
||||
|
||||
bool empty() const { return StringIndexMap.empty(); }
|
||||
size_t getSize() const { return Size; }
|
||||
LLVM_ABI void clear();
|
||||
|
||||
|
||||
@@ -138,13 +138,31 @@ void StringTableBuilder::finalizeInOrder() {
|
||||
void StringTableBuilder::finalizeStringTable(bool Optimize) {
|
||||
Finalized = true;
|
||||
|
||||
if (Optimize) {
|
||||
if (Optimize && StringIndexMap.size()) {
|
||||
std::vector<StringPair *> Strings;
|
||||
Strings.reserve(StringIndexMap.size());
|
||||
for (StringPair &P : StringIndexMap)
|
||||
Strings.push_back(&P);
|
||||
|
||||
multikeySort(Strings, 0);
|
||||
size_t RangeBegin = 0;
|
||||
MutableArrayRef<StringPair *> StringsRef(Strings);
|
||||
if (StringPriorityMap.size()) {
|
||||
llvm::sort(Strings,
|
||||
[&](const StringPair *LHS, const StringPair *RHS) -> bool {
|
||||
return StringPriorityMap.lookup(LHS->first) >
|
||||
StringPriorityMap.lookup(RHS->first);
|
||||
});
|
||||
uint8_t RangePriority = StringPriorityMap.lookup(Strings[0]->first);
|
||||
for (size_t I = 1, E = Strings.size(); I != E && RangePriority; ++I) {
|
||||
uint8_t Priority = StringPriorityMap.lookup(Strings[I]->first);
|
||||
if (Priority != RangePriority) {
|
||||
multikeySort(StringsRef.slice(RangeBegin, I - RangeBegin), 0);
|
||||
RangePriority = Priority;
|
||||
RangeBegin = I;
|
||||
}
|
||||
}
|
||||
}
|
||||
multikeySort(StringsRef.slice(RangeBegin), 0);
|
||||
initSize();
|
||||
|
||||
StringRef Previous;
|
||||
@@ -199,11 +217,14 @@ size_t StringTableBuilder::getOffset(CachedHashStringRef S) const {
|
||||
return I->second;
|
||||
}
|
||||
|
||||
size_t StringTableBuilder::add(CachedHashStringRef S) {
|
||||
size_t StringTableBuilder::add(CachedHashStringRef S, uint8_t Priority) {
|
||||
if (K == WinCOFF)
|
||||
assert(S.size() > COFF::NameSize && "Short string in COFF string table!");
|
||||
|
||||
assert(!isFinalized());
|
||||
if (Priority)
|
||||
StringPriorityMap[S] = std::max(Priority, StringPriorityMap[S]);
|
||||
|
||||
auto P = StringIndexMap.try_emplace(S);
|
||||
if (P.second) {
|
||||
size_t Start = alignTo(Size, Alignment);
|
||||
|
||||
Reference in New Issue
Block a user