[lld][COFF] Remove duplicate strtab entries (#141197)

String table size is too big for large binary when symbol table is
enabled. Some strings in strtab is same so it can be reused.

This patch revives 9ffeaaa authored by mstorsjo with the prioritized
string table builder to fix debug section name issue (see 4d2eda2
for more details).

---------

Co-authored-by: Wen Haohai <whh108@live.com>
Co-authored-by: James Henderson <James.Henderson@sony.com>
This commit is contained in:
Haohai Wen
2025-06-21 13:44:10 +08:00
committed by GitHub
parent 0fa0c3c233
commit 9cc9efc483
4 changed files with 97 additions and 26 deletions

View File

@@ -24,6 +24,7 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringSet.h"
#include "llvm/BinaryFormat/COFF.h"
#include "llvm/MC/StringTableBuilder.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/FileOutputBuffer.h"
#include "llvm/Support/Parallel.h"
@@ -201,7 +202,8 @@ struct ChunkRange {
class Writer {
public:
Writer(COFFLinkerContext &c)
: buffer(c.e.outputBuffer), delayIdata(c), ctx(c) {}
: buffer(c.e.outputBuffer), strtab(StringTableBuilder::WinCOFF),
delayIdata(c), ctx(c) {}
void run();
private:
@@ -281,7 +283,7 @@ private:
std::unique_ptr<FileOutputBuffer> &buffer;
std::map<PartialSectionKey, PartialSection *> partialSections;
std::vector<char> strtab;
StringTableBuilder strtab;
std::vector<llvm::object::coff_symbol16> outputSymtab;
std::vector<ECCodeMapEntry> codeMap;
IdataContents idata;
@@ -1434,14 +1436,6 @@ void Writer::assignOutputSectionIndices() {
sc->setOutputSectionIdx(mc->getOutputSectionIdx());
}
size_t Writer::addEntryToStringTable(StringRef str) {
assert(str.size() > COFF::NameSize);
size_t offsetOfEntry = strtab.size() + 4; // +4 for the size field
strtab.insert(strtab.end(), str.begin(), str.end());
strtab.push_back('\0');
return offsetOfEntry;
}
std::optional<coff_symbol16> Writer::createSymbol(Defined *def) {
coff_symbol16 sym;
switch (def->kind()) {
@@ -1482,7 +1476,8 @@ std::optional<coff_symbol16> Writer::createSymbol(Defined *def) {
StringRef name = def->getName();
if (name.size() > COFF::NameSize) {
sym.Name.Offset.Zeroes = 0;
sym.Name.Offset.Offset = addEntryToStringTable(name);
sym.Name.Offset.Offset = 0; // Filled in later.
strtab.add(name);
} else {
memset(sym.Name.ShortName, 0, COFF::NameSize);
memcpy(sym.Name.ShortName, name.data(), name.size());
@@ -1514,6 +1509,7 @@ void Writer::createSymbolAndStringTable() {
// solution where discardable sections have long names preserved and
// non-discardable sections have their names truncated, to ensure that any
// section which is mapped at runtime also has its name mapped at runtime.
SmallVector<OutputSection *> longNameSections;
for (OutputSection *sec : ctx.outputSections) {
if (sec->name.size() <= COFF::NameSize)
continue;
@@ -1525,9 +1521,13 @@ void Writer::createSymbolAndStringTable() {
<< " is longer than 8 characters and will use a non-standard string "
"table";
}
sec->setStringTableOff(addEntryToStringTable(sec->name));
// Put the section name in the begin of strtab so that its offset is less
// than Max7DecimalOffset otherwise lldb/gdb will not read it.
strtab.add(sec->name, /*Priority=*/UINT8_MAX);
longNameSections.push_back(sec);
}
std::vector<std::pair<size_t, StringRef>> longNameSymbols;
if (ctx.config.writeSymtab) {
for (ObjFile *file : ctx.objFileInstances) {
for (Symbol *b : file->getSymbols()) {
@@ -1542,29 +1542,44 @@ void Writer::createSymbolAndStringTable() {
continue;
}
if (std::optional<coff_symbol16> sym = createSymbol(d))
if (std::optional<coff_symbol16> sym = createSymbol(d)) {
if (d->getName().size() > COFF::NameSize)
longNameSymbols.emplace_back(outputSymtab.size(), d->getName());
outputSymtab.push_back(*sym);
}
if (auto *dthunk = dyn_cast<DefinedImportThunk>(d)) {
if (!dthunk->wrappedSym->writtenToSymtab) {
dthunk->wrappedSym->writtenToSymtab = true;
if (std::optional<coff_symbol16> sym =
createSymbol(dthunk->wrappedSym))
createSymbol(dthunk->wrappedSym)) {
if (d->getName().size() > COFF::NameSize)
longNameSymbols.emplace_back(outputSymtab.size(),
dthunk->wrappedSym->getName());
outputSymtab.push_back(*sym);
}
}
}
}
}
}
if (outputSymtab.empty() && strtab.empty())
return;
strtab.finalize();
for (OutputSection *sec : longNameSections)
sec->setStringTableOff(strtab.getOffset(sec->name));
for (auto P : longNameSymbols) {
coff_symbol16 &sym = outputSymtab[P.first];
sym.Name.Offset.Offset = strtab.getOffset(P.second);
}
// We position the symbol table to be adjacent to the end of the last section.
uint64_t fileOff = fileSize;
pointerToSymbolTable = fileOff;
fileOff += outputSymtab.size() * sizeof(coff_symbol16);
fileOff += 4 + strtab.size();
fileOff += strtab.getSize();
fileSize = alignTo(fileOff, ctx.config.fileAlign);
}
@@ -1945,9 +1960,7 @@ template <typename PEHeaderTy> void Writer::writeHeader() {
// Create the string table, it follows immediately after the symbol table.
// The first 4 bytes is length including itself.
buf = reinterpret_cast<uint8_t *>(&symbolTable[numberOfSymbols]);
write32le(buf, strtab.size() + 4);
if (!strtab.empty())
memcpy(buf + 4, strtab.data(), strtab.size());
strtab.write(buf);
}
void Writer::openFile(StringRef path) {

29
lld/test/COFF/strtab.s Normal file
View File

@@ -0,0 +1,29 @@
# RUN: llvm-mc -triple=x86_64-windows-msvc %s -filetype=obj -o %t.obj
# RUN: lld-link -out:%t.exe -entry:main %t.obj -debug:dwarf
# RUN: llvm-readobj --string-table %t.exe | FileCheck %s
# CHECK: StringTable {
# CHECK-NEXT: Length: 87
# CHECK-NEXT: [ 4] .debug_abbrev
# CHECK-NEXT: [ 12] .debug_line
# CHECK-NEXT: [ 1e] long_name_symbolz
# CHECK-NEXT: [ 30] .debug_abbrez
# CHECK-NEXT: [ 3e] __impl_long_name_symbolA
# CHECK-NEXT: }
.global main
.text
main:
long_name_symbolz:
long_name_symbolA:
__impl_long_name_symbolA:
name_symbolA:
.debug_abbrez:
ret
.section .debug_abbrev,"dr"
.byte 0
.section .debug_line,"dr"
.byte 0

View File

@@ -38,6 +38,8 @@ public:
};
private:
// Only non-zero priority will be recorded.
DenseMap<CachedHashStringRef, uint8_t> StringPriorityMap;
DenseMap<CachedHashStringRef, size_t> StringIndexMap;
size_t Size = 0;
Kind K;
@@ -51,11 +53,16 @@ public:
LLVM_ABI StringTableBuilder(Kind K, Align Alignment = Align(1));
LLVM_ABI ~StringTableBuilder();
/// Add a string to the builder. Returns the position of S in the
/// table. The position will be changed if finalize is used.
/// Can only be used before the table is finalized.
LLVM_ABI size_t add(CachedHashStringRef S);
size_t add(StringRef S) { return add(CachedHashStringRef(S)); }
/// Add a string to the builder. Returns the position of S in the table. The
/// position will be changed if finalize is used. Can only be used before the
/// table is finalized. Priority is only useful with reordering. Strings with
/// the same priority will be put together. Strings with higher priority are
/// placed closer to the begin of string table. When adding same string with
/// different priority, the maximum priority win.
LLVM_ABI size_t add(CachedHashStringRef S, uint8_t Priority = 0);
size_t add(StringRef S, uint8_t Priority = 0) {
return add(CachedHashStringRef(S), Priority);
}
/// Analyze the strings and build the final table. No more strings can
/// be added after this point.
@@ -78,6 +85,7 @@ public:
bool contains(StringRef S) const { return contains(CachedHashStringRef(S)); }
bool contains(CachedHashStringRef S) const { return StringIndexMap.count(S); }
bool empty() const { return StringIndexMap.empty(); }
size_t getSize() const { return Size; }
LLVM_ABI void clear();

View File

@@ -138,13 +138,31 @@ void StringTableBuilder::finalizeInOrder() {
void StringTableBuilder::finalizeStringTable(bool Optimize) {
Finalized = true;
if (Optimize) {
if (Optimize && StringIndexMap.size()) {
std::vector<StringPair *> Strings;
Strings.reserve(StringIndexMap.size());
for (StringPair &P : StringIndexMap)
Strings.push_back(&P);
multikeySort(Strings, 0);
size_t RangeBegin = 0;
MutableArrayRef<StringPair *> StringsRef(Strings);
if (StringPriorityMap.size()) {
llvm::sort(Strings,
[&](const StringPair *LHS, const StringPair *RHS) -> bool {
return StringPriorityMap.lookup(LHS->first) >
StringPriorityMap.lookup(RHS->first);
});
uint8_t RangePriority = StringPriorityMap.lookup(Strings[0]->first);
for (size_t I = 1, E = Strings.size(); I != E && RangePriority; ++I) {
uint8_t Priority = StringPriorityMap.lookup(Strings[I]->first);
if (Priority != RangePriority) {
multikeySort(StringsRef.slice(RangeBegin, I - RangeBegin), 0);
RangePriority = Priority;
RangeBegin = I;
}
}
}
multikeySort(StringsRef.slice(RangeBegin), 0);
initSize();
StringRef Previous;
@@ -199,11 +217,14 @@ size_t StringTableBuilder::getOffset(CachedHashStringRef S) const {
return I->second;
}
size_t StringTableBuilder::add(CachedHashStringRef S) {
size_t StringTableBuilder::add(CachedHashStringRef S, uint8_t Priority) {
if (K == WinCOFF)
assert(S.size() > COFF::NameSize && "Short string in COFF string table!");
assert(!isFinalized());
if (Priority)
StringPriorityMap[S] = std::max(Priority, StringPriorityMap[S]);
auto P = StringIndexMap.try_emplace(S);
if (P.second) {
size_t Start = alignTo(Size, Alignment);