From 4d2eda2bb3156cee63ea486be34b01164b178e10 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Storsj=C3=B6?= Date: Tue, 21 Jun 2022 13:16:00 +0300 Subject: [PATCH] Revert "[LLD] [COFF] Use StringTableBuilder to optimize the string table" This reverts commit 9ffeaaa0ea54307db309104696a0b6cce6ddda38. This fixes debugging large executables with lldb and gdb. When StringTableBuilder is used, the string offsets for any string can point anywhere in the string table - while previously, all strings were inserted in order (without deduplication and tail merging). For symbols, there's no complications in encoding the string offset; the offset is encoded as a raw 32 bit binary number in half of the symbol name field. For sections, the string table offset is written as "/", but if the decimal offset would be larger than 7 digits, it's instead written as "//". Tools that operate on object files can handle the base64 offset format, but apparently neither lldb nor gdb expect that syntax when locating the debug information section. Prior to the reverted commit, all long section names were located at the start of the string table, so their offset never exceeded the range for the decimal syntax. Just reverting this change for now, as the actual benefit from it was fairly modest. Longer term, lld could write all long section names unoptimized at the start of the string table, followed by all the strings for symbol names, with deduplication and tail merging. And lldb and gdb could be fixed to handle sections with the base64 offset syntax. This fixes https://github.com/mstorsjo/llvm-mingw/issues/289. --- lld/COFF/Writer.cpp | 50 ++++++++++++++++++--------------------------- 1 file changed, 20 insertions(+), 30 deletions(-) diff --git a/lld/COFF/Writer.cpp b/lld/COFF/Writer.cpp index 1b9a870d4630..df60c9032b2d 100644 --- a/lld/COFF/Writer.cpp +++ b/lld/COFF/Writer.cpp @@ -24,7 +24,6 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringSet.h" #include "llvm/ADT/StringSwitch.h" -#include "llvm/MC/StringTableBuilder.h" #include "llvm/Support/BinaryStreamReader.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Endian.h" @@ -196,9 +195,7 @@ public: // The writer writes a SymbolTable result to a file. class Writer { public: - Writer(COFFLinkerContext &c) - : buffer(errorHandler().outputBuffer), - strtab(StringTableBuilder::WinCOFF), ctx(c) {} + Writer(COFFLinkerContext &c) : buffer(errorHandler().outputBuffer), ctx(c) {} void run(); private: @@ -243,6 +240,7 @@ private: PartialSection *findPartialSection(StringRef name, uint32_t outChars); llvm::Optional createSymbol(Defined *d); + size_t addEntryToStringTable(StringRef str); OutputSection *findSection(StringRef name); void addBaserels(); @@ -252,7 +250,7 @@ private: std::unique_ptr &buffer; std::map partialSections; - StringTableBuilder strtab; + std::vector strtab; std::vector outputSymtab; IdataContents idata; Chunk *importTableStart = nullptr; @@ -1128,6 +1126,14 @@ void Writer::assignOutputSectionIndices() { sc->setOutputSectionIdx(mc->getOutputSectionIdx()); } +size_t Writer::addEntryToStringTable(StringRef str) { + assert(str.size() > COFF::NameSize); + size_t offsetOfEntry = strtab.size() + 4; // +4 for the size field + strtab.insert(strtab.end(), str.begin(), str.end()); + strtab.push_back('\0'); + return offsetOfEntry; +} + Optional Writer::createSymbol(Defined *def) { coff_symbol16 sym; switch (def->kind()) { @@ -1164,8 +1170,7 @@ Optional Writer::createSymbol(Defined *def) { StringRef name = def->getName(); if (name.size() > COFF::NameSize) { sym.Name.Offset.Zeroes = 0; - sym.Name.Offset.Offset = 0; // Filled in later - strtab.add(name); + sym.Name.Offset.Offset = addEntryToStringTable(name); } else { memset(sym.Name.ShortName, 0, COFF::NameSize); memcpy(sym.Name.ShortName, name.data(), name.size()); @@ -1192,7 +1197,6 @@ void Writer::createSymbolAndStringTable() { // solution where discardable sections have long names preserved and // non-discardable sections have their names truncated, to ensure that any // section which is mapped at runtime also has its name mapped at runtime. - std::vector longNameSections; for (OutputSection *sec : ctx.outputSections) { if (sec->name.size() <= COFF::NameSize) continue; @@ -1203,12 +1207,9 @@ void Writer::createSymbolAndStringTable() { " is longer than 8 characters and will use a non-standard string " "table"); } - - strtab.add(sec->name); - longNameSections.push_back(sec); + sec->setStringTableOff(addEntryToStringTable(sec->name)); } - std::vector> longNameSymbols; if (config->debugDwarf || config->debugSymtab) { for (ObjFile *file : ctx.objFileInstances) { for (Symbol *b : file->getSymbols()) { @@ -1223,33 +1224,20 @@ void Writer::createSymbolAndStringTable() { continue; } - if (Optional sym = createSymbol(d)) { + if (Optional sym = createSymbol(d)) outputSymtab.push_back(*sym); - if (d->getName().size() > COFF::NameSize) - longNameSymbols.push_back({outputSymtab.size() - 1, d->getName()}); - } } } } - strtab.finalize(); - - for (OutputSection *sec : longNameSections) - sec->setStringTableOff(strtab.getOffset(sec->name)); - - for (auto P : longNameSymbols) { - coff_symbol16 &sym = outputSymtab[P.first]; - sym.Name.Offset.Offset = strtab.getOffset(P.second); - } - - if (outputSymtab.empty() && strtab.getSize() <= 4) + if (outputSymtab.empty() && strtab.empty()) return; // We position the symbol table to be adjacent to the end of the last section. uint64_t fileOff = fileSize; pointerToSymbolTable = fileOff; fileOff += outputSymtab.size() * sizeof(coff_symbol16); - fileOff += strtab.getSize(); + fileOff += 4 + strtab.size(); fileSize = alignTo(fileOff, config->fileAlign); } @@ -1524,7 +1512,7 @@ template void Writer::writeHeader() { sectionTable = ArrayRef( buf - ctx.outputSections.size() * sizeof(coff_section), buf); - if (outputSymtab.empty() && strtab.getSize() <= 4) + if (outputSymtab.empty() && strtab.empty()) return; coff->PointerToSymbolTable = pointerToSymbolTable; @@ -1537,7 +1525,9 @@ template void Writer::writeHeader() { // Create the string table, it follows immediately after the symbol table. // The first 4 bytes is length including itself. buf = reinterpret_cast(&symbolTable[numberOfSymbols]); - strtab.write(buf); + write32le(buf, strtab.size() + 4); + if (!strtab.empty()) + memcpy(buf + 4, strtab.data(), strtab.size()); } void Writer::openFile(StringRef path) {