[lld-macho] Implement symbol string deduplication (#123874)
The symbol string table does not have deduplication. Here we add code to deduplicate the symbol string table. This has a rather large size impact (20-30%) on unstripped binaries (typically debug binaries) but no size impact on stripped binaries(typically release binaries). We enable deduplication by default and add a flag to disable it (`-no-deduplicate-symbol-strings`).
This commit is contained in:
@@ -143,6 +143,7 @@ struct Configuration {
|
|||||||
bool timeTraceEnabled = false;
|
bool timeTraceEnabled = false;
|
||||||
bool dataConst = false;
|
bool dataConst = false;
|
||||||
bool dedupStrings = true;
|
bool dedupStrings = true;
|
||||||
|
bool dedupSymbolStrings = true;
|
||||||
bool deadStripDuplicates = false;
|
bool deadStripDuplicates = false;
|
||||||
bool omitDebugInfo = false;
|
bool omitDebugInfo = false;
|
||||||
bool warnDylibInstallName = false;
|
bool warnDylibInstallName = false;
|
||||||
|
|||||||
@@ -1806,6 +1806,7 @@ bool link(ArrayRef<const char *> argsArr, llvm::raw_ostream &stdoutOS,
|
|||||||
config->keepICFStabs = args.hasArg(OPT_keep_icf_stabs);
|
config->keepICFStabs = args.hasArg(OPT_keep_icf_stabs);
|
||||||
config->dedupStrings =
|
config->dedupStrings =
|
||||||
args.hasFlag(OPT_deduplicate_strings, OPT_no_deduplicate_strings, true);
|
args.hasFlag(OPT_deduplicate_strings, OPT_no_deduplicate_strings, true);
|
||||||
|
config->dedupSymbolStrings = !args.hasArg(OPT_no_deduplicate_symbol_strings);
|
||||||
config->deadStripDuplicates = args.hasArg(OPT_dead_strip_duplicates);
|
config->deadStripDuplicates = args.hasArg(OPT_dead_strip_duplicates);
|
||||||
config->warnDylibInstallName = args.hasFlag(
|
config->warnDylibInstallName = args.hasFlag(
|
||||||
OPT_warn_dylib_install_name, OPT_no_warn_dylib_install_name, false);
|
OPT_warn_dylib_install_name, OPT_no_warn_dylib_install_name, false);
|
||||||
|
|||||||
@@ -1476,3 +1476,8 @@ def no_warn_duplicate_libraries : Flag<["-"], "no_warn_duplicate_libraries">,
|
|||||||
HelpText<"Do not warn if the input contains duplicate library options.">,
|
HelpText<"Do not warn if the input contains duplicate library options.">,
|
||||||
Flags<[HelpHidden]>,
|
Flags<[HelpHidden]>,
|
||||||
Group<grp_ignored_silently>;
|
Group<grp_ignored_silently>;
|
||||||
|
|
||||||
|
// Add this with the other flags in the rare options group
|
||||||
|
def no_deduplicate_symbol_strings : Flag<["-"], "no-deduplicate-symbol-strings">,
|
||||||
|
HelpText<"Do not deduplicate strings in the symbol string table. Might result in larger binaries but slightly faster link times.">,
|
||||||
|
Group<grp_rare>;
|
||||||
|
|||||||
@@ -1541,7 +1541,14 @@ StringTableSection::StringTableSection()
|
|||||||
|
|
||||||
uint32_t StringTableSection::addString(StringRef str) {
|
uint32_t StringTableSection::addString(StringRef str) {
|
||||||
uint32_t strx = size;
|
uint32_t strx = size;
|
||||||
strings.push_back(str); // TODO: consider deduplicating strings
|
if (config->dedupSymbolStrings) {
|
||||||
|
llvm::CachedHashStringRef hashedStr(str);
|
||||||
|
auto [it, inserted] = stringMap.try_emplace(hashedStr, strx);
|
||||||
|
if (!inserted)
|
||||||
|
return it->second;
|
||||||
|
}
|
||||||
|
|
||||||
|
strings.push_back(str);
|
||||||
size += str.size() + 1; // account for null terminator
|
size += str.size() + 1; // account for null terminator
|
||||||
return strx;
|
return strx;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -447,6 +447,7 @@ private:
|
|||||||
// match its behavior here since some tools depend on it.
|
// match its behavior here since some tools depend on it.
|
||||||
// Consequently, the empty string will be at index 1, not zero.
|
// Consequently, the empty string will be at index 1, not zero.
|
||||||
std::vector<StringRef> strings{" "};
|
std::vector<StringRef> strings{" "};
|
||||||
|
llvm::DenseMap<llvm::CachedHashStringRef, uint32_t> stringMap;
|
||||||
size_t size = 2;
|
size_t size = 2;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|||||||
@@ -7,6 +7,17 @@
|
|||||||
# RUN: %lld -dylib -framework CoreFoundation %t/foo1.o %t/foo2.o -o %t/foo
|
# RUN: %lld -dylib -framework CoreFoundation %t/foo1.o %t/foo2.o -o %t/foo
|
||||||
# RUN: llvm-objdump --no-print-imm-hex --macho --rebase --bind --syms -d %t/foo | FileCheck %s --check-prefix=LITERALS
|
# RUN: llvm-objdump --no-print-imm-hex --macho --rebase --bind --syms -d %t/foo | FileCheck %s --check-prefix=LITERALS
|
||||||
|
|
||||||
|
# Check that string deduplication for symbol names is working
|
||||||
|
# RUN: %lld -dylib -framework CoreFoundation %t/foo1.o %t/foo2.o -o %t/foo_no_dedup -no-deduplicate-symbol-strings
|
||||||
|
# RUN: llvm-strings %t/foo | FileCheck %s --check-prefix=CHECK-DEDUP
|
||||||
|
# RUN: llvm-strings %t/foo_no_dedup | FileCheck %s --check-prefix=CHECK-NO-DEDUP
|
||||||
|
# CHECK-DEDUP: _named_cfstring
|
||||||
|
# CHECK-DEDUP-NOT: _named_cfstring
|
||||||
|
# CHECK-NO-DEDUP: _named_cfstring
|
||||||
|
# CHECK-NO-DEDUP: _named_cfstring
|
||||||
|
# CHECK-NO-DEDUP-NOT: _named_cfstring
|
||||||
|
|
||||||
|
|
||||||
# CHECK: (__TEXT,__text) section
|
# CHECK: (__TEXT,__text) section
|
||||||
# CHECK-NEXT: _foo1:
|
# CHECK-NEXT: _foo1:
|
||||||
# CHECK-NEXT: _foo2:
|
# CHECK-NEXT: _foo2:
|
||||||
|
|||||||
Reference in New Issue
Block a user