[lld-macho] Use Symbols as branch target for safe_thunks ICF (#126835)

## Problem

The `safe_thunks` ICF optimization in `lld-macho` was creating thunks
that pointed to `InputSection`s instead of `Symbol`s. While, generally,
branch relocations can point to symbols or input sections, in this case
we need them to point to symbols as subsequently the branch extension
algorithm expects branches to always point to `Symbol`'s.

## Solution
This patch changes the ICF implementation so that safe thunks point to
`Symbol`'s rather than `InputSection`s.

## Testing
The existing `arm64-thunks.s` test is modified to include
`--icf=safe_thunks` to explicitly verify the interaction between ICF and
branch range extension thunks. Two functions were added that will be
merged together via a thunk. Before this patch, this test would generate
an assert - now this scenario is correctly handled.
This commit is contained in:
alx32
2025-02-13 11:07:12 -08:00
committed by GitHub
parent c2e96778e0
commit 4ac79a8c98
4 changed files with 80 additions and 25 deletions

View File

@@ -43,8 +43,8 @@ struct ARM64 : ARM64Common {
void applyOptimizationHints(uint8_t *, const ObjFile &) const override;
void initICFSafeThunkBody(InputSection *thunk,
InputSection *branchTarget) const override;
InputSection *getThunkBranchTarget(InputSection *thunk) const override;
Symbol *targetSym) const override;
Symbol *getThunkBranchTarget(InputSection *thunk) const override;
uint32_t getICFSafeThunkSize() const override;
};
@@ -185,8 +185,7 @@ static constexpr uint32_t icfSafeThunkCode[] = {
0x14000000, // 08: b target
};
void ARM64::initICFSafeThunkBody(InputSection *thunk,
InputSection *branchTarget) const {
void ARM64::initICFSafeThunkBody(InputSection *thunk, Symbol *targetSym) const {
// The base data here will not be itself modified, we'll just be adding a
// reloc below. So we can directly use the constexpr above as the data.
thunk->data = {reinterpret_cast<const uint8_t *>(icfSafeThunkCode),
@@ -195,17 +194,17 @@ void ARM64::initICFSafeThunkBody(InputSection *thunk,
thunk->relocs.emplace_back(/*type=*/ARM64_RELOC_BRANCH26,
/*pcrel=*/true, /*length=*/2,
/*offset=*/0, /*addend=*/0,
/*referent=*/branchTarget);
/*referent=*/targetSym);
}
InputSection *ARM64::getThunkBranchTarget(InputSection *thunk) const {
Symbol *ARM64::getThunkBranchTarget(InputSection *thunk) const {
assert(thunk->relocs.size() == 1 &&
"expected a single reloc on ARM64 ICF thunk");
auto &reloc = thunk->relocs[0];
assert(isa<InputSection *>(reloc.referent) &&
"ARM64 thunk reloc is expected to point to an InputSection");
assert(isa<Symbol *>(reloc.referent) &&
"ARM64 thunk reloc is expected to point to a Symbol");
return cast<InputSection *>(reloc.referent);
return cast<Symbol *>(reloc.referent);
}
uint32_t ARM64::getICFSafeThunkSize() const { return sizeof(icfSafeThunkCode); }

View File

@@ -27,6 +27,8 @@ using namespace lld;
using namespace lld::macho;
static constexpr bool verboseDiagnostics = false;
// This counter is used to generate unique thunk names.
static uint64_t icfThunkCounter = 0;
class ICF {
public:
@@ -263,6 +265,31 @@ void ICF::forEachClassRange(size_t begin, size_t end,
}
}
// Find or create a symbol at offset 0 in the given section
static Symbol *getThunkTargetSymbol(ConcatInputSection *isec) {
for (Symbol *sym : isec->symbols)
if (auto *d = dyn_cast<Defined>(sym))
if (d->value == 0)
return sym;
std::string thunkName;
if (isec->symbols.size() == 0)
thunkName = isec->getName().str() + ".icf.0";
else
thunkName = isec->getName().str() + "icf.thunk.target" +
std::to_string(icfThunkCounter++);
// If no symbol found at offset 0, create one
auto *sym = make<Defined>(thunkName, /*file=*/nullptr, isec,
/*value=*/0, /*size=*/isec->getSize(),
/*isWeakDef=*/false, /*isExternal=*/false,
/*isPrivateExtern=*/false, /*isThumb=*/false,
/*isReferencedDynamically=*/false,
/*noDeadStrip=*/false);
isec->symbols.push_back(sym);
return sym;
}
// Given a range of identical icfInputs, replace address significant functions
// with a thunk that is just a direct branch to the first function in the
// series. This way we keep only one main body of the function but we still
@@ -280,6 +307,9 @@ void ICF::applySafeThunksToRange(size_t begin, size_t end) {
// all thunks will branch to.
ConcatInputSection *masterIsec = icfInputs[begin];
// Get the symbol that all thunks will branch to.
Symbol *masterSym = getThunkTargetSymbol(masterIsec);
for (size_t i = begin + 1; i < end; ++i) {
ConcatInputSection *isec = icfInputs[i];
// When we're done processing keepUnique entries, we can stop. Sorting
@@ -291,7 +321,7 @@ void ICF::applySafeThunksToRange(size_t begin, size_t end) {
makeSyntheticInputSection(isec->getSegName(), isec->getName());
addInputSection(thunk);
target->initICFSafeThunkBody(thunk, masterIsec);
target->initICFSafeThunkBody(thunk, masterSym);
thunk->foldIdentical(isec, Symbol::ICFFoldKind::Thunk);
// Since we're folding the target function into a thunk, we need to adjust
@@ -495,18 +525,11 @@ Defined *macho::getBodyForThunkFoldedSym(Defined *foldedSym) {
// the actual body of the function.
InputSection *thunkBody = foldedSec->replacement;
// The actual (merged) body of the function that the thunk jumps to. This will
// end up in the final binary.
InputSection *functionBody = target->getThunkBranchTarget(thunkBody);
// The symbol of the merged body of the function that the thunk jumps to. This
// will end up in the final binary.
Symbol *targetSym = target->getThunkBranchTarget(thunkBody);
for (Symbol *sym : functionBody->symbols) {
Defined *d = dyn_cast<Defined>(sym);
// The symbol needs to be at the start of the InputSection
if (d && d->value == 0)
return d;
}
llvm_unreachable("could not find body symbol for ICF-generated thunk");
return cast<Defined>(targetSym);
}
void macho::foldIdenticalSections(bool onlyCfStrings) {
TimeTraceScope timeScope("Fold Identical Code Sections");
@@ -526,6 +549,8 @@ void macho::foldIdenticalSections(bool onlyCfStrings) {
// ICF::segregate()
std::vector<ConcatInputSection *> foldable;
uint64_t icfUniqueID = inputSections.size();
// Reset the thunk counter for each run of ICF.
icfThunkCounter = 0;
for (ConcatInputSection *isec : inputSections) {
bool isFoldableWithAddendsRemoved = isCfStringSection(isec) ||
isClassRefsSection(isec) ||

View File

@@ -76,13 +76,13 @@ public:
// Init 'thunk' so that it be a direct jump to 'branchTarget'.
virtual void initICFSafeThunkBody(InputSection *thunk,
InputSection *branchTarget) const {
Symbol *targetSym) const {
llvm_unreachable("target does not support ICF safe thunks");
}
// Given a thunk for which `initICFSafeThunkBody` was called, return the
// branchTarget it was initialized with.
virtual InputSection *getThunkBranchTarget(InputSection *thunk) const {
virtual Symbol *getThunkBranchTarget(InputSection *thunk) const {
llvm_unreachable("target does not support ICF safe thunks");
}

View File

@@ -14,12 +14,16 @@
# RUN: rm -rf %t; mkdir %t
# RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %s -o %t/input.o
# RUN: %lld -arch arm64 -dead_strip -lSystem -U _extern_sym -map %t/thunk.map -o %t/thunk %t/input.o
## Use --icf=safe_thunks to test that branch extension algo is compatible
## with safe_thunks ICF.
# RUN: %lld -arch arm64 -dead_strip -lSystem -U _extern_sym -map %t/thunk.map -o %t/thunk %t/input.o --icf=safe_thunks
# RUN: llvm-objdump --no-print-imm-hex -d --no-show-raw-insn %t/thunk | FileCheck %s
# RUN: FileCheck %s --input-file %t/thunk.map --check-prefix=MAP
# MAP: 0x{{[[:xdigit:]]+}} {{.*}} _b
# MAP: 0x{{[[:xdigit:]]+}} {{.*}} _fold_func_low_addr
# MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _a
# MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _b
# MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _c
# MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _d.thunk.0
# MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _e.thunk.0
@@ -35,10 +39,13 @@
# MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _b.thunk.0
# MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _h
# MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _main
# MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _fold_func_high_addr
# MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _c.thunk.0
# MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _d.thunk.1
# MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _e.thunk.1
# MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _f.thunk.1
# MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _fold_func_low_addr.thunk.0
# MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} ltmp0.thunk.0
# MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _z
@@ -200,8 +207,21 @@
# CHECK: [[#%x, NAN_PAGE + NAN_OFFSET]] <__stubs>:
.subsections_via_symbols
.addrsig
.addrsig_sym _fold_func_low_addr
.addrsig_sym _fold_func_high_addr
.text
.globl _fold_func_low_addr
.p2align 2
_fold_func_low_addr:
add x0, x0, x0
add x1, x0, x1
add x2, x0, x2
ret
.globl _a
.p2align 2
_a:
@@ -329,9 +349,20 @@ _main:
bl _f
bl _g
bl _h
bl _fold_func_low_addr
bl _fold_func_high_addr
bl ___nan
ret
.globl _fold_func_high_addr
.p2align 2
_fold_func_high_addr:
add x0, x0, x0
add x1, x0, x1
add x2, x0, x2
ret
.section __TEXT,__cstring
# The .space below has to be composed of non-zero characters. Otherwise, the
# linker will create a symbol for every '0' in the section, leading to