[lld-macho] Use Symbols as branch target for safe_thunks ICF (#126835)
## Problem The `safe_thunks` ICF optimization in `lld-macho` was creating thunks that pointed to `InputSection`s instead of `Symbol`s. While, generally, branch relocations can point to symbols or input sections, in this case we need them to point to symbols as subsequently the branch extension algorithm expects branches to always point to `Symbol`'s. ## Solution This patch changes the ICF implementation so that safe thunks point to `Symbol`'s rather than `InputSection`s. ## Testing The existing `arm64-thunks.s` test is modified to include `--icf=safe_thunks` to explicitly verify the interaction between ICF and branch range extension thunks. Two functions were added that will be merged together via a thunk. Before this patch, this test would generate an assert - now this scenario is correctly handled.
This commit is contained in:
@@ -43,8 +43,8 @@ struct ARM64 : ARM64Common {
|
||||
void applyOptimizationHints(uint8_t *, const ObjFile &) const override;
|
||||
|
||||
void initICFSafeThunkBody(InputSection *thunk,
|
||||
InputSection *branchTarget) const override;
|
||||
InputSection *getThunkBranchTarget(InputSection *thunk) const override;
|
||||
Symbol *targetSym) const override;
|
||||
Symbol *getThunkBranchTarget(InputSection *thunk) const override;
|
||||
uint32_t getICFSafeThunkSize() const override;
|
||||
};
|
||||
|
||||
@@ -185,8 +185,7 @@ static constexpr uint32_t icfSafeThunkCode[] = {
|
||||
0x14000000, // 08: b target
|
||||
};
|
||||
|
||||
void ARM64::initICFSafeThunkBody(InputSection *thunk,
|
||||
InputSection *branchTarget) const {
|
||||
void ARM64::initICFSafeThunkBody(InputSection *thunk, Symbol *targetSym) const {
|
||||
// The base data here will not be itself modified, we'll just be adding a
|
||||
// reloc below. So we can directly use the constexpr above as the data.
|
||||
thunk->data = {reinterpret_cast<const uint8_t *>(icfSafeThunkCode),
|
||||
@@ -195,17 +194,17 @@ void ARM64::initICFSafeThunkBody(InputSection *thunk,
|
||||
thunk->relocs.emplace_back(/*type=*/ARM64_RELOC_BRANCH26,
|
||||
/*pcrel=*/true, /*length=*/2,
|
||||
/*offset=*/0, /*addend=*/0,
|
||||
/*referent=*/branchTarget);
|
||||
/*referent=*/targetSym);
|
||||
}
|
||||
|
||||
InputSection *ARM64::getThunkBranchTarget(InputSection *thunk) const {
|
||||
Symbol *ARM64::getThunkBranchTarget(InputSection *thunk) const {
|
||||
assert(thunk->relocs.size() == 1 &&
|
||||
"expected a single reloc on ARM64 ICF thunk");
|
||||
auto &reloc = thunk->relocs[0];
|
||||
assert(isa<InputSection *>(reloc.referent) &&
|
||||
"ARM64 thunk reloc is expected to point to an InputSection");
|
||||
assert(isa<Symbol *>(reloc.referent) &&
|
||||
"ARM64 thunk reloc is expected to point to a Symbol");
|
||||
|
||||
return cast<InputSection *>(reloc.referent);
|
||||
return cast<Symbol *>(reloc.referent);
|
||||
}
|
||||
|
||||
uint32_t ARM64::getICFSafeThunkSize() const { return sizeof(icfSafeThunkCode); }
|
||||
|
||||
@@ -27,6 +27,8 @@ using namespace lld;
|
||||
using namespace lld::macho;
|
||||
|
||||
static constexpr bool verboseDiagnostics = false;
|
||||
// This counter is used to generate unique thunk names.
|
||||
static uint64_t icfThunkCounter = 0;
|
||||
|
||||
class ICF {
|
||||
public:
|
||||
@@ -263,6 +265,31 @@ void ICF::forEachClassRange(size_t begin, size_t end,
|
||||
}
|
||||
}
|
||||
|
||||
// Find or create a symbol at offset 0 in the given section
|
||||
static Symbol *getThunkTargetSymbol(ConcatInputSection *isec) {
|
||||
for (Symbol *sym : isec->symbols)
|
||||
if (auto *d = dyn_cast<Defined>(sym))
|
||||
if (d->value == 0)
|
||||
return sym;
|
||||
|
||||
std::string thunkName;
|
||||
if (isec->symbols.size() == 0)
|
||||
thunkName = isec->getName().str() + ".icf.0";
|
||||
else
|
||||
thunkName = isec->getName().str() + "icf.thunk.target" +
|
||||
std::to_string(icfThunkCounter++);
|
||||
|
||||
// If no symbol found at offset 0, create one
|
||||
auto *sym = make<Defined>(thunkName, /*file=*/nullptr, isec,
|
||||
/*value=*/0, /*size=*/isec->getSize(),
|
||||
/*isWeakDef=*/false, /*isExternal=*/false,
|
||||
/*isPrivateExtern=*/false, /*isThumb=*/false,
|
||||
/*isReferencedDynamically=*/false,
|
||||
/*noDeadStrip=*/false);
|
||||
isec->symbols.push_back(sym);
|
||||
return sym;
|
||||
}
|
||||
|
||||
// Given a range of identical icfInputs, replace address significant functions
|
||||
// with a thunk that is just a direct branch to the first function in the
|
||||
// series. This way we keep only one main body of the function but we still
|
||||
@@ -280,6 +307,9 @@ void ICF::applySafeThunksToRange(size_t begin, size_t end) {
|
||||
// all thunks will branch to.
|
||||
ConcatInputSection *masterIsec = icfInputs[begin];
|
||||
|
||||
// Get the symbol that all thunks will branch to.
|
||||
Symbol *masterSym = getThunkTargetSymbol(masterIsec);
|
||||
|
||||
for (size_t i = begin + 1; i < end; ++i) {
|
||||
ConcatInputSection *isec = icfInputs[i];
|
||||
// When we're done processing keepUnique entries, we can stop. Sorting
|
||||
@@ -291,7 +321,7 @@ void ICF::applySafeThunksToRange(size_t begin, size_t end) {
|
||||
makeSyntheticInputSection(isec->getSegName(), isec->getName());
|
||||
addInputSection(thunk);
|
||||
|
||||
target->initICFSafeThunkBody(thunk, masterIsec);
|
||||
target->initICFSafeThunkBody(thunk, masterSym);
|
||||
thunk->foldIdentical(isec, Symbol::ICFFoldKind::Thunk);
|
||||
|
||||
// Since we're folding the target function into a thunk, we need to adjust
|
||||
@@ -495,18 +525,11 @@ Defined *macho::getBodyForThunkFoldedSym(Defined *foldedSym) {
|
||||
// the actual body of the function.
|
||||
InputSection *thunkBody = foldedSec->replacement;
|
||||
|
||||
// The actual (merged) body of the function that the thunk jumps to. This will
|
||||
// end up in the final binary.
|
||||
InputSection *functionBody = target->getThunkBranchTarget(thunkBody);
|
||||
// The symbol of the merged body of the function that the thunk jumps to. This
|
||||
// will end up in the final binary.
|
||||
Symbol *targetSym = target->getThunkBranchTarget(thunkBody);
|
||||
|
||||
for (Symbol *sym : functionBody->symbols) {
|
||||
Defined *d = dyn_cast<Defined>(sym);
|
||||
// The symbol needs to be at the start of the InputSection
|
||||
if (d && d->value == 0)
|
||||
return d;
|
||||
}
|
||||
|
||||
llvm_unreachable("could not find body symbol for ICF-generated thunk");
|
||||
return cast<Defined>(targetSym);
|
||||
}
|
||||
void macho::foldIdenticalSections(bool onlyCfStrings) {
|
||||
TimeTraceScope timeScope("Fold Identical Code Sections");
|
||||
@@ -526,6 +549,8 @@ void macho::foldIdenticalSections(bool onlyCfStrings) {
|
||||
// ICF::segregate()
|
||||
std::vector<ConcatInputSection *> foldable;
|
||||
uint64_t icfUniqueID = inputSections.size();
|
||||
// Reset the thunk counter for each run of ICF.
|
||||
icfThunkCounter = 0;
|
||||
for (ConcatInputSection *isec : inputSections) {
|
||||
bool isFoldableWithAddendsRemoved = isCfStringSection(isec) ||
|
||||
isClassRefsSection(isec) ||
|
||||
|
||||
@@ -76,13 +76,13 @@ public:
|
||||
|
||||
// Init 'thunk' so that it be a direct jump to 'branchTarget'.
|
||||
virtual void initICFSafeThunkBody(InputSection *thunk,
|
||||
InputSection *branchTarget) const {
|
||||
Symbol *targetSym) const {
|
||||
llvm_unreachable("target does not support ICF safe thunks");
|
||||
}
|
||||
|
||||
// Given a thunk for which `initICFSafeThunkBody` was called, return the
|
||||
// branchTarget it was initialized with.
|
||||
virtual InputSection *getThunkBranchTarget(InputSection *thunk) const {
|
||||
virtual Symbol *getThunkBranchTarget(InputSection *thunk) const {
|
||||
llvm_unreachable("target does not support ICF safe thunks");
|
||||
}
|
||||
|
||||
|
||||
@@ -14,12 +14,16 @@
|
||||
|
||||
# RUN: rm -rf %t; mkdir %t
|
||||
# RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %s -o %t/input.o
|
||||
# RUN: %lld -arch arm64 -dead_strip -lSystem -U _extern_sym -map %t/thunk.map -o %t/thunk %t/input.o
|
||||
## Use --icf=safe_thunks to test that branch extension algo is compatible
|
||||
## with safe_thunks ICF.
|
||||
# RUN: %lld -arch arm64 -dead_strip -lSystem -U _extern_sym -map %t/thunk.map -o %t/thunk %t/input.o --icf=safe_thunks
|
||||
# RUN: llvm-objdump --no-print-imm-hex -d --no-show-raw-insn %t/thunk | FileCheck %s
|
||||
|
||||
# RUN: FileCheck %s --input-file %t/thunk.map --check-prefix=MAP
|
||||
|
||||
# MAP: 0x{{[[:xdigit:]]+}} {{.*}} _b
|
||||
# MAP: 0x{{[[:xdigit:]]+}} {{.*}} _fold_func_low_addr
|
||||
# MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _a
|
||||
# MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _b
|
||||
# MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _c
|
||||
# MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _d.thunk.0
|
||||
# MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _e.thunk.0
|
||||
@@ -35,10 +39,13 @@
|
||||
# MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _b.thunk.0
|
||||
# MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _h
|
||||
# MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _main
|
||||
# MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _fold_func_high_addr
|
||||
# MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _c.thunk.0
|
||||
# MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _d.thunk.1
|
||||
# MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _e.thunk.1
|
||||
# MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _f.thunk.1
|
||||
# MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _fold_func_low_addr.thunk.0
|
||||
# MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} ltmp0.thunk.0
|
||||
# MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _z
|
||||
|
||||
|
||||
@@ -200,8 +207,21 @@
|
||||
# CHECK: [[#%x, NAN_PAGE + NAN_OFFSET]] <__stubs>:
|
||||
|
||||
.subsections_via_symbols
|
||||
|
||||
.addrsig
|
||||
.addrsig_sym _fold_func_low_addr
|
||||
.addrsig_sym _fold_func_high_addr
|
||||
|
||||
.text
|
||||
|
||||
.globl _fold_func_low_addr
|
||||
.p2align 2
|
||||
_fold_func_low_addr:
|
||||
add x0, x0, x0
|
||||
add x1, x0, x1
|
||||
add x2, x0, x2
|
||||
ret
|
||||
|
||||
.globl _a
|
||||
.p2align 2
|
||||
_a:
|
||||
@@ -329,9 +349,20 @@ _main:
|
||||
bl _f
|
||||
bl _g
|
||||
bl _h
|
||||
bl _fold_func_low_addr
|
||||
bl _fold_func_high_addr
|
||||
bl ___nan
|
||||
ret
|
||||
|
||||
.globl _fold_func_high_addr
|
||||
.p2align 2
|
||||
_fold_func_high_addr:
|
||||
add x0, x0, x0
|
||||
add x1, x0, x1
|
||||
add x2, x0, x2
|
||||
ret
|
||||
|
||||
|
||||
.section __TEXT,__cstring
|
||||
# The .space below has to be composed of non-zero characters. Otherwise, the
|
||||
# linker will create a symbol for every '0' in the section, leading to
|
||||
|
||||
Reference in New Issue
Block a user