[lld-macho] Fix branch extension logic compatibility with __objc_stubs (#137913)

Enhance branch extension logic to handle __objc_stubs identically to
__stubs

The branch extension algorithm currently has specific handling for the
`__stubs` section:
1. It ensures all `__stubs` content is directly reachable via branches
from the text section.
2. It calculates the latest text section address that might require
thunks to reach the end of `__stubs`.

The `__objc_stubs` section requires precisely the same handling due to
its similar nature, but this was not implemented.

This commit generalizes the existing logic so it applies consistently to
both the `__stubs` and `__objc_stubs` sections, ensuring correct
reachability and thunk placement for both. Without this change it's
possible to get relocation errors during linking in scenarios where the
`__objc_stubs` section is large enough.
This commit is contained in:
alx32
2025-05-01 10:29:07 -07:00
committed by GitHub
parent b6d4b7eba1
commit deae5eef71
3 changed files with 99 additions and 33 deletions

View File

@@ -133,8 +133,16 @@ bool TextOutputSection::needsThunks() const {
// we've already processed in this segment needs thunks, so do the
// rest.
bool needsThunks = parent && parent->needsThunks;
// Calculate the total size of all branch target sections
uint64_t branchTargetsSize = in.stubs->getSize();
// Add the size of __objc_stubs section if it exists
if (in.objcStubs && in.objcStubs->isNeeded())
branchTargetsSize += in.objcStubs->getSize();
if (!needsThunks &&
isecAddr - addr + in.stubs->getSize() <=
isecAddr - addr + branchTargetsSize <=
std::min(target->backwardBranchRange, target->forwardBranchRange))
return false;
// Yes, this program is large enough to need thunks.
@@ -148,11 +156,11 @@ bool TextOutputSection::needsThunks() const {
auto *sym = cast<Symbol *>(r.referent);
// Pre-populate the thunkMap and memoize call site counts for every
// InputSection and ThunkInfo. We do this for the benefit of
// estimateStubsInRangeVA().
// estimateBranchTargetThresholdVA().
ThunkInfo &thunkInfo = thunkMap[sym];
// Knowing ThunkInfo call site count will help us know whether or not we
// might need to create more for this referent at the time we are
// estimating distance to __stubs in estimateStubsInRangeVA().
// estimating distance to __stubs in estimateBranchTargetThresholdVA().
++thunkInfo.callSiteCount;
// We can avoid work on InputSections that have no BRANCH relocs.
isec->hasCallSites = true;
@@ -161,10 +169,11 @@ bool TextOutputSection::needsThunks() const {
return true;
}
// Since __stubs is placed after __text, we must estimate the address
// beyond which stubs are within range of a simple forward branch.
// This is called exactly once, when the last input section has been finalized.
uint64_t TextOutputSection::estimateStubsInRangeVA(size_t callIdx) const {
// Estimate the address beyond which branch targets (like __stubs and
// __objc_stubs) are within range of a simple forward branch. This is called
// exactly once, when the last input section has been finalized.
uint64_t
TextOutputSection::estimateBranchTargetThresholdVA(size_t callIdx) const {
// Tally the functions which still have call sites remaining to process,
// which yields the maximum number of thunks we might yet place.
size_t maxPotentialThunks = 0;
@@ -172,8 +181,8 @@ uint64_t TextOutputSection::estimateStubsInRangeVA(size_t callIdx) const {
ThunkInfo &ti = tp.second;
// This overcounts: Only sections that are in forward jump range from the
// currently-active section get finalized, and all input sections are
// finalized when estimateStubsInRangeVA() is called. So only backward
// jumps will need thunks, but we count all jumps.
// finalized when estimateBranchTargetThresholdVA() is called. So only
// backward jumps will need thunks, but we count all jumps.
if (ti.callSitesUsed < ti.callSiteCount)
maxPotentialThunks += 1;
}
@@ -200,7 +209,8 @@ uint64_t TextOutputSection::estimateStubsInRangeVA(size_t callIdx) const {
assert(isecEnd - isecVA <= forwardBranchRange &&
"should only finalize sections in jump range");
// Estimate the maximum size of the code, right before the stubs section.
// Estimate the maximum size of the code, right before the branch target
// sections.
uint64_t maxTextSize = 0;
// Add the size of all the inputs, including the unprocessed ones.
maxTextSize += isecEnd;
@@ -214,21 +224,35 @@ uint64_t TextOutputSection::estimateStubsInRangeVA(size_t callIdx) const {
// 'maxPotentialThunks' overcounts, this is an estimate of the upper limit.
maxTextSize += maxPotentialThunks * target->thunkSize;
// Estimated maximum VA of last stub.
uint64_t maxVAOfLastStub = maxTextSize + in.stubs->getSize();
// Calculate the total size of all late branch target sections
uint64_t branchTargetsSize = 0;
// Estimate the address after which call sites can safely call stubs
// Add the size of __stubs section
branchTargetsSize += in.stubs->getSize();
// Add the size of __objc_stubs section if it exists
if (in.objcStubs && in.objcStubs->isNeeded())
branchTargetsSize += in.objcStubs->getSize();
// Estimated maximum VA of the last branch target.
uint64_t maxVAOfLastBranchTarget = maxTextSize + branchTargetsSize;
// Estimate the address after which call sites can safely call branch targets
// directly rather than through intermediary thunks.
uint64_t stubsInRangeVA = maxVAOfLastStub - forwardBranchRange;
uint64_t branchTargetThresholdVA =
maxVAOfLastBranchTarget - forwardBranchRange;
log("thunks = " + std::to_string(thunkMap.size()) +
", potential = " + std::to_string(maxPotentialThunks) +
", stubs = " + std::to_string(in.stubs->getSize()) + ", isecVA = " +
utohexstr(isecVA) + ", threshold = " + utohexstr(stubsInRangeVA) +
", isecEnd = " + utohexstr(isecEnd) +
", stubs = " + std::to_string(in.stubs->getSize()) +
(in.objcStubs && in.objcStubs->isNeeded()
? ", objc_stubs = " + std::to_string(in.objcStubs->getSize())
: "") +
", isecVA = " + utohexstr(isecVA) + ", threshold = " +
utohexstr(branchTargetThresholdVA) + ", isecEnd = " + utohexstr(isecEnd) +
", tail = " + utohexstr(isecEnd - isecVA) +
", slop = " + utohexstr(forwardBranchRange - (isecEnd - isecVA)));
return stubsInRangeVA;
return branchTargetThresholdVA;
}
void ConcatOutputSection::finalizeOne(ConcatInputSection *isec) {
@@ -254,7 +278,7 @@ void TextOutputSection::finalize() {
uint64_t forwardBranchRange = target->forwardBranchRange;
uint64_t backwardBranchRange = target->backwardBranchRange;
uint64_t stubsInRangeVA = TargetInfo::outOfRangeVA;
uint64_t branchTargetThresholdVA = TargetInfo::outOfRangeVA;
size_t thunkSize = target->thunkSize;
size_t relocCount = 0;
size_t callSiteCount = 0;
@@ -297,16 +321,18 @@ void TextOutputSection::finalize() {
if (!isec->hasCallSites)
continue;
if (finalIdx == endIdx && stubsInRangeVA == TargetInfo::outOfRangeVA) {
// When we have finalized all input sections, __stubs (destined
// to follow __text) comes within range of forward branches and
// we can estimate the threshold address after which we can
// reach any stub with a forward branch. Note that although it
// sits in the middle of a loop, this code executes only once.
if (finalIdx == endIdx &&
branchTargetThresholdVA == TargetInfo::outOfRangeVA) {
// When we have finalized all input sections, branch target sections (like
// __stubs and __objc_stubs) (destined to follow __text) come within range
// of forward branches and we can estimate the threshold address after
// which we can reach any branch target with a forward branch. Note that
// although it sits in the middle of a loop, this code executes only once.
// It is in the loop because we need to call it at the proper
// time: the earliest call site from which the end of __text
// (and start of __stubs) comes within range of a forward branch.
stubsInRangeVA = estimateStubsInRangeVA(callIdx);
// (and start of branch target sections) comes within range of a forward
// branch.
branchTargetThresholdVA = estimateBranchTargetThresholdVA(callIdx);
}
// Process relocs by ascending address, i.e., ascending offset within isec
std::vector<Reloc> &relocs = isec->relocs;
@@ -328,10 +354,14 @@ void TextOutputSection::finalize() {
auto *funcSym = cast<Symbol *>(r.referent);
ThunkInfo &thunkInfo = thunkMap[funcSym];
// The referent is not reachable, so we need to use a thunk ...
if (funcSym->isInStubs() && callVA >= stubsInRangeVA) {
if ((funcSym->isInStubs() ||
(in.objcStubs && in.objcStubs->isNeeded() &&
ObjCStubsSection::isObjCStubSymbol(funcSym))) &&
callVA >= branchTargetThresholdVA) {
assert(callVA != TargetInfo::outOfRangeVA);
// ... Oh, wait! We are close enough to the end that __stubs
// are now within range of a simple forward branch.
// ... Oh, wait! We are close enough to the end that branch target
// sections (__stubs, __objc_stubs) are now within range of a simple
// forward branch.
continue;
}
uint64_t funcVA = funcSym->resolveBranchVA();

View File

@@ -80,7 +80,7 @@ public:
}
private:
uint64_t estimateStubsInRangeVA(size_t callIdx) const;
uint64_t estimateBranchTargetThresholdVA(size_t callIdx) const;
std::vector<ConcatInputSection *> thunks;
};

View File

@@ -18,9 +18,28 @@
## with safe_thunks ICF.
# RUN: %lld -arch arm64 -dead_strip -lSystem -U _extern_sym -map %t/thunk.map -o %t/thunk %t/input.o --icf=safe_thunks
# RUN: llvm-objdump --no-print-imm-hex -d --no-show-raw-insn %t/thunk | FileCheck %s
# RUN: llvm-objdump --macho --section-headers %t/thunk > %t/headers.txt
# RUN: llvm-otool -vs __DATA __objc_selrefs %t/thunk >> %t/headers.txt
# RUN: llvm-otool -vs __TEXT __objc_stubs %t/thunk >> %t/headers.txt
# RUN: FileCheck %s --check-prefix=OBJC < %t/headers.txt
# RUN: FileCheck %s --input-file %t/thunk.map --check-prefix=MAP
# OBJC: Sections:
# OBJC: __text
# OBJC-NEXT: __lcxx_override
# OBJC-NEXT: __stubs
# OBJC-NEXT: __stub_helper
# OBJC-NEXT: __objc_stubs
# OBJC: Contents of (__DATA,__objc_selrefs) section
# OBJC-NEXT: {{[0-9a-f]*}} __TEXT:__objc_methname:foo
# OBJC-NEXT: {{[0-9a-f]*}} __TEXT:__objc_methname:bar
# OBJC: Contents of (__TEXT,__objc_stubs) section
# OBJC: _objc_msgSend$bar:
# OBJC: _objc_msgSend$foo:
# MAP: 0x{{[[:xdigit:]]+}} {{.*}} _fold_func_low_addr
# MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _a
# MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _b
@@ -45,7 +64,6 @@
# MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _e.thunk.1
# MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _f.thunk.1
# MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _fold_func_low_addr.thunk.0
# MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} ltmp0.thunk.0
# MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _z
@@ -206,6 +224,22 @@
# CHECK: [[#%x, NAN_PAGE + NAN_OFFSET]] <__stubs>:
.section __TEXT,__objc_methname,cstring_literals
lselref1:
.asciz "foo"
lselref2:
.asciz "bar"
.section __DATA,__objc_selrefs,literal_pointers,no_dead_strip
.p2align 3
.quad lselref1
.quad lselref2
.text
.globl _objc_msgSend
_objc_msgSend:
ret
.subsections_via_symbols
.addrsig
@@ -352,6 +386,8 @@ _main:
bl _fold_func_low_addr
bl _fold_func_high_addr
bl ___nan
bl _objc_msgSend$foo
bl _objc_msgSend$bar
ret
.globl _fold_func_high_addr