From deae5eef7147c5ad3acff612ab0ed00e2186d9a5 Mon Sep 17 00:00:00 2001 From: alx32 <103613512+alx32@users.noreply.github.com> Date: Thu, 1 May 2025 10:29:07 -0700 Subject: [PATCH] [lld-macho] Fix branch extension logic compatibility with __objc_stubs (#137913) Enhance branch extension logic to handle __objc_stubs identically to __stubs The branch extension algorithm currently has specific handling for the `__stubs` section: 1. It ensures all `__stubs` content is directly reachable via branches from the text section. 2. It calculates the latest text section address that might require thunks to reach the end of `__stubs`. The `__objc_stubs` section requires precisely the same handling due to its similar nature, but this was not implemented. This commit generalizes the existing logic so it applies consistently to both the `__stubs` and `__objc_stubs` sections, ensuring correct reachability and thunk placement for both. Without this change it's possible to get relocation errors during linking in scenarios where the `__objc_stubs` section is large enough. --- lld/MachO/ConcatOutputSection.cpp | 90 ++++++++++++++++++++----------- lld/MachO/ConcatOutputSection.h | 2 +- lld/test/MachO/arm64-thunks.s | 40 +++++++++++++- 3 files changed, 99 insertions(+), 33 deletions(-) diff --git a/lld/MachO/ConcatOutputSection.cpp b/lld/MachO/ConcatOutputSection.cpp index d64816ec695a..48bc06194eef 100644 --- a/lld/MachO/ConcatOutputSection.cpp +++ b/lld/MachO/ConcatOutputSection.cpp @@ -133,8 +133,16 @@ bool TextOutputSection::needsThunks() const { // we've already processed in this segment needs thunks, so do the // rest. bool needsThunks = parent && parent->needsThunks; + + // Calculate the total size of all branch target sections + uint64_t branchTargetsSize = in.stubs->getSize(); + + // Add the size of __objc_stubs section if it exists + if (in.objcStubs && in.objcStubs->isNeeded()) + branchTargetsSize += in.objcStubs->getSize(); + if (!needsThunks && - isecAddr - addr + in.stubs->getSize() <= + isecAddr - addr + branchTargetsSize <= std::min(target->backwardBranchRange, target->forwardBranchRange)) return false; // Yes, this program is large enough to need thunks. @@ -148,11 +156,11 @@ bool TextOutputSection::needsThunks() const { auto *sym = cast(r.referent); // Pre-populate the thunkMap and memoize call site counts for every // InputSection and ThunkInfo. We do this for the benefit of - // estimateStubsInRangeVA(). + // estimateBranchTargetThresholdVA(). ThunkInfo &thunkInfo = thunkMap[sym]; // Knowing ThunkInfo call site count will help us know whether or not we // might need to create more for this referent at the time we are - // estimating distance to __stubs in estimateStubsInRangeVA(). + // estimating distance to __stubs in estimateBranchTargetThresholdVA(). ++thunkInfo.callSiteCount; // We can avoid work on InputSections that have no BRANCH relocs. isec->hasCallSites = true; @@ -161,10 +169,11 @@ bool TextOutputSection::needsThunks() const { return true; } -// Since __stubs is placed after __text, we must estimate the address -// beyond which stubs are within range of a simple forward branch. -// This is called exactly once, when the last input section has been finalized. -uint64_t TextOutputSection::estimateStubsInRangeVA(size_t callIdx) const { +// Estimate the address beyond which branch targets (like __stubs and +// __objc_stubs) are within range of a simple forward branch. This is called +// exactly once, when the last input section has been finalized. +uint64_t +TextOutputSection::estimateBranchTargetThresholdVA(size_t callIdx) const { // Tally the functions which still have call sites remaining to process, // which yields the maximum number of thunks we might yet place. size_t maxPotentialThunks = 0; @@ -172,8 +181,8 @@ uint64_t TextOutputSection::estimateStubsInRangeVA(size_t callIdx) const { ThunkInfo &ti = tp.second; // This overcounts: Only sections that are in forward jump range from the // currently-active section get finalized, and all input sections are - // finalized when estimateStubsInRangeVA() is called. So only backward - // jumps will need thunks, but we count all jumps. + // finalized when estimateBranchTargetThresholdVA() is called. So only + // backward jumps will need thunks, but we count all jumps. if (ti.callSitesUsed < ti.callSiteCount) maxPotentialThunks += 1; } @@ -200,7 +209,8 @@ uint64_t TextOutputSection::estimateStubsInRangeVA(size_t callIdx) const { assert(isecEnd - isecVA <= forwardBranchRange && "should only finalize sections in jump range"); - // Estimate the maximum size of the code, right before the stubs section. + // Estimate the maximum size of the code, right before the branch target + // sections. uint64_t maxTextSize = 0; // Add the size of all the inputs, including the unprocessed ones. maxTextSize += isecEnd; @@ -214,21 +224,35 @@ uint64_t TextOutputSection::estimateStubsInRangeVA(size_t callIdx) const { // 'maxPotentialThunks' overcounts, this is an estimate of the upper limit. maxTextSize += maxPotentialThunks * target->thunkSize; - // Estimated maximum VA of last stub. - uint64_t maxVAOfLastStub = maxTextSize + in.stubs->getSize(); + // Calculate the total size of all late branch target sections + uint64_t branchTargetsSize = 0; - // Estimate the address after which call sites can safely call stubs + // Add the size of __stubs section + branchTargetsSize += in.stubs->getSize(); + + // Add the size of __objc_stubs section if it exists + if (in.objcStubs && in.objcStubs->isNeeded()) + branchTargetsSize += in.objcStubs->getSize(); + + // Estimated maximum VA of the last branch target. + uint64_t maxVAOfLastBranchTarget = maxTextSize + branchTargetsSize; + + // Estimate the address after which call sites can safely call branch targets // directly rather than through intermediary thunks. - uint64_t stubsInRangeVA = maxVAOfLastStub - forwardBranchRange; + uint64_t branchTargetThresholdVA = + maxVAOfLastBranchTarget - forwardBranchRange; log("thunks = " + std::to_string(thunkMap.size()) + ", potential = " + std::to_string(maxPotentialThunks) + - ", stubs = " + std::to_string(in.stubs->getSize()) + ", isecVA = " + - utohexstr(isecVA) + ", threshold = " + utohexstr(stubsInRangeVA) + - ", isecEnd = " + utohexstr(isecEnd) + + ", stubs = " + std::to_string(in.stubs->getSize()) + + (in.objcStubs && in.objcStubs->isNeeded() + ? ", objc_stubs = " + std::to_string(in.objcStubs->getSize()) + : "") + + ", isecVA = " + utohexstr(isecVA) + ", threshold = " + + utohexstr(branchTargetThresholdVA) + ", isecEnd = " + utohexstr(isecEnd) + ", tail = " + utohexstr(isecEnd - isecVA) + ", slop = " + utohexstr(forwardBranchRange - (isecEnd - isecVA))); - return stubsInRangeVA; + return branchTargetThresholdVA; } void ConcatOutputSection::finalizeOne(ConcatInputSection *isec) { @@ -254,7 +278,7 @@ void TextOutputSection::finalize() { uint64_t forwardBranchRange = target->forwardBranchRange; uint64_t backwardBranchRange = target->backwardBranchRange; - uint64_t stubsInRangeVA = TargetInfo::outOfRangeVA; + uint64_t branchTargetThresholdVA = TargetInfo::outOfRangeVA; size_t thunkSize = target->thunkSize; size_t relocCount = 0; size_t callSiteCount = 0; @@ -297,16 +321,18 @@ void TextOutputSection::finalize() { if (!isec->hasCallSites) continue; - if (finalIdx == endIdx && stubsInRangeVA == TargetInfo::outOfRangeVA) { - // When we have finalized all input sections, __stubs (destined - // to follow __text) comes within range of forward branches and - // we can estimate the threshold address after which we can - // reach any stub with a forward branch. Note that although it - // sits in the middle of a loop, this code executes only once. + if (finalIdx == endIdx && + branchTargetThresholdVA == TargetInfo::outOfRangeVA) { + // When we have finalized all input sections, branch target sections (like + // __stubs and __objc_stubs) (destined to follow __text) come within range + // of forward branches and we can estimate the threshold address after + // which we can reach any branch target with a forward branch. Note that + // although it sits in the middle of a loop, this code executes only once. // It is in the loop because we need to call it at the proper // time: the earliest call site from which the end of __text - // (and start of __stubs) comes within range of a forward branch. - stubsInRangeVA = estimateStubsInRangeVA(callIdx); + // (and start of branch target sections) comes within range of a forward + // branch. + branchTargetThresholdVA = estimateBranchTargetThresholdVA(callIdx); } // Process relocs by ascending address, i.e., ascending offset within isec std::vector &relocs = isec->relocs; @@ -328,10 +354,14 @@ void TextOutputSection::finalize() { auto *funcSym = cast(r.referent); ThunkInfo &thunkInfo = thunkMap[funcSym]; // The referent is not reachable, so we need to use a thunk ... - if (funcSym->isInStubs() && callVA >= stubsInRangeVA) { + if ((funcSym->isInStubs() || + (in.objcStubs && in.objcStubs->isNeeded() && + ObjCStubsSection::isObjCStubSymbol(funcSym))) && + callVA >= branchTargetThresholdVA) { assert(callVA != TargetInfo::outOfRangeVA); - // ... Oh, wait! We are close enough to the end that __stubs - // are now within range of a simple forward branch. + // ... Oh, wait! We are close enough to the end that branch target + // sections (__stubs, __objc_stubs) are now within range of a simple + // forward branch. continue; } uint64_t funcVA = funcSym->resolveBranchVA(); diff --git a/lld/MachO/ConcatOutputSection.h b/lld/MachO/ConcatOutputSection.h index 8131c48d3111..1c68018f9755 100644 --- a/lld/MachO/ConcatOutputSection.h +++ b/lld/MachO/ConcatOutputSection.h @@ -80,7 +80,7 @@ public: } private: - uint64_t estimateStubsInRangeVA(size_t callIdx) const; + uint64_t estimateBranchTargetThresholdVA(size_t callIdx) const; std::vector thunks; }; diff --git a/lld/test/MachO/arm64-thunks.s b/lld/test/MachO/arm64-thunks.s index b48abec71f63..cd3842895c47 100644 --- a/lld/test/MachO/arm64-thunks.s +++ b/lld/test/MachO/arm64-thunks.s @@ -18,9 +18,28 @@ ## with safe_thunks ICF. # RUN: %lld -arch arm64 -dead_strip -lSystem -U _extern_sym -map %t/thunk.map -o %t/thunk %t/input.o --icf=safe_thunks # RUN: llvm-objdump --no-print-imm-hex -d --no-show-raw-insn %t/thunk | FileCheck %s +# RUN: llvm-objdump --macho --section-headers %t/thunk > %t/headers.txt +# RUN: llvm-otool -vs __DATA __objc_selrefs %t/thunk >> %t/headers.txt +# RUN: llvm-otool -vs __TEXT __objc_stubs %t/thunk >> %t/headers.txt +# RUN: FileCheck %s --check-prefix=OBJC < %t/headers.txt # RUN: FileCheck %s --input-file %t/thunk.map --check-prefix=MAP - + +# OBJC: Sections: +# OBJC: __text +# OBJC-NEXT: __lcxx_override +# OBJC-NEXT: __stubs +# OBJC-NEXT: __stub_helper +# OBJC-NEXT: __objc_stubs + +# OBJC: Contents of (__DATA,__objc_selrefs) section +# OBJC-NEXT: {{[0-9a-f]*}} __TEXT:__objc_methname:foo +# OBJC-NEXT: {{[0-9a-f]*}} __TEXT:__objc_methname:bar + +# OBJC: Contents of (__TEXT,__objc_stubs) section +# OBJC: _objc_msgSend$bar: +# OBJC: _objc_msgSend$foo: + # MAP: 0x{{[[:xdigit:]]+}} {{.*}} _fold_func_low_addr # MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _a # MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _b @@ -45,7 +64,6 @@ # MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _e.thunk.1 # MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _f.thunk.1 # MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _fold_func_low_addr.thunk.0 -# MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} ltmp0.thunk.0 # MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _z @@ -206,6 +224,22 @@ # CHECK: [[#%x, NAN_PAGE + NAN_OFFSET]] <__stubs>: +.section __TEXT,__objc_methname,cstring_literals +lselref1: + .asciz "foo" +lselref2: + .asciz "bar" + +.section __DATA,__objc_selrefs,literal_pointers,no_dead_strip +.p2align 3 +.quad lselref1 +.quad lselref2 + +.text +.globl _objc_msgSend +_objc_msgSend: + ret + .subsections_via_symbols .addrsig @@ -352,6 +386,8 @@ _main: bl _fold_func_low_addr bl _fold_func_high_addr bl ___nan + bl _objc_msgSend$foo + bl _objc_msgSend$bar ret .globl _fold_func_high_addr