[lld-macho] Include branch extension thunks in linker map (#120496)

This patch extends the MachO linker's map file generation to include
branch extension thunk symbols. Previously, thunks were omitted from the
map file, making it difficult to understand the final layout of the
binary, especially when debugging issues related to long branch thunks.
This change ensures thunks are included and correctly interleaved with
other symbols based on their address, providing an accurate
representation of the linked output.
This commit is contained in:
alx32
2025-01-07 21:07:51 -08:00
committed by GitHub
parent b4ae419298
commit 162814a7ec
4 changed files with 64 additions and 6 deletions

View File

@@ -25,8 +25,9 @@ class Defined;
// in the final binary.
class ConcatOutputSection : public OutputSection {
public:
explicit ConcatOutputSection(StringRef name)
: OutputSection(ConcatKind, name) {}
explicit ConcatOutputSection(StringRef name,
OutputSection::Kind kind = ConcatKind)
: OutputSection(kind, name) {}
const ConcatInputSection *firstSection() const { return inputs.front(); }
const ConcatInputSection *lastSection() const { return inputs.back(); }
@@ -46,7 +47,7 @@ public:
void writeTo(uint8_t *buf) const override;
static bool classof(const OutputSection *sec) {
return sec->kind() == ConcatKind;
return sec->kind() == ConcatKind || sec->kind() == TextKind;
}
static ConcatOutputSection *getOrCreateForInput(const InputSection *);
@@ -66,12 +67,18 @@ private:
// support thunk insertion.
class TextOutputSection : public ConcatOutputSection {
public:
explicit TextOutputSection(StringRef name) : ConcatOutputSection(name) {}
explicit TextOutputSection(StringRef name)
: ConcatOutputSection(name, TextKind) {}
void finalizeContents() override {}
void finalize() override;
bool needsThunks() const;
ArrayRef<ConcatInputSection *> getThunks() const { return thunks; }
void writeTo(uint8_t *buf) const override;
static bool classof(const OutputSection *sec) {
return sec->kind() == TextKind;
}
private:
uint64_t estimateStubsInRangeVA(size_t callIdx) const;

View File

@@ -161,6 +161,20 @@ static uint64_t getSymSizeForMap(Defined *sym) {
return sym->size;
}
// Merges two vectors of input sections in order of their outSecOff values.
// This approach creates a new (temporary) vector which is not ideal but the
// ideal approach leads to a lot of code duplication.
static std::vector<ConcatInputSection *>
mergeOrderedInputs(ArrayRef<ConcatInputSection *> inputs1,
ArrayRef<ConcatInputSection *> inputs2) {
std::vector<ConcatInputSection *> vec(inputs1.size() + inputs2.size());
std::merge(inputs1.begin(), inputs1.end(), inputs2.begin(), inputs2.end(),
vec.begin(), [](ConcatInputSection *a, ConcatInputSection *b) {
return a->outSecOff < b->outSecOff;
});
return vec;
}
void macho::writeMapFile() {
if (config->mapFile.empty())
return;
@@ -220,7 +234,11 @@ void macho::writeMapFile() {
os << "# Address\tSize \tFile Name\n";
for (const OutputSegment *seg : outputSegments) {
for (const OutputSection *osec : seg->getSections()) {
if (auto *concatOsec = dyn_cast<ConcatOutputSection>(osec)) {
if (auto *textOsec = dyn_cast<TextOutputSection>(osec)) {
auto inputsAndThunks =
mergeOrderedInputs(textOsec->inputs, textOsec->getThunks());
printIsecArrSyms(inputsAndThunks);
} else if (auto *concatOsec = dyn_cast<ConcatOutputSection>(osec)) {
printIsecArrSyms(concatOsec->inputs);
} else if (osec == in.cStringSection || osec == in.objcMethnameSection) {
const auto &liveCStrings = info.liveCStringsForSection.lookup(osec);

View File

@@ -37,6 +37,7 @@ public:
enum Kind {
ConcatKind,
SyntheticKind,
TextKind,
};
OutputSection(Kind kind, StringRef name) : name(name), sectionKind(kind) {}

View File

@@ -8,14 +8,46 @@
## (4) early calls to a dylib stub use a thunk, and later calls the stub
## directly
## (5) Thunks are created for all sections in the text segment with branches.
## (6) Thunks are in the linker map file.
## Notes:
## 0x4000000 = 64 Mi = half the magnitude of the forward-branch range
# RUN: rm -rf %t; mkdir %t
# RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %s -o %t/input.o
# RUN: %lld -arch arm64 -dead_strip -lSystem -U _extern_sym -o %t/thunk %t/input.o
# RUN: %lld -arch arm64 -dead_strip -lSystem -U _extern_sym -map %t/thunk.map -o %t/thunk %t/input.o
# RUN: llvm-objdump --no-print-imm-hex -d --no-show-raw-insn %t/thunk | FileCheck %s
## Check that the thunks appear in the map file and that everything is sorted by address
# Because of the `.space` instructions, there will end up being a lot of dead symbols in the
# linker map (linker map will be ~2.7GB). So to avoid the test trying to (slowly) match regex
# across all the ~2.7GB of the linker map - generate a version of the linker map without dead symbols.
# RUN: awk '/# Dead Stripped Symbols:/ {exit} {print}' %t/thunk.map > %t/thunk_no_dead_syms.map
# RUN: FileCheck %s --input-file %t/thunk_no_dead_syms.map --check-prefix=MAP
# MAP: 0x{{[[:xdigit:]]+}} {{.*}} _b
# MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _c
# MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _d.thunk.0
# MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _e.thunk.0
# MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _f.thunk.0
# MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _g.thunk.0
# MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _h.thunk.0
# MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} ___nan.thunk.0
# MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _d
# MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _e
# MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _f
# MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _g
# MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _a.thunk.0
# MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _b.thunk.0
# MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _h
# MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _main
# MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _c.thunk.0
# MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _d.thunk.1
# MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _e.thunk.1
# MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _f.thunk.1
# MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _z
# CHECK: Disassembly of section __TEXT,__text:
# CHECK: [[#%.13x, A_PAGE:]][[#%.3x, A_OFFSET:]] <_a>: