[BOLT][AArch64] Add support for short LLD thunks/veneers (#118422)

When a callee function is closer than 256MB from its call site, LLD
linker can strategically create a short thunk for the function with a
single branch instruction (that covers +/-128MB). Detect and convert
such thunks into direct calls in BOLT.
This commit is contained in:
Maksim Panchenko
2024-12-03 13:44:51 -08:00
committed by GitHub
parent b5b15c1973
commit d5956fb8f9
2 changed files with 47 additions and 23 deletions

View File

@@ -46,16 +46,17 @@ Error VeneerElimination::runOnFunctions(BinaryContext &BC) {
if (BF.isIgnored())
continue;
MCInst &FirstInstruction = *(BF.begin()->begin());
const MCSymbol *VeneerTargetSymbol = 0;
uint64_t TargetAddress;
if (BC.MIB->matchAbsLongVeneer(BF, TargetAddress)) {
if (BC.MIB->isTailCall(FirstInstruction)) {
VeneerTargetSymbol = BC.MIB->getTargetSymbol(FirstInstruction);
} else if (BC.MIB->matchAbsLongVeneer(BF, TargetAddress)) {
if (BinaryFunction *TargetBF =
BC.getBinaryFunctionAtAddress(TargetAddress))
VeneerTargetSymbol = TargetBF->getSymbol();
} else {
MCInst &FirstInstruction = *(BF.begin()->begin());
if (BC.MIB->hasAnnotation(FirstInstruction, "AArch64Veneer"))
VeneerTargetSymbol = BC.MIB->getTargetSymbol(FirstInstruction, 1);
} else if (BC.MIB->hasAnnotation(FirstInstruction, "AArch64Veneer")) {
VeneerTargetSymbol = BC.MIB->getTargetSymbol(FirstInstruction, 1);
}
if (!VeneerTargetSymbol)

View File

@@ -1,5 +1,5 @@
## Check that llvm-bolt correctly recognizes long absolute thunks generated
## by LLD.
## Check that llvm-bolt correctly recognizes veneers/thunks for absolute code
## generated by LLD.
# RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown %s -o %t.o
# RUN: %clang %cflags -fno-PIC -no-pie %t.o -o %t.exe -nostdlib \
@@ -12,40 +12,63 @@
.text
.balign 4
.global foo
.type foo, %function
foo:
adrp x1, foo
.global far_function
.type far_function, %function
far_function:
ret
.size foo, .-foo
.size far_function, .-far_function
.global near_function
.type near_function, %function
near_function:
ret
.size near_function, .-near_function
## Force relocations against .text.
.reloc 0, R_AARCH64_NONE
.section ".mytext", "ax"
.balign 4
.global __AArch64AbsLongThunk_foo
.type __AArch64AbsLongThunk_foo, %function
__AArch64AbsLongThunk_foo:
## This version of a thunk is always generated by LLD for function calls
## spanning more than 256MB.
.global __AArch64AbsLongThunk_far_function
.type __AArch64AbsLongThunk_far_function, %function
__AArch64AbsLongThunk_far_function:
ldr x16, .L1
br x16
# CHECK-INPUT-LABEL: <__AArch64AbsLongThunk_foo>:
# CHECK-INPUT-LABEL: <__AArch64AbsLongThunk_far_function>:
# CHECK-INPUT-NEXT: ldr
# CHECK-INPUT-NEXT: br
.L1:
.quad foo
.size __AArch64AbsLongThunk_foo, .-__AArch64AbsLongThunk_foo
.quad far_function
.size __AArch64AbsLongThunk_far_function, .-__AArch64AbsLongThunk_far_function
## Check that the thunk was removed from .text and _start() calls foo()
## If a callee is closer than 256MB away, LLD may generate a thunk with a direct
## jump to the callee. Note, that the name might still include "AbsLong".
.global __AArch64AbsLongThunk_near_function
.type __AArch64AbsLongThunk_near_function, %function
__AArch64AbsLongThunk_near_function:
b near_function
# CHECK-INPUT-LABEL: <__AArch64AbsLongThunk_near_function>:
# CHECK-INPUT-NEXT: b {{.*}} <near_function>
.size __AArch64AbsLongThunk_near_function, .-__AArch64AbsLongThunk_near_function
## Check that thunks were removed from .text, and _start calls functions
## directly.
# CHECK-OUTPUT-NOT: __AArch64AbsLongThunk_foo
# CHECK-OUTPUT-NOT: __AArch64AbsLongThunk_{{.*}}
.global _start
.type _start, %function
_start:
# CHECK-INPUT-LABEL: <_start>:
# CHECK-OUTPUT-LABEL: <_start>:
bl __AArch64AbsLongThunk_foo
# CHECK-INPUT-NEXT: bl {{.*}} <__AArch64AbsLongThunk_foo>
# CHECK-OUTPUT-NEXT: bl {{.*}} <foo>
bl __AArch64AbsLongThunk_far_function
bl __AArch64AbsLongThunk_near_function
# CHECK-INPUT-NEXT: bl {{.*}} <__AArch64AbsLongThunk_far_function>
# CHECK-INPUT-NEXT: bl {{.*}} <__AArch64AbsLongThunk_near_function>
# CHECK-OUTPUT-NEXT: bl {{.*}} <far_function>
# CHECK-OUTPUT-NEXT: bl {{.*}} <near_function>
ret
.size _start, .-_start