[AArch64] fix trampoline implementation: use X15 (#126743)
AAPCS64 reserves any of X9-X15 for a compiler to choose to use for this purpose, and says not to use X16 or X18 like GCC (and the previous implementation) chose to use. The X18 register may need to get used by the kernel in some circumstances, as specified by the platform ABI, so it is generally an unwise choice. Simply choosing a different register fixes the problem of this being broken on any platform that actually follows the platform ABI (which is all of them except EABI, if I am reading this linux kernel bug correctly https://lkml2.uits.iu.edu/hypermail/linux/kernel/2001.2/01502.html). As a side benefit, also generate slightly better code and avoids needing the compiler-rt to be present. I did that by following the XCore implementation instead of PPC (although in hindsight, following the RISCV might have been slightly more readable). That X18 is wrong to use for this purpose has been known for many years (e.g. https://www.mail-archive.com/gcc@gcc.gnu.org/msg76934.html) and also known that fixing this to use one of the correct registers is not an ABI break, since this only appears inside of a translation unit. Some of the other temporary registers (e.g. X9) are already reserved inside llvm for internal use as a generic temporary register in the prologue before saving registers, while X15 was already used in rare cases as a scratch register in the prologue as well, so I felt that seemed the most logical choice to choose here.
This commit is contained in:
@@ -272,11 +272,6 @@ switch32
|
||||
switch8
|
||||
switchu8
|
||||
|
||||
// This function generates a custom trampoline function with the specific
|
||||
// realFunc and localsPtr values.
|
||||
void __trampoline_setup(uint32_t* trampOnStack, int trampSizeAllocated,
|
||||
const void* realFunc, void* localsPtr);
|
||||
|
||||
// There is no C interface to the *_vfp_d8_d15_regs functions. There are
|
||||
// called in the prolog and epilog of Thumb1 functions. When the C++ ABI use
|
||||
// SJLJ for exceptions, each function with a catch clause or destructors needs
|
||||
|
||||
@@ -41,45 +41,3 @@ COMPILER_RT_ABI void __trampoline_setup(uint32_t *trampOnStack,
|
||||
__clear_cache(trampOnStack, &trampOnStack[10]);
|
||||
}
|
||||
#endif // __powerpc__ && !defined(__powerpc64__)
|
||||
|
||||
// The AArch64 compiler generates calls to __trampoline_setup() when creating
|
||||
// trampoline functions on the stack for use with nested functions.
|
||||
// This function creates a custom 36-byte trampoline function on the stack
|
||||
// which loads x18 with a pointer to the outer function's locals
|
||||
// and then jumps to the target nested function.
|
||||
// Note: x18 is a reserved platform register on Windows and macOS.
|
||||
|
||||
#if defined(__aarch64__) && defined(__ELF__)
|
||||
COMPILER_RT_ABI void __trampoline_setup(uint32_t *trampOnStack,
|
||||
int trampSizeAllocated,
|
||||
const void *realFunc, void *localsPtr) {
|
||||
// This should never happen, but if compiler did not allocate
|
||||
// enough space on stack for the trampoline, abort.
|
||||
if (trampSizeAllocated < 36)
|
||||
compilerrt_abort();
|
||||
|
||||
// create trampoline
|
||||
// Load realFunc into x17. mov/movk 16 bits at a time.
|
||||
trampOnStack[0] =
|
||||
0xd2800000u | ((((uint64_t)realFunc >> 0) & 0xffffu) << 5) | 0x11;
|
||||
trampOnStack[1] =
|
||||
0xf2a00000u | ((((uint64_t)realFunc >> 16) & 0xffffu) << 5) | 0x11;
|
||||
trampOnStack[2] =
|
||||
0xf2c00000u | ((((uint64_t)realFunc >> 32) & 0xffffu) << 5) | 0x11;
|
||||
trampOnStack[3] =
|
||||
0xf2e00000u | ((((uint64_t)realFunc >> 48) & 0xffffu) << 5) | 0x11;
|
||||
// Load localsPtr into x18
|
||||
trampOnStack[4] =
|
||||
0xd2800000u | ((((uint64_t)localsPtr >> 0) & 0xffffu) << 5) | 0x12;
|
||||
trampOnStack[5] =
|
||||
0xf2a00000u | ((((uint64_t)localsPtr >> 16) & 0xffffu) << 5) | 0x12;
|
||||
trampOnStack[6] =
|
||||
0xf2c00000u | ((((uint64_t)localsPtr >> 32) & 0xffffu) << 5) | 0x12;
|
||||
trampOnStack[7] =
|
||||
0xf2e00000u | ((((uint64_t)localsPtr >> 48) & 0xffffu) << 5) | 0x12;
|
||||
trampOnStack[8] = 0xd61f0220; // br x17
|
||||
|
||||
// Clear instruction cache.
|
||||
__clear_cache(trampOnStack, &trampOnStack[9]);
|
||||
}
|
||||
#endif // defined(__aarch64__) && !defined(__APPLE__) && !defined(_WIN64)
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
|
||||
/*
|
||||
* Tests nested functions
|
||||
* The ppc and aarch64 compilers generates a call to __trampoline_setup
|
||||
* The ppc compiler generates a call to __trampoline_setup
|
||||
* The i386 and x86_64 compilers generate a call to ___enable_execute_stack
|
||||
*/
|
||||
|
||||
|
||||
Reference in New Issue
Block a user