[BOLT] Enable hugify for AArch64 (#117158)

Add required hugify instrumentation and runtime libraries support for AArch64.
Fixes #58226
Unblocks #62695
This commit is contained in:
alekuz01
2025-04-15 12:59:05 +01:00
committed by GitHub
parent 7eae1a4d1f
commit 38faf32d23
7 changed files with 67 additions and 21 deletions

View File

@@ -136,7 +136,7 @@ if (LLVM_INCLUDE_TESTS)
endif()
if (BOLT_ENABLE_RUNTIME)
message(STATUS "Building BOLT runtime libraries for X86")
message(STATUS "Building BOLT runtime libraries for ${CMAKE_SYSTEM_PROCESSOR}")
set(extra_args "")
if(CMAKE_SYSROOT)
list(APPEND extra_args -DCMAKE_SYSROOT=${CMAKE_SYSROOT})

View File

@@ -597,8 +597,9 @@ Error RewriteInstance::discoverStorage() {
// Hugify: Additional huge page from left side due to
// weird ASLR mapping addresses (4KB aligned)
if (opts::Hugify && !BC->HasFixedLoadAddress)
if (opts::Hugify && !BC->HasFixedLoadAddress) {
NextAvailableAddress += BC->PageAlign;
}
if (!opts::UseGnuStack && !BC->IsLinuxKernel) {
// This is where the black magic happens. Creating PHDR table in a segment
@@ -5885,17 +5886,28 @@ void RewriteInstance::rewriteFile() {
// Write all allocatable sections - reloc-mode text is written here as well
for (BinarySection &Section : BC->allocatableSections()) {
if (!Section.isFinalized() || !Section.getOutputData())
if (!Section.isFinalized() || !Section.getOutputData()) {
LLVM_DEBUG(if (opts::Verbosity > 1) {
dbgs() << "BOLT-INFO: new section is finalized or !getOutputData, skip "
<< Section.getName() << '\n';
});
continue;
if (Section.isLinkOnly())
}
if (Section.isLinkOnly()) {
LLVM_DEBUG(if (opts::Verbosity > 1) {
dbgs() << "BOLT-INFO: new section is link only, skip "
<< Section.getName() << '\n';
});
continue;
}
if (opts::Verbosity >= 1)
BC->outs() << "BOLT: writing new section " << Section.getName()
<< "\n data at 0x"
<< Twine::utohexstr(Section.getAllocAddress()) << "\n of size "
<< Section.getOutputSize() << "\n at offset "
<< Section.getOutputFileOffset() << '\n';
<< Section.getOutputFileOffset() << " with content size "
<< Section.getOutputContents().size() << '\n';
OS.seek(Section.getOutputFileOffset());
Section.write(OS);
}

View File

@@ -151,10 +151,12 @@ struct timespec {
uint64_t tv_nsec; /* nanoseconds */
};
#if defined(__aarch64__)
#if defined(__aarch64__) || defined(__arm64__)
#include "sys_aarch64.h"
#else
#elif defined(__x86_64__)
#include "sys_x86_64.h"
#else
#error "For AArch64/ARM64 and X86_64 only."
#endif
constexpr uint32_t BufSize = 10240;

View File

@@ -6,7 +6,8 @@
//
//===---------------------------------------------------------------------===//
#if defined (__x86_64__) && !defined(__APPLE__)
#if defined(__x86_64__) || \
(defined(__aarch64__) || defined(__arm64__)) && !defined(__APPLE__)
#include "common.h"
@@ -73,8 +74,10 @@ static bool hasPagecacheTHPSupport() {
if (Res < 0)
return false;
if (!strStr(Buf, "[always]") && !strStr(Buf, "[madvise]"))
if (!strStr(Buf, "[always]") && !strStr(Buf, "[madvise]")) {
DEBUG(report("[hugify] THP support is not enabled.\n");)
return false;
}
struct KernelVersionTy {
uint32_t major;
@@ -167,12 +170,20 @@ extern "C" void __bolt_hugify_self_impl() {
/// This is hooking ELF's entry, it needs to save all machine state.
extern "C" __attribute((naked)) void __bolt_hugify_self() {
// clang-format off
#if defined(__x86_64__)
__asm__ __volatile__(SAVE_ALL "call __bolt_hugify_self_impl\n" RESTORE_ALL
"jmp __bolt_hugify_start_program\n" ::
:);
"jmp __bolt_hugify_start_program\n"
:::);
#elif defined(__aarch64__) || defined(__arm64__)
__asm__ __volatile__(SAVE_ALL "bl __bolt_hugify_self_impl\n" RESTORE_ALL
"adrp x16, __bolt_hugify_start_program\n"
"add x16, x16, #:lo12:__bolt_hugify_start_program\n"
"br x16\n"
:::);
#else
exit(1);
__exit(1);
#endif
// clang-format on
}
#endif

View File

@@ -11,17 +11,28 @@ int main(int argc, char **argv) {
REQUIRES: system-linux,bolt-runtime
RUN: %clang %cflags -no-pie %s -o %t.nopie.exe -Wl,-q
RUN: %clang %cflags -fpic -pie %s -o %t.pie.exe -Wl,-q
RUN: %clang %cflags -fpic %s -o %t.pie.exe -Wl,-q
RUN: llvm-bolt %t.nopie.exe --lite=0 -o %t.nopie --hugify
RUN: llvm-bolt %t.pie.exe --lite=0 -o %t.pie --hugify
RUN: llvm-nm --numeric-sort --print-armap %t.nopie | \
RUN: FileCheck %s -check-prefix=CHECK-NM
RUN: %t.nopie | FileCheck %s -check-prefix=CHECK-NOPIE
CHECK-NOPIE: Hello world
RUN: llvm-nm --numeric-sort --print-armap %t.pie | \
RUN: FileCheck %s -check-prefix=CHECK-NM
RUN: %t.pie | FileCheck %s -check-prefix=CHECK-PIE
CHECK-NM: W __hot_start
CHECK-NM-NEXT: T _start
CHECK-NM: T main
CHECK-NM: W __hot_end
CHECK-NM: t __bolt_hugify_start_program
CHECK-NM-NEXT: W __bolt_runtime_start
CHECK-NOPIE: Hello world
CHECK-PIE: Hello world
*/

View File

@@ -5,9 +5,7 @@
*/
#include <stdio.h>
int foo(int x) {
return x + 1;
}
int foo(int x) { return x + 1; }
int fib(int x) {
if (x < 2)
@@ -15,9 +13,7 @@ int fib(int x) {
return fib(x - 1) + fib(x - 2);
}
int bar(int x) {
return x - 1;
}
int bar(int x) { return x - 1; }
int main(int argc, char **argv) {
printf("fib(%d) = %d\n", argc, fib(argc));
@@ -31,14 +27,28 @@ RUN: %clang %cflags -no-pie %s -o %t.exe -Wl,-q
RUN: llvm-bolt %t.exe --relocs=1 --lite --reorder-functions=user \
RUN: --hugify --function-order=%p/Inputs/user_func_order.txt -o %t
RUN: llvm-bolt %t.exe --relocs=1 --lite --reorder-functions=user \
RUN: --function-order=%p/Inputs/user_func_order.txt -o %t.nohugify
RUN: llvm-nm --numeric-sort --print-armap %t | \
RUN: FileCheck %s -check-prefix=CHECK-NM
RUN: %t 1 2 3 | FileCheck %s -check-prefix=CHECK-OUTPUT
RUN: llvm-nm --numeric-sort --print-armap %t.nohugify | \
RUN: FileCheck %s -check-prefix=CHECK-NM-NOHUGIFY
RUN: %t.nohugify 1 2 3 | FileCheck %s -check-prefix=CHECK-OUTPUT-NOHUGIFY
CHECK-NM: W __hot_start
CHECK-NM: T main
CHECK-NM-NEXT: T fib
CHECK-NM-NEXT: W __hot_end
CHECK-NM: t __bolt_hugify_start_program
CHECK-NM-NEXT: W __bolt_runtime_start
CHECK-NM-NOHUGIFY: W __hot_start
CHECK-NM-NOHUGIFY: T main
CHECK-NM-NOHUGIFY-NEXT: T fib
CHECK-NM-NOHUGIFY-NEXT: W __hot_end
CHECK-OUTPUT: fib(4) = 3
CHECK-OUTPUT-NOHUGIFY: fib(4) = 3
*/