[profile] Use base+vaddr for __llvm_write_binary_ids note pointers (#114907)

This function is always examining its own ELF headers in memory, but it
was trying to use conditions between examining files or memory, and it
wasn't accounting for LOAD offsets at runtime. This is especially bad if
a loaded segment has additional padding that's not in the file offsets.

Now we do a first scan of the program headers to figure out the runtime
base address based on `PT_PHDR` and/or `PT_DYNAMIC` (else assume zero),
similar to libc's `do_start`. Then each `PT_NOTE` pointer is simply the
base plus the segments's `pt_vaddr`, which includes LOAD offsets.

Fixes #114605
This commit is contained in:
Josh Stone
2024-11-21 10:14:29 -08:00
committed by GitHub
parent 391bf068f2
commit 667e1fadcf
2 changed files with 49 additions and 24 deletions

View File

@@ -194,41 +194,33 @@ static int WriteBinaryIds(ProfDataWriter *Writer, const ElfW(Nhdr) * Note,
*/
COMPILER_RT_VISIBILITY int __llvm_write_binary_ids(ProfDataWriter *Writer) {
extern const ElfW(Ehdr) __ehdr_start __attribute__((visibility("hidden")));
extern ElfW(Dyn) _DYNAMIC[] __attribute__((weak, visibility("hidden")));
const ElfW(Ehdr) *ElfHeader = &__ehdr_start;
const ElfW(Phdr) *ProgramHeader =
(const ElfW(Phdr) *)((uintptr_t)ElfHeader + ElfHeader->e_phoff);
/* Compute the added base address in case of position-independent code. */
uintptr_t Base = 0;
for (uint32_t I = 0; I < ElfHeader->e_phnum; I++) {
if (ProgramHeader[I].p_type == PT_PHDR)
Base = (uintptr_t)ProgramHeader - ProgramHeader[I].p_vaddr;
if (ProgramHeader[I].p_type == PT_DYNAMIC && _DYNAMIC)
Base = (uintptr_t)_DYNAMIC - ProgramHeader[I].p_vaddr;
}
int TotalBinaryIdsSize = 0;
uint32_t I;
/* Iterate through entries in the program header. */
for (I = 0; I < ElfHeader->e_phnum; I++) {
for (uint32_t I = 0; I < ElfHeader->e_phnum; I++) {
/* Look for the notes segment in program header entries. */
if (ProgramHeader[I].p_type != PT_NOTE)
continue;
/* There can be multiple notes segment, and examine each of them. */
const ElfW(Nhdr) * Note;
const ElfW(Nhdr) * NotesEnd;
/*
* When examining notes in file, use p_offset, which is the offset within
* the elf file, to find the start of notes.
*/
if (ProgramHeader[I].p_memsz == 0 ||
ProgramHeader[I].p_memsz == ProgramHeader[I].p_filesz) {
Note = (const ElfW(Nhdr) *)((uintptr_t)ElfHeader +
ProgramHeader[I].p_offset);
NotesEnd = (const ElfW(Nhdr) *)((const char *)(Note) +
ProgramHeader[I].p_filesz);
} else {
/*
* When examining notes in memory, use p_vaddr, which is the address of
* section after loaded to memory, to find the start of notes.
*/
Note =
(const ElfW(Nhdr) *)((uintptr_t)ElfHeader + ProgramHeader[I].p_vaddr);
NotesEnd =
(const ElfW(Nhdr) *)((const char *)(Note) + ProgramHeader[I].p_memsz);
}
const ElfW(Nhdr) *Note =
(const ElfW(Nhdr) *)(Base + ProgramHeader[I].p_vaddr);
const ElfW(Nhdr) *NotesEnd =
(const ElfW(Nhdr) *)((const char *)(Note) + ProgramHeader[I].p_memsz);
int BinaryIdsSize = WriteBinaryIds(Writer, Note, NotesEnd);
if (TotalBinaryIdsSize == -1)

View File

@@ -0,0 +1,33 @@
// REQUIRES: linux
//
// Make sure the build-id can be found in both EXEC and DYN (PIE) files,
// even when the note's section-start is forced to a weird address.
// (The DYN case would also apply to libraries, not explicitly tested here.)
// DEFINE: %{cflags} =
// DEFINE: %{check} = ( \
// DEFINE: %clang_profgen -Wl,--build-id -o %t %s %{cflags} && \
// DEFINE: env LLVM_PROFILE_FILE=%t.profraw %run %t && \
// DEFINE: llvm-readelf --notes %t && \
// DEFINE: llvm-profdata show --binary-ids %t.profraw \
// DEFINE: ) | FileCheck %s
// REDEFINE: %{cflags} = -no-pie
// RUN: %{check}
// REDEFINE: %{cflags} = -pie -fPIE
// RUN: %{check}
// REDEFINE: %{cflags} = -no-pie -Wl,--section-start=.note.gnu.build-id=0x1000000
// RUN: %{check}
// REDEFINE: %{cflags} = -pie -fPIE -Wl,--section-start=.note.gnu.build-id=0x1000000
// RUN: %{check}
// CHECK-LABEL{LITERAL}: .note.gnu.build-id
// CHECK: Build ID: [[ID:[0-9a-f]+]]
// CHECK-LABEL{LITERAL}: Binary IDs:
// CHECK-NEXT: [[ID]]
int main() { return 0; }