[BOLT] Ignore AArch64 markers outside their sections. (#74106)

AArch64 uses $d and $x symbols to delimit data embedded in code.
However, sometimes we see $d symbols, typically in .eh_frame, with
addresses that belong to different sections. These occasionally fall
inside .text functions and cause BOLT to stop disassembling, which in
turn causes DWARF CFA processing to fail.

As a workaround, we just ignore symbols with addresses outside the
section they belong to. This behaviour is consistent with objdump and
similar tools.
This commit is contained in:
Jacob Bramley
2024-11-07 12:16:14 +00:00
committed by GitHub
parent 3d0b283dcd
commit 16cd5cdf4d
4 changed files with 155 additions and 3 deletions

View File

@@ -0,0 +1,56 @@
--- !ELF
FileHeader:
Class: ELFCLASS64
Data: ELFDATA2LSB
Type: ET_EXEC
Machine: EM_AARCH64
Entry: 0x2a0000
ProgramHeaders:
- Type: PT_PHDR
Flags: [ PF_R ]
VAddr: 0x40
Align: 0x8
FileSize: 0xa8
MemSize: 0xa8
Offset: 0x40
- Type: PT_LOAD
Flags: [ PF_R ]
VAddr: 0x0
Align: 0x10000
FileSize: 0xf8
MemSize: 0xf8
Offset: 0x0
- Type: PT_LOAD
Flags: [ PF_X, PF_R ]
VAddr: 0x2a0000
Align: 0x10000
FirstSec: .text
LastSec: .ignored
Sections:
- Name: .text
Type: SHT_PROGBITS
Flags: [ SHF_ALLOC, SHF_EXECINSTR ]
Address: 0x2a0000
AddressAlign: 0x4
Content: 400580d2c0035fd6
- Name: .ignored
Type: SHT_PROGBITS
Flags: [ SHF_ALLOC ]
Address: 0x2a0008
AddressAlign: 0x8
Size: 0x8
- Name: .eh_frame
Type: SHT_PROGBITS
Flags: [ SHF_ALLOC ]
Address: 0x2a0010
AddressAlign: 0x8
Content: 1000000000000000017a520004781e010b0c1f00140000001800000000002a0008000000000e01410e010000
Symbols:
- Name: func
Section: .text
Value: 0x2a0000
Size: 0x8
- Name: '$d.42'
Section: .ignored
Value: 0x2a0004
...

View File

@@ -0,0 +1,61 @@
// Check that marker symbols ($d, $x) denoting data embedded in code are ignored
// if they fall outside their respective sections.
// RUN: yaml2obj %S/Inputs/spurious-marker-symbol.yaml -o %t.exe
// RUN: llvm-bolt %t.exe -o %t.bolt 2>&1 | FileCheck %s
// CHECK: 1 out of 1 functions were overwritten
// RUN: llvm-objdump -j .text -d %t.bolt | FileCheck %s -check-prefix=CHECK-DISASM
// CHECK-DISASM: func
// CHECK-DISASM: 2a0000: d2800540 mov
// CHECK-DISASM: 2a0004: d65f03c0 ret
// The YAML encodes the following assembly and debug information:
.text
.globl func
.type func, %function
func:
mov x0, #42
// $d.42: (symbol in .ignored, with an address in .text)
ret
// .eh_frame contains minimal DWARF with a CFA operation on the `ret`. BOLT
// should ignore the spurious `$d.42`. If it doesn't, then it will stop
// disassembling after the `mov` and will fail to process the second
// DW_CFA_def_cfa_offset.
//
// CIE
// length: 00000010
// CIE_id: 00000000
// version: 01
// augmentation:
// "zR" 7a 52 00
// - read augmentation data
// - read FDE pointer encoding
// code_alignment_factor: 04
// data_alignment_factor: 78 (-8)
// return_address_register: 1e (r30 / lr)
//
// augmentation data:
// length: 01
// FDE pointers are absptr+sdata4 0b
//
// initial_instructions:
// DW_CFA_def_cfa (31, 0): 0c 1f 00
//
// Encoding: 10000000'00000000'01'7a5200'04'78'1e'10'0b'0c1f00
//
// FDE
// length: 00000014
// CIE_pointer: 00000018 (backwards offset from here to CIE)
// initial_location: 002a0000 (`func` as absptr+sdata4)
// address_range: 00000008
// augmentation data:
// length: 00
// instructions:
// DW_CFA_def_cfa_offset (1) 0e 01
// DW_CFA_advance_loc (1) 41 (`ret` at 0x2a0004)
// DW_CFA_def_cfa_offset (1) 0e 01 Fails unless $d.42 is ignored.
// DW_CFA_nop 00 00
//
// Encoding: 14000000'18000000'00002a00'08000000'000e0141'0e010000