[llvm-objdump] Support --symbolize-operand on AArch64

Similar to the existing implementations for X86 and PPC, support
symbolizing branch targets for AArch64. Do not omit the address for ADRP
as the target is typically not at an intended location.

Pull Request: https://github.com/llvm/llvm-project/pull/145009
This commit is contained in:
Alexis Engelke
2025-06-25 17:09:25 +02:00
committed by GitHub
parent 10edc3df99
commit 36819eaed1
5 changed files with 159 additions and 2 deletions

View File

@@ -278,7 +278,7 @@ OPTIONS
any analysis with a special representation (i.e. BlockFrequency,
BranchProbability, etc) are printed as raw hex values.
Only works with PowerPC objects or X86 linked images.
Only supported for AArch64, BPF, PowerPC, and X86.
Example:
A non-symbolized branch instruction with a local target and pc-relative memory access like

View File

@@ -1784,6 +1784,10 @@ void AArch64InstPrinter::printAlignedLabel(const MCInst *MI, uint64_t Address,
unsigned OpNum,
const MCSubtargetInfo &STI,
raw_ostream &O) {
// Do not print the numeric target address when symbolizing.
if (SymbolizeOperands)
return;
const MCOperand &Op = MI->getOperand(OpNum);
// If the label has already been resolved to an immediate offset (say, when
@@ -1813,6 +1817,12 @@ void AArch64InstPrinter::printAdrAdrpLabel(const MCInst *MI, uint64_t Address,
unsigned OpNum,
const MCSubtargetInfo &STI,
raw_ostream &O) {
// Do not print the numeric target address when symbolizing.
// However, do print for ADRP, as this is typically used together with an ADD
// or an immediate-offset ldr/str and the label is likely at the wrong point.
if (SymbolizeOperands && MI->getOpcode() != AArch64::ADRP)
return;
const MCOperand &Op = MI->getOperand(OpNum);
// If the label has already been resolved to an immediate offset (say, when

View File

@@ -0,0 +1,67 @@
# RUN: yaml2obj %s -o %t
# RUN: llvm-objdump %t -d --symbolize-operands --no-show-raw-insn --no-leading-addr | \
# RUN: FileCheck %s --match-full-lines -DABS_ADRP_VAL=0x6000
# RUN: llvm-objdump %t -d --symbolize-operands --no-show-raw-insn --no-leading-addr --adjust-vma=0x2000 | \
# RUN: FileCheck %s --match-full-lines -DABS_ADRP_VAL=0x8000
## Expect to find the branch labels and global variable name.
# CHECK: <_start>:
# CHECK-NEXT: ldr x0, <symbol>
# CHECK-NEXT: <L0>:
# CHECK-NEXT: adrp x1, [[ABS_ADRP_VAL]] <symbol+0xff4>
# CHECK-NEXT: adr x2, <symbol>
# CHECK-NEXT: cmp x1, x2
# CHECK-NEXT: b.eq <L1>
# CHECK-NEXT: b <L0>
# CHECK-NEXT: <L1>:
# CHECK-NEXT: cbz x2, <L0>
# CHECK-NEXT: ret
## Machine code generated with:
# llvm-mc --arch=aarch64 --filetype=obj -o tmp.o <<EOF
# .text
# .p2align 14
# .globl .start
# _start:
# ldr x0, symbol
# 1:
# adrp x1, symbol + 0x1000
# adr x2, symbol
# cmp x1, x2
# b.eq 2f
# b 1b
# 2:
# cbz x2, 1b
# ret
#
# .data
# .p2align 12
# .skip 12
# symbol:
# EOF
# ld.lld -shared --nmagic -o tmp.so tmp.o
# llvm-objdump -s tmp.so --section=.text
--- !ELF
FileHeader:
Class: ELFCLASS64
Data: ELFDATA2LSB
Type: ET_EXEC
Machine: EM_AARCH64
Sections:
- Name: .text
Type: SHT_PROGBITS
Address: 0x4000
Flags: [SHF_ALLOC, SHF_EXECINSTR]
Content: '60800058010000d0228000103f0002eb40000054fcffff1762ffffb4c0035fd6'
- Name: .data
Type: SHT_PROGBITS
Flags: [SHF_ALLOC, SHF_WRITE]
Address: 0x5000
Symbols:
- Name: _start
Section: .text
Value: 0x4000
- Name: symbol
Section: .data
Value: 0x500c

View File

@@ -0,0 +1,79 @@
# RUN: llvm-mc --triple=aarch64-elf --filetype=obj < %s | \
# RUN: llvm-objdump -d -r --symbolize-operands --no-show-raw-insn --no-leading-addr - | \
# RUN: FileCheck %s --match-full-lines
# CHECK: <fn1>:
# CHECK-NEXT: b <L0>
# CHECK-NEXT: tbz x0, #0x2c, <L2>
# CHECK-NEXT: <L0>:
# CHECK-NEXT: b.eq <L1>
# CHECK-NEXT: <L1>:
# CHECK-NEXT: cbz x1, <L0>
# CHECK-NEXT: <L2>:
# CHECK-NEXT: nop
# CHECK-NEXT: <L3>:
# CHECK-NEXT: bl <L3>
# CHECK-NEXT: R_AARCH64_CALL26 fn2
# CHECK-NEXT: bl <fn2>
# CHECK-NEXT: adr x0, <L2>
# CHECK-NEXT: <L4>:
# CHECK-NEXT: adr x1, <L4>
# CHECK-NEXT: R_AARCH64_ADR_PREL_LO21 fn2
# CHECK-NEXT: adr x2, <fn2>
# CHECK-NEXT: ldr w0, <L2>
# CHECK-NEXT: <L5>:
# CHECK-NEXT: ldr w0, <L5>
# CHECK-NEXT: R_AARCH64_LD_PREL_LO19 fn2
# CHECK-NEXT: ret
# CHECK-NEXT: nop
# CHECK-NEXT: nop
# CHECK-NEXT: nop
# CHECK-EMPTY:
# CHECK-NEXT: <fn2>:
# CHECK-NEXT: bl <L0>
# CHECK-NEXT: adrp x3, 0x0 <fn1>
# CHECK-NEXT: R_AARCH64_ADR_PREL_PG_HI21 fn2
# CHECK-NEXT: add x3, x3, #0x0
# CHECK-NEXT: R_AARCH64_ADD_ABS_LO12_NC fn2
# CHECK-NEXT: adrp x3, 0x0 <fn1>
# CHECK-NEXT: R_AARCH64_ADR_PREL_PG_HI21 fn2
# CHECK-NEXT: ldr x0, [x3]
# CHECK-NEXT: R_AARCH64_LDST64_ABS_LO12_NC fn2
# CHECK-NEXT: ret
# CHECK-NEXT: nop
# CHECK-NEXT: nop
# CHECK-NEXT: <L0>:
# CHECK-NEXT: ret
.p2align 4
.global fn1
fn1:
b 0f
tbz x0, 44, 2f
0: b.eq 1f
1: cbz x1, 0b
2: nop
bl fn2
bl .Lfn2
adr x0, 2b
adr x1, fn2
adr x2, .Lfn2
ldr w0, 2b
ldr w0, fn2
ret
.p2align 4
.global fn2
fn2:
.Lfn2: ## Local label for non-interposable call.
bl .Lfn3
## In future, we might identify the pairs and symbolize the operands properly.
adrp x3, fn2
add x3, x3, :lo12:fn2
adrp x3, fn2
ldr x0, [x3, :lo12:fn2]
ret
.p2align 4
.Lfn3: ## Private function
ret

View File

@@ -1495,8 +1495,9 @@ collectLocalBranchTargets(ArrayRef<uint8_t> Bytes, MCInstrAnalysis *MIA,
// Supported by certain targets.
const bool isPPC = STI->getTargetTriple().isPPC();
const bool isX86 = STI->getTargetTriple().isX86();
const bool isAArch64 = STI->getTargetTriple().isAArch64();
const bool isBPF = STI->getTargetTriple().isBPF();
if (!isPPC && !isX86 && !isBPF)
if (!isPPC && !isX86 && !isAArch64 && !isBPF)
return;
if (MIA)