[BOLT][AArch64] Add symbolizer for AArch64 disassembler. NFCI (#127969)

Add AArch64MCSymbolizer that symbolizes `MCInst` operands during
disassembly. The symbolization was previously done in
`BinaryFunction::disassemble()`, but it is also required by
`scanExternalRefs()` for "lite" mode functionality. Hence, similar to
x86, I've implemented the symbolizer interface that uses
`BinaryFunction` relocations to properly create instruction operands. I
expect the result of the disassembly to be identical after the change.

AArch64 disassembler was not calling `tryAddingSymbolicOperand()` for
`MOV` instructions. Fix that. Additionally, the disassembler marks `ldr`
instructions as branches by setting `IsBranch` parameter to true. Ignore
the parameter and rely on `MCPlusBuilder` interface instead.

I've modified `--check-encoding` flag to check symolization of operands
of instructions that have relocations against them.
This commit is contained in:
Maksim Panchenko
2025-03-03 12:44:28 -08:00
committed by GitHub
parent 39402cde61
commit b971d4d7c8
6 changed files with 162 additions and 23 deletions

View File

@@ -1333,6 +1333,22 @@ Error BinaryFunction::disassemble() {
BC.printInstruction(BC.errs(), Instruction, AbsoluteInstrAddr);
BC.errs() << '\n';
}
// Verify that we've symbolized an operand if the instruction has a
// relocation against it.
if (getRelocationInRange(Offset, Offset + Size)) {
bool HasSymbolicOp = false;
for (MCOperand &Op : Instruction) {
if (Op.isExpr()) {
HasSymbolicOp = true;
break;
}
}
if (!HasSymbolicOp)
return createFatalBOLTError(
"expected symbolized operand for instruction at 0x" +
Twine::utohexstr(AbsoluteInstrAddr));
}
}
// Special handling for AVX-512 instructions.
@@ -1440,9 +1456,8 @@ Error BinaryFunction::disassemble() {
if (BC.isAArch64())
handleAArch64IndirectCall(Instruction, Offset);
}
} else if (BC.isAArch64() || BC.isRISCV()) {
} else if (BC.isRISCV()) {
// Check if there's a relocation associated with this instruction.
bool UsedReloc = false;
for (auto Itr = Relocations.lower_bound(Offset),
ItrE = Relocations.lower_bound(Offset + Size);
Itr != ItrE; ++Itr) {
@@ -1467,24 +1482,6 @@ Error BinaryFunction::disassemble() {
Relocation.Type);
(void)Result;
assert(Result && "cannot replace immediate with relocation");
// For aarch64, if we replaced an immediate with a symbol from a
// relocation, we mark it so we do not try to further process a
// pc-relative operand. All we need is the symbol.
UsedReloc = true;
}
if (!BC.isRISCV() && MIB->hasPCRelOperand(Instruction) && !UsedReloc) {
if (auto NewE = handleErrors(
handlePCRelOperand(Instruction, AbsoluteInstrAddr, Size),
[&](const BOLTError &E) -> Error {
if (E.isFatal())
return Error(std::make_unique<BOLTError>(std::move(E)));
if (!E.getMessage().empty())
E.log(BC.errs());
return Error::success();
}))
return Error(std::move(NewE));
}
}
@@ -1580,8 +1577,9 @@ bool BinaryFunction::scanExternalRefs() {
assert(FunctionData.size() == getMaxSize() &&
"function size does not match raw data size");
BC.SymbolicDisAsm->setSymbolizer(
BC.MIB->createTargetSymbolizer(*this, /*CreateSymbols*/ false));
if (BC.isX86())
BC.SymbolicDisAsm->setSymbolizer(
BC.MIB->createTargetSymbolizer(*this, /*CreateSymbols*/ false));
// Disassemble contents of the function. Detect code entry points and create
// relocations for references to code that will be moved.

View File

@@ -10,6 +10,7 @@
//
//===----------------------------------------------------------------------===//
#include "AArch64MCSymbolizer.h"
#include "MCTargetDesc/AArch64AddressingModes.h"
#include "MCTargetDesc/AArch64FixupKinds.h"
#include "MCTargetDesc/AArch64MCExpr.h"
@@ -134,6 +135,12 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
public:
using MCPlusBuilder::MCPlusBuilder;
std::unique_ptr<MCSymbolizer>
createTargetSymbolizer(BinaryFunction &Function,
bool CreateNewSymbols) const override {
return std::make_unique<AArch64MCSymbolizer>(Function, CreateNewSymbols);
}
MCPhysReg getStackPointer() const override { return AArch64::SP; }
MCPhysReg getFramePointer() const override { return AArch64::FP; }

View File

@@ -0,0 +1,85 @@
//===- bolt/Target/AArch64/AArch64MCSymbolizer.cpp ------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "AArch64MCSymbolizer.h"
#include "bolt/Core/BinaryContext.h"
#include "bolt/Core/BinaryFunction.h"
#include "bolt/Core/MCPlusBuilder.h"
#include "bolt/Core/Relocation.h"
#include "llvm/MC/MCInst.h"
#include "llvm/Support/Debug.h"
#define DEBUG_TYPE "bolt-symbolizer"
namespace llvm {
namespace bolt {
AArch64MCSymbolizer::~AArch64MCSymbolizer() {}
bool AArch64MCSymbolizer::tryAddingSymbolicOperand(
MCInst &Inst, raw_ostream &CStream, int64_t Value, uint64_t InstAddress,
bool IsBranch, uint64_t ImmOffset, uint64_t ImmSize, uint64_t InstSize) {
BinaryContext &BC = Function.getBinaryContext();
MCContext *Ctx = BC.Ctx.get();
// NOTE: the callee may incorrectly set IsBranch.
if (BC.MIB->isBranch(Inst) || BC.MIB->isCall(Inst))
return false;
// TODO: add handling for linker "relaxation". At the moment, relocations
// corresponding to "relaxed" instructions are excluded from BinaryFunction
// relocation list.
const uint64_t InstOffset = InstAddress - Function.getAddress();
const Relocation *Relocation = Function.getRelocationAt(InstOffset);
/// Add symbolic operand to the instruction with an optional addend.
auto addOperand = [&](const MCSymbol *Symbol, uint64_t Addend,
uint64_t RelType) {
const MCExpr *Expr = MCSymbolRefExpr::create(Symbol, *Ctx);
if (Addend)
Expr = MCBinaryExpr::createAdd(Expr, MCConstantExpr::create(Addend, *Ctx),
*Ctx);
Inst.addOperand(MCOperand::createExpr(
BC.MIB->getTargetExprFor(Inst, Expr, *Ctx, RelType)));
};
if (Relocation) {
addOperand(Relocation->Symbol, Relocation->Addend, Relocation->Type);
return true;
}
if (!BC.MIB->hasPCRelOperand(Inst))
return false;
Value += InstAddress;
const MCSymbol *TargetSymbol;
uint64_t TargetOffset;
if (!CreateNewSymbols) {
if (BinaryData *BD = BC.getBinaryDataContainingAddress(Value)) {
TargetSymbol = BD->getSymbol();
TargetOffset = Value - BD->getAddress();
} else {
return false;
}
} else {
std::tie(TargetSymbol, TargetOffset) =
BC.handleAddressRef(Value, Function, /*IsPCRel*/ true);
}
addOperand(TargetSymbol, TargetOffset, 0);
return true;
}
void AArch64MCSymbolizer::tryAddingPcLoadReferenceComment(raw_ostream &CStream,
int64_t Value,
uint64_t Address) {}
} // namespace bolt
} // namespace llvm

View File

@@ -0,0 +1,44 @@
//===- bolt/Target/AArch64/AArch64MCSymbolizer.cpp --------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef BOLT_TARGET_AARCH64_AARCH64MCSYMBOLIZER_H
#define BOLT_TARGET_AARCH64_AARCH64MCSYMBOLIZER_H
#include "bolt/Core/BinaryFunction.h"
#include "llvm/MC/MCDisassembler/MCSymbolizer.h"
namespace llvm {
namespace bolt {
class AArch64MCSymbolizer : public MCSymbolizer {
protected:
BinaryFunction &Function;
bool CreateNewSymbols{true};
public:
AArch64MCSymbolizer(BinaryFunction &Function, bool CreateNewSymbols = true)
: MCSymbolizer(*Function.getBinaryContext().Ctx.get(), nullptr),
Function(Function), CreateNewSymbols(CreateNewSymbols) {}
AArch64MCSymbolizer(const AArch64MCSymbolizer &) = delete;
AArch64MCSymbolizer &operator=(const AArch64MCSymbolizer &) = delete;
virtual ~AArch64MCSymbolizer();
bool tryAddingSymbolicOperand(MCInst &Inst, raw_ostream &CStream,
int64_t Value, uint64_t Address, bool IsBranch,
uint64_t Offset, uint64_t OpSize,
uint64_t InstSize) override;
void tryAddingPcLoadReferenceComment(raw_ostream &CStream, int64_t Value,
uint64_t Address) override;
};
} // namespace bolt
} // namespace llvm
#endif

View File

@@ -1,5 +1,6 @@
set(LLVM_LINK_COMPONENTS
MC
MCDisassembler
Support
AArch64Desc
)
@@ -18,6 +19,7 @@ endif()
add_llvm_library(LLVMBOLTTargetAArch64
AArch64MCPlusBuilder.cpp
AArch64MCSymbolizer.cpp
NO_EXPORT
DISABLE_LLVM_LINK_LLVM_DYLIB

View File

@@ -744,7 +744,10 @@ static DecodeStatus DecodeMoveImmInstruction(MCInst &Inst, uint32_t insn,
Inst.getOpcode() == AArch64::MOVKXi)
Inst.addOperand(Inst.getOperand(0));
Inst.addOperand(MCOperand::createImm(imm));
if (!Decoder->tryAddingSymbolicOperand(Inst, imm, Addr, /*IsBranch*/ false, 0,
0, 4))
Inst.addOperand(MCOperand::createImm(imm));
Inst.addOperand(MCOperand::createImm(shift));
return Success;
}