[BOLT][AArch64] Add symbolizer for AArch64 disassembler. NFCI (#127969)
Add AArch64MCSymbolizer that symbolizes `MCInst` operands during disassembly. The symbolization was previously done in `BinaryFunction::disassemble()`, but it is also required by `scanExternalRefs()` for "lite" mode functionality. Hence, similar to x86, I've implemented the symbolizer interface that uses `BinaryFunction` relocations to properly create instruction operands. I expect the result of the disassembly to be identical after the change. AArch64 disassembler was not calling `tryAddingSymbolicOperand()` for `MOV` instructions. Fix that. Additionally, the disassembler marks `ldr` instructions as branches by setting `IsBranch` parameter to true. Ignore the parameter and rely on `MCPlusBuilder` interface instead. I've modified `--check-encoding` flag to check symolization of operands of instructions that have relocations against them.
This commit is contained in:
@@ -1333,6 +1333,22 @@ Error BinaryFunction::disassemble() {
|
||||
BC.printInstruction(BC.errs(), Instruction, AbsoluteInstrAddr);
|
||||
BC.errs() << '\n';
|
||||
}
|
||||
|
||||
// Verify that we've symbolized an operand if the instruction has a
|
||||
// relocation against it.
|
||||
if (getRelocationInRange(Offset, Offset + Size)) {
|
||||
bool HasSymbolicOp = false;
|
||||
for (MCOperand &Op : Instruction) {
|
||||
if (Op.isExpr()) {
|
||||
HasSymbolicOp = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!HasSymbolicOp)
|
||||
return createFatalBOLTError(
|
||||
"expected symbolized operand for instruction at 0x" +
|
||||
Twine::utohexstr(AbsoluteInstrAddr));
|
||||
}
|
||||
}
|
||||
|
||||
// Special handling for AVX-512 instructions.
|
||||
@@ -1440,9 +1456,8 @@ Error BinaryFunction::disassemble() {
|
||||
if (BC.isAArch64())
|
||||
handleAArch64IndirectCall(Instruction, Offset);
|
||||
}
|
||||
} else if (BC.isAArch64() || BC.isRISCV()) {
|
||||
} else if (BC.isRISCV()) {
|
||||
// Check if there's a relocation associated with this instruction.
|
||||
bool UsedReloc = false;
|
||||
for (auto Itr = Relocations.lower_bound(Offset),
|
||||
ItrE = Relocations.lower_bound(Offset + Size);
|
||||
Itr != ItrE; ++Itr) {
|
||||
@@ -1467,24 +1482,6 @@ Error BinaryFunction::disassemble() {
|
||||
Relocation.Type);
|
||||
(void)Result;
|
||||
assert(Result && "cannot replace immediate with relocation");
|
||||
|
||||
// For aarch64, if we replaced an immediate with a symbol from a
|
||||
// relocation, we mark it so we do not try to further process a
|
||||
// pc-relative operand. All we need is the symbol.
|
||||
UsedReloc = true;
|
||||
}
|
||||
|
||||
if (!BC.isRISCV() && MIB->hasPCRelOperand(Instruction) && !UsedReloc) {
|
||||
if (auto NewE = handleErrors(
|
||||
handlePCRelOperand(Instruction, AbsoluteInstrAddr, Size),
|
||||
[&](const BOLTError &E) -> Error {
|
||||
if (E.isFatal())
|
||||
return Error(std::make_unique<BOLTError>(std::move(E)));
|
||||
if (!E.getMessage().empty())
|
||||
E.log(BC.errs());
|
||||
return Error::success();
|
||||
}))
|
||||
return Error(std::move(NewE));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1580,8 +1577,9 @@ bool BinaryFunction::scanExternalRefs() {
|
||||
assert(FunctionData.size() == getMaxSize() &&
|
||||
"function size does not match raw data size");
|
||||
|
||||
BC.SymbolicDisAsm->setSymbolizer(
|
||||
BC.MIB->createTargetSymbolizer(*this, /*CreateSymbols*/ false));
|
||||
if (BC.isX86())
|
||||
BC.SymbolicDisAsm->setSymbolizer(
|
||||
BC.MIB->createTargetSymbolizer(*this, /*CreateSymbols*/ false));
|
||||
|
||||
// Disassemble contents of the function. Detect code entry points and create
|
||||
// relocations for references to code that will be moved.
|
||||
|
||||
@@ -10,6 +10,7 @@
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "AArch64MCSymbolizer.h"
|
||||
#include "MCTargetDesc/AArch64AddressingModes.h"
|
||||
#include "MCTargetDesc/AArch64FixupKinds.h"
|
||||
#include "MCTargetDesc/AArch64MCExpr.h"
|
||||
@@ -134,6 +135,12 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
|
||||
public:
|
||||
using MCPlusBuilder::MCPlusBuilder;
|
||||
|
||||
std::unique_ptr<MCSymbolizer>
|
||||
createTargetSymbolizer(BinaryFunction &Function,
|
||||
bool CreateNewSymbols) const override {
|
||||
return std::make_unique<AArch64MCSymbolizer>(Function, CreateNewSymbols);
|
||||
}
|
||||
|
||||
MCPhysReg getStackPointer() const override { return AArch64::SP; }
|
||||
MCPhysReg getFramePointer() const override { return AArch64::FP; }
|
||||
|
||||
|
||||
85
bolt/lib/Target/AArch64/AArch64MCSymbolizer.cpp
Normal file
85
bolt/lib/Target/AArch64/AArch64MCSymbolizer.cpp
Normal file
@@ -0,0 +1,85 @@
|
||||
//===- bolt/Target/AArch64/AArch64MCSymbolizer.cpp ------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "AArch64MCSymbolizer.h"
|
||||
#include "bolt/Core/BinaryContext.h"
|
||||
#include "bolt/Core/BinaryFunction.h"
|
||||
#include "bolt/Core/MCPlusBuilder.h"
|
||||
#include "bolt/Core/Relocation.h"
|
||||
#include "llvm/MC/MCInst.h"
|
||||
#include "llvm/Support/Debug.h"
|
||||
|
||||
#define DEBUG_TYPE "bolt-symbolizer"
|
||||
|
||||
namespace llvm {
|
||||
namespace bolt {
|
||||
|
||||
AArch64MCSymbolizer::~AArch64MCSymbolizer() {}
|
||||
|
||||
bool AArch64MCSymbolizer::tryAddingSymbolicOperand(
|
||||
MCInst &Inst, raw_ostream &CStream, int64_t Value, uint64_t InstAddress,
|
||||
bool IsBranch, uint64_t ImmOffset, uint64_t ImmSize, uint64_t InstSize) {
|
||||
BinaryContext &BC = Function.getBinaryContext();
|
||||
MCContext *Ctx = BC.Ctx.get();
|
||||
|
||||
// NOTE: the callee may incorrectly set IsBranch.
|
||||
if (BC.MIB->isBranch(Inst) || BC.MIB->isCall(Inst))
|
||||
return false;
|
||||
|
||||
// TODO: add handling for linker "relaxation". At the moment, relocations
|
||||
// corresponding to "relaxed" instructions are excluded from BinaryFunction
|
||||
// relocation list.
|
||||
|
||||
const uint64_t InstOffset = InstAddress - Function.getAddress();
|
||||
const Relocation *Relocation = Function.getRelocationAt(InstOffset);
|
||||
|
||||
/// Add symbolic operand to the instruction with an optional addend.
|
||||
auto addOperand = [&](const MCSymbol *Symbol, uint64_t Addend,
|
||||
uint64_t RelType) {
|
||||
const MCExpr *Expr = MCSymbolRefExpr::create(Symbol, *Ctx);
|
||||
if (Addend)
|
||||
Expr = MCBinaryExpr::createAdd(Expr, MCConstantExpr::create(Addend, *Ctx),
|
||||
*Ctx);
|
||||
Inst.addOperand(MCOperand::createExpr(
|
||||
BC.MIB->getTargetExprFor(Inst, Expr, *Ctx, RelType)));
|
||||
};
|
||||
|
||||
if (Relocation) {
|
||||
addOperand(Relocation->Symbol, Relocation->Addend, Relocation->Type);
|
||||
return true;
|
||||
}
|
||||
|
||||
if (!BC.MIB->hasPCRelOperand(Inst))
|
||||
return false;
|
||||
|
||||
Value += InstAddress;
|
||||
const MCSymbol *TargetSymbol;
|
||||
uint64_t TargetOffset;
|
||||
if (!CreateNewSymbols) {
|
||||
if (BinaryData *BD = BC.getBinaryDataContainingAddress(Value)) {
|
||||
TargetSymbol = BD->getSymbol();
|
||||
TargetOffset = Value - BD->getAddress();
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
std::tie(TargetSymbol, TargetOffset) =
|
||||
BC.handleAddressRef(Value, Function, /*IsPCRel*/ true);
|
||||
}
|
||||
|
||||
addOperand(TargetSymbol, TargetOffset, 0);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void AArch64MCSymbolizer::tryAddingPcLoadReferenceComment(raw_ostream &CStream,
|
||||
int64_t Value,
|
||||
uint64_t Address) {}
|
||||
|
||||
} // namespace bolt
|
||||
} // namespace llvm
|
||||
44
bolt/lib/Target/AArch64/AArch64MCSymbolizer.h
Normal file
44
bolt/lib/Target/AArch64/AArch64MCSymbolizer.h
Normal file
@@ -0,0 +1,44 @@
|
||||
//===- bolt/Target/AArch64/AArch64MCSymbolizer.cpp --------------*- C++ -*-===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef BOLT_TARGET_AARCH64_AARCH64MCSYMBOLIZER_H
|
||||
#define BOLT_TARGET_AARCH64_AARCH64MCSYMBOLIZER_H
|
||||
|
||||
#include "bolt/Core/BinaryFunction.h"
|
||||
#include "llvm/MC/MCDisassembler/MCSymbolizer.h"
|
||||
|
||||
namespace llvm {
|
||||
namespace bolt {
|
||||
|
||||
class AArch64MCSymbolizer : public MCSymbolizer {
|
||||
protected:
|
||||
BinaryFunction &Function;
|
||||
bool CreateNewSymbols{true};
|
||||
|
||||
public:
|
||||
AArch64MCSymbolizer(BinaryFunction &Function, bool CreateNewSymbols = true)
|
||||
: MCSymbolizer(*Function.getBinaryContext().Ctx.get(), nullptr),
|
||||
Function(Function), CreateNewSymbols(CreateNewSymbols) {}
|
||||
|
||||
AArch64MCSymbolizer(const AArch64MCSymbolizer &) = delete;
|
||||
AArch64MCSymbolizer &operator=(const AArch64MCSymbolizer &) = delete;
|
||||
virtual ~AArch64MCSymbolizer();
|
||||
|
||||
bool tryAddingSymbolicOperand(MCInst &Inst, raw_ostream &CStream,
|
||||
int64_t Value, uint64_t Address, bool IsBranch,
|
||||
uint64_t Offset, uint64_t OpSize,
|
||||
uint64_t InstSize) override;
|
||||
|
||||
void tryAddingPcLoadReferenceComment(raw_ostream &CStream, int64_t Value,
|
||||
uint64_t Address) override;
|
||||
};
|
||||
|
||||
} // namespace bolt
|
||||
} // namespace llvm
|
||||
|
||||
#endif
|
||||
@@ -1,5 +1,6 @@
|
||||
set(LLVM_LINK_COMPONENTS
|
||||
MC
|
||||
MCDisassembler
|
||||
Support
|
||||
AArch64Desc
|
||||
)
|
||||
@@ -18,6 +19,7 @@ endif()
|
||||
|
||||
add_llvm_library(LLVMBOLTTargetAArch64
|
||||
AArch64MCPlusBuilder.cpp
|
||||
AArch64MCSymbolizer.cpp
|
||||
|
||||
NO_EXPORT
|
||||
DISABLE_LLVM_LINK_LLVM_DYLIB
|
||||
|
||||
@@ -744,7 +744,10 @@ static DecodeStatus DecodeMoveImmInstruction(MCInst &Inst, uint32_t insn,
|
||||
Inst.getOpcode() == AArch64::MOVKXi)
|
||||
Inst.addOperand(Inst.getOperand(0));
|
||||
|
||||
Inst.addOperand(MCOperand::createImm(imm));
|
||||
if (!Decoder->tryAddingSymbolicOperand(Inst, imm, Addr, /*IsBranch*/ false, 0,
|
||||
0, 4))
|
||||
Inst.addOperand(MCOperand::createImm(imm));
|
||||
|
||||
Inst.addOperand(MCOperand::createImm(shift));
|
||||
return Success;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user