[BOLT] Pass unfiltered relocations to disassembler. NFCI (#131202)

Instead of filtering and modifying relocations in readRelocations(),
preserve the relocation info and use it in the symbolizing disassembler.
This change mostly affects AArch64, where we need to look at original
linker relocations in order to properly symbolize instruction operands.
This commit is contained in:
Maksim Panchenko
2025-03-14 18:44:33 -07:00
committed by GitHub
parent d52ec1e9dd
commit bac21719a8
8 changed files with 114 additions and 134 deletions

View File

@@ -637,6 +637,16 @@ public:
return false;
}
virtual bool isAddXri(const MCInst &Inst) const {
llvm_unreachable("not implemented");
return false;
}
virtual bool isMOVW(const MCInst &Inst) const {
llvm_unreachable("not implemented");
return false;
}
virtual bool isMoveMem2Reg(const MCInst &Inst) const { return false; }
virtual bool mayLoad(const MCInst &Inst) const {

View File

@@ -64,11 +64,6 @@ struct Relocation {
/// Skip relocations that we don't want to handle in BOLT
static bool skipRelocationType(uint32_t Type);
/// Handle special cases when relocation should not be processed by BOLT or
/// change relocation \p Type to proper one before continuing if \p Contents
/// and \P Type mismatch occurred.
static bool skipRelocationProcess(uint32_t &Type, uint64_t Contents);
/// Adjust value depending on relocation type (make it PC relative or not).
static uint64_t encodeValue(uint32_t Type, uint64_t Value, uint64_t PC);

View File

@@ -1473,10 +1473,19 @@ Error BinaryFunction::disassemble() {
}
}
uint64_t Addend = Relocation.Addend;
// For GOT relocations, create a reference against GOT entry ignoring
// the relocation symbol.
if (Relocation::isGOT(Relocation.Type)) {
assert(Relocation::isPCRelative(Relocation.Type) &&
"GOT relocation must be PC-relative on RISC-V");
Symbol = BC.registerNameAtAddress("__BOLT_got_zero", 0, 0, 0);
Addend = Relocation.Value + Relocation.Offset + getAddress();
}
int64_t Value = Relocation.Value;
const bool Result = BC.MIB->replaceImmWithSymbolRef(
Instruction, Symbol, Relocation.Addend, Ctx.get(), Value,
Relocation.Type);
Instruction, Symbol, Addend, Ctx.get(), Value, Relocation.Type);
(void)Result;
assert(Result && "cannot replace immediate with relocation");
}

View File

@@ -257,78 +257,6 @@ static bool skipRelocationTypeRISCV(uint32_t Type) {
}
}
static bool skipRelocationProcessX86(uint32_t &Type, uint64_t Contents) {
return false;
}
static bool skipRelocationProcessAArch64(uint32_t &Type, uint64_t Contents) {
auto IsMov = [](uint64_t Contents) -> bool {
// The bits 28-23 are 0b100101
return (Contents & 0x1f800000) == 0x12800000;
};
auto IsB = [](uint64_t Contents) -> bool {
// The bits 31-26 are 0b000101
return (Contents & 0xfc000000) == 0x14000000;
};
auto IsAddImm = [](uint64_t Contents) -> bool {
// The bits 30-23 are 0b00100010
return (Contents & 0x7F800000) == 0x11000000;
};
// The linker might relax ADRP+LDR instruction sequence for loading symbol
// address from GOT table to ADRP+ADD sequence that would point to the
// binary-local symbol. Change relocation type in order to process it right.
if (Type == ELF::R_AARCH64_LD64_GOT_LO12_NC && IsAddImm(Contents)) {
Type = ELF::R_AARCH64_ADD_ABS_LO12_NC;
return false;
}
// The linker might perform TLS relocations relaxations, such as
// changed TLS access model (e.g. changed global dynamic model
// to initial exec), thus changing the instructions. The static
// relocations might be invalid at this point and we might no
// need to process these relocations anymore.
// More information could be found by searching
// elfNN_aarch64_tls_relax in bfd
switch (Type) {
default:
break;
case ELF::R_AARCH64_TLSDESC_LD64_LO12:
case ELF::R_AARCH64_TLSDESC_ADR_PAGE21:
case ELF::R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC:
case ELF::R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21: {
if (IsMov(Contents))
return true;
}
}
// The linker might replace load/store instruction with jump and
// veneer due to errata 843419
// https://documentation-service.arm.com/static/5fa29fddb209f547eebd361d
// Thus load/store relocations for these instructions must be ignored
// NOTE: We only process GOT and TLS relocations this way since the
// addend used in load/store instructions won't change after bolt
// (it is important since the instruction in veneer won't have relocation)
switch (Type) {
default:
break;
case ELF::R_AARCH64_LD64_GOT_LO12_NC:
case ELF::R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC:
case ELF::R_AARCH64_TLSDESC_LD64_LO12: {
if (IsB(Contents))
return true;
}
}
return false;
}
static bool skipRelocationProcessRISCV(uint32_t &Type, uint64_t Contents) {
return false;
}
static uint64_t encodeValueX86(uint32_t Type, uint64_t Value, uint64_t PC) {
switch (Type) {
default:
@@ -798,19 +726,6 @@ bool Relocation::skipRelocationType(uint32_t Type) {
}
}
bool Relocation::skipRelocationProcess(uint32_t &Type, uint64_t Contents) {
switch (Arch) {
default:
llvm_unreachable("Unsupported architecture");
case Triple::aarch64:
return skipRelocationProcessAArch64(Type, Contents);
case Triple::riscv64:
return skipRelocationProcessRISCV(Type, Contents);
case Triple::x86_64:
return skipRelocationProcessX86(Type, Contents);
}
}
uint64_t Relocation::encodeValue(uint32_t Type, uint64_t Value, uint64_t PC) {
switch (Arch) {
default:

View File

@@ -2229,8 +2229,6 @@ bool RewriteInstance::analyzeRelocation(
ErrorOr<uint64_t> Value =
BC->getUnsignedValueAtAddress(Rel.getOffset(), RelSize);
assert(Value && "failed to extract relocated value");
if ((Skip = Relocation::skipRelocationProcess(RType, *Value)))
return true;
ExtractedValue = Relocation::extractValue(RType, *Value, Rel.getOffset());
Addend = getRelocationAddend(InputFile, Rel);
@@ -2283,17 +2281,14 @@ bool RewriteInstance::analyzeRelocation(
}
}
// If no symbol has been found or if it is a relocation requiring the
// creation of a GOT entry, do not link against the symbol but against
// whatever address was extracted from the instruction itself. We are
// not creating a GOT entry as this was already processed by the linker.
// For GOT relocs, do not subtract addend as the addend does not refer
// to this instruction's target, but it refers to the target in the GOT
// entry.
if (Relocation::isGOT(RType)) {
Addend = 0;
SymbolAddress = ExtractedValue + PCRelOffset;
} else if (Relocation::isTLS(RType)) {
// GOT relocation can cause the underlying instruction to be modified by the
// linker, resulting in the extracted value being different from the actual
// symbol. It's also possible to have a GOT entry for a symbol defined in the
// binary. In the latter case, the instruction can be using the GOT version
// causing the extracted value mismatch. Similar cases can happen for TLS.
// Pass the relocation information as is to the disassembler and let it decide
// how to use it for the operand symbolization.
if (Relocation::isGOT(RType) || Relocation::isTLS(RType)) {
SkipVerification = true;
} else if (!SymbolAddress) {
assert(!IsSectionRelocation);
@@ -2666,11 +2661,14 @@ void RewriteInstance::handleRelocation(const SectionRef &RelocatedSection,
MCSymbol *ReferencedSymbol = nullptr;
if (!IsSectionRelocation) {
if (BinaryData *BD = BC->getBinaryDataByName(SymbolName))
if (BinaryData *BD = BC->getBinaryDataByName(SymbolName)) {
ReferencedSymbol = BD->getSymbol();
else if (BC->isGOTSymbol(SymbolName))
} else if (BC->isGOTSymbol(SymbolName)) {
if (BinaryData *BD = BC->getGOTSymbol())
ReferencedSymbol = BD->getSymbol();
} else if (BinaryData *BD = BC->getBinaryDataAtAddress(SymbolAddress)) {
ReferencedSymbol = BD->getSymbol();
}
}
ErrorOr<BinarySection &> ReferencedSection{std::errc::bad_address};
@@ -2798,15 +2796,14 @@ void RewriteInstance::handleRelocation(const SectionRef &RelocatedSection,
}
}
if (ForceRelocation) {
std::string Name =
Relocation::isGOT(RType) ? "__BOLT_got_zero" : SymbolName;
ReferencedSymbol = BC->registerNameAtAddress(Name, 0, 0, 0);
SymbolAddress = 0;
if (Relocation::isGOT(RType))
Addend = Address;
if (ForceRelocation && !ReferencedBF) {
// Create the relocation symbol if it's not defined in the binary.
if (SymbolAddress == 0)
ReferencedSymbol = BC->registerNameAtAddress(SymbolName, 0, 0, 0);
LLVM_DEBUG(dbgs() << "BOLT-DEBUG: forcing relocation against symbol "
<< SymbolName << " with addend " << Addend << '\n');
<< ReferencedSymbol->getName() << " with addend "
<< Addend << '\n');
} else if (ReferencedBF) {
ReferencedSymbol = ReferencedBF->getSymbol();
uint64_t RefFunctionOffset = 0;

View File

@@ -281,7 +281,7 @@ public:
return Inst.getOpcode() == AArch64::ADR;
}
bool isAddXri(const MCInst &Inst) const {
bool isAddXri(const MCInst &Inst) const override {
return Inst.getOpcode() == AArch64::ADDXri;
}
@@ -318,7 +318,7 @@ public:
Inst.getOpcode() == AArch64::CBZX);
}
bool isMOVW(const MCInst &Inst) const {
bool isMOVW(const MCInst &Inst) const override {
return (Inst.getOpcode() == AArch64::MOVKWi ||
Inst.getOpcode() == AArch64::MOVKXi ||
Inst.getOpcode() == AArch64::MOVNWi ||

View File

@@ -45,24 +45,15 @@ bool AArch64MCSymbolizer::tryAddingSymbolicOperand(
BC.MIB->getTargetExprFor(Inst, Expr, *Ctx, RelType)));
};
// The linker can convert ADRP+ADD and ADRP+LDR instruction sequences into
// NOP+ADR. After the conversion, the linker might keep the relocations and
// if we try to symbolize ADR's operand using outdated relocations, we might
// get unexpected results. Hence, we check for the conversion/relaxation, and
// ignore the relocation. The symbolization is done based on the PC-relative
// value of the operand instead.
if (Relocation && BC.MIB->isADR(Inst)) {
if (Relocation->Type == ELF::R_AARCH64_ADD_ABS_LO12_NC ||
Relocation->Type == ELF::R_AARCH64_LD64_GOT_LO12_NC) {
LLVM_DEBUG(dbgs() << "BOLT-DEBUG: ignoring relocation at 0x"
<< Twine::utohexstr(InstAddress) << '\n');
Relocation = nullptr;
}
}
if (Relocation) {
addOperand(Relocation->Symbol, Relocation->Addend, Relocation->Type);
return true;
auto AdjustedRel = adjustRelocation(*Relocation, Inst);
if (AdjustedRel) {
addOperand(AdjustedRel->Symbol, AdjustedRel->Addend, AdjustedRel->Type);
return true;
}
LLVM_DEBUG(dbgs() << "BOLT-DEBUG: ignoring relocation at 0x"
<< Twine::utohexstr(InstAddress) << '\n');
}
if (!BC.MIB->hasPCRelOperand(Inst))
@@ -88,6 +79,61 @@ bool AArch64MCSymbolizer::tryAddingSymbolicOperand(
return true;
}
std::optional<Relocation>
AArch64MCSymbolizer::adjustRelocation(const Relocation &Rel,
const MCInst &Inst) const {
BinaryContext &BC = Function.getBinaryContext();
// The linker can convert ADRP+ADD and ADRP+LDR instruction sequences into
// NOP+ADR. After the conversion, the linker might keep the relocations and
// if we try to symbolize ADR's operand using outdated relocations, we might
// get unexpected results. Hence, we check for the conversion/relaxation, and
// ignore the relocation. The symbolization is done based on the PC-relative
// value of the operand instead.
if (BC.MIB->isADR(Inst) && (Rel.Type == ELF::R_AARCH64_ADD_ABS_LO12_NC ||
Rel.Type == ELF::R_AARCH64_LD64_GOT_LO12_NC))
return std::nullopt;
// The linker might perform TLS relocations relaxations, such as changed TLS
// access model (e.g. changed global dynamic model to initial exec), thus
// changing the instructions. The static relocations might be invalid at this
// point and we don't have to process these relocations anymore. More
// information could be found by searching elfNN_aarch64_tls_relax in bfd.
if (BC.MIB->isMOVW(Inst)) {
switch (Rel.Type) {
default:
break;
case ELF::R_AARCH64_TLSDESC_LD64_LO12:
case ELF::R_AARCH64_TLSDESC_ADR_PAGE21:
case ELF::R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC:
case ELF::R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21:
return std::nullopt;
}
}
if (!Relocation::isGOT(Rel.Type))
return Rel;
Relocation AdjustedRel = Rel;
if (Rel.Type == ELF::R_AARCH64_LD64_GOT_LO12_NC && BC.MIB->isAddXri(Inst)) {
// The ADRP+LDR sequence was converted into ADRP+ADD. We are looking at the
// second instruction and have to use the relocation type for ADD.
AdjustedRel.Type = ELF::R_AARCH64_ADD_ABS_LO12_NC;
} else {
// For instructions that reference GOT, ignore the referenced symbol and
// use value at the relocation site. FixRelaxationPass will look at
// instruction pairs and will perform necessary adjustments.
ErrorOr<uint64_t> SymbolValue = BC.getSymbolValue(*Rel.Symbol);
assert(SymbolValue && "Symbol value should be set");
const uint64_t SymbolPageAddr = *SymbolValue & ~0xfffULL;
AdjustedRel.Symbol = BC.registerNameAtAddress("__BOLT_got_zero", 0, 0, 0);
AdjustedRel.Addend = Rel.Value;
}
return AdjustedRel;
}
void AArch64MCSymbolizer::tryAddingPcLoadReferenceComment(raw_ostream &CStream,
int64_t Value,
uint64_t Address) {}

View File

@@ -11,6 +11,7 @@
#include "bolt/Core/BinaryFunction.h"
#include "llvm/MC/MCDisassembler/MCSymbolizer.h"
#include <optional>
namespace llvm {
namespace bolt {
@@ -20,6 +21,13 @@ protected:
BinaryFunction &Function;
bool CreateNewSymbols{true};
/// Modify relocation \p Rel based on type of the relocation and the
/// instruction it was applied to. Return the new relocation info, or
/// std::nullopt if the relocation should be ignored, e.g. in the case the
/// instruction was modified by the linker.
std::optional<Relocation> adjustRelocation(const Relocation &Rel,
const MCInst &Inst) const;
public:
AArch64MCSymbolizer(BinaryFunction &Function, bool CreateNewSymbols = true)
: MCSymbolizer(*Function.getBinaryContext().Ctx.get(), nullptr),