diff --git a/bolt/include/bolt/Core/MCPlusBuilder.h b/bolt/include/bolt/Core/MCPlusBuilder.h index 0927b01cd421..b285138b77fe 100644 --- a/bolt/include/bolt/Core/MCPlusBuilder.h +++ b/bolt/include/bolt/Core/MCPlusBuilder.h @@ -637,6 +637,16 @@ public: return false; } + virtual bool isAddXri(const MCInst &Inst) const { + llvm_unreachable("not implemented"); + return false; + } + + virtual bool isMOVW(const MCInst &Inst) const { + llvm_unreachable("not implemented"); + return false; + } + virtual bool isMoveMem2Reg(const MCInst &Inst) const { return false; } virtual bool mayLoad(const MCInst &Inst) const { diff --git a/bolt/include/bolt/Core/Relocation.h b/bolt/include/bolt/Core/Relocation.h index 7b468566a1d3..1f18dad008ca 100644 --- a/bolt/include/bolt/Core/Relocation.h +++ b/bolt/include/bolt/Core/Relocation.h @@ -64,11 +64,6 @@ struct Relocation { /// Skip relocations that we don't want to handle in BOLT static bool skipRelocationType(uint32_t Type); - /// Handle special cases when relocation should not be processed by BOLT or - /// change relocation \p Type to proper one before continuing if \p Contents - /// and \P Type mismatch occurred. - static bool skipRelocationProcess(uint32_t &Type, uint64_t Contents); - /// Adjust value depending on relocation type (make it PC relative or not). static uint64_t encodeValue(uint32_t Type, uint64_t Value, uint64_t PC); diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp index a5dc188c9860..acf04cee769c 100644 --- a/bolt/lib/Core/BinaryFunction.cpp +++ b/bolt/lib/Core/BinaryFunction.cpp @@ -1473,10 +1473,19 @@ Error BinaryFunction::disassemble() { } } + uint64_t Addend = Relocation.Addend; + + // For GOT relocations, create a reference against GOT entry ignoring + // the relocation symbol. + if (Relocation::isGOT(Relocation.Type)) { + assert(Relocation::isPCRelative(Relocation.Type) && + "GOT relocation must be PC-relative on RISC-V"); + Symbol = BC.registerNameAtAddress("__BOLT_got_zero", 0, 0, 0); + Addend = Relocation.Value + Relocation.Offset + getAddress(); + } int64_t Value = Relocation.Value; const bool Result = BC.MIB->replaceImmWithSymbolRef( - Instruction, Symbol, Relocation.Addend, Ctx.get(), Value, - Relocation.Type); + Instruction, Symbol, Addend, Ctx.get(), Value, Relocation.Type); (void)Result; assert(Result && "cannot replace immediate with relocation"); } diff --git a/bolt/lib/Core/Relocation.cpp b/bolt/lib/Core/Relocation.cpp index e91b6702c8d6..1a142c7d9716 100644 --- a/bolt/lib/Core/Relocation.cpp +++ b/bolt/lib/Core/Relocation.cpp @@ -257,78 +257,6 @@ static bool skipRelocationTypeRISCV(uint32_t Type) { } } -static bool skipRelocationProcessX86(uint32_t &Type, uint64_t Contents) { - return false; -} - -static bool skipRelocationProcessAArch64(uint32_t &Type, uint64_t Contents) { - auto IsMov = [](uint64_t Contents) -> bool { - // The bits 28-23 are 0b100101 - return (Contents & 0x1f800000) == 0x12800000; - }; - - auto IsB = [](uint64_t Contents) -> bool { - // The bits 31-26 are 0b000101 - return (Contents & 0xfc000000) == 0x14000000; - }; - - auto IsAddImm = [](uint64_t Contents) -> bool { - // The bits 30-23 are 0b00100010 - return (Contents & 0x7F800000) == 0x11000000; - }; - - // The linker might relax ADRP+LDR instruction sequence for loading symbol - // address from GOT table to ADRP+ADD sequence that would point to the - // binary-local symbol. Change relocation type in order to process it right. - if (Type == ELF::R_AARCH64_LD64_GOT_LO12_NC && IsAddImm(Contents)) { - Type = ELF::R_AARCH64_ADD_ABS_LO12_NC; - return false; - } - - // The linker might perform TLS relocations relaxations, such as - // changed TLS access model (e.g. changed global dynamic model - // to initial exec), thus changing the instructions. The static - // relocations might be invalid at this point and we might no - // need to process these relocations anymore. - // More information could be found by searching - // elfNN_aarch64_tls_relax in bfd - switch (Type) { - default: - break; - case ELF::R_AARCH64_TLSDESC_LD64_LO12: - case ELF::R_AARCH64_TLSDESC_ADR_PAGE21: - case ELF::R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC: - case ELF::R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21: { - if (IsMov(Contents)) - return true; - } - } - - // The linker might replace load/store instruction with jump and - // veneer due to errata 843419 - // https://documentation-service.arm.com/static/5fa29fddb209f547eebd361d - // Thus load/store relocations for these instructions must be ignored - // NOTE: We only process GOT and TLS relocations this way since the - // addend used in load/store instructions won't change after bolt - // (it is important since the instruction in veneer won't have relocation) - switch (Type) { - default: - break; - case ELF::R_AARCH64_LD64_GOT_LO12_NC: - case ELF::R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC: - case ELF::R_AARCH64_TLSDESC_LD64_LO12: { - if (IsB(Contents)) - return true; - } - } - - return false; -} - -static bool skipRelocationProcessRISCV(uint32_t &Type, uint64_t Contents) { - return false; -} - static uint64_t encodeValueX86(uint32_t Type, uint64_t Value, uint64_t PC) { switch (Type) { default: @@ -798,19 +726,6 @@ bool Relocation::skipRelocationType(uint32_t Type) { } } -bool Relocation::skipRelocationProcess(uint32_t &Type, uint64_t Contents) { - switch (Arch) { - default: - llvm_unreachable("Unsupported architecture"); - case Triple::aarch64: - return skipRelocationProcessAArch64(Type, Contents); - case Triple::riscv64: - return skipRelocationProcessRISCV(Type, Contents); - case Triple::x86_64: - return skipRelocationProcessX86(Type, Contents); - } -} - uint64_t Relocation::encodeValue(uint32_t Type, uint64_t Value, uint64_t PC) { switch (Arch) { default: diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp index c50850e9090c..560b30c6c676 100644 --- a/bolt/lib/Rewrite/RewriteInstance.cpp +++ b/bolt/lib/Rewrite/RewriteInstance.cpp @@ -2229,8 +2229,6 @@ bool RewriteInstance::analyzeRelocation( ErrorOr Value = BC->getUnsignedValueAtAddress(Rel.getOffset(), RelSize); assert(Value && "failed to extract relocated value"); - if ((Skip = Relocation::skipRelocationProcess(RType, *Value))) - return true; ExtractedValue = Relocation::extractValue(RType, *Value, Rel.getOffset()); Addend = getRelocationAddend(InputFile, Rel); @@ -2283,17 +2281,14 @@ bool RewriteInstance::analyzeRelocation( } } - // If no symbol has been found or if it is a relocation requiring the - // creation of a GOT entry, do not link against the symbol but against - // whatever address was extracted from the instruction itself. We are - // not creating a GOT entry as this was already processed by the linker. - // For GOT relocs, do not subtract addend as the addend does not refer - // to this instruction's target, but it refers to the target in the GOT - // entry. - if (Relocation::isGOT(RType)) { - Addend = 0; - SymbolAddress = ExtractedValue + PCRelOffset; - } else if (Relocation::isTLS(RType)) { + // GOT relocation can cause the underlying instruction to be modified by the + // linker, resulting in the extracted value being different from the actual + // symbol. It's also possible to have a GOT entry for a symbol defined in the + // binary. In the latter case, the instruction can be using the GOT version + // causing the extracted value mismatch. Similar cases can happen for TLS. + // Pass the relocation information as is to the disassembler and let it decide + // how to use it for the operand symbolization. + if (Relocation::isGOT(RType) || Relocation::isTLS(RType)) { SkipVerification = true; } else if (!SymbolAddress) { assert(!IsSectionRelocation); @@ -2666,11 +2661,14 @@ void RewriteInstance::handleRelocation(const SectionRef &RelocatedSection, MCSymbol *ReferencedSymbol = nullptr; if (!IsSectionRelocation) { - if (BinaryData *BD = BC->getBinaryDataByName(SymbolName)) + if (BinaryData *BD = BC->getBinaryDataByName(SymbolName)) { ReferencedSymbol = BD->getSymbol(); - else if (BC->isGOTSymbol(SymbolName)) + } else if (BC->isGOTSymbol(SymbolName)) { if (BinaryData *BD = BC->getGOTSymbol()) ReferencedSymbol = BD->getSymbol(); + } else if (BinaryData *BD = BC->getBinaryDataAtAddress(SymbolAddress)) { + ReferencedSymbol = BD->getSymbol(); + } } ErrorOr ReferencedSection{std::errc::bad_address}; @@ -2798,15 +2796,14 @@ void RewriteInstance::handleRelocation(const SectionRef &RelocatedSection, } } - if (ForceRelocation) { - std::string Name = - Relocation::isGOT(RType) ? "__BOLT_got_zero" : SymbolName; - ReferencedSymbol = BC->registerNameAtAddress(Name, 0, 0, 0); - SymbolAddress = 0; - if (Relocation::isGOT(RType)) - Addend = Address; + if (ForceRelocation && !ReferencedBF) { + // Create the relocation symbol if it's not defined in the binary. + if (SymbolAddress == 0) + ReferencedSymbol = BC->registerNameAtAddress(SymbolName, 0, 0, 0); + LLVM_DEBUG(dbgs() << "BOLT-DEBUG: forcing relocation against symbol " - << SymbolName << " with addend " << Addend << '\n'); + << ReferencedSymbol->getName() << " with addend " + << Addend << '\n'); } else if (ReferencedBF) { ReferencedSymbol = ReferencedBF->getSymbol(); uint64_t RefFunctionOffset = 0; diff --git a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp index 73f27b00213d..c03b4aedbec6 100644 --- a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp +++ b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp @@ -281,7 +281,7 @@ public: return Inst.getOpcode() == AArch64::ADR; } - bool isAddXri(const MCInst &Inst) const { + bool isAddXri(const MCInst &Inst) const override { return Inst.getOpcode() == AArch64::ADDXri; } @@ -318,7 +318,7 @@ public: Inst.getOpcode() == AArch64::CBZX); } - bool isMOVW(const MCInst &Inst) const { + bool isMOVW(const MCInst &Inst) const override { return (Inst.getOpcode() == AArch64::MOVKWi || Inst.getOpcode() == AArch64::MOVKXi || Inst.getOpcode() == AArch64::MOVNWi || diff --git a/bolt/lib/Target/AArch64/AArch64MCSymbolizer.cpp b/bolt/lib/Target/AArch64/AArch64MCSymbolizer.cpp index d08bca6e0fc3..231582b5aabe 100644 --- a/bolt/lib/Target/AArch64/AArch64MCSymbolizer.cpp +++ b/bolt/lib/Target/AArch64/AArch64MCSymbolizer.cpp @@ -45,24 +45,15 @@ bool AArch64MCSymbolizer::tryAddingSymbolicOperand( BC.MIB->getTargetExprFor(Inst, Expr, *Ctx, RelType))); }; - // The linker can convert ADRP+ADD and ADRP+LDR instruction sequences into - // NOP+ADR. After the conversion, the linker might keep the relocations and - // if we try to symbolize ADR's operand using outdated relocations, we might - // get unexpected results. Hence, we check for the conversion/relaxation, and - // ignore the relocation. The symbolization is done based on the PC-relative - // value of the operand instead. - if (Relocation && BC.MIB->isADR(Inst)) { - if (Relocation->Type == ELF::R_AARCH64_ADD_ABS_LO12_NC || - Relocation->Type == ELF::R_AARCH64_LD64_GOT_LO12_NC) { - LLVM_DEBUG(dbgs() << "BOLT-DEBUG: ignoring relocation at 0x" - << Twine::utohexstr(InstAddress) << '\n'); - Relocation = nullptr; - } - } - if (Relocation) { - addOperand(Relocation->Symbol, Relocation->Addend, Relocation->Type); - return true; + auto AdjustedRel = adjustRelocation(*Relocation, Inst); + if (AdjustedRel) { + addOperand(AdjustedRel->Symbol, AdjustedRel->Addend, AdjustedRel->Type); + return true; + } + + LLVM_DEBUG(dbgs() << "BOLT-DEBUG: ignoring relocation at 0x" + << Twine::utohexstr(InstAddress) << '\n'); } if (!BC.MIB->hasPCRelOperand(Inst)) @@ -88,6 +79,61 @@ bool AArch64MCSymbolizer::tryAddingSymbolicOperand( return true; } +std::optional +AArch64MCSymbolizer::adjustRelocation(const Relocation &Rel, + const MCInst &Inst) const { + BinaryContext &BC = Function.getBinaryContext(); + + // The linker can convert ADRP+ADD and ADRP+LDR instruction sequences into + // NOP+ADR. After the conversion, the linker might keep the relocations and + // if we try to symbolize ADR's operand using outdated relocations, we might + // get unexpected results. Hence, we check for the conversion/relaxation, and + // ignore the relocation. The symbolization is done based on the PC-relative + // value of the operand instead. + if (BC.MIB->isADR(Inst) && (Rel.Type == ELF::R_AARCH64_ADD_ABS_LO12_NC || + Rel.Type == ELF::R_AARCH64_LD64_GOT_LO12_NC)) + return std::nullopt; + + // The linker might perform TLS relocations relaxations, such as changed TLS + // access model (e.g. changed global dynamic model to initial exec), thus + // changing the instructions. The static relocations might be invalid at this + // point and we don't have to process these relocations anymore. More + // information could be found by searching elfNN_aarch64_tls_relax in bfd. + if (BC.MIB->isMOVW(Inst)) { + switch (Rel.Type) { + default: + break; + case ELF::R_AARCH64_TLSDESC_LD64_LO12: + case ELF::R_AARCH64_TLSDESC_ADR_PAGE21: + case ELF::R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC: + case ELF::R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21: + return std::nullopt; + } + } + + if (!Relocation::isGOT(Rel.Type)) + return Rel; + + Relocation AdjustedRel = Rel; + if (Rel.Type == ELF::R_AARCH64_LD64_GOT_LO12_NC && BC.MIB->isAddXri(Inst)) { + // The ADRP+LDR sequence was converted into ADRP+ADD. We are looking at the + // second instruction and have to use the relocation type for ADD. + AdjustedRel.Type = ELF::R_AARCH64_ADD_ABS_LO12_NC; + } else { + // For instructions that reference GOT, ignore the referenced symbol and + // use value at the relocation site. FixRelaxationPass will look at + // instruction pairs and will perform necessary adjustments. + ErrorOr SymbolValue = BC.getSymbolValue(*Rel.Symbol); + assert(SymbolValue && "Symbol value should be set"); + const uint64_t SymbolPageAddr = *SymbolValue & ~0xfffULL; + + AdjustedRel.Symbol = BC.registerNameAtAddress("__BOLT_got_zero", 0, 0, 0); + AdjustedRel.Addend = Rel.Value; + } + + return AdjustedRel; +} + void AArch64MCSymbolizer::tryAddingPcLoadReferenceComment(raw_ostream &CStream, int64_t Value, uint64_t Address) {} diff --git a/bolt/lib/Target/AArch64/AArch64MCSymbolizer.h b/bolt/lib/Target/AArch64/AArch64MCSymbolizer.h index 56ba4fbcaf27..7f06a4df1eb1 100644 --- a/bolt/lib/Target/AArch64/AArch64MCSymbolizer.h +++ b/bolt/lib/Target/AArch64/AArch64MCSymbolizer.h @@ -11,6 +11,7 @@ #include "bolt/Core/BinaryFunction.h" #include "llvm/MC/MCDisassembler/MCSymbolizer.h" +#include namespace llvm { namespace bolt { @@ -20,6 +21,13 @@ protected: BinaryFunction &Function; bool CreateNewSymbols{true}; + /// Modify relocation \p Rel based on type of the relocation and the + /// instruction it was applied to. Return the new relocation info, or + /// std::nullopt if the relocation should be ignored, e.g. in the case the + /// instruction was modified by the linker. + std::optional adjustRelocation(const Relocation &Rel, + const MCInst &Inst) const; + public: AArch64MCSymbolizer(BinaryFunction &Function, bool CreateNewSymbols = true) : MCSymbolizer(*Function.getBinaryContext().Ctx.get(), nullptr),