From 23c8d382583167812cfe59aed0d004bdbdcd9fba Mon Sep 17 00:00:00 2001 From: Job Noorman Date: Mon, 21 Aug 2023 10:10:48 +0200 Subject: [PATCH] [BOLT] Calculate input to output address map using BOLTLinker BOLT uses MCAsmLayout to calculate the output values of basic blocks. This means output values are calculated based on a pre-linking state and any changes to symbol values during linking will cause incorrect values to be used. This issue was first addressed in D154604 by adding all basic block symbols to the symbol table for the linker to resolve them. However, the runtime overhead of handling this huge symbol table turned out to be prohibitively large. This patch solves the issue in a different way. First, a temporary section containing [input address, output symbol] pairs is emitted to the intermediary object file. The linker will resolve all these references so we end up with a section of [input address, output address] pairs. This section is then parsed and used to: - Replace BinaryBasicBlock::OffsetTranslationTable - Replace BinaryFunction::InputOffsetToAddressMap - Update BinaryBasicBlock::OutputAddressRange Note that the reason this is more performant than the previous attempt is that these symbol references do not cause entries to be added to the symbol table. Instead, section-relative references are used for the relocations. Reviewed By: maksfb Differential Revision: https://reviews.llvm.org/D155604 --- bolt/include/bolt/Core/AddressMap.h | 59 +++++++++++++++++++ bolt/include/bolt/Core/BinaryBasicBlock.h | 26 +-------- bolt/include/bolt/Core/BinaryContext.h | 10 ++++ bolt/include/bolt/Core/BinaryFunction.h | 15 ++--- bolt/include/bolt/Core/BinarySection.h | 4 ++ bolt/lib/Core/AddressMap.cpp | 63 +++++++++++++++++++++ bolt/lib/Core/BinaryBasicBlock.cpp | 22 ------- bolt/lib/Core/BinaryEmitter.cpp | 4 ++ bolt/lib/Core/BinaryFunction.cpp | 19 ++++--- bolt/lib/Core/CMakeLists.txt | 1 + bolt/lib/Profile/BoltAddressTranslation.cpp | 11 +++- bolt/lib/Rewrite/PseudoProbeRewriter.cpp | 4 +- bolt/lib/Rewrite/RewriteInstance.cpp | 15 +++++ 13 files changed, 183 insertions(+), 70 deletions(-) create mode 100644 bolt/include/bolt/Core/AddressMap.h create mode 100644 bolt/lib/Core/AddressMap.cpp diff --git a/bolt/include/bolt/Core/AddressMap.h b/bolt/include/bolt/Core/AddressMap.h new file mode 100644 index 000000000000..16c2727b6943 --- /dev/null +++ b/bolt/include/bolt/Core/AddressMap.h @@ -0,0 +1,59 @@ +//===- bolt/Core/AddressMap.h - Input-output address map --------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Helper class to create a mapping from input to output addresses needed for +// updating debugging symbols and BAT. We emit an MCSection containing +// pairs to the object file and JITLink will +// transform this in pairs. The linker output +// can then be parsed and used to establish the mapping. +// +//===----------------------------------------------------------------------===// +// +#ifndef BOLT_CORE_ADDRESS_MAP_H +#define BOLT_CORE_ADDRESS_MAP_H + +#include "llvm/ADT/StringRef.h" + +#include +#include + +namespace llvm { + +class MCStreamer; + +namespace bolt { + +class BinaryContext; + +class AddressMap { + using MapTy = std::unordered_multimap; + MapTy Map; + +public: + static const char *const SectionName; + + static void emit(MCStreamer &Streamer, BinaryContext &BC); + static AddressMap parse(StringRef Buffer, const BinaryContext &BC); + + std::optional lookup(uint64_t InputAddress) const { + auto It = Map.find(InputAddress); + if (It != Map.end()) + return It->second; + return std::nullopt; + } + + std::pair + lookupAll(uint64_t InputAddress) const { + return Map.equal_range(InputAddress); + } +}; + +} // namespace bolt +} // namespace llvm + +#endif diff --git a/bolt/include/bolt/Core/BinaryBasicBlock.h b/bolt/include/bolt/Core/BinaryBasicBlock.h index 02be9c1d4f11..bc95e2c4de3a 100644 --- a/bolt/include/bolt/Core/BinaryBasicBlock.h +++ b/bolt/include/bolt/Core/BinaryBasicBlock.h @@ -100,16 +100,6 @@ private: using LocSymsTy = std::vector>; std::unique_ptr LocSyms; - /// After output/codegen, map output offsets of instructions in this basic - /// block to instruction offsets in the original function. Note that the - /// output basic block could be different from the input basic block. - /// We only map instruction of interest, such as calls and markers. - /// - /// We store the offset array in a basic block to facilitate BAT tables - /// generation. Otherwise, the mapping could be done at function level. - using OffsetTranslationTableTy = std::vector>; - std::unique_ptr OffsetTranslationTable; - /// Alignment requirements for the block. uint32_t Alignment{1}; @@ -828,8 +818,7 @@ public: return OutputAddressRange; } - /// Update addresses of special instructions inside this basic block. - void updateOutputValues(const MCAsmLayout &Layout); + bool hasLocSyms() const { return LocSyms != nullptr; } /// Return mapping of input offsets to symbols in the output. LocSymsTy &getLocSyms() { @@ -841,19 +830,6 @@ public: return const_cast(this)->getLocSyms(); } - /// Return offset translation table for the basic block. - OffsetTranslationTableTy &getOffsetTranslationTable() { - return OffsetTranslationTable - ? *OffsetTranslationTable - : *(OffsetTranslationTable = - std::make_unique()); - } - - /// Return offset translation table for the basic block. - const OffsetTranslationTableTy &getOffsetTranslationTable() const { - return const_cast(this)->getOffsetTranslationTable(); - } - /// Return size of the basic block in the output binary. uint64_t getOutputSize() const { return OutputAddressRange.second - OutputAddressRange.first; diff --git a/bolt/include/bolt/Core/BinaryContext.h b/bolt/include/bolt/Core/BinaryContext.h index 7bbfb2fc33ee..ef57ff3541dc 100644 --- a/bolt/include/bolt/Core/BinaryContext.h +++ b/bolt/include/bolt/Core/BinaryContext.h @@ -13,6 +13,7 @@ #ifndef BOLT_CORE_BINARY_CONTEXT_H #define BOLT_CORE_BINARY_CONTEXT_H +#include "bolt/Core/AddressMap.h" #include "bolt/Core/BinaryData.h" #include "bolt/Core/BinarySection.h" #include "bolt/Core/DebugData.h" @@ -221,6 +222,9 @@ class BinaryContext { bool ContainsDwarf5{false}; bool ContainsDwarfLegacy{false}; + /// Mapping from input to output addresses. + std::optional IOAddressMap; + /// Preprocess DWO debug information. void preprocessDWODebugInfo(); @@ -1343,6 +1347,12 @@ public: /* DWARFMustBeAtTheEnd */ false)); return Streamer; } + + void setIOAddressMap(AddressMap Map) { IOAddressMap = std::move(Map); } + const AddressMap &getIOAddressMap() const { + assert(IOAddressMap && "Address map not set yet"); + return *IOAddressMap; + } }; template > diff --git a/bolt/include/bolt/Core/BinaryFunction.h b/bolt/include/bolt/Core/BinaryFunction.h index 359ecec905ee..9b45467a6a8f 100644 --- a/bolt/include/bolt/Core/BinaryFunction.h +++ b/bolt/include/bolt/Core/BinaryFunction.h @@ -577,9 +577,6 @@ private: /// Count the number of functions created. static uint64_t Count; - /// Map offsets of special instructions to addresses in the output. - InputOffsetToAddressMapTy InputOffsetToAddressMap; - /// Register alternative function name. void addAlternativeName(std::string NewName) { Aliases.push_back(std::move(NewName)); @@ -1226,13 +1223,6 @@ public: /// Update output values of the function based on the final \p Layout. void updateOutputValues(const MCAsmLayout &Layout); - /// Return mapping of input to output addresses. Most users should call - /// translateInputToOutputAddress() for address translation. - InputOffsetToAddressMapTy &getInputOffsetToAddressMap() { - assert(isEmitted() && "cannot use address mapping before code emission"); - return InputOffsetToAddressMap; - } - /// Register relocation type \p RelType at a given \p Address in the function /// against \p Symbol. /// Assert if the \p Address is not inside this function. @@ -2180,6 +2170,11 @@ public: /// its code emission. bool requiresAddressTranslation() const; + /// Return true if the linker needs to generate an address map for this + /// function. Used for keeping track of the mapping from input to out + /// addresses of basic blocks. + bool requiresAddressMap() const; + /// Adjust branch instructions to match the CFG. /// /// As it comes to internal branches, the CFG represents "the ultimate source diff --git a/bolt/include/bolt/Core/BinarySection.h b/bolt/include/bolt/Core/BinarySection.h index f1041777926f..326d088d1f04 100644 --- a/bolt/include/bolt/Core/BinarySection.h +++ b/bolt/include/bolt/Core/BinarySection.h @@ -97,6 +97,8 @@ class BinarySection { mutable bool IsReordered{false}; // Have the contents been reordered? bool IsAnonymous{false}; // True if the name should not be included // in the output file. + bool IsLinkOnly{false}; // True if the section should not be included + // in the output file. uint64_t hash(const BinaryData &BD, std::map &Cache) const; @@ -452,6 +454,8 @@ public: void setIndex(uint32_t I) { Index = I; } void setOutputName(const Twine &Name) { OutputName = Name.str(); } void setAnonymous(bool Flag) { IsAnonymous = Flag; } + bool isLinkOnly() const { return IsLinkOnly; } + void setLinkOnly() { IsLinkOnly = true; } /// Emit the section as data, possibly with relocations. /// Use name \p SectionName for the section during the emission. diff --git a/bolt/lib/Core/AddressMap.cpp b/bolt/lib/Core/AddressMap.cpp new file mode 100644 index 000000000000..76c5378a3eb1 --- /dev/null +++ b/bolt/lib/Core/AddressMap.cpp @@ -0,0 +1,63 @@ +#include "bolt/Core/AddressMap.h" +#include "bolt/Core/BinaryContext.h" +#include "bolt/Core/BinaryFunction.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/Support/DataExtractor.h" + +namespace llvm { +namespace bolt { + +const char *const AddressMap::SectionName = ".bolt.address_map"; + +static void emitLabel(MCStreamer &Streamer, uint64_t InputAddress, + const MCSymbol *OutputLabel) { + Streamer.emitIntValue(InputAddress, 8); + Streamer.emitSymbolValue(OutputLabel, 8); +} + +void AddressMap::emit(MCStreamer &Streamer, BinaryContext &BC) { + Streamer.switchSection(BC.getDataSection(SectionName)); + + for (const auto &[BFAddress, BF] : BC.getBinaryFunctions()) { + if (!BF.requiresAddressMap()) + continue; + + for (const auto &BB : BF) { + if (!BB.getLabel()->isDefined()) + continue; + + emitLabel(Streamer, BFAddress + BB.getInputAddressRange().first, + BB.getLabel()); + + if (!BB.hasLocSyms()) + continue; + + for (auto [Offset, Symbol] : BB.getLocSyms()) + emitLabel(Streamer, BFAddress + Offset, Symbol); + } + } +} + +AddressMap AddressMap::parse(StringRef Buffer, const BinaryContext &BC) { + const auto EntrySize = 2 * BC.AsmInfo->getCodePointerSize(); + assert(Buffer.size() % EntrySize == 0 && "Unexpected address map size"); + + DataExtractor DE(Buffer, BC.AsmInfo->isLittleEndian(), + BC.AsmInfo->getCodePointerSize()); + DataExtractor::Cursor Cursor(0); + + AddressMap Parsed; + Parsed.Map.reserve(Buffer.size() / EntrySize); + + while (Cursor && !DE.eof(Cursor)) { + const auto Input = DE.getAddress(Cursor); + const auto Output = DE.getAddress(Cursor); + Parsed.Map.insert({Input, Output}); + } + + assert(Cursor && "Error reading address map section"); + return Parsed; +} + +} // namespace bolt +} // namespace llvm diff --git a/bolt/lib/Core/BinaryBasicBlock.cpp b/bolt/lib/Core/BinaryBasicBlock.cpp index b271b86ec699..d764a874d08c 100644 --- a/bolt/lib/Core/BinaryBasicBlock.cpp +++ b/bolt/lib/Core/BinaryBasicBlock.cpp @@ -613,27 +613,5 @@ BinaryBasicBlock *BinaryBasicBlock::splitAt(iterator II) { return NewBlock; } -void BinaryBasicBlock::updateOutputValues(const MCAsmLayout &Layout) { - if (!LocSyms) - return; - - const uint64_t BBAddress = getOutputAddressRange().first; - const uint64_t BBOffset = Layout.getSymbolOffset(*getLabel()); - for (const auto &LocSymKV : *LocSyms) { - const uint32_t InputFunctionOffset = LocSymKV.first; - const uint32_t OutputOffset = static_cast( - Layout.getSymbolOffset(*LocSymKV.second) - BBOffset); - getOffsetTranslationTable().emplace_back( - std::make_pair(OutputOffset, InputFunctionOffset)); - - // Update reverse (relative to BAT) address lookup table for function. - if (getFunction()->requiresAddressTranslation()) { - getFunction()->getInputOffsetToAddressMap().emplace( - std::make_pair(InputFunctionOffset, OutputOffset + BBAddress)); - } - } - LocSyms.reset(nullptr); -} - } // namespace bolt } // namespace llvm diff --git a/bolt/lib/Core/BinaryEmitter.cpp b/bolt/lib/Core/BinaryEmitter.cpp index c4129615ac32..63446575f4b2 100644 --- a/bolt/lib/Core/BinaryEmitter.cpp +++ b/bolt/lib/Core/BinaryEmitter.cpp @@ -214,6 +214,10 @@ void BinaryEmitter::emitAll(StringRef OrgSecPrefix) { } emitDataSections(OrgSecPrefix); + + // TODO Enable for Mach-O once BinaryContext::getDataSection supports it. + if (BC.isELF()) + AddressMap::emit(Streamer, BC); } void BinaryEmitter::emitFunctions() { diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp index b92e5982efda..9ae0c6303108 100644 --- a/bolt/lib/Core/BinaryFunction.cpp +++ b/bolt/lib/Core/BinaryFunction.cpp @@ -2855,6 +2855,14 @@ bool BinaryFunction::requiresAddressTranslation() const { return opts::EnableBAT || hasSDTMarker() || hasPseudoProbe(); } +bool BinaryFunction::requiresAddressMap() const { + if (isInjected()) + return false; + + return opts::UpdateDebugSections || isMultiEntry() || + requiresAddressTranslation(); +} + uint64_t BinaryFunction::getInstructionCount() const { uint64_t Count = 0; for (const BinaryBasicBlock &BB : blocks()) @@ -4120,15 +4128,13 @@ void BinaryFunction::updateOutputValues(const MCAsmLayout &Layout) { assert(FragmentBaseAddress == getOutputAddress()); } - const uint64_t BBOffset = Layout.getSymbolOffset(*BB->getLabel()); - const uint64_t BBAddress = FragmentBaseAddress + BBOffset; + const uint64_t BBAddress = + *BC.getIOAddressMap().lookup(BB->getInputOffset() + getAddress()); BB->setOutputStartAddress(BBAddress); if (PrevBB) PrevBB->setOutputEndAddress(BBAddress); PrevBB = BB; - - BB->updateOutputValues(Layout); } PrevBB->setOutputEndAddress(PrevBB->isSplit() @@ -4181,9 +4187,8 @@ uint64_t BinaryFunction::translateInputToOutputAddress(uint64_t Address) const { // Check if the address is associated with an instruction that is tracked // by address translation. - auto KV = InputOffsetToAddressMap.find(Address - getAddress()); - if (KV != InputOffsetToAddressMap.end()) - return KV->second; + if (auto OutputAddress = BC.getIOAddressMap().lookup(Address)) + return *OutputAddress; // FIXME: #18950828 - we rely on relative offsets inside basic blocks to stay // intact. Instead we can use pseudo instructions and/or annotations. diff --git a/bolt/lib/Core/CMakeLists.txt b/bolt/lib/Core/CMakeLists.txt index a4612fb93f8c..c913179ebcc5 100644 --- a/bolt/lib/Core/CMakeLists.txt +++ b/bolt/lib/Core/CMakeLists.txt @@ -11,6 +11,7 @@ set(LLVM_LINK_COMPONENTS ) add_llvm_library(LLVMBOLTCore + AddressMap.cpp BinaryBasicBlock.cpp BinaryContext.cpp BinaryData.cpp diff --git a/bolt/lib/Profile/BoltAddressTranslation.cpp b/bolt/lib/Profile/BoltAddressTranslation.cpp index 57a850eb1723..e004309e0e21 100644 --- a/bolt/lib/Profile/BoltAddressTranslation.cpp +++ b/bolt/lib/Profile/BoltAddressTranslation.cpp @@ -46,9 +46,14 @@ void BoltAddressTranslation::writeEntriesForBB(MapTy &Map, // allowing it to overwrite the previously inserted key in the map. Map[BBOutputOffset] = BBInputOffset; - for (const auto &IOPair : BB.getOffsetTranslationTable()) { - const uint64_t OutputOffset = IOPair.first + BBOutputOffset; - const uint32_t InputOffset = IOPair.second; + const auto &IOAddressMap = + BB.getFunction()->getBinaryContext().getIOAddressMap(); + + for (const auto &[InputOffset, Sym] : BB.getLocSyms()) { + const auto InputAddress = BB.getFunction()->getAddress() + InputOffset; + const auto OutputAddress = IOAddressMap.lookup(InputAddress); + assert(OutputAddress && "Unknown instruction address"); + const auto OutputOffset = *OutputAddress - FuncAddress; // Is this the first instruction in the BB? No need to duplicate the entry. if (OutputOffset == BBOutputOffset) diff --git a/bolt/lib/Rewrite/PseudoProbeRewriter.cpp b/bolt/lib/Rewrite/PseudoProbeRewriter.cpp index 64b8a8b6d400..316b83cfbd38 100644 --- a/bolt/lib/Rewrite/PseudoProbeRewriter.cpp +++ b/bolt/lib/Rewrite/PseudoProbeRewriter.cpp @@ -183,9 +183,7 @@ void PseudoProbeRewriter::updatePseudoProbes() { // A call probe may be duplicated due to ICP // Go through output of InputOffsetToAddressMap to collect all related // probes - const InputOffsetToAddressMapTy &Offset2Addr = - F->getInputOffsetToAddressMap(); - auto CallOutputAddresses = Offset2Addr.equal_range(Offset); + auto CallOutputAddresses = BC.getIOAddressMap().lookupAll(AP.first); auto CallOutputAddress = CallOutputAddresses.first; if (CallOutputAddress == CallOutputAddresses.second) { Probe->setAddress(INT64_MAX); diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp index 551527f4c678..31babd2de802 100644 --- a/bolt/lib/Rewrite/RewriteInstance.cpp +++ b/bolt/lib/Rewrite/RewriteInstance.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "bolt/Rewrite/RewriteInstance.h" +#include "bolt/Core/AddressMap.h" #include "bolt/Core/BinaryContext.h" #include "bolt/Core/BinaryEmitter.h" #include "bolt/Core/BinaryFunction.h" @@ -3170,6 +3171,9 @@ void RewriteInstance::preregisterSections() { ROFlags); BC->registerOrUpdateSection(getNewSecPrefix() + ".rodata.cold", ELF::SHT_PROGBITS, ROFlags); + BC->registerOrUpdateSection(AddressMap::SectionName, ELF::SHT_PROGBITS, + ROFlags) + .setLinkOnly(); } void RewriteInstance::emitAndLink() { @@ -3575,6 +3579,9 @@ void RewriteInstance::mapAllocatableSections( } for (BinarySection &Section : BC->allocatableSections()) { + if (Section.isLinkOnly()) + continue; + if (!Section.hasValidSectionID()) continue; @@ -3637,6 +3644,12 @@ void RewriteInstance::mapAllocatableSections( } void RewriteInstance::updateOutputValues(const MCAsmLayout &Layout) { + if (auto MapSection = BC->getUniqueSectionByName(AddressMap::SectionName)) { + auto Map = AddressMap::parse(MapSection->getOutputContents(), *BC); + BC->setIOAddressMap(std::move(Map)); + BC->deregisterSection(*MapSection); + } + for (BinaryFunction *Function : BC->getAllBinaryFunctions()) Function->updateOutputValues(Layout); } @@ -5282,6 +5295,8 @@ void RewriteInstance::rewriteFile() { for (BinarySection &Section : BC->allocatableSections()) { if (!Section.isFinalized() || !Section.getOutputData()) continue; + if (Section.isLinkOnly()) + continue; if (opts::Verbosity >= 1) outs() << "BOLT: writing new section " << Section.getName()