[BOLT] Calculate input to output address map using BOLTLinker
BOLT uses MCAsmLayout to calculate the output values of basic blocks. This means output values are calculated based on a pre-linking state and any changes to symbol values during linking will cause incorrect values to be used. This issue was first addressed in D154604 by adding all basic block symbols to the symbol table for the linker to resolve them. However, the runtime overhead of handling this huge symbol table turned out to be prohibitively large. This patch solves the issue in a different way. First, a temporary section containing [input address, output symbol] pairs is emitted to the intermediary object file. The linker will resolve all these references so we end up with a section of [input address, output address] pairs. This section is then parsed and used to: - Replace BinaryBasicBlock::OffsetTranslationTable - Replace BinaryFunction::InputOffsetToAddressMap - Update BinaryBasicBlock::OutputAddressRange Note that the reason this is more performant than the previous attempt is that these symbol references do not cause entries to be added to the symbol table. Instead, section-relative references are used for the relocations. Reviewed By: maksfb Differential Revision: https://reviews.llvm.org/D155604
This commit is contained in:
59
bolt/include/bolt/Core/AddressMap.h
Normal file
59
bolt/include/bolt/Core/AddressMap.h
Normal file
@@ -0,0 +1,59 @@
|
||||
//===- bolt/Core/AddressMap.h - Input-output address map --------*- C++ -*-===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Helper class to create a mapping from input to output addresses needed for
|
||||
// updating debugging symbols and BAT. We emit an MCSection containing
|
||||
// <Input address, Output MCSymbol> pairs to the object file and JITLink will
|
||||
// transform this in <Input address, Output address> pairs. The linker output
|
||||
// can then be parsed and used to establish the mapping.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
#ifndef BOLT_CORE_ADDRESS_MAP_H
|
||||
#define BOLT_CORE_ADDRESS_MAP_H
|
||||
|
||||
#include "llvm/ADT/StringRef.h"
|
||||
|
||||
#include <optional>
|
||||
#include <unordered_map>
|
||||
|
||||
namespace llvm {
|
||||
|
||||
class MCStreamer;
|
||||
|
||||
namespace bolt {
|
||||
|
||||
class BinaryContext;
|
||||
|
||||
class AddressMap {
|
||||
using MapTy = std::unordered_multimap<uint64_t, uint64_t>;
|
||||
MapTy Map;
|
||||
|
||||
public:
|
||||
static const char *const SectionName;
|
||||
|
||||
static void emit(MCStreamer &Streamer, BinaryContext &BC);
|
||||
static AddressMap parse(StringRef Buffer, const BinaryContext &BC);
|
||||
|
||||
std::optional<uint64_t> lookup(uint64_t InputAddress) const {
|
||||
auto It = Map.find(InputAddress);
|
||||
if (It != Map.end())
|
||||
return It->second;
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
std::pair<MapTy::const_iterator, MapTy::const_iterator>
|
||||
lookupAll(uint64_t InputAddress) const {
|
||||
return Map.equal_range(InputAddress);
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace bolt
|
||||
} // namespace llvm
|
||||
|
||||
#endif
|
||||
@@ -100,16 +100,6 @@ private:
|
||||
using LocSymsTy = std::vector<std::pair<uint32_t, const MCSymbol *>>;
|
||||
std::unique_ptr<LocSymsTy> LocSyms;
|
||||
|
||||
/// After output/codegen, map output offsets of instructions in this basic
|
||||
/// block to instruction offsets in the original function. Note that the
|
||||
/// output basic block could be different from the input basic block.
|
||||
/// We only map instruction of interest, such as calls and markers.
|
||||
///
|
||||
/// We store the offset array in a basic block to facilitate BAT tables
|
||||
/// generation. Otherwise, the mapping could be done at function level.
|
||||
using OffsetTranslationTableTy = std::vector<std::pair<uint32_t, uint32_t>>;
|
||||
std::unique_ptr<OffsetTranslationTableTy> OffsetTranslationTable;
|
||||
|
||||
/// Alignment requirements for the block.
|
||||
uint32_t Alignment{1};
|
||||
|
||||
@@ -828,8 +818,7 @@ public:
|
||||
return OutputAddressRange;
|
||||
}
|
||||
|
||||
/// Update addresses of special instructions inside this basic block.
|
||||
void updateOutputValues(const MCAsmLayout &Layout);
|
||||
bool hasLocSyms() const { return LocSyms != nullptr; }
|
||||
|
||||
/// Return mapping of input offsets to symbols in the output.
|
||||
LocSymsTy &getLocSyms() {
|
||||
@@ -841,19 +830,6 @@ public:
|
||||
return const_cast<BinaryBasicBlock *>(this)->getLocSyms();
|
||||
}
|
||||
|
||||
/// Return offset translation table for the basic block.
|
||||
OffsetTranslationTableTy &getOffsetTranslationTable() {
|
||||
return OffsetTranslationTable
|
||||
? *OffsetTranslationTable
|
||||
: *(OffsetTranslationTable =
|
||||
std::make_unique<OffsetTranslationTableTy>());
|
||||
}
|
||||
|
||||
/// Return offset translation table for the basic block.
|
||||
const OffsetTranslationTableTy &getOffsetTranslationTable() const {
|
||||
return const_cast<BinaryBasicBlock *>(this)->getOffsetTranslationTable();
|
||||
}
|
||||
|
||||
/// Return size of the basic block in the output binary.
|
||||
uint64_t getOutputSize() const {
|
||||
return OutputAddressRange.second - OutputAddressRange.first;
|
||||
|
||||
@@ -13,6 +13,7 @@
|
||||
#ifndef BOLT_CORE_BINARY_CONTEXT_H
|
||||
#define BOLT_CORE_BINARY_CONTEXT_H
|
||||
|
||||
#include "bolt/Core/AddressMap.h"
|
||||
#include "bolt/Core/BinaryData.h"
|
||||
#include "bolt/Core/BinarySection.h"
|
||||
#include "bolt/Core/DebugData.h"
|
||||
@@ -221,6 +222,9 @@ class BinaryContext {
|
||||
bool ContainsDwarf5{false};
|
||||
bool ContainsDwarfLegacy{false};
|
||||
|
||||
/// Mapping from input to output addresses.
|
||||
std::optional<AddressMap> IOAddressMap;
|
||||
|
||||
/// Preprocess DWO debug information.
|
||||
void preprocessDWODebugInfo();
|
||||
|
||||
@@ -1343,6 +1347,12 @@ public:
|
||||
/* DWARFMustBeAtTheEnd */ false));
|
||||
return Streamer;
|
||||
}
|
||||
|
||||
void setIOAddressMap(AddressMap Map) { IOAddressMap = std::move(Map); }
|
||||
const AddressMap &getIOAddressMap() const {
|
||||
assert(IOAddressMap && "Address map not set yet");
|
||||
return *IOAddressMap;
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T, typename = std::enable_if_t<sizeof(T) == 1>>
|
||||
|
||||
@@ -577,9 +577,6 @@ private:
|
||||
/// Count the number of functions created.
|
||||
static uint64_t Count;
|
||||
|
||||
/// Map offsets of special instructions to addresses in the output.
|
||||
InputOffsetToAddressMapTy InputOffsetToAddressMap;
|
||||
|
||||
/// Register alternative function name.
|
||||
void addAlternativeName(std::string NewName) {
|
||||
Aliases.push_back(std::move(NewName));
|
||||
@@ -1226,13 +1223,6 @@ public:
|
||||
/// Update output values of the function based on the final \p Layout.
|
||||
void updateOutputValues(const MCAsmLayout &Layout);
|
||||
|
||||
/// Return mapping of input to output addresses. Most users should call
|
||||
/// translateInputToOutputAddress() for address translation.
|
||||
InputOffsetToAddressMapTy &getInputOffsetToAddressMap() {
|
||||
assert(isEmitted() && "cannot use address mapping before code emission");
|
||||
return InputOffsetToAddressMap;
|
||||
}
|
||||
|
||||
/// Register relocation type \p RelType at a given \p Address in the function
|
||||
/// against \p Symbol.
|
||||
/// Assert if the \p Address is not inside this function.
|
||||
@@ -2180,6 +2170,11 @@ public:
|
||||
/// its code emission.
|
||||
bool requiresAddressTranslation() const;
|
||||
|
||||
/// Return true if the linker needs to generate an address map for this
|
||||
/// function. Used for keeping track of the mapping from input to out
|
||||
/// addresses of basic blocks.
|
||||
bool requiresAddressMap() const;
|
||||
|
||||
/// Adjust branch instructions to match the CFG.
|
||||
///
|
||||
/// As it comes to internal branches, the CFG represents "the ultimate source
|
||||
|
||||
@@ -97,6 +97,8 @@ class BinarySection {
|
||||
mutable bool IsReordered{false}; // Have the contents been reordered?
|
||||
bool IsAnonymous{false}; // True if the name should not be included
|
||||
// in the output file.
|
||||
bool IsLinkOnly{false}; // True if the section should not be included
|
||||
// in the output file.
|
||||
|
||||
uint64_t hash(const BinaryData &BD,
|
||||
std::map<const BinaryData *, uint64_t> &Cache) const;
|
||||
@@ -452,6 +454,8 @@ public:
|
||||
void setIndex(uint32_t I) { Index = I; }
|
||||
void setOutputName(const Twine &Name) { OutputName = Name.str(); }
|
||||
void setAnonymous(bool Flag) { IsAnonymous = Flag; }
|
||||
bool isLinkOnly() const { return IsLinkOnly; }
|
||||
void setLinkOnly() { IsLinkOnly = true; }
|
||||
|
||||
/// Emit the section as data, possibly with relocations.
|
||||
/// Use name \p SectionName for the section during the emission.
|
||||
|
||||
63
bolt/lib/Core/AddressMap.cpp
Normal file
63
bolt/lib/Core/AddressMap.cpp
Normal file
@@ -0,0 +1,63 @@
|
||||
#include "bolt/Core/AddressMap.h"
|
||||
#include "bolt/Core/BinaryContext.h"
|
||||
#include "bolt/Core/BinaryFunction.h"
|
||||
#include "llvm/MC/MCStreamer.h"
|
||||
#include "llvm/Support/DataExtractor.h"
|
||||
|
||||
namespace llvm {
|
||||
namespace bolt {
|
||||
|
||||
const char *const AddressMap::SectionName = ".bolt.address_map";
|
||||
|
||||
static void emitLabel(MCStreamer &Streamer, uint64_t InputAddress,
|
||||
const MCSymbol *OutputLabel) {
|
||||
Streamer.emitIntValue(InputAddress, 8);
|
||||
Streamer.emitSymbolValue(OutputLabel, 8);
|
||||
}
|
||||
|
||||
void AddressMap::emit(MCStreamer &Streamer, BinaryContext &BC) {
|
||||
Streamer.switchSection(BC.getDataSection(SectionName));
|
||||
|
||||
for (const auto &[BFAddress, BF] : BC.getBinaryFunctions()) {
|
||||
if (!BF.requiresAddressMap())
|
||||
continue;
|
||||
|
||||
for (const auto &BB : BF) {
|
||||
if (!BB.getLabel()->isDefined())
|
||||
continue;
|
||||
|
||||
emitLabel(Streamer, BFAddress + BB.getInputAddressRange().first,
|
||||
BB.getLabel());
|
||||
|
||||
if (!BB.hasLocSyms())
|
||||
continue;
|
||||
|
||||
for (auto [Offset, Symbol] : BB.getLocSyms())
|
||||
emitLabel(Streamer, BFAddress + Offset, Symbol);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
AddressMap AddressMap::parse(StringRef Buffer, const BinaryContext &BC) {
|
||||
const auto EntrySize = 2 * BC.AsmInfo->getCodePointerSize();
|
||||
assert(Buffer.size() % EntrySize == 0 && "Unexpected address map size");
|
||||
|
||||
DataExtractor DE(Buffer, BC.AsmInfo->isLittleEndian(),
|
||||
BC.AsmInfo->getCodePointerSize());
|
||||
DataExtractor::Cursor Cursor(0);
|
||||
|
||||
AddressMap Parsed;
|
||||
Parsed.Map.reserve(Buffer.size() / EntrySize);
|
||||
|
||||
while (Cursor && !DE.eof(Cursor)) {
|
||||
const auto Input = DE.getAddress(Cursor);
|
||||
const auto Output = DE.getAddress(Cursor);
|
||||
Parsed.Map.insert({Input, Output});
|
||||
}
|
||||
|
||||
assert(Cursor && "Error reading address map section");
|
||||
return Parsed;
|
||||
}
|
||||
|
||||
} // namespace bolt
|
||||
} // namespace llvm
|
||||
@@ -613,27 +613,5 @@ BinaryBasicBlock *BinaryBasicBlock::splitAt(iterator II) {
|
||||
return NewBlock;
|
||||
}
|
||||
|
||||
void BinaryBasicBlock::updateOutputValues(const MCAsmLayout &Layout) {
|
||||
if (!LocSyms)
|
||||
return;
|
||||
|
||||
const uint64_t BBAddress = getOutputAddressRange().first;
|
||||
const uint64_t BBOffset = Layout.getSymbolOffset(*getLabel());
|
||||
for (const auto &LocSymKV : *LocSyms) {
|
||||
const uint32_t InputFunctionOffset = LocSymKV.first;
|
||||
const uint32_t OutputOffset = static_cast<uint32_t>(
|
||||
Layout.getSymbolOffset(*LocSymKV.second) - BBOffset);
|
||||
getOffsetTranslationTable().emplace_back(
|
||||
std::make_pair(OutputOffset, InputFunctionOffset));
|
||||
|
||||
// Update reverse (relative to BAT) address lookup table for function.
|
||||
if (getFunction()->requiresAddressTranslation()) {
|
||||
getFunction()->getInputOffsetToAddressMap().emplace(
|
||||
std::make_pair(InputFunctionOffset, OutputOffset + BBAddress));
|
||||
}
|
||||
}
|
||||
LocSyms.reset(nullptr);
|
||||
}
|
||||
|
||||
} // namespace bolt
|
||||
} // namespace llvm
|
||||
|
||||
@@ -214,6 +214,10 @@ void BinaryEmitter::emitAll(StringRef OrgSecPrefix) {
|
||||
}
|
||||
|
||||
emitDataSections(OrgSecPrefix);
|
||||
|
||||
// TODO Enable for Mach-O once BinaryContext::getDataSection supports it.
|
||||
if (BC.isELF())
|
||||
AddressMap::emit(Streamer, BC);
|
||||
}
|
||||
|
||||
void BinaryEmitter::emitFunctions() {
|
||||
|
||||
@@ -2855,6 +2855,14 @@ bool BinaryFunction::requiresAddressTranslation() const {
|
||||
return opts::EnableBAT || hasSDTMarker() || hasPseudoProbe();
|
||||
}
|
||||
|
||||
bool BinaryFunction::requiresAddressMap() const {
|
||||
if (isInjected())
|
||||
return false;
|
||||
|
||||
return opts::UpdateDebugSections || isMultiEntry() ||
|
||||
requiresAddressTranslation();
|
||||
}
|
||||
|
||||
uint64_t BinaryFunction::getInstructionCount() const {
|
||||
uint64_t Count = 0;
|
||||
for (const BinaryBasicBlock &BB : blocks())
|
||||
@@ -4120,15 +4128,13 @@ void BinaryFunction::updateOutputValues(const MCAsmLayout &Layout) {
|
||||
assert(FragmentBaseAddress == getOutputAddress());
|
||||
}
|
||||
|
||||
const uint64_t BBOffset = Layout.getSymbolOffset(*BB->getLabel());
|
||||
const uint64_t BBAddress = FragmentBaseAddress + BBOffset;
|
||||
const uint64_t BBAddress =
|
||||
*BC.getIOAddressMap().lookup(BB->getInputOffset() + getAddress());
|
||||
BB->setOutputStartAddress(BBAddress);
|
||||
|
||||
if (PrevBB)
|
||||
PrevBB->setOutputEndAddress(BBAddress);
|
||||
PrevBB = BB;
|
||||
|
||||
BB->updateOutputValues(Layout);
|
||||
}
|
||||
|
||||
PrevBB->setOutputEndAddress(PrevBB->isSplit()
|
||||
@@ -4181,9 +4187,8 @@ uint64_t BinaryFunction::translateInputToOutputAddress(uint64_t Address) const {
|
||||
|
||||
// Check if the address is associated with an instruction that is tracked
|
||||
// by address translation.
|
||||
auto KV = InputOffsetToAddressMap.find(Address - getAddress());
|
||||
if (KV != InputOffsetToAddressMap.end())
|
||||
return KV->second;
|
||||
if (auto OutputAddress = BC.getIOAddressMap().lookup(Address))
|
||||
return *OutputAddress;
|
||||
|
||||
// FIXME: #18950828 - we rely on relative offsets inside basic blocks to stay
|
||||
// intact. Instead we can use pseudo instructions and/or annotations.
|
||||
|
||||
@@ -11,6 +11,7 @@ set(LLVM_LINK_COMPONENTS
|
||||
)
|
||||
|
||||
add_llvm_library(LLVMBOLTCore
|
||||
AddressMap.cpp
|
||||
BinaryBasicBlock.cpp
|
||||
BinaryContext.cpp
|
||||
BinaryData.cpp
|
||||
|
||||
@@ -46,9 +46,14 @@ void BoltAddressTranslation::writeEntriesForBB(MapTy &Map,
|
||||
// allowing it to overwrite the previously inserted key in the map.
|
||||
Map[BBOutputOffset] = BBInputOffset;
|
||||
|
||||
for (const auto &IOPair : BB.getOffsetTranslationTable()) {
|
||||
const uint64_t OutputOffset = IOPair.first + BBOutputOffset;
|
||||
const uint32_t InputOffset = IOPair.second;
|
||||
const auto &IOAddressMap =
|
||||
BB.getFunction()->getBinaryContext().getIOAddressMap();
|
||||
|
||||
for (const auto &[InputOffset, Sym] : BB.getLocSyms()) {
|
||||
const auto InputAddress = BB.getFunction()->getAddress() + InputOffset;
|
||||
const auto OutputAddress = IOAddressMap.lookup(InputAddress);
|
||||
assert(OutputAddress && "Unknown instruction address");
|
||||
const auto OutputOffset = *OutputAddress - FuncAddress;
|
||||
|
||||
// Is this the first instruction in the BB? No need to duplicate the entry.
|
||||
if (OutputOffset == BBOutputOffset)
|
||||
|
||||
@@ -183,9 +183,7 @@ void PseudoProbeRewriter::updatePseudoProbes() {
|
||||
// A call probe may be duplicated due to ICP
|
||||
// Go through output of InputOffsetToAddressMap to collect all related
|
||||
// probes
|
||||
const InputOffsetToAddressMapTy &Offset2Addr =
|
||||
F->getInputOffsetToAddressMap();
|
||||
auto CallOutputAddresses = Offset2Addr.equal_range(Offset);
|
||||
auto CallOutputAddresses = BC.getIOAddressMap().lookupAll(AP.first);
|
||||
auto CallOutputAddress = CallOutputAddresses.first;
|
||||
if (CallOutputAddress == CallOutputAddresses.second) {
|
||||
Probe->setAddress(INT64_MAX);
|
||||
|
||||
@@ -7,6 +7,7 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "bolt/Rewrite/RewriteInstance.h"
|
||||
#include "bolt/Core/AddressMap.h"
|
||||
#include "bolt/Core/BinaryContext.h"
|
||||
#include "bolt/Core/BinaryEmitter.h"
|
||||
#include "bolt/Core/BinaryFunction.h"
|
||||
@@ -3170,6 +3171,9 @@ void RewriteInstance::preregisterSections() {
|
||||
ROFlags);
|
||||
BC->registerOrUpdateSection(getNewSecPrefix() + ".rodata.cold",
|
||||
ELF::SHT_PROGBITS, ROFlags);
|
||||
BC->registerOrUpdateSection(AddressMap::SectionName, ELF::SHT_PROGBITS,
|
||||
ROFlags)
|
||||
.setLinkOnly();
|
||||
}
|
||||
|
||||
void RewriteInstance::emitAndLink() {
|
||||
@@ -3575,6 +3579,9 @@ void RewriteInstance::mapAllocatableSections(
|
||||
}
|
||||
|
||||
for (BinarySection &Section : BC->allocatableSections()) {
|
||||
if (Section.isLinkOnly())
|
||||
continue;
|
||||
|
||||
if (!Section.hasValidSectionID())
|
||||
continue;
|
||||
|
||||
@@ -3637,6 +3644,12 @@ void RewriteInstance::mapAllocatableSections(
|
||||
}
|
||||
|
||||
void RewriteInstance::updateOutputValues(const MCAsmLayout &Layout) {
|
||||
if (auto MapSection = BC->getUniqueSectionByName(AddressMap::SectionName)) {
|
||||
auto Map = AddressMap::parse(MapSection->getOutputContents(), *BC);
|
||||
BC->setIOAddressMap(std::move(Map));
|
||||
BC->deregisterSection(*MapSection);
|
||||
}
|
||||
|
||||
for (BinaryFunction *Function : BC->getAllBinaryFunctions())
|
||||
Function->updateOutputValues(Layout);
|
||||
}
|
||||
@@ -5282,6 +5295,8 @@ void RewriteInstance::rewriteFile() {
|
||||
for (BinarySection &Section : BC->allocatableSections()) {
|
||||
if (!Section.isFinalized() || !Section.getOutputData())
|
||||
continue;
|
||||
if (Section.isLinkOnly())
|
||||
continue;
|
||||
|
||||
if (opts::Verbosity >= 1)
|
||||
outs() << "BOLT: writing new section " << Section.getName()
|
||||
|
||||
Reference in New Issue
Block a user