[BOLT] Encode BAT using ULEB128 (#76899)

Reduces BAT section size, bytes:
- large binary: 38676872 -> 23262524 (0.60x),
- medium binary (trunk clang): 5938004 -> 3213504 (0.54x),
- small binary (X86/bolt-address-translation.test): 1436 -> 680 (0.47x).

Test Plan: Updated bolt/test/X86/bolt-address-translation.test
This commit is contained in:
Amir Ayupov
2023-12-21 12:16:59 -08:00
parent a7cf0a1f7f
commit 565f40d66b
4 changed files with 40 additions and 50 deletions

View File

@@ -10,6 +10,8 @@
#include "bolt/Core/BinaryFunction.h"
#include "llvm/Support/DataExtractor.h"
#include "llvm/Support/Errc.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/LEB128.h"
#define DEBUG_TYPE "bolt-bat"
@@ -44,7 +46,7 @@ void BoltAddressTranslation::writeEntriesForBB(MapTy &Map,
// and this deleted block will both share the same output address (the same
// key), and we need to map back. We choose here to privilege the successor by
// allowing it to overwrite the previously inserted key in the map.
Map[BBOutputOffset] = BBInputOffset;
Map[BBOutputOffset] = BBInputOffset << 1;
const auto &IOAddressMap =
BB.getFunction()->getBinaryContext().getIOAddressMap();
@@ -61,8 +63,8 @@ void BoltAddressTranslation::writeEntriesForBB(MapTy &Map,
LLVM_DEBUG(dbgs() << " Key: " << Twine::utohexstr(OutputOffset) << " Val: "
<< Twine::utohexstr(InputOffset) << " (branch)\n");
Map.insert(
std::pair<uint32_t, uint32_t>(OutputOffset, InputOffset | BRANCHENTRY));
Map.insert(std::pair<uint32_t, uint32_t>(OutputOffset,
(InputOffset << 1) | BRANCHENTRY));
}
}
@@ -102,7 +104,7 @@ void BoltAddressTranslation::write(const BinaryContext &BC, raw_ostream &OS) {
}
const uint32_t NumFuncs = Maps.size();
OS.write(reinterpret_cast<const char *>(&NumFuncs), 4);
encodeULEB128(NumFuncs, OS);
LLVM_DEBUG(dbgs() << "Writing " << NumFuncs << " functions for BAT.\n");
for (auto &MapEntry : Maps) {
const uint64_t Address = MapEntry.first;
@@ -110,20 +112,20 @@ void BoltAddressTranslation::write(const BinaryContext &BC, raw_ostream &OS) {
const uint32_t NumEntries = Map.size();
LLVM_DEBUG(dbgs() << "Writing " << NumEntries << " entries for 0x"
<< Twine::utohexstr(Address) << ".\n");
OS.write(reinterpret_cast<const char *>(&Address), 8);
OS.write(reinterpret_cast<const char *>(&NumEntries), 4);
encodeULEB128(Address, OS);
encodeULEB128(NumEntries, OS);
for (std::pair<const uint32_t, uint32_t> &KeyVal : Map) {
OS.write(reinterpret_cast<const char *>(&KeyVal.first), 4);
OS.write(reinterpret_cast<const char *>(&KeyVal.second), 4);
encodeULEB128(KeyVal.first, OS);
encodeULEB128(KeyVal.second, OS);
}
}
const uint32_t NumColdEntries = ColdPartSource.size();
LLVM_DEBUG(dbgs() << "Writing " << NumColdEntries
<< " cold part mappings.\n");
OS.write(reinterpret_cast<const char *>(&NumColdEntries), 4);
encodeULEB128(NumColdEntries, OS);
for (std::pair<const uint64_t, uint64_t> &ColdEntry : ColdPartSource) {
OS.write(reinterpret_cast<const char *>(&ColdEntry.first), 8);
OS.write(reinterpret_cast<const char *>(&ColdEntry.second), 8);
encodeULEB128(ColdEntry.first, OS);
encodeULEB128(ColdEntry.second, OS);
LLVM_DEBUG(dbgs() << " " << Twine::utohexstr(ColdEntry.first) << " -> "
<< Twine::utohexstr(ColdEntry.second) << "\n");
}
@@ -152,26 +154,19 @@ std::error_code BoltAddressTranslation::parse(StringRef Buf) {
if (Name.substr(0, 4) != "BOLT")
return make_error_code(llvm::errc::io_error);
if (Buf.size() - Offset < 4)
return make_error_code(llvm::errc::io_error);
const uint32_t NumFunctions = DE.getU32(&Offset);
Error Err(Error::success());
const uint32_t NumFunctions = DE.getULEB128(&Offset, &Err);
LLVM_DEBUG(dbgs() << "Parsing " << NumFunctions << " functions\n");
for (uint32_t I = 0; I < NumFunctions; ++I) {
if (Buf.size() - Offset < 12)
return make_error_code(llvm::errc::io_error);
const uint64_t Address = DE.getU64(&Offset);
const uint32_t NumEntries = DE.getU32(&Offset);
const uint64_t Address = DE.getULEB128(&Offset, &Err);
const uint32_t NumEntries = DE.getULEB128(&Offset, &Err);
MapTy Map;
LLVM_DEBUG(dbgs() << "Parsing " << NumEntries << " entries for 0x"
<< Twine::utohexstr(Address) << "\n");
if (Buf.size() - Offset < 8 * NumEntries)
return make_error_code(llvm::errc::io_error);
for (uint32_t J = 0; J < NumEntries; ++J) {
const uint32_t OutputAddr = DE.getU32(&Offset);
const uint32_t InputAddr = DE.getU32(&Offset);
const uint32_t OutputAddr = DE.getULEB128(&Offset, &Err);
const uint32_t InputAddr = DE.getULEB128(&Offset, &Err);
Map.insert(std::pair<uint32_t, uint32_t>(OutputAddr, InputAddr));
LLVM_DEBUG(dbgs() << Twine::utohexstr(OutputAddr) << " -> "
<< Twine::utohexstr(InputAddr) << "\n");
@@ -179,16 +174,11 @@ std::error_code BoltAddressTranslation::parse(StringRef Buf) {
Maps.insert(std::pair<uint64_t, MapTy>(Address, Map));
}
if (Buf.size() - Offset < 4)
return make_error_code(llvm::errc::io_error);
const uint32_t NumColdEntries = DE.getU32(&Offset);
const uint32_t NumColdEntries = DE.getULEB128(&Offset, &Err);
LLVM_DEBUG(dbgs() << "Parsing " << NumColdEntries << " cold part mappings\n");
for (uint32_t I = 0; I < NumColdEntries; ++I) {
if (Buf.size() - Offset < 16)
return make_error_code(llvm::errc::io_error);
const uint32_t ColdAddress = DE.getU64(&Offset);
const uint32_t HotAddress = DE.getU64(&Offset);
const uint32_t ColdAddress = DE.getULEB128(&Offset, &Err);
const uint32_t HotAddress = DE.getULEB128(&Offset, &Err);
ColdPartSource.insert(
std::pair<uint64_t, uint64_t>(ColdAddress, HotAddress));
LLVM_DEBUG(dbgs() << Twine::utohexstr(ColdAddress) << " -> "
@@ -198,7 +188,7 @@ std::error_code BoltAddressTranslation::parse(StringRef Buf) {
outs() << "BOLT-INFO: Parsed " << NumColdEntries
<< " BAT cold-to-hot entries\n";
return std::error_code();
return errorToErrorCode(std::move(Err));
}
void BoltAddressTranslation::dump(raw_ostream &OS) {
@@ -209,7 +199,7 @@ void BoltAddressTranslation::dump(raw_ostream &OS) {
OS << "BB mappings:\n";
for (const auto &Entry : MapEntry.second) {
const bool IsBranch = Entry.second & BRANCHENTRY;
const uint32_t Val = Entry.second & ~BRANCHENTRY;
const uint32_t Val = Entry.second >> 1; // dropping BRANCHENTRY bit
OS << "0x" << Twine::utohexstr(Entry.first) << " -> "
<< "0x" << Twine::utohexstr(Val);
if (IsBranch)
@@ -244,7 +234,7 @@ uint64_t BoltAddressTranslation::translate(uint64_t FuncAddress,
--KeyVal;
const uint32_t Val = KeyVal->second & ~BRANCHENTRY;
const uint32_t Val = KeyVal->second >> 1; // dropping BRANCHENTRY bit
// Branch source addresses are translated to the first instruction of the
// source BB to avoid accounting for modifications BOLT may have made in the
// BB regarding deletion/addition of instructions.