From 8f1d94aaea5c18b83cd3b0df3be3a48ef1d3833d Mon Sep 17 00:00:00 2001 From: Amir Ayupov Date: Thu, 18 Jan 2024 13:49:44 -0800 Subject: [PATCH] [BOLT] Use continuous output addresses in delta encoding in BAT Make output function addresses be delta-encoded wrt last offset in the previous function. This reduces the deltas in function start addresses. Test Plan: Reduces BAT section size to: - large binary: 12218860 bytes (0.32x original), - medium binary: 1606580 bytes (0.27x original), - small binary: 404 bytes (0.28x original), Reviewers: rafaelauler Reviewed By: rafaelauler Pull Request: https://github.com/llvm/llvm-project/pull/76904 --- bolt/docs/BAT.md | 12 +++-- .../bolt/Profile/BoltAddressTranslation.h | 7 +-- bolt/lib/Profile/BoltAddressTranslation.cpp | 44 ++++++++++--------- 3 files changed, 36 insertions(+), 27 deletions(-) diff --git a/bolt/docs/BAT.md b/bolt/docs/BAT.md index 96a9a187e0ac..0a2c878ef4ae 100644 --- a/bolt/docs/BAT.md +++ b/bolt/docs/BAT.md @@ -69,11 +69,15 @@ Header: The header is followed by Functions table with `NumFuncs` entries. Output binary addresses are delta encoded, meaning that only the difference with -the previous output address is stored. Addresses implicitly start at zero. +the last previous output address is stored. Addresses implicitly start at zero. +Output addresses are continuous through function start addresses and function +internal offsets, and between hot and cold fragments, to better spread deltas +and save space. + Hot indices are delta encoded, implicitly starting at zero. | Entry | Encoding | Description | | ------ | ------| ----------- | -| `Address` | Delta, ULEB128 | Function address in the output binary | +| `Address` | Continuous, Delta, ULEB128 | Function address in the output binary | | `HotIndex` | Delta, ULEB128 | Cold functions only: index of corresponding hot function in hot functions table | | `NumEntries` | ULEB128 | Number of address translation entries for a function | @@ -82,10 +86,10 @@ function. ### Address translation table Delta encoding means that only the difference with the previous corresponding -entry is encoded. Offsets implicitly start at zero. +entry is encoded. Input offsets implicitly start at zero. | Entry | Encoding | Description | | ------ | ------| ----------- | -| `OutputOffset` | Delta, ULEB128 | Function offset in output binary | +| `OutputOffset` | Continuous, Delta, ULEB128 | Function offset in output binary | | `InputOffset` | Delta, SLEB128 | Function offset in input binary with `BRANCHENTRY` LSB bit | `BRANCHENTRY` bit denotes whether a given offset pair is a control flow source diff --git a/bolt/include/bolt/Profile/BoltAddressTranslation.h b/bolt/include/bolt/Profile/BoltAddressTranslation.h index 01d3be4ee59b..f6bd61bc8898 100644 --- a/bolt/include/bolt/Profile/BoltAddressTranslation.h +++ b/bolt/include/bolt/Profile/BoltAddressTranslation.h @@ -121,13 +121,14 @@ private: /// Write the serialized address translation table for a function. template - void writeMaps(std::map &Maps, raw_ostream &OS); + void writeMaps(std::map &Maps, uint64_t &PrevAddress, + raw_ostream &OS); /// Read the serialized address translation table for a function. /// Return a parse error if failed. template - void parseMaps(std::vector &HotFuncs, DataExtractor &DE, - uint64_t &Offset, Error &Err); + void parseMaps(std::vector &HotFuncs, uint64_t &PrevAddress, + DataExtractor &DE, uint64_t &Offset, Error &Err); std::map Maps; diff --git a/bolt/lib/Profile/BoltAddressTranslation.cpp b/bolt/lib/Profile/BoltAddressTranslation.cpp index 697ff1e5dd0d..d3c33d6e6bc7 100644 --- a/bolt/lib/Profile/BoltAddressTranslation.cpp +++ b/bolt/lib/Profile/BoltAddressTranslation.cpp @@ -102,15 +102,17 @@ void BoltAddressTranslation::write(const BinaryContext &BC, raw_ostream &OS) { } } - writeMaps(Maps, OS); - writeMaps(Maps, OS); + // Output addresses are delta-encoded + uint64_t PrevAddress = 0; + writeMaps(Maps, PrevAddress, OS); + writeMaps(Maps, PrevAddress, OS); outs() << "BOLT-INFO: Wrote " << Maps.size() << " BAT maps\n"; } template void BoltAddressTranslation::writeMaps(std::map &Maps, - raw_ostream &OS) { + uint64_t &PrevAddress, raw_ostream &OS) { const uint32_t NumFuncs = llvm::count_if(llvm::make_first_range(Maps), [&](const uint64_t Address) { return Cold == ColdPartSource.count(Address); @@ -119,8 +121,6 @@ void BoltAddressTranslation::writeMaps(std::map &Maps, LLVM_DEBUG(dbgs() << "Writing " << NumFuncs << (Cold ? " cold" : "") << " functions for BAT.\n"); size_t PrevIndex = 0; - // Output addresses are delta-encoded - uint64_t PrevAddress = 0; for (auto &MapEntry : Maps) { const uint64_t Address = MapEntry.first; // Only process cold fragments in cold mode, and vice versa. @@ -139,12 +139,14 @@ void BoltAddressTranslation::writeMaps(std::map &Maps, PrevIndex = HotIndex; } encodeULEB128(NumEntries, OS); - uint64_t InOffset = 0, OutOffset = 0; + uint64_t InOffset = 0; // Output and Input addresses and delta-encoded for (std::pair &KeyVal : Map) { - encodeULEB128(KeyVal.first - OutOffset, OS); + const uint64_t OutputAddress = KeyVal.first + Address; + encodeULEB128(OutputAddress - PrevAddress, OS); + PrevAddress = OutputAddress; encodeSLEB128(KeyVal.second - InOffset, OS); - std::tie(OutOffset, InOffset) = KeyVal; + InOffset = KeyVal.second; } } } @@ -170,21 +172,21 @@ std::error_code BoltAddressTranslation::parse(StringRef Buf) { Error Err(Error::success()); std::vector HotFuncs; - parseMaps(HotFuncs, DE, Offset, Err); - parseMaps(HotFuncs, DE, Offset, Err); + uint64_t PrevAddress = 0; + parseMaps(HotFuncs, PrevAddress, DE, Offset, Err); + parseMaps(HotFuncs, PrevAddress, DE, Offset, Err); outs() << "BOLT-INFO: Parsed " << Maps.size() << " BAT entries\n"; return errorToErrorCode(std::move(Err)); } template void BoltAddressTranslation::parseMaps(std::vector &HotFuncs, - DataExtractor &DE, uint64_t &Offset, - Error &Err) { + uint64_t &PrevAddress, DataExtractor &DE, + uint64_t &Offset, Error &Err) { const uint32_t NumFunctions = DE.getULEB128(&Offset, &Err); LLVM_DEBUG(dbgs() << "Parsing " << NumFunctions << (Cold ? " cold" : "") << " functions\n"); size_t HotIndex = 0; - uint64_t PrevAddress = 0; for (uint32_t I = 0; I < NumFunctions; ++I) { const uint64_t Address = PrevAddress + DE.getULEB128(&Offset, &Err); PrevAddress = Address; @@ -199,18 +201,20 @@ void BoltAddressTranslation::parseMaps(std::vector &HotFuncs, LLVM_DEBUG(dbgs() << "Parsing " << NumEntries << " entries for 0x" << Twine::utohexstr(Address) << "\n"); - uint64_t InputOffset = 0, OutputOffset = 0; + uint64_t InputOffset = 0; for (uint32_t J = 0; J < NumEntries; ++J) { const uint64_t OutputDelta = DE.getULEB128(&Offset, &Err); + const uint64_t OutputAddress = PrevAddress + OutputDelta; + const uint64_t OutputOffset = OutputAddress - Address; + PrevAddress = OutputAddress; const int64_t InputDelta = DE.getSLEB128(&Offset, &Err); - OutputOffset += OutputDelta; InputOffset += InputDelta; Map.insert(std::pair(OutputOffset, InputOffset)); - LLVM_DEBUG(dbgs() << formatv("{0:x} -> {1:x} ({2}/{3}b -> {4}/{5}b)\n", - OutputOffset, InputOffset, OutputDelta, - encodeULEB128(OutputDelta, nulls()), - InputDelta, - encodeSLEB128(InputDelta, nulls()))); + LLVM_DEBUG( + dbgs() << formatv("{0:x} -> {1:x} ({2}/{3}b -> {4}/{5}b), {6:x}\n", + OutputOffset, InputOffset, OutputDelta, + encodeULEB128(OutputDelta, nulls()), InputDelta, + encodeSLEB128(InputDelta, nulls()), OutputAddress)); } Maps.insert(std::pair(Address, Map)); }