[BOLT] Use continuous output addresses in delta encoding in BAT

Make output function addresses be delta-encoded wrt last offset in the
previous function. This reduces the deltas in function start addresses.

Test Plan:
Reduces BAT section size to:
- large binary: 12218860 bytes (0.32x original),
- medium binary: 1606580 bytes (0.27x original),
- small binary: 404 bytes (0.28x original),

Reviewers: rafaelauler

Reviewed By: rafaelauler

Pull Request: https://github.com/llvm/llvm-project/pull/76904
This commit is contained in:
Amir Ayupov
2024-01-18 13:49:44 -08:00
committed by GitHub
parent 83365152a4
commit 8f1d94aaea
3 changed files with 36 additions and 27 deletions

View File

@@ -69,11 +69,15 @@ Header:
The header is followed by Functions table with `NumFuncs` entries.
Output binary addresses are delta encoded, meaning that only the difference with
the previous output address is stored. Addresses implicitly start at zero.
the last previous output address is stored. Addresses implicitly start at zero.
Output addresses are continuous through function start addresses and function
internal offsets, and between hot and cold fragments, to better spread deltas
and save space.
Hot indices are delta encoded, implicitly starting at zero.
| Entry | Encoding | Description |
| ------ | ------| ----------- |
| `Address` | Delta, ULEB128 | Function address in the output binary |
| `Address` | Continuous, Delta, ULEB128 | Function address in the output binary |
| `HotIndex` | Delta, ULEB128 | Cold functions only: index of corresponding hot function in hot functions table |
| `NumEntries` | ULEB128 | Number of address translation entries for a function |
@@ -82,10 +86,10 @@ function.
### Address translation table
Delta encoding means that only the difference with the previous corresponding
entry is encoded. Offsets implicitly start at zero.
entry is encoded. Input offsets implicitly start at zero.
| Entry | Encoding | Description |
| ------ | ------| ----------- |
| `OutputOffset` | Delta, ULEB128 | Function offset in output binary |
| `OutputOffset` | Continuous, Delta, ULEB128 | Function offset in output binary |
| `InputOffset` | Delta, SLEB128 | Function offset in input binary with `BRANCHENTRY` LSB bit |
`BRANCHENTRY` bit denotes whether a given offset pair is a control flow source

View File

@@ -121,13 +121,14 @@ private:
/// Write the serialized address translation table for a function.
template <bool Cold>
void writeMaps(std::map<uint64_t, MapTy> &Maps, raw_ostream &OS);
void writeMaps(std::map<uint64_t, MapTy> &Maps, uint64_t &PrevAddress,
raw_ostream &OS);
/// Read the serialized address translation table for a function.
/// Return a parse error if failed.
template <bool Cold>
void parseMaps(std::vector<uint64_t> &HotFuncs, DataExtractor &DE,
uint64_t &Offset, Error &Err);
void parseMaps(std::vector<uint64_t> &HotFuncs, uint64_t &PrevAddress,
DataExtractor &DE, uint64_t &Offset, Error &Err);
std::map<uint64_t, MapTy> Maps;

View File

@@ -102,15 +102,17 @@ void BoltAddressTranslation::write(const BinaryContext &BC, raw_ostream &OS) {
}
}
writeMaps</*Cold=*/false>(Maps, OS);
writeMaps</*Cold=*/true>(Maps, OS);
// Output addresses are delta-encoded
uint64_t PrevAddress = 0;
writeMaps</*Cold=*/false>(Maps, PrevAddress, OS);
writeMaps</*Cold=*/true>(Maps, PrevAddress, OS);
outs() << "BOLT-INFO: Wrote " << Maps.size() << " BAT maps\n";
}
template <bool Cold>
void BoltAddressTranslation::writeMaps(std::map<uint64_t, MapTy> &Maps,
raw_ostream &OS) {
uint64_t &PrevAddress, raw_ostream &OS) {
const uint32_t NumFuncs =
llvm::count_if(llvm::make_first_range(Maps), [&](const uint64_t Address) {
return Cold == ColdPartSource.count(Address);
@@ -119,8 +121,6 @@ void BoltAddressTranslation::writeMaps(std::map<uint64_t, MapTy> &Maps,
LLVM_DEBUG(dbgs() << "Writing " << NumFuncs << (Cold ? " cold" : "")
<< " functions for BAT.\n");
size_t PrevIndex = 0;
// Output addresses are delta-encoded
uint64_t PrevAddress = 0;
for (auto &MapEntry : Maps) {
const uint64_t Address = MapEntry.first;
// Only process cold fragments in cold mode, and vice versa.
@@ -139,12 +139,14 @@ void BoltAddressTranslation::writeMaps(std::map<uint64_t, MapTy> &Maps,
PrevIndex = HotIndex;
}
encodeULEB128(NumEntries, OS);
uint64_t InOffset = 0, OutOffset = 0;
uint64_t InOffset = 0;
// Output and Input addresses and delta-encoded
for (std::pair<const uint32_t, uint32_t> &KeyVal : Map) {
encodeULEB128(KeyVal.first - OutOffset, OS);
const uint64_t OutputAddress = KeyVal.first + Address;
encodeULEB128(OutputAddress - PrevAddress, OS);
PrevAddress = OutputAddress;
encodeSLEB128(KeyVal.second - InOffset, OS);
std::tie(OutOffset, InOffset) = KeyVal;
InOffset = KeyVal.second;
}
}
}
@@ -170,21 +172,21 @@ std::error_code BoltAddressTranslation::parse(StringRef Buf) {
Error Err(Error::success());
std::vector<uint64_t> HotFuncs;
parseMaps</*Cold=*/false>(HotFuncs, DE, Offset, Err);
parseMaps</*Cold=*/true>(HotFuncs, DE, Offset, Err);
uint64_t PrevAddress = 0;
parseMaps</*Cold=*/false>(HotFuncs, PrevAddress, DE, Offset, Err);
parseMaps</*Cold=*/true>(HotFuncs, PrevAddress, DE, Offset, Err);
outs() << "BOLT-INFO: Parsed " << Maps.size() << " BAT entries\n";
return errorToErrorCode(std::move(Err));
}
template <bool Cold>
void BoltAddressTranslation::parseMaps(std::vector<uint64_t> &HotFuncs,
DataExtractor &DE, uint64_t &Offset,
Error &Err) {
uint64_t &PrevAddress, DataExtractor &DE,
uint64_t &Offset, Error &Err) {
const uint32_t NumFunctions = DE.getULEB128(&Offset, &Err);
LLVM_DEBUG(dbgs() << "Parsing " << NumFunctions << (Cold ? " cold" : "")
<< " functions\n");
size_t HotIndex = 0;
uint64_t PrevAddress = 0;
for (uint32_t I = 0; I < NumFunctions; ++I) {
const uint64_t Address = PrevAddress + DE.getULEB128(&Offset, &Err);
PrevAddress = Address;
@@ -199,18 +201,20 @@ void BoltAddressTranslation::parseMaps(std::vector<uint64_t> &HotFuncs,
LLVM_DEBUG(dbgs() << "Parsing " << NumEntries << " entries for 0x"
<< Twine::utohexstr(Address) << "\n");
uint64_t InputOffset = 0, OutputOffset = 0;
uint64_t InputOffset = 0;
for (uint32_t J = 0; J < NumEntries; ++J) {
const uint64_t OutputDelta = DE.getULEB128(&Offset, &Err);
const uint64_t OutputAddress = PrevAddress + OutputDelta;
const uint64_t OutputOffset = OutputAddress - Address;
PrevAddress = OutputAddress;
const int64_t InputDelta = DE.getSLEB128(&Offset, &Err);
OutputOffset += OutputDelta;
InputOffset += InputDelta;
Map.insert(std::pair<uint32_t, uint32_t>(OutputOffset, InputOffset));
LLVM_DEBUG(dbgs() << formatv("{0:x} -> {1:x} ({2}/{3}b -> {4}/{5}b)\n",
OutputOffset, InputOffset, OutputDelta,
encodeULEB128(OutputDelta, nulls()),
InputDelta,
encodeSLEB128(InputDelta, nulls())));
LLVM_DEBUG(
dbgs() << formatv("{0:x} -> {1:x} ({2}/{3}b -> {4}/{5}b), {6:x}\n",
OutputOffset, InputOffset, OutputDelta,
encodeULEB128(OutputDelta, nulls()), InputDelta,
encodeSLEB128(InputDelta, nulls()), OutputAddress));
}
Maps.insert(std::pair<uint64_t, MapTy>(Address, Map));
}