diff --git a/bolt/docs/CommandLineArgumentReference.md b/bolt/docs/CommandLineArgumentReference.md index d95f30a299a2..00d472c57891 100644 --- a/bolt/docs/CommandLineArgumentReference.md +++ b/bolt/docs/CommandLineArgumentReference.md @@ -259,6 +259,10 @@ Always use long jumps/nops for Linux kernel static keys +- `--match-profile-with-function-hash` + + Match profile with function hash + - `--max-data-relocations=` Maximum number of data relocations to process diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp b/bolt/lib/Profile/YAMLProfileReader.cpp index f25f59201f1c..554def697fa2 100644 --- a/bolt/lib/Profile/YAMLProfileReader.cpp +++ b/bolt/lib/Profile/YAMLProfileReader.cpp @@ -22,12 +22,18 @@ namespace opts { extern cl::opt Verbosity; extern cl::OptionCategory BoltOptCategory; extern cl::opt InferStaleProfile; +extern cl::opt Lite; static llvm::cl::opt IgnoreHash("profile-ignore-hash", cl::desc("ignore hash while reading function profile"), cl::Hidden, cl::cat(BoltOptCategory)); +llvm::cl::opt + MatchProfileWithFunctionHash("match-profile-with-function-hash", + cl::desc("Match profile with function hash"), + cl::Hidden, cl::cat(BoltOptCategory)); + llvm::cl::opt ProfileUseDFS("profile-use-dfs", cl::desc("use DFS order for YAML profile"), cl::Hidden, cl::cat(BoltOptCategory)); @@ -329,6 +335,8 @@ Error YAMLProfileReader::preprocessProfile(BinaryContext &BC) { } bool YAMLProfileReader::mayHaveProfileData(const BinaryFunction &BF) { + if (opts::MatchProfileWithFunctionHash) + return true; for (StringRef Name : BF.getNames()) if (ProfileFunctionNames.contains(Name)) return true; @@ -363,9 +371,24 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) { return Profile.Hash == static_cast(BF.getHash()); }; - // We have to do 2 passes since LTO introduces an ambiguity in function - // names. The first pass assigns profiles that match 100% by name and - // by hash. The second pass allows name ambiguity for LTO private functions. + uint64_t MatchedWithExactName = 0; + uint64_t MatchedWithHash = 0; + uint64_t MatchedWithLTOCommonName = 0; + + // Computes hash for binary functions. + if (opts::MatchProfileWithFunctionHash) { + for (auto &[_, BF] : BC.getBinaryFunctions()) { + BF.computeHash(YamlBP.Header.IsDFSOrder, YamlBP.Header.HashFunction); + } + } else if (!opts::IgnoreHash) { + for (BinaryFunction *BF : ProfileBFs) { + if (!BF) + continue; + BF->computeHash(YamlBP.Header.IsDFSOrder, YamlBP.Header.HashFunction); + } + } + + // This first pass assigns profiles that match 100% by name and by hash. for (auto [YamlBF, BF] : llvm::zip_equal(YamlBP.Functions, ProfileBFs)) { if (!BF) continue; @@ -374,15 +397,35 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) { // the profile. Function.setExecutionCount(BinaryFunction::COUNT_NO_PROFILE); - // Recompute hash once per function. - if (!opts::IgnoreHash) - Function.computeHash(YamlBP.Header.IsDFSOrder, - YamlBP.Header.HashFunction); - - if (profileMatches(YamlBF, Function)) + if (profileMatches(YamlBF, Function)) { matchProfileToFunction(YamlBF, Function); + ++MatchedWithExactName; + } } + // Iterates through profiled functions to match the first binary function with + // the same exact hash. Serves to match identical, renamed functions. + // Collisions are possible where multiple functions share the same exact hash. + if (opts::MatchProfileWithFunctionHash) { + DenseMap StrictHashToBF; + StrictHashToBF.reserve(BC.getBinaryFunctions().size()); + + for (auto &[_, BF] : BC.getBinaryFunctions()) + StrictHashToBF[BF.getHash()] = &BF; + + for (yaml::bolt::BinaryFunctionProfile &YamlBF : YamlBP.Functions) { + if (YamlBF.Used) + continue; + auto It = StrictHashToBF.find(YamlBF.Hash); + if (It != StrictHashToBF.end() && !ProfiledFunctions.count(It->second)) { + BinaryFunction *BF = It->second; + matchProfileToFunction(YamlBF, *BF); + ++MatchedWithHash; + } + } + } + + // This second pass allows name ambiguity for LTO private functions. for (const auto &[CommonName, LTOProfiles] : LTOCommonNameMap) { if (!LTOCommonNameFunctionMap.contains(CommonName)) continue; @@ -396,6 +439,7 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) { for (BinaryFunction *BF : Functions) { if (!ProfiledFunctions.count(BF) && profileMatches(*YamlBF, *BF)) { matchProfileToFunction(*YamlBF, *BF); + ++MatchedWithLTOCommonName; return true; } } @@ -407,8 +451,10 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) { // partially. if (!ProfileMatched && LTOProfiles.size() == 1 && Functions.size() == 1 && !LTOProfiles.front()->Used && - !ProfiledFunctions.count(*Functions.begin())) + !ProfiledFunctions.count(*Functions.begin())) { matchProfileToFunction(*LTOProfiles.front(), **Functions.begin()); + ++MatchedWithLTOCommonName; + } } for (auto [YamlBF, BF] : llvm::zip_equal(YamlBP.Functions, ProfileBFs)) @@ -420,6 +466,15 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) { errs() << "BOLT-WARNING: profile ignored for function " << YamlBF.Name << '\n'; + if (opts::Verbosity >= 1) { + outs() << "BOLT-INFO: matched " << MatchedWithExactName + << " functions with identical names\n"; + outs() << "BOLT-INFO: matched " << MatchedWithHash + << " functions with hash\n"; + outs() << "BOLT-INFO: matched " << MatchedWithLTOCommonName + << " functions with matching LTO common names\n"; + } + // Set for parseFunctionProfile(). NormalizeByInsnCount = usesEvent("cycles") || usesEvent("instructions"); NormalizeByCalls = usesEvent("branches"); @@ -439,6 +494,12 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) { BC.setNumUnusedProfiledObjects(NumUnused); + if (opts::Lite && opts::MatchProfileWithFunctionHash) { + for (BinaryFunction *BF : BC.getAllBinaryFunctions()) + if (!BF->hasProfile()) + BF->setIgnored(); + } + return Error::success(); } diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp index 1a3a8af21d81..c118f2c3ccd6 100644 --- a/bolt/lib/Rewrite/RewriteInstance.cpp +++ b/bolt/lib/Rewrite/RewriteInstance.cpp @@ -82,6 +82,7 @@ extern cl::opt Hugify; extern cl::opt Instrument; extern cl::opt JumpTables; extern cl::opt KeepNops; +extern cl::opt Lite; extern cl::list ReorderData; extern cl::opt ReorderFunctions; extern cl::opt TerminalTrap; @@ -140,9 +141,6 @@ KeepTmp("keep-tmp", cl::Hidden, cl::cat(BoltCategory)); -cl::opt Lite("lite", cl::desc("skip processing of cold functions"), - cl::cat(BoltCategory)); - static cl::opt LiteThresholdPct("lite-threshold-pct", cl::desc("threshold (in percent) for selecting functions to process in lite " diff --git a/bolt/lib/Utils/CommandLineOpts.cpp b/bolt/lib/Utils/CommandLineOpts.cpp index 41c89bc8aeba..b9bc79f408a6 100644 --- a/bolt/lib/Utils/CommandLineOpts.cpp +++ b/bolt/lib/Utils/CommandLineOpts.cpp @@ -128,6 +128,9 @@ cl::opt cl::desc("instrument code to generate accurate profile data"), cl::cat(BoltOptCategory)); +cl::opt Lite("lite", cl::desc("skip processing of cold functions"), + cl::cat(BoltCategory)); + cl::opt OutputFilename("o", cl::desc(""), diff --git a/bolt/test/X86/hashing-based-function-matching.test b/bolt/test/X86/hashing-based-function-matching.test new file mode 100644 index 000000000000..41c991b4612e --- /dev/null +++ b/bolt/test/X86/hashing-based-function-matching.test @@ -0,0 +1,63 @@ +## Tests function matching in YAMLProfileReader by function hash. + +# REQUIRES: system-linux +# RUN: split-file %s %t +# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown %t/main.s -o %t.o +# RUN: %clang %cflags %t.o -o %t.exe -Wl,-q -nostdlib +# RUN: llvm-bolt %t.exe -o %t.out --data %t/yaml -v=2 \ +# RUN: --print-cfg --match-profile-with-function-hash --profile-ignore-hash=0 2>&1 | FileCheck %s + +# CHECK: BOLT-INFO: matched 1 functions with hash + +#--- main.s +.globl main +.type main, @function +main: + .cfi_startproc +.LBB00: + pushq %rbp + movq %rsp, %rbp + subq $16, %rsp + testq %rax, %rax + js .LBB03 +.LBB01: + jne .LBB04 +.LBB02: + nop +.LBB03: + xorl %eax, %eax + addq $16, %rsp + popq %rbp + retq +.LBB04: + xorl %eax, %eax + addq $16, %rsp + popq %rbp + retq +## For relocations against .text + .reloc 0, R_X86_64_NONE + .cfi_endproc + .size main, .-main + +#--- yaml +--- +header: + profile-version: 1 + binary-name: 'hashing-based-function-matching.s.tmp.exe' + binary-build-id: '' + profile-flags: [ lbr ] + profile-origin: branch profile reader + profile-events: '' + dfs-order: false + hash-func: xxh3 +functions: + - name: main2 + fid: 0 + hash: 0x6E7F15254DE2478 + exec: 1 + nblocks: 6 + blocks: + - bid: 1 + insns: 1 + succ: [ { bid: 3, cnt: 1} ] +... diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst index 7eafc49059dd..a110671bacf3 100644 --- a/llvm/docs/ReleaseNotes.rst +++ b/llvm/docs/ReleaseNotes.rst @@ -372,6 +372,12 @@ Changes to the LLVM tools Changes to LLDB --------------------------------- +Changes to BOLT +--------------------------------- +* Now supports ``--match-profile-with-function-hash`` to match profiled and + binary functions with exact hash, allowing for the matching of renamed but + identical functions. + Changes to Sanitizers ---------------------