From 3c357a49d61e4c81a1ac016502ee504521bc8dda Mon Sep 17 00:00:00 2001 From: Alexander Yermolovich <43973793+ayermolo@users.noreply.github.com> Date: Mon, 16 Dec 2024 21:49:53 -0800 Subject: [PATCH] [BOLT] Add support for safe-icf (#116275) Identical Code Folding (ICF) folds functions that are identical into one function, and updates symbol addresses to the new address. This reduces the size of a binary, but can lead to problems. For example when function pointers are compared. This can be done either explicitly in the code or generated IR by optimization passes like Indirect Call Promotion (ICP). After ICF what used to be two different addresses become the same address. This can lead to a different code path being taken. This is where safe ICF comes in. Linker (LLD) does it using address significant section generated by clang. If symbol is in it, or an object doesn't have this section symbols are not folded. BOLT does not have the information regarding which objects do not have this section, so can't re-use this mechanism. This implementation scans code section and conservatively marks functions symbols as unsafe. It treats symbols as unsafe if they are used in non-control flow instruction. It also scans through the data relocation sections and does the same for relocations that reference a function symbol. The latter handles the case when function pointer is stored in a local or global variable, etc. If a relocation address points within a vtable these symbols are skipped. --- bolt/docs/CommandLineArgumentReference.md | 5 +- bolt/include/bolt/Core/BinaryFunction.h | 14 ++ .../bolt/Passes/IdenticalCodeFolding.h | 69 ++++++-- bolt/lib/Core/BinaryFunction.cpp | 16 ++ bolt/lib/Passes/IdenticalCodeFolding.cpp | 107 ++++++++++++- bolt/lib/Rewrite/BinaryPassManager.cpp | 10 +- bolt/lib/Rewrite/BoltDiff.cpp | 6 +- bolt/lib/Rewrite/RewriteInstance.cpp | 11 ++ bolt/test/X86/icf-safe-icp.test | 148 ++++++++++++++++++ bolt/test/X86/icf-safe-process-rela-data.test | 64 ++++++++ bolt/test/X86/icf-safe-test1-no-relocs.test | 20 +++ bolt/test/X86/icf-safe-test1.test | 98 ++++++++++++ .../icf-safe-test2GlobalConstPtrNoPic.test | 95 +++++++++++ 13 files changed, 644 insertions(+), 19 deletions(-) create mode 100644 bolt/test/X86/icf-safe-icp.test create mode 100644 bolt/test/X86/icf-safe-process-rela-data.test create mode 100644 bolt/test/X86/icf-safe-test1-no-relocs.test create mode 100644 bolt/test/X86/icf-safe-test1.test create mode 100644 bolt/test/X86/icf-safe-test2GlobalConstPtrNoPic.test diff --git a/bolt/docs/CommandLineArgumentReference.md b/bolt/docs/CommandLineArgumentReference.md index 6d3b797da378..91918d614a90 100644 --- a/bolt/docs/CommandLineArgumentReference.md +++ b/bolt/docs/CommandLineArgumentReference.md @@ -498,9 +498,12 @@ Automatically put hot code on 2MB page(s) (hugify) at runtime. No manual call to hugify is needed in the binary (which is what --hot-text relies on). -- `--icf` +- `--icf=` Fold functions with identical code + - `all`: Enable identical code folding + - `none`: Disable identical code folding (default) + - `safe`: Enable safe identical code folding - `--icp` diff --git a/bolt/include/bolt/Core/BinaryFunction.h b/bolt/include/bolt/Core/BinaryFunction.h index 7560908c250c..e8b2757f7db2 100644 --- a/bolt/include/bolt/Core/BinaryFunction.h +++ b/bolt/include/bolt/Core/BinaryFunction.h @@ -428,6 +428,9 @@ private: /// Function order for streaming into the destination binary. uint32_t Index{-1U}; + /// Function is referenced by a non-control flow instruction. + bool HasAddressTaken{false}; + /// Get basic block index assuming it belongs to this function. unsigned getIndex(const BinaryBasicBlock *BB) const { assert(BB->getIndex() < BasicBlocks.size()); @@ -822,6 +825,14 @@ public: return nullptr; } + /// Return true if function is referenced in a non-control flow instruction. + /// This flag is set when the code and relocation analyses are being + /// performed, which occurs when safe ICF (Identical Code Folding) is enabled. + bool hasAddressTaken() const { return HasAddressTaken; } + + /// Set whether function is referenced in a non-control flow instruction. + void setHasAddressTaken(bool AddressTaken) { HasAddressTaken = AddressTaken; } + /// Returns the raw binary encoding of this function. ErrorOr> getData() const; @@ -2135,6 +2146,9 @@ public: // adjustments. void handleAArch64IndirectCall(MCInst &Instruction, const uint64_t Offset); + /// Analyze instruction to identify a function reference. + void analyzeInstructionForFuncReference(const MCInst &Inst); + /// Scan function for references to other functions. In relocation mode, /// add relocations for external references. In non-relocation mode, detect /// and mark new entry points. diff --git a/bolt/include/bolt/Passes/IdenticalCodeFolding.h b/bolt/include/bolt/Passes/IdenticalCodeFolding.h index b4206fa36074..f59e75c61860 100644 --- a/bolt/include/bolt/Passes/IdenticalCodeFolding.h +++ b/bolt/include/bolt/Passes/IdenticalCodeFolding.h @@ -11,6 +11,7 @@ #include "bolt/Core/BinaryFunction.h" #include "bolt/Passes/BinaryPasses.h" +#include "llvm/ADT/SparseBitVector.h" namespace llvm { namespace bolt { @@ -20,22 +21,72 @@ namespace bolt { /// class IdenticalCodeFolding : public BinaryFunctionPass { protected: - bool shouldOptimize(const BinaryFunction &BF) const override { - if (BF.hasUnknownControlFlow()) - return false; - if (BF.isFolded()) - return false; - if (BF.hasSDTMarker()) - return false; - return BinaryFunctionPass::shouldOptimize(BF); - } + /// Return true if the function is safe to fold. + bool shouldOptimize(const BinaryFunction &BF) const override; public: + enum class ICFLevel { + None, /// No ICF. (Default) + Safe, /// Safe ICF. + All, /// Aggressive ICF. + }; explicit IdenticalCodeFolding(const cl::opt &PrintPass) : BinaryFunctionPass(PrintPass) {} const char *getName() const override { return "identical-code-folding"; } Error runOnFunctions(BinaryContext &BC) override; + +private: + /// Bit vector of memory addresses of vtables. + llvm::SparseBitVector<> VTableBitVector; + + /// Return true if the memory address is in a vtable. + bool isAddressInVTable(uint64_t Address) const { + return VTableBitVector.test(Address / 8); + } + + /// Mark memory address of a vtable as used. + void setAddressUsedInVTable(uint64_t Address) { + VTableBitVector.set(Address / 8); + } + + /// Scan symbol table and mark memory addresses of + /// vtables. + void initVTableReferences(const BinaryContext &BC); + + /// Analyze code section and relocations and mark functions that are not + /// safe to fold. + void markFunctionsUnsafeToFold(BinaryContext &BC); + + /// Process static and dynamic relocations in the data sections to identify + /// function references, and mark them as unsafe to fold. It filters out + /// symbol references that are in vtables. + void analyzeDataRelocations(BinaryContext &BC); + + /// Process functions that have been disassembled and mark functions that are + /// used in non-control flow instructions as unsafe to fold. + void analyzeFunctions(BinaryContext &BC); +}; + +class DeprecatedICFNumericOptionParser + : public cl::parser { +public: + explicit DeprecatedICFNumericOptionParser(cl::Option &O) + : cl::parser(O) {} + + bool parse(cl::Option &O, StringRef ArgName, StringRef Arg, + IdenticalCodeFolding::ICFLevel &Value) { + if (Arg == "0" || Arg == "1") { + Value = (Arg == "0") ? IdenticalCodeFolding::ICFLevel::None + : IdenticalCodeFolding::ICFLevel::All; + errs() << formatv("BOLT-WARNING: specifying numeric value \"{0}\" " + "for option -{1} is deprecated\n", + Arg, ArgName); + return false; + } + return cl::parser::parse(O, ArgName, Arg, + Value); + } }; } // namespace bolt diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp index a9ccaea3c438..1c5cd62a095b 100644 --- a/bolt/lib/Core/BinaryFunction.cpp +++ b/bolt/lib/Core/BinaryFunction.cpp @@ -1504,6 +1504,20 @@ MCSymbol *BinaryFunction::registerBranch(uint64_t Src, uint64_t Dst) { return Target; } +void BinaryFunction::analyzeInstructionForFuncReference(const MCInst &Inst) { + for (const MCOperand &Op : MCPlus::primeOperands(Inst)) { + if (!Op.isExpr()) + continue; + const MCExpr &Expr = *Op.getExpr(); + if (Expr.getKind() != MCExpr::SymbolRef) + continue; + const MCSymbol &Symbol = cast(Expr).getSymbol(); + // Set HasAddressTaken for a function regardless of the ICF level. + if (BinaryFunction *BF = BC.getFunctionForSymbol(&Symbol)) + BF->setHasAddressTaken(true); + } +} + bool BinaryFunction::scanExternalRefs() { bool Success = true; bool DisassemblyFailed = false; @@ -1624,6 +1638,8 @@ bool BinaryFunction::scanExternalRefs() { [](const MCOperand &Op) { return Op.isExpr(); })) { // Skip assembly if the instruction may not have any symbolic operands. continue; + } else { + analyzeInstructionForFuncReference(Instruction); } // Emit the instruction using temp emitter and generate relocations. diff --git a/bolt/lib/Passes/IdenticalCodeFolding.cpp b/bolt/lib/Passes/IdenticalCodeFolding.cpp index 38e080c9dd62..8923562776cc 100644 --- a/bolt/lib/Passes/IdenticalCodeFolding.cpp +++ b/bolt/lib/Passes/IdenticalCodeFolding.cpp @@ -15,6 +15,7 @@ #include "bolt/Core/ParallelUtilities.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/FormatVariadic.h" #include "llvm/Support/ThreadPool.h" #include "llvm/Support/Timer.h" #include @@ -42,8 +43,41 @@ TimeICF("time-icf", cl::ReallyHidden, cl::ZeroOrMore, cl::cat(BoltOptCategory)); + +cl::opt + ICF("icf", cl::desc("fold functions with identical code"), + cl::init(bolt::IdenticalCodeFolding::ICFLevel::None), + cl::values(clEnumValN(bolt::IdenticalCodeFolding::ICFLevel::All, "all", + "Enable identical code folding"), + clEnumValN(bolt::IdenticalCodeFolding::ICFLevel::All, "1", + "Enable identical code folding"), + clEnumValN(bolt::IdenticalCodeFolding::ICFLevel::All, "", + "Enable identical code folding"), + clEnumValN(bolt::IdenticalCodeFolding::ICFLevel::None, + "none", + "Disable identical code folding (default)"), + clEnumValN(bolt::IdenticalCodeFolding::ICFLevel::None, "0", + "Disable identical code folding (default)"), + clEnumValN(bolt::IdenticalCodeFolding::ICFLevel::Safe, + "safe", "Enable safe identical code folding")), + cl::ZeroOrMore, cl::ValueOptional, cl::cat(BoltOptCategory)); } // namespace opts +bool IdenticalCodeFolding::shouldOptimize(const BinaryFunction &BF) const { + if (BF.hasUnknownControlFlow()) + return false; + if (BF.isFolded()) + return false; + if (BF.hasSDTMarker()) + return false; + if (BF.isPseudo()) + return false; + if (opts::ICF == ICFLevel::Safe && BF.hasAddressTaken()) + return false; + return BinaryFunctionPass::shouldOptimize(BF); +} + /// Compare two jump tables in 2 functions. The function relies on consistent /// ordering of basic blocks in both binary functions (e.g. DFS). static bool equalJumpTables(const JumpTable &JumpTableA, @@ -340,6 +374,74 @@ typedef std::unordered_map, namespace llvm { namespace bolt { +void IdenticalCodeFolding::initVTableReferences(const BinaryContext &BC) { + for (const auto &[Address, Data] : BC.getBinaryData()) { + // Filter out all symbols that are not vtables. + if (!Data->getName().starts_with("_ZTV")) + continue; + for (uint64_t I = Address, End = I + Data->getSize(); I < End; I += 8) + setAddressUsedInVTable(I); + } +} + +void IdenticalCodeFolding::analyzeDataRelocations(BinaryContext &BC) { + initVTableReferences(BC); + // For static relocations there should be a symbol for function references. + for (const BinarySection &Sec : BC.sections()) { + if (!Sec.hasSectionRef() || !Sec.isData()) + continue; + for (const auto &Rel : Sec.relocations()) { + const uint64_t RelAddr = Rel.Offset + Sec.getAddress(); + if (isAddressInVTable(RelAddr)) + continue; + if (BinaryFunction *BF = BC.getFunctionForSymbol(Rel.Symbol)) + BF->setHasAddressTaken(true); + } + // For dynamic relocations there are two cases: + // 1: No symbol and only addend. + // 2: There is a symbol, but it does not references a function in a binary. + for (const auto &Rel : Sec.dynamicRelocations()) { + const uint64_t RelAddr = Rel.Offset + Sec.getAddress(); + if (isAddressInVTable(RelAddr)) + continue; + if (BinaryFunction *BF = BC.getBinaryFunctionAtAddress(Rel.Addend)) + BF->setHasAddressTaken(true); + } + } +} + +void IdenticalCodeFolding::analyzeFunctions(BinaryContext &BC) { + ParallelUtilities::WorkFuncTy WorkFun = [&](BinaryFunction &BF) { + for (const BinaryBasicBlock &BB : BF) + for (const MCInst &Inst : BB) + if (!(BC.MIB->isCall(Inst) || BC.MIB->isBranch(Inst))) + BF.analyzeInstructionForFuncReference(Inst); + }; + ParallelUtilities::PredicateTy SkipFunc = + [&](const BinaryFunction &BF) -> bool { return !BF.hasCFG(); }; + ParallelUtilities::runOnEachFunction( + BC, ParallelUtilities::SchedulingPolicy::SP_INST_LINEAR, WorkFun, + SkipFunc, "markUnsafe"); + + LLVM_DEBUG({ + for (const auto &BFIter : BC.getBinaryFunctions()) { + if (!BFIter.second.hasAddressTaken()) + continue; + dbgs() << "BOLT-DEBUG: skipping function with reference taken " + << BFIter.second.getOneName() << '\n'; + } + }); +} + +void IdenticalCodeFolding::markFunctionsUnsafeToFold(BinaryContext &BC) { + NamedRegionTimer MarkFunctionsUnsafeToFoldTimer( + "markFunctionsUnsafeToFold", "markFunctionsUnsafeToFold", "ICF breakdown", + "ICF breakdown", opts::TimeICF); + if (!BC.isX86()) + BC.outs() << "BOLT-WARNING: safe ICF is only supported for x86\n"; + analyzeDataRelocations(BC); + analyzeFunctions(BC); +} Error IdenticalCodeFolding::runOnFunctions(BinaryContext &BC) { const size_t OriginalFunctionCount = BC.getBinaryFunctions().size(); @@ -385,7 +487,7 @@ Error IdenticalCodeFolding::runOnFunctions(BinaryContext &BC) { "ICF breakdown", opts::TimeICF); for (auto &BFI : BC.getBinaryFunctions()) { BinaryFunction &BF = BFI.second; - if (!this->shouldOptimize(BF)) + if (!shouldOptimize(BF)) continue; CongruentBuckets[&BF].emplace(&BF); } @@ -475,7 +577,8 @@ Error IdenticalCodeFolding::runOnFunctions(BinaryContext &BC) { LLVM_DEBUG(SinglePass.stopTimer()); }; - + if (opts::ICF == ICFLevel::Safe) + markFunctionsUnsafeToFold(BC); hashFunctions(); createCongruentBuckets(); diff --git a/bolt/lib/Rewrite/BinaryPassManager.cpp b/bolt/lib/Rewrite/BinaryPassManager.cpp index 6f074d5d1191..2d851c751ae1 100644 --- a/bolt/lib/Rewrite/BinaryPassManager.cpp +++ b/bolt/lib/Rewrite/BinaryPassManager.cpp @@ -54,6 +54,9 @@ extern cl::opt PrintDynoStats; extern cl::opt DumpDotAll; extern cl::opt AsmDump; extern cl::opt PLT; +extern cl::opt + ICF; static cl::opt DynoStatsAll("dyno-stats-all", @@ -65,9 +68,6 @@ static cl::opt cl::desc("eliminate unreachable code"), cl::init(true), cl::cat(BoltOptCategory)); -cl::opt ICF("icf", cl::desc("fold functions with identical code"), - cl::cat(BoltOptCategory)); - static cl::opt JTFootprintReductionFlag( "jt-footprint-reduction", cl::desc("make jump tables size smaller at the cost of using more " @@ -403,7 +403,7 @@ Error BinaryFunctionPassManager::runAllPasses(BinaryContext &BC) { opts::StripRepRet); Manager.registerPass(std::make_unique(PrintICF), - opts::ICF); + opts::ICF != IdenticalCodeFolding::ICFLevel::None); Manager.registerPass( std::make_unique(NeverPrint, opts::SpecializeMemcpy1), @@ -428,7 +428,7 @@ Error BinaryFunctionPassManager::runAllPasses(BinaryContext &BC) { Manager.registerPass(std::make_unique(PrintInline)); Manager.registerPass(std::make_unique(PrintICF), - opts::ICF); + opts::ICF != IdenticalCodeFolding::ICFLevel::None); Manager.registerPass(std::make_unique(PrintPLT)); diff --git a/bolt/lib/Rewrite/BoltDiff.cpp b/bolt/lib/Rewrite/BoltDiff.cpp index 74b5ca18abce..35f671050664 100644 --- a/bolt/lib/Rewrite/BoltDiff.cpp +++ b/bolt/lib/Rewrite/BoltDiff.cpp @@ -28,7 +28,9 @@ using namespace bolt; namespace opts { extern cl::OptionCategory BoltDiffCategory; extern cl::opt NeverPrint; -extern cl::opt ICF; +extern cl::opt + ICF; static cl::opt IgnoreLTOSuffix( "ignore-lto-suffix", @@ -697,7 +699,7 @@ void RewriteInstance::compare(RewriteInstance &RI2) { } // Pre-pass ICF - if (opts::ICF) { + if (opts::ICF != IdenticalCodeFolding::ICFLevel::None) { IdenticalCodeFolding ICF(opts::NeverPrint); outs() << "BOLT-DIFF: Starting ICF pass for binary 1"; BC->logBOLTErrorsAndQuitOnFatal(ICF.runOnFunctions(*BC)); diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp index 04e073152f08..4329235d4704 100644 --- a/bolt/lib/Rewrite/RewriteInstance.cpp +++ b/bolt/lib/Rewrite/RewriteInstance.cpp @@ -19,6 +19,7 @@ #include "bolt/Core/Relocation.h" #include "bolt/Passes/BinaryPasses.h" #include "bolt/Passes/CacheMetrics.h" +#include "bolt/Passes/IdenticalCodeFolding.h" #include "bolt/Passes/ReorderFunctions.h" #include "bolt/Profile/BoltAddressTranslation.h" #include "bolt/Profile/DataAggregator.h" @@ -85,6 +86,9 @@ extern cl::opt ReorderFunctions; extern cl::opt TerminalTrap; extern cl::opt TimeBuild; extern cl::opt TimeRewrite; +extern cl::opt + ICF; cl::opt AllowStripped("allow-stripped", cl::desc("allow processing of stripped binaries"), @@ -2055,6 +2059,13 @@ void RewriteInstance::adjustCommandLineOptions() { exit(1); } + if (!BC->HasRelocations && + opts::ICF == IdenticalCodeFolding::ICFLevel::Safe) { + BC->errs() << "BOLT-ERROR: binary built without relocations. Safe ICF is " + "not supported\n"; + exit(1); + } + if (opts::Instrument || (opts::ReorderFunctions != ReorderFunctions::RT_NONE && !opts::HotText.getNumOccurrences())) { diff --git a/bolt/test/X86/icf-safe-icp.test b/bolt/test/X86/icf-safe-icp.test new file mode 100644 index 000000000000..a9227d311edc --- /dev/null +++ b/bolt/test/X86/icf-safe-icp.test @@ -0,0 +1,148 @@ +## Check that BOLT handles correctly folding functions with --icf=safe +## that can be referenced through a non control flow instruction when ICP optimization is enabled. +## This tests also checks that destructors are folded. + +# REQUIRES: system-linux, asserts +# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-linux %s -o %t1.o +# RUN: %clang %cflags %t1.o -o %t.exe -Wl,-q +# RUN: llvm-bolt --no-threads %t.exe --icf -debug-only=bolt-icf -o %t.bolt 2>&1 | FileCheck --check-prefix=ICFCHECK %s +# RUN: llvm-bolt --no-threads %t.exe --icf=safe -debug-only=bolt-icf -o %t.bolt 2>&1 | FileCheck --check-prefix=SAFEICFCHECK %s + +# ICFCHECK: ICF iteration 1 +# ICFCHECK-NEXT: folding Derived3Destructor into Derived2Destructor +# ICFCHECK-NEXT: folding Derived3Func into Derived2Func + +# SAFEICFCHECK: skipping function with reference taken Derived3Func +# SAFEICFCHECK-NEXT: ICF iteration 1 +# SAFEICFCHECK-NEXT: folding Derived3Destructor into Derived2Destructor +# SAFEICFCHECK-NEXT: ===--------- + + +## generate profile +## clang++ -O2 -fprofile-generate=. main.cpp -c -o mainProf.o +## PROF=test.profdata +## clang++ -m64 -fprofile-use=$PROF \ +## -mllvm -disable-icp=true -mllvm -print-after-all \ +## -g0 -flto=thin -fwhole-program-vtables -fno-split-lto-unit -O2 \ +## -fdebug-types-section \ +## main.cpp -c -o mainProfLTO.bc +## PASS='pgo-icall-prom' +## clang++ -m64 -fprofile-use=$PROF \ +## -O3 -Rpass=$PASS \ +## -mllvm -print-before=$PASS \ +## -mllvm -print-after=$PASS \ +## -mllvm -filter-print-funcs=main \ +## -mllvm -debug-only=$PASS \ +## -x ir \ +## mainProfLTO.bc -c -o mainProfFinal.o + +## class Base { +## public: +## virtual int func(int a, int b) const = 0; +## +## virtual ~Base() {}; +## }; +## +## class Derived2 : public Base { +## int c = 5; +## public: +## __attribute__((noinline)) int func(int a, int b)const override { return a * (a - b) + this->c; } +## +## ~Derived2() {} +## }; +## +## class Derived3 : public Base { +## int c = 500; +## public: +## __attribute__((noinline)) int func(int a, int b) const override { return a * (a - b) + this->c; } +## ~Derived3() {} +## }; +## +## __attribute__((noinline)) Base *createType(int a) { +## Base *base = nullptr; +## if (a == 4) +## base = new Derived2(); +## else +## base = new Derived3(); +## return base; +## } +## +## extern int returnFive(); +## extern int returnFourOrFive(int val); +## int main(int argc, char **argv) { +## int sum = 0; +## int a = returnFourOrFive(argc); +## int b = returnFive(); +## Base *ptr = createType(a); +## Base *ptr2 = createType(b); +## sum += ptr->func(b, a) + ptr2->func(b, a); +## return 0; +## } +## clang++ -c helper.cpp -o helper.o +## int FooVar = 1; +## int BarVar = 2; +## +## int fooGlobalFuncHelper(int a, int b) { +## return 5; +## } +## Manually modified to remove "extra" assembly. + .globl main + .type main,@function +main: + leaq Derived3Func(%rip), %rcx + callq Derived3Func + .size main, .-main + + .weak Derived2Func + .type Derived2Func,@function +Derived2Func: + imull %esi, %eax + retq + .size Derived2Func, .-Derived2Func + + .weak Derived2Destructor + .type Derived2Destructor,@function +Derived2Destructor: + jmp _ZdlPvm@PLT + .size Derived2Destructor, .-Derived2Destructor + + .weak Derived3Func + .type Derived3Func,@function +Derived3Func: + imull %esi, %eax + retq + .size Derived3Func, .-Derived3Func + + .weak _ZN4BaseD2Ev + .type _ZN4BaseD2Ev,@function +_ZN4BaseD2Ev: + retq + .size _ZN4BaseD2Ev, .-_ZN4BaseD2Ev + + .weak Derived3Destructor + .type Derived3Destructor,@function +Derived3Destructor: + jmp _ZdlPvm@PLT + .size Derived3Destructor, .-Derived3Destructor + + .type _ZTV8Derived2,@object + .section .data.rel.ro._ZTV8Derived2,"awG",@progbits,_ZTV8Derived2,comdat + .weak _ZTV8Derived2 +_ZTV8Derived2: + .quad 0 + .quad _ZTI8Derived2 + .quad Derived2Func + .quad _ZN4BaseD2Ev + .quad Derived2Destructor + .size _ZTV8Derived2, 40 + + .type _ZTV8Derived3,@object + .section .data.rel.ro._ZTV8Derived3,"awG",@progbits,_ZTV8Derived3,comdat + .weak _ZTV8Derived3 +_ZTV8Derived3: + .quad 0 + .quad _ZTI8Derived3 + .quad Derived3Func + .quad _ZN4BaseD2Ev + .quad Derived3Destructor + .size _ZTV8Derived3, 40 diff --git a/bolt/test/X86/icf-safe-process-rela-data.test b/bolt/test/X86/icf-safe-process-rela-data.test new file mode 100644 index 000000000000..cf71f5525777 --- /dev/null +++ b/bolt/test/X86/icf-safe-process-rela-data.test @@ -0,0 +1,64 @@ +## Check that BOLT handles correctly folding functions with --icf=safe that are only referenced from a .rela.data section. + +# REQUIRES: system-linux, asserts +# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-linux %s -o %t1.o +# RUN: %clang %cflags %t1.o -o %t.exe -Wl,-q -no-pie +# RUN: llvm-bolt --no-threads %t.exe --icf -debug-only=bolt-icf -o %t.bolt 2>&1 | FileCheck --check-prefix=ICFCHECK %s +# RUN: llvm-bolt --no-threads %t.exe --icf=safe -debug-only=bolt-icf -o %t.bolt 2>&1 | FileCheck --check-prefix=SAFEICFCHECK %s + +# ICFCHECK: ICF iteration 1 +# ICFCHECK-NEXT: folding barAddFunc into fooAddFunc + +# SAFEICFCHECK: skipping function with reference taken fooAddFunc +# SAFEICFCHECK-NEXT: skipping function with reference taken barAddFunc +# SAFEICFCHECK-NEXT: ICF iteration 1 +# SAFEICFCHECK-NEXT: ===--------- + +## clang++ main.cpp +## Other functions removed for brevity. +## int main(int argc, char **argv) { +## const static int (*const funcGlobalBarAdd)(int, int) = barAddHdlper; +## const int (* const funcGlobalBarMul)(int, int) = fooGlobalFuncHelper; +## helper2(funcGlobalBarAdd, funcGlobalFooAdd, 3, 4) +## } +## Extra assembly removed. + + .globl fooAddFunc + .type fooAddFunc,@function +fooAddFunc: + addl -8(%rbp), %eax + retq + .size fooAddFunc, .-fooAddFunc + + .globl barAddFunc + .type barAddFunc,@function +barAddFunc: + addl -8(%rbp), %eax + retq + .size barAddFunc, .-barAddFunc + + .globl helperFunc + .type helperFunc,@function +helperFunc: + retq + .size helperFunc, .-helperFunc + + .globl main + .type main,@function +main: + movq localStaticVarBarAdd, %rdi + movq localStaticVarFooAdd, %rsi + callq helperFunc + retq + .size main, .-main + + .type localStaticVarBarAdd,@object # @localStaticVarBarAdd + .data +localStaticVarBarAdd: + .quad barAddFunc + .size localStaticVarBarAdd, 8 + + .type localStaticVarFooAdd,@object # @localStaticVarFooAdd +localStaticVarFooAdd: + .quad fooAddFunc + .size localStaticVarFooAdd, 8 diff --git a/bolt/test/X86/icf-safe-test1-no-relocs.test b/bolt/test/X86/icf-safe-test1-no-relocs.test new file mode 100644 index 000000000000..b4e55a6d5504 --- /dev/null +++ b/bolt/test/X86/icf-safe-test1-no-relocs.test @@ -0,0 +1,20 @@ +## Check that BOLT reports an error for a binary with no relocations with the --icf=safe option. + +# REQUIRES: system-linux, asserts +# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-linux %s -o %t1.o +# RUN: %clang %cflags %t1.o -o %t.exe +# RUN: not llvm-bolt --no-threads %t.exe --icf=safe -o %t.bolt 2>&1 | FileCheck --check-prefix=SAFEICFCHECK %s + +# SAFEICFCHECK: BOLT-ERROR: binary built without relocations. Safe ICF is not supported + +## int main(int argc, char **argv) { +## return temp; +## } + .globl main + .type main,@function +main: + .cfi_startproc + retq +.Lfunc_end8: + .size main, .-main + .cfi_endproc diff --git a/bolt/test/X86/icf-safe-test1.test b/bolt/test/X86/icf-safe-test1.test new file mode 100644 index 000000000000..8a8e5ccf38e7 --- /dev/null +++ b/bolt/test/X86/icf-safe-test1.test @@ -0,0 +1,98 @@ +## Check that BOLT handles correctly folding functions with --icf=safe that can be referenced by non-control flow instructions. +## It invokes BOLT twice first testing CFG path, and second when functions have to be disassembled. + +# REQUIRES: system-linux, asserts +# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-linux %s -o %t1.o +# RUN: %clang %cflags %t1.o -o %t.exe -Wl,-q +# RUN: llvm-bolt --no-threads %t.exe --icf -debug-only=bolt-icf \ +# RUN: -o %t.bolt 2>&1 | FileCheck --check-prefix=ICFCHECK %s +# RUN: llvm-bolt --no-threads %t.exe --icf=safe -debug-only=bolt-icf \ +# RUN: -o %t.bolt 2>&1 | FileCheck --check-prefix=SAFEICFCHECK %s +# RUN: llvm-bolt --no-threads %t.exe --icf=safe -debug-only=bolt-icf \ +# RUN: --skip-funcs=helper1Func,main -o %t.bolt 2>&1 | FileCheck --check-prefix=SAFEICFCHECKNOCFG %s + +# ICFCHECK: ICF iteration 1 +# ICFCHECK-NEXT: folding barAddFunc into fooAddFunc +# ICFCHECK-NEXT: folding barSubFunc into fooSubFunc + +# SAFEICFCHECK: skipping function with reference taken barAddFunc +# SAFEICFCHECK-NEXT: ICF iteration 1 +# SAFEICFCHECK-NEXT: folding barSubFunc into fooSubFunc +# SAFEICFCHECK-NEXT: ===--------- + +# SAFEICFCHECKNOCFG: skipping function with reference taken barAddFunc +# SAFEICFCHECKNOCFG-NEXT: ICF iteration 1 +# SAFEICFCHECKNOCFG-NEXT: folding barSubFunc into fooSubFunc +# SAFEICFCHECKNOCFG-NEXT: ===--------- + +## clang++ -c main.cpp -o main.o +## extern int FooVar; +## extern int BarVar; +## [[clang::noinline]] +## int fooSub(int a, int b) { +## return a - b; +## } +## [[clang::noinline]] +## int barSub(int a, int b) { +## return a - b; +## } +## [[clang::noinline]] +## int fooAdd(int a, int b) { +## return a + b; +## } +## [[clang::noinline]] +## int barAdd(int a, int b) { +## return a + b; +## } +## int main(int argc, char **argv) { +## int temp = helper1(barAdd, FooVar, BarVar) + +## fooSub(FooVar, BarVar) + +## barSub(FooVar, BarVar) + fooAdd(FooVar, BarVar); +## return temp; +## } + .globl fooSubFunc + .type fooSubFunc,@function +fooSubFunc: + subl -8(%rbp), %eax + retq + .size fooSubFunc, .-fooSubFunc + + .globl barSubFunc + .type barSubFunc,@function +barSubFunc: + subl -8(%rbp), %eax + retq + .size barSubFunc, .-barSubFunc + + .globl fooAddFunc + .type fooAddFunc,@function +fooAddFunc: + addl -8(%rbp), %eax + retq + .size fooAddFunc, .-fooAddFunc + + .globl barAddFunc + .type barAddFunc,@function +barAddFunc: + addl -8(%rbp), %eax + retq + .size barAddFunc, .-barAddFunc + + .globl helper1Func + .type helper1Func,@function +helper1Func: + leaq barAddFunc(%rip), %rax + cmpq %rax, -16(%rbp) + retq + .size helper1Func, .-helper1Func + + .globl main + .type main,@function +main: + leaq barAddFunc(%rip), %rdi + callq helper1Func + callq fooSubFunc + callq barSubFunc + callq fooAddFunc + retq + .size main, .-main diff --git a/bolt/test/X86/icf-safe-test2GlobalConstPtrNoPic.test b/bolt/test/X86/icf-safe-test2GlobalConstPtrNoPic.test new file mode 100644 index 000000000000..ea2d8a5f11e0 --- /dev/null +++ b/bolt/test/X86/icf-safe-test2GlobalConstPtrNoPic.test @@ -0,0 +1,95 @@ +## Check that BOLT handles correctly folding functions with --icf=safe that can be referenced by non-control flow instructions, +## when binary is built with -fno-PIC/-fno-PIE. + +# REQUIRES: system-linux, asserts +# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-linux %s -o %t1.o +# RUN: %clang %cflags %t1.o -o %t.exe -Wl,-q -no-pie +# RUN: llvm-bolt --no-threads %t.exe --icf -debug-only=bolt-icf -o %t.bolt 2>&1 | FileCheck --check-prefix=ICFCHECK %s +# RUN: llvm-bolt --no-threads %t.exe --icf=safe -debug-only=bolt-icf -o %t.bolt 2>&1 | FileCheck --check-prefix=SAFEICFCHECK %s + +# ICFCHECK: ICF iteration 1 +# ICFCHECK-NEXT: folding barAddFunc into fooAddFunc +# ICFCHECK-NEXT: folding barMulFunc into fooMulFunc + +# SAFEICFCHECK: skipping function with reference taken fooMulFunc +# SAFEICFCHECK-NEXT: skipping function with reference taken barMulFunc +# SAFEICFCHECK-NEXT: skipping function with reference taken barAddFunc +# SAFEICFCHECK-NEXT: ICF iteration 1 +# SAFEICFCHECK-NEXT: ===--------- + +## clang++ main.cpp -c -o -fno-PIC +## Similar code gets generated for external reference function. +## Other functions removed for brevity. +## const static int (*const funcGlobalBarAdd)(int, int) = barAdd; +## const int (*const funcGlobalBarMul)(int, int) = barMul; +## int main(int argc, char **argv) { +## int temp = helper1(funcGlobalBarAdd, FooVar, BarVar) +## return temp; +## } +## Manually modified to remove "extra" assembly. + .globl fooMulFunc + .type fooMulFunc,@function +fooMulFunc: + imull -8(%rbp), %eax + retq + .size fooMulFunc, .-fooMulFunc + + .globl barMulFunc + .type barMulFunc,@function +barMulFunc: + imull -8(%rbp), %eax + retq + .size barMulFunc, .-barMulFunc + + .globl fooAddFunc + .type fooAddFunc,@function +fooAddFunc: + addl -8(%rbp), %eax + retq + .size fooAddFunc, .-fooAddFunc + + .globl barAddFunc + .type barAddFunc,@function +barAddFunc: + addl -8(%rbp), %eax + retq + .size barAddFunc, .-barAddFunc + + .globl helperFunc + .type helperFunc,@function +helperFunc: + movabsq $barAddFunc, %rax + cmpq %rax, -16(%rbp) + retq + .size helperFunc, .-helperFunc + + .globl main + .type main,@function +main: + movl FooVar, %esi + movl BarVar, %edx + movabsq $barAddFunc, %rdi + callq helperFunc + movabsq $fooMulFunc, %rdi + movabsq $barMulFunc, %rsi + retq + .size main, .-main + + .type FooVar,@object + .data + .globl FooVar +FooVar: + .long 1 + .size FooVar, 4 + + .type BarVar,@object + .globl BarVar +BarVar: + .long 2 + .size BarVar, 4 + + .type .L.str,@object + .section .rodata.str1.1,"aMS",@progbits,1 +.L.str: + .asciz "val: %d\n" + .size .L.str, 9