From 344228ebf45f9bd1f7626fdcd3c0fada0f0c8385 Mon Sep 17 00:00:00 2001 From: Amir Ayupov Date: Tue, 2 Jul 2024 09:18:59 -0700 Subject: [PATCH] [BOLT] Drop macro-fusion alignment (#97358) 9d0754ada5dbbc0c009bcc2f7824488419cc5530 dropped MC support required for optimal macro-fusion alignment in BOLT. Remove the support in BOLT as performance measurements with large binaries didn't show a significant improvement. Test Plan: macro-fusion alignment was never upstreamed, so no upstream tests are affected. --- bolt/include/bolt/Core/BinaryBasicBlock.h | 9 ----- bolt/include/bolt/Core/BinaryContext.h | 4 -- bolt/include/bolt/Core/BinaryFunction.h | 4 -- bolt/include/bolt/Core/MCPlusBuilder.h | 7 ---- bolt/lib/Core/BinaryBasicBlock.cpp | 39 ------------------- bolt/lib/Core/BinaryEmitter.cpp | 37 ------------------ bolt/lib/Core/BinaryFunction.cpp | 25 ------------ bolt/lib/Passes/BinaryPasses.cpp | 20 ---------- bolt/lib/Rewrite/RewriteInstance.cpp | 22 ----------- .../Target/AArch64/AArch64MCPlusBuilder.cpp | 4 -- bolt/lib/Target/X86/X86MCPlusBuilder.cpp | 34 ---------------- 11 files changed, 205 deletions(-) diff --git a/bolt/include/bolt/Core/BinaryBasicBlock.h b/bolt/include/bolt/Core/BinaryBasicBlock.h index a57b70714fe3..9a9d7b8735d7 100644 --- a/bolt/include/bolt/Core/BinaryBasicBlock.h +++ b/bolt/include/bolt/Core/BinaryBasicBlock.h @@ -842,15 +842,6 @@ public: bool analyzeBranch(const MCSymbol *&TBB, const MCSymbol *&FBB, MCInst *&CondBranch, MCInst *&UncondBranch); - /// Return true if iterator \p I is pointing to the first instruction in - /// a pair that could be macro-fused. - bool isMacroOpFusionPair(const_iterator I) const; - - /// If the basic block has a pair of instructions suitable for macro-fusion, - /// return iterator to the first instruction of the pair. - /// Otherwise return end(). - const_iterator getMacroOpFusionPair() const; - /// Printer required for printing dominator trees. void printAsOperand(raw_ostream &OS, bool PrintType = true) { if (PrintType) diff --git a/bolt/include/bolt/Core/BinaryContext.h b/bolt/include/bolt/Core/BinaryContext.h index 4ec3de3da1bf..73932c4ca2fb 100644 --- a/bolt/include/bolt/Core/BinaryContext.h +++ b/bolt/include/bolt/Core/BinaryContext.h @@ -698,10 +698,6 @@ public: /// Binary-wide aggregated stats. struct BinaryStats { - /// Stats for macro-fusion. - uint64_t MissedMacroFusionPairs{0}; - uint64_t MissedMacroFusionExecCount{0}; - /// Stats for stale profile matching: /// the total number of basic blocks in the profile uint32_t NumStaleBlocks{0}; diff --git a/bolt/include/bolt/Core/BinaryFunction.h b/bolt/include/bolt/Core/BinaryFunction.h index 1048e50cd805..7d0afee4d1b7 100644 --- a/bolt/include/bolt/Core/BinaryFunction.h +++ b/bolt/include/bolt/Core/BinaryFunction.h @@ -835,10 +835,6 @@ public: /// them. void calculateLoopInfo(); - /// Calculate missed macro-fusion opportunities and update BinaryContext - /// stats. - void calculateMacroOpFusionStats(); - /// Returns if BinaryDominatorTree has been constructed for this function. bool hasDomTree() const { return BDT != nullptr; } diff --git a/bolt/include/bolt/Core/MCPlusBuilder.h b/bolt/include/bolt/Core/MCPlusBuilder.h index 83f4d4c649fd..ab07f07e4984 100644 --- a/bolt/include/bolt/Core/MCPlusBuilder.h +++ b/bolt/include/bolt/Core/MCPlusBuilder.h @@ -930,13 +930,6 @@ public: /// Return true if the instruction is encoded using EVEX (AVX-512). virtual bool hasEVEXEncoding(const MCInst &Inst) const { return false; } - /// Return true if a pair of instructions represented by \p Insts - /// could be fused into a single uop. - virtual bool isMacroOpFusionPair(ArrayRef Insts) const { - llvm_unreachable("not implemented"); - return false; - } - struct X86MemOperand { unsigned BaseRegNum; int64_t ScaleImm; diff --git a/bolt/lib/Core/BinaryBasicBlock.cpp b/bolt/lib/Core/BinaryBasicBlock.cpp index a4b9a7f558cd..2a2192b79bb4 100644 --- a/bolt/lib/Core/BinaryBasicBlock.cpp +++ b/bolt/lib/Core/BinaryBasicBlock.cpp @@ -404,45 +404,6 @@ bool BinaryBasicBlock::analyzeBranch(const MCSymbol *&TBB, const MCSymbol *&FBB, CondBranch, UncondBranch); } -bool BinaryBasicBlock::isMacroOpFusionPair(const_iterator I) const { - auto &MIB = Function->getBinaryContext().MIB; - ArrayRef Insts = Instructions; - return MIB->isMacroOpFusionPair(Insts.slice(I - begin())); -} - -BinaryBasicBlock::const_iterator -BinaryBasicBlock::getMacroOpFusionPair() const { - if (!Function->getBinaryContext().isX86()) - return end(); - - if (getNumNonPseudos() < 2 || succ_size() != 2) - return end(); - - auto RI = getLastNonPseudo(); - assert(RI != rend() && "cannot have an empty block with 2 successors"); - - BinaryContext &BC = Function->getBinaryContext(); - - // Skip instruction if it's an unconditional branch following - // a conditional one. - if (BC.MIB->isUnconditionalBranch(*RI)) - ++RI; - - if (!BC.MIB->isConditionalBranch(*RI)) - return end(); - - // Start checking with instruction preceding the conditional branch. - ++RI; - if (RI == rend()) - return end(); - - auto II = std::prev(RI.base()); // convert to a forward iterator - if (isMacroOpFusionPair(II)) - return II; - - return end(); -} - MCInst *BinaryBasicBlock::getTerminatorBefore(MCInst *Pos) { BinaryContext &BC = Function->getBinaryContext(); auto Itr = rbegin(); diff --git a/bolt/lib/Core/BinaryEmitter.cpp b/bolt/lib/Core/BinaryEmitter.cpp index 5793963f9b80..f6dfa249f9a9 100644 --- a/bolt/lib/Core/BinaryEmitter.cpp +++ b/bolt/lib/Core/BinaryEmitter.cpp @@ -38,19 +38,6 @@ extern cl::opt PreserveBlocksAlignment; cl::opt AlignBlocks("align-blocks", cl::desc("align basic blocks"), cl::cat(BoltOptCategory)); -cl::opt -AlignMacroOpFusion("align-macro-fusion", - cl::desc("fix instruction alignment for macro-fusion (x86 relocation mode)"), - cl::init(MFT_HOT), - cl::values(clEnumValN(MFT_NONE, "none", - "do not insert alignment no-ops for macro-fusion"), - clEnumValN(MFT_HOT, "hot", - "only insert alignment no-ops on hot execution paths (default)"), - clEnumValN(MFT_ALL, "all", - "always align instructions to allow macro-fusion")), - cl::ZeroOrMore, - cl::cat(BoltRelocCategory)); - static cl::list BreakFunctionNames("break-funcs", cl::CommaSeparated, @@ -453,20 +440,7 @@ void BinaryEmitter::emitFunctionBody(BinaryFunction &BF, FunctionFragment &FF, Streamer.emitLabel(EntrySymbol); } - // Check if special alignment for macro-fusion is needed. - bool MayNeedMacroFusionAlignment = - (opts::AlignMacroOpFusion == MFT_ALL) || - (opts::AlignMacroOpFusion == MFT_HOT && BB->getKnownExecutionCount()); - BinaryBasicBlock::const_iterator MacroFusionPair; - if (MayNeedMacroFusionAlignment) { - MacroFusionPair = BB->getMacroOpFusionPair(); - if (MacroFusionPair == BB->end()) - MayNeedMacroFusionAlignment = false; - } - SMLoc LastLocSeen; - // Remember if the last instruction emitted was a prefix. - bool LastIsPrefix = false; for (auto I = BB->begin(), E = BB->end(); I != E; ++I) { MCInst &Instr = *I; @@ -479,16 +453,6 @@ void BinaryEmitter::emitFunctionBody(BinaryFunction &BF, FunctionFragment &FF, continue; } - // Handle macro-fusion alignment. If we emitted a prefix as - // the last instruction, we should've already emitted the associated - // alignment hint, so don't emit it twice. - if (MayNeedMacroFusionAlignment && !LastIsPrefix && - I == MacroFusionPair) { - // This assumes the second instruction in the macro-op pair will get - // assigned to its own MCRelaxableFragment. Since all JCC instructions - // are relaxable, we should be safe. - } - if (!EmitCodeOnly) { // A symbol to be emitted before the instruction to mark its location. MCSymbol *InstrLabel = BC.MIB->getInstLabel(Instr); @@ -525,7 +489,6 @@ void BinaryEmitter::emitFunctionBody(BinaryFunction &BF, FunctionFragment &FF, } Streamer.emitInstruction(Instr, *BC.STI); - LastIsPrefix = BC.MIB->isPrefix(Instr); } } diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp index f8530c424b8e..89c05e3ab005 100644 --- a/bolt/lib/Core/BinaryFunction.cpp +++ b/bolt/lib/Core/BinaryFunction.cpp @@ -2279,8 +2279,6 @@ void BinaryFunction::postProcessCFG() { postProcessBranches(); } - calculateMacroOpFusionStats(); - // The final cleanup of intermediate structures. clearList(IgnoredBranches); @@ -2297,29 +2295,6 @@ void BinaryFunction::postProcessCFG() { "invalid CFG detected after post-processing"); } -void BinaryFunction::calculateMacroOpFusionStats() { - if (!getBinaryContext().isX86()) - return; - for (const BinaryBasicBlock &BB : blocks()) { - auto II = BB.getMacroOpFusionPair(); - if (II == BB.end()) - continue; - - // Check offset of the second instruction. - // FIXME: arch-specific. - const uint32_t Offset = BC.MIB->getOffsetWithDefault(*std::next(II), 0); - if (!Offset || (getAddress() + Offset) % 64) - continue; - - LLVM_DEBUG(dbgs() << "\nmissed macro-op fusion at address 0x" - << Twine::utohexstr(getAddress() + Offset) - << " in function " << *this << "; executed " - << BB.getKnownExecutionCount() << " times.\n"); - ++BC.Stats.MissedMacroFusionPairs; - BC.Stats.MissedMacroFusionExecCount += BB.getKnownExecutionCount(); - } -} - void BinaryFunction::removeTagsFromProfile() { for (BinaryBasicBlock *BB : BasicBlocks) { if (BB->ExecutionCount == BinaryBasicBlock::COUNT_NO_PROFILE) diff --git a/bolt/lib/Passes/BinaryPasses.cpp b/bolt/lib/Passes/BinaryPasses.cpp index ecc2c08a3032..fa95ad7324ac 100644 --- a/bolt/lib/Passes/BinaryPasses.cpp +++ b/bolt/lib/Passes/BinaryPasses.cpp @@ -44,7 +44,6 @@ namespace opts { extern cl::OptionCategory BoltCategory; extern cl::OptionCategory BoltOptCategory; -extern cl::opt AlignMacroOpFusion; extern cl::opt Verbosity; extern cl::opt EnableBAT; extern cl::opt ExecutionCountThreshold; @@ -1637,25 +1636,6 @@ Error PrintProgramStats::runOnFunctions(BinaryContext &BC) { } } - // Print information on missed macro-fusion opportunities seen on input. - if (BC.Stats.MissedMacroFusionPairs) { - BC.outs() << format( - "BOLT-INFO: the input contains %zu (dynamic count : %zu)" - " opportunities for macro-fusion optimization", - BC.Stats.MissedMacroFusionPairs, BC.Stats.MissedMacroFusionExecCount); - switch (opts::AlignMacroOpFusion) { - case MFT_NONE: - BC.outs() << ". Use -align-macro-fusion to fix.\n"; - break; - case MFT_HOT: - BC.outs() << ". Will fix instances on a hot path.\n"; - break; - case MFT_ALL: - BC.outs() << " that are going to be fixed\n"; - break; - } - } - // Collect and print information about suboptimal code layout on input. if (opts::ReportBadLayout) { std::vector SuboptimalFuncs; diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp index 8248c1cd7fad..ded2f577237f 100644 --- a/bolt/lib/Rewrite/RewriteInstance.cpp +++ b/bolt/lib/Rewrite/RewriteInstance.cpp @@ -75,7 +75,6 @@ extern cl::opt X86AlignBranchWithin32BBoundaries; namespace opts { -extern cl::opt AlignMacroOpFusion; extern cl::list HotTextMoveSections; extern cl::opt Hugify; extern cl::opt Instrument; @@ -1969,12 +1968,6 @@ void RewriteInstance::adjustCommandLineOptions() { if (RuntimeLibrary *RtLibrary = BC->getRuntimeLibrary()) RtLibrary->adjustCommandLineOptions(*BC); - if (opts::AlignMacroOpFusion != MFT_NONE && !BC->isX86()) { - BC->outs() - << "BOLT-INFO: disabling -align-macro-fusion on non-x86 platform\n"; - opts::AlignMacroOpFusion = MFT_NONE; - } - if (BC->isX86() && BC->MAB->allowAutoPadding()) { if (!BC->HasRelocations) { BC->errs() @@ -1985,13 +1978,6 @@ void RewriteInstance::adjustCommandLineOptions() { BC->outs() << "BOLT-WARNING: using mitigation for Intel JCC erratum, layout " "may take several minutes\n"; - opts::AlignMacroOpFusion = MFT_NONE; - } - - if (opts::AlignMacroOpFusion != MFT_NONE && !BC->HasRelocations) { - BC->outs() << "BOLT-INFO: disabling -align-macro-fusion in non-relocation " - "mode\n"; - opts::AlignMacroOpFusion = MFT_NONE; } if (opts::SplitEH && !BC->HasRelocations) { @@ -2013,14 +1999,6 @@ void RewriteInstance::adjustCommandLineOptions() { opts::StrictMode = true; } - if (BC->isX86() && BC->HasRelocations && - opts::AlignMacroOpFusion == MFT_HOT && !ProfileReader) { - BC->outs() - << "BOLT-INFO: enabling -align-macro-fusion=all since no profile " - "was specified\n"; - opts::AlignMacroOpFusion = MFT_ALL; - } - if (!BC->HasRelocations && opts::ReorderFunctions != ReorderFunctions::RT_NONE) { BC->errs() << "BOLT-ERROR: function reordering only works when " diff --git a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp index 5220d305b838..06f79dee3378 100644 --- a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp +++ b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp @@ -141,10 +141,6 @@ public: *AArch64ExprB.getSubExpr(), Comp); } - bool isMacroOpFusionPair(ArrayRef Insts) const override { - return false; - } - bool shortenInstruction(MCInst &, const MCSubtargetInfo &) const override { return false; } diff --git a/bolt/lib/Target/X86/X86MCPlusBuilder.cpp b/bolt/lib/Target/X86/X86MCPlusBuilder.cpp index 5bd77958934f..37136f4a5c55 100644 --- a/bolt/lib/Target/X86/X86MCPlusBuilder.cpp +++ b/bolt/lib/Target/X86/X86MCPlusBuilder.cpp @@ -661,40 +661,6 @@ public: return (Desc.TSFlags & X86II::EncodingMask) == X86II::EVEX; } - bool isMacroOpFusionPair(ArrayRef Insts) const override { - const auto *I = Insts.begin(); - while (I != Insts.end() && isPrefix(*I)) - ++I; - if (I == Insts.end()) - return false; - - const MCInst &FirstInst = *I; - ++I; - while (I != Insts.end() && isPrefix(*I)) - ++I; - if (I == Insts.end()) - return false; - const MCInst &SecondInst = *I; - - if (!isConditionalBranch(SecondInst)) - return false; - // Cannot fuse if the first instruction uses RIP-relative memory. - if (hasPCRelOperand(FirstInst)) - return false; - - const X86::FirstMacroFusionInstKind CmpKind = - X86::classifyFirstOpcodeInMacroFusion(FirstInst.getOpcode()); - if (CmpKind == X86::FirstMacroFusionInstKind::Invalid) - return false; - - X86::CondCode CC = static_cast(getCondCode(SecondInst)); - X86::SecondMacroFusionInstKind BranchKind = - X86::classifySecondCondCodeInMacroFusion(CC); - if (BranchKind == X86::SecondMacroFusionInstKind::Invalid) - return false; - return X86::isMacroFused(CmpKind, BranchKind); - } - std::optional evaluateX86MemoryOperand(const MCInst &Inst) const override { int MemOpNo = getMemoryOperandNo(Inst);