[BOLT] Drop macro-fusion alignment (#97358)

9d0754ada5 dropped MC support required for
optimal macro-fusion alignment in BOLT. Remove the support in BOLT as
performance measurements with large binaries didn't show a significant
improvement.

Test Plan:
macro-fusion alignment was never upstreamed, so no upstream tests are
affected.
This commit is contained in:
Amir Ayupov
2024-07-02 09:18:59 -07:00
parent 1f7d31e342
commit 344228ebf4
11 changed files with 0 additions and 205 deletions

View File

@@ -842,15 +842,6 @@ public:
bool analyzeBranch(const MCSymbol *&TBB, const MCSymbol *&FBB,
MCInst *&CondBranch, MCInst *&UncondBranch);
/// Return true if iterator \p I is pointing to the first instruction in
/// a pair that could be macro-fused.
bool isMacroOpFusionPair(const_iterator I) const;
/// If the basic block has a pair of instructions suitable for macro-fusion,
/// return iterator to the first instruction of the pair.
/// Otherwise return end().
const_iterator getMacroOpFusionPair() const;
/// Printer required for printing dominator trees.
void printAsOperand(raw_ostream &OS, bool PrintType = true) {
if (PrintType)

View File

@@ -698,10 +698,6 @@ public:
/// Binary-wide aggregated stats.
struct BinaryStats {
/// Stats for macro-fusion.
uint64_t MissedMacroFusionPairs{0};
uint64_t MissedMacroFusionExecCount{0};
/// Stats for stale profile matching:
/// the total number of basic blocks in the profile
uint32_t NumStaleBlocks{0};

View File

@@ -835,10 +835,6 @@ public:
/// them.
void calculateLoopInfo();
/// Calculate missed macro-fusion opportunities and update BinaryContext
/// stats.
void calculateMacroOpFusionStats();
/// Returns if BinaryDominatorTree has been constructed for this function.
bool hasDomTree() const { return BDT != nullptr; }

View File

@@ -930,13 +930,6 @@ public:
/// Return true if the instruction is encoded using EVEX (AVX-512).
virtual bool hasEVEXEncoding(const MCInst &Inst) const { return false; }
/// Return true if a pair of instructions represented by \p Insts
/// could be fused into a single uop.
virtual bool isMacroOpFusionPair(ArrayRef<MCInst> Insts) const {
llvm_unreachable("not implemented");
return false;
}
struct X86MemOperand {
unsigned BaseRegNum;
int64_t ScaleImm;

View File

@@ -404,45 +404,6 @@ bool BinaryBasicBlock::analyzeBranch(const MCSymbol *&TBB, const MCSymbol *&FBB,
CondBranch, UncondBranch);
}
bool BinaryBasicBlock::isMacroOpFusionPair(const_iterator I) const {
auto &MIB = Function->getBinaryContext().MIB;
ArrayRef<MCInst> Insts = Instructions;
return MIB->isMacroOpFusionPair(Insts.slice(I - begin()));
}
BinaryBasicBlock::const_iterator
BinaryBasicBlock::getMacroOpFusionPair() const {
if (!Function->getBinaryContext().isX86())
return end();
if (getNumNonPseudos() < 2 || succ_size() != 2)
return end();
auto RI = getLastNonPseudo();
assert(RI != rend() && "cannot have an empty block with 2 successors");
BinaryContext &BC = Function->getBinaryContext();
// Skip instruction if it's an unconditional branch following
// a conditional one.
if (BC.MIB->isUnconditionalBranch(*RI))
++RI;
if (!BC.MIB->isConditionalBranch(*RI))
return end();
// Start checking with instruction preceding the conditional branch.
++RI;
if (RI == rend())
return end();
auto II = std::prev(RI.base()); // convert to a forward iterator
if (isMacroOpFusionPair(II))
return II;
return end();
}
MCInst *BinaryBasicBlock::getTerminatorBefore(MCInst *Pos) {
BinaryContext &BC = Function->getBinaryContext();
auto Itr = rbegin();

View File

@@ -38,19 +38,6 @@ extern cl::opt<bool> PreserveBlocksAlignment;
cl::opt<bool> AlignBlocks("align-blocks", cl::desc("align basic blocks"),
cl::cat(BoltOptCategory));
cl::opt<MacroFusionType>
AlignMacroOpFusion("align-macro-fusion",
cl::desc("fix instruction alignment for macro-fusion (x86 relocation mode)"),
cl::init(MFT_HOT),
cl::values(clEnumValN(MFT_NONE, "none",
"do not insert alignment no-ops for macro-fusion"),
clEnumValN(MFT_HOT, "hot",
"only insert alignment no-ops on hot execution paths (default)"),
clEnumValN(MFT_ALL, "all",
"always align instructions to allow macro-fusion")),
cl::ZeroOrMore,
cl::cat(BoltRelocCategory));
static cl::list<std::string>
BreakFunctionNames("break-funcs",
cl::CommaSeparated,
@@ -453,20 +440,7 @@ void BinaryEmitter::emitFunctionBody(BinaryFunction &BF, FunctionFragment &FF,
Streamer.emitLabel(EntrySymbol);
}
// Check if special alignment for macro-fusion is needed.
bool MayNeedMacroFusionAlignment =
(opts::AlignMacroOpFusion == MFT_ALL) ||
(opts::AlignMacroOpFusion == MFT_HOT && BB->getKnownExecutionCount());
BinaryBasicBlock::const_iterator MacroFusionPair;
if (MayNeedMacroFusionAlignment) {
MacroFusionPair = BB->getMacroOpFusionPair();
if (MacroFusionPair == BB->end())
MayNeedMacroFusionAlignment = false;
}
SMLoc LastLocSeen;
// Remember if the last instruction emitted was a prefix.
bool LastIsPrefix = false;
for (auto I = BB->begin(), E = BB->end(); I != E; ++I) {
MCInst &Instr = *I;
@@ -479,16 +453,6 @@ void BinaryEmitter::emitFunctionBody(BinaryFunction &BF, FunctionFragment &FF,
continue;
}
// Handle macro-fusion alignment. If we emitted a prefix as
// the last instruction, we should've already emitted the associated
// alignment hint, so don't emit it twice.
if (MayNeedMacroFusionAlignment && !LastIsPrefix &&
I == MacroFusionPair) {
// This assumes the second instruction in the macro-op pair will get
// assigned to its own MCRelaxableFragment. Since all JCC instructions
// are relaxable, we should be safe.
}
if (!EmitCodeOnly) {
// A symbol to be emitted before the instruction to mark its location.
MCSymbol *InstrLabel = BC.MIB->getInstLabel(Instr);
@@ -525,7 +489,6 @@ void BinaryEmitter::emitFunctionBody(BinaryFunction &BF, FunctionFragment &FF,
}
Streamer.emitInstruction(Instr, *BC.STI);
LastIsPrefix = BC.MIB->isPrefix(Instr);
}
}

View File

@@ -2279,8 +2279,6 @@ void BinaryFunction::postProcessCFG() {
postProcessBranches();
}
calculateMacroOpFusionStats();
// The final cleanup of intermediate structures.
clearList(IgnoredBranches);
@@ -2297,29 +2295,6 @@ void BinaryFunction::postProcessCFG() {
"invalid CFG detected after post-processing");
}
void BinaryFunction::calculateMacroOpFusionStats() {
if (!getBinaryContext().isX86())
return;
for (const BinaryBasicBlock &BB : blocks()) {
auto II = BB.getMacroOpFusionPair();
if (II == BB.end())
continue;
// Check offset of the second instruction.
// FIXME: arch-specific.
const uint32_t Offset = BC.MIB->getOffsetWithDefault(*std::next(II), 0);
if (!Offset || (getAddress() + Offset) % 64)
continue;
LLVM_DEBUG(dbgs() << "\nmissed macro-op fusion at address 0x"
<< Twine::utohexstr(getAddress() + Offset)
<< " in function " << *this << "; executed "
<< BB.getKnownExecutionCount() << " times.\n");
++BC.Stats.MissedMacroFusionPairs;
BC.Stats.MissedMacroFusionExecCount += BB.getKnownExecutionCount();
}
}
void BinaryFunction::removeTagsFromProfile() {
for (BinaryBasicBlock *BB : BasicBlocks) {
if (BB->ExecutionCount == BinaryBasicBlock::COUNT_NO_PROFILE)

View File

@@ -44,7 +44,6 @@ namespace opts {
extern cl::OptionCategory BoltCategory;
extern cl::OptionCategory BoltOptCategory;
extern cl::opt<bolt::MacroFusionType> AlignMacroOpFusion;
extern cl::opt<unsigned> Verbosity;
extern cl::opt<bool> EnableBAT;
extern cl::opt<unsigned> ExecutionCountThreshold;
@@ -1637,25 +1636,6 @@ Error PrintProgramStats::runOnFunctions(BinaryContext &BC) {
}
}
// Print information on missed macro-fusion opportunities seen on input.
if (BC.Stats.MissedMacroFusionPairs) {
BC.outs() << format(
"BOLT-INFO: the input contains %zu (dynamic count : %zu)"
" opportunities for macro-fusion optimization",
BC.Stats.MissedMacroFusionPairs, BC.Stats.MissedMacroFusionExecCount);
switch (opts::AlignMacroOpFusion) {
case MFT_NONE:
BC.outs() << ". Use -align-macro-fusion to fix.\n";
break;
case MFT_HOT:
BC.outs() << ". Will fix instances on a hot path.\n";
break;
case MFT_ALL:
BC.outs() << " that are going to be fixed\n";
break;
}
}
// Collect and print information about suboptimal code layout on input.
if (opts::ReportBadLayout) {
std::vector<BinaryFunction *> SuboptimalFuncs;

View File

@@ -75,7 +75,6 @@ extern cl::opt<bool> X86AlignBranchWithin32BBoundaries;
namespace opts {
extern cl::opt<MacroFusionType> AlignMacroOpFusion;
extern cl::list<std::string> HotTextMoveSections;
extern cl::opt<bool> Hugify;
extern cl::opt<bool> Instrument;
@@ -1969,12 +1968,6 @@ void RewriteInstance::adjustCommandLineOptions() {
if (RuntimeLibrary *RtLibrary = BC->getRuntimeLibrary())
RtLibrary->adjustCommandLineOptions(*BC);
if (opts::AlignMacroOpFusion != MFT_NONE && !BC->isX86()) {
BC->outs()
<< "BOLT-INFO: disabling -align-macro-fusion on non-x86 platform\n";
opts::AlignMacroOpFusion = MFT_NONE;
}
if (BC->isX86() && BC->MAB->allowAutoPadding()) {
if (!BC->HasRelocations) {
BC->errs()
@@ -1985,13 +1978,6 @@ void RewriteInstance::adjustCommandLineOptions() {
BC->outs()
<< "BOLT-WARNING: using mitigation for Intel JCC erratum, layout "
"may take several minutes\n";
opts::AlignMacroOpFusion = MFT_NONE;
}
if (opts::AlignMacroOpFusion != MFT_NONE && !BC->HasRelocations) {
BC->outs() << "BOLT-INFO: disabling -align-macro-fusion in non-relocation "
"mode\n";
opts::AlignMacroOpFusion = MFT_NONE;
}
if (opts::SplitEH && !BC->HasRelocations) {
@@ -2013,14 +1999,6 @@ void RewriteInstance::adjustCommandLineOptions() {
opts::StrictMode = true;
}
if (BC->isX86() && BC->HasRelocations &&
opts::AlignMacroOpFusion == MFT_HOT && !ProfileReader) {
BC->outs()
<< "BOLT-INFO: enabling -align-macro-fusion=all since no profile "
"was specified\n";
opts::AlignMacroOpFusion = MFT_ALL;
}
if (!BC->HasRelocations &&
opts::ReorderFunctions != ReorderFunctions::RT_NONE) {
BC->errs() << "BOLT-ERROR: function reordering only works when "

View File

@@ -141,10 +141,6 @@ public:
*AArch64ExprB.getSubExpr(), Comp);
}
bool isMacroOpFusionPair(ArrayRef<MCInst> Insts) const override {
return false;
}
bool shortenInstruction(MCInst &, const MCSubtargetInfo &) const override {
return false;
}

View File

@@ -661,40 +661,6 @@ public:
return (Desc.TSFlags & X86II::EncodingMask) == X86II::EVEX;
}
bool isMacroOpFusionPair(ArrayRef<MCInst> Insts) const override {
const auto *I = Insts.begin();
while (I != Insts.end() && isPrefix(*I))
++I;
if (I == Insts.end())
return false;
const MCInst &FirstInst = *I;
++I;
while (I != Insts.end() && isPrefix(*I))
++I;
if (I == Insts.end())
return false;
const MCInst &SecondInst = *I;
if (!isConditionalBranch(SecondInst))
return false;
// Cannot fuse if the first instruction uses RIP-relative memory.
if (hasPCRelOperand(FirstInst))
return false;
const X86::FirstMacroFusionInstKind CmpKind =
X86::classifyFirstOpcodeInMacroFusion(FirstInst.getOpcode());
if (CmpKind == X86::FirstMacroFusionInstKind::Invalid)
return false;
X86::CondCode CC = static_cast<X86::CondCode>(getCondCode(SecondInst));
X86::SecondMacroFusionInstKind BranchKind =
X86::classifySecondCondCodeInMacroFusion(CC);
if (BranchKind == X86::SecondMacroFusionInstKind::Invalid)
return false;
return X86::isMacroFused(CmpKind, BranchKind);
}
std::optional<X86MemOperand>
evaluateX86MemoryOperand(const MCInst &Inst) const override {
int MemOpNo = getMemoryOperandNo(Inst);