[BOLT] Drop macro-fusion alignment (#97358)
9d0754ada5 dropped MC support required for
optimal macro-fusion alignment in BOLT. Remove the support in BOLT as
performance measurements with large binaries didn't show a significant
improvement.
Test Plan:
macro-fusion alignment was never upstreamed, so no upstream tests are
affected.
This commit is contained in:
@@ -842,15 +842,6 @@ public:
|
||||
bool analyzeBranch(const MCSymbol *&TBB, const MCSymbol *&FBB,
|
||||
MCInst *&CondBranch, MCInst *&UncondBranch);
|
||||
|
||||
/// Return true if iterator \p I is pointing to the first instruction in
|
||||
/// a pair that could be macro-fused.
|
||||
bool isMacroOpFusionPair(const_iterator I) const;
|
||||
|
||||
/// If the basic block has a pair of instructions suitable for macro-fusion,
|
||||
/// return iterator to the first instruction of the pair.
|
||||
/// Otherwise return end().
|
||||
const_iterator getMacroOpFusionPair() const;
|
||||
|
||||
/// Printer required for printing dominator trees.
|
||||
void printAsOperand(raw_ostream &OS, bool PrintType = true) {
|
||||
if (PrintType)
|
||||
|
||||
@@ -698,10 +698,6 @@ public:
|
||||
|
||||
/// Binary-wide aggregated stats.
|
||||
struct BinaryStats {
|
||||
/// Stats for macro-fusion.
|
||||
uint64_t MissedMacroFusionPairs{0};
|
||||
uint64_t MissedMacroFusionExecCount{0};
|
||||
|
||||
/// Stats for stale profile matching:
|
||||
/// the total number of basic blocks in the profile
|
||||
uint32_t NumStaleBlocks{0};
|
||||
|
||||
@@ -835,10 +835,6 @@ public:
|
||||
/// them.
|
||||
void calculateLoopInfo();
|
||||
|
||||
/// Calculate missed macro-fusion opportunities and update BinaryContext
|
||||
/// stats.
|
||||
void calculateMacroOpFusionStats();
|
||||
|
||||
/// Returns if BinaryDominatorTree has been constructed for this function.
|
||||
bool hasDomTree() const { return BDT != nullptr; }
|
||||
|
||||
|
||||
@@ -930,13 +930,6 @@ public:
|
||||
/// Return true if the instruction is encoded using EVEX (AVX-512).
|
||||
virtual bool hasEVEXEncoding(const MCInst &Inst) const { return false; }
|
||||
|
||||
/// Return true if a pair of instructions represented by \p Insts
|
||||
/// could be fused into a single uop.
|
||||
virtual bool isMacroOpFusionPair(ArrayRef<MCInst> Insts) const {
|
||||
llvm_unreachable("not implemented");
|
||||
return false;
|
||||
}
|
||||
|
||||
struct X86MemOperand {
|
||||
unsigned BaseRegNum;
|
||||
int64_t ScaleImm;
|
||||
|
||||
@@ -404,45 +404,6 @@ bool BinaryBasicBlock::analyzeBranch(const MCSymbol *&TBB, const MCSymbol *&FBB,
|
||||
CondBranch, UncondBranch);
|
||||
}
|
||||
|
||||
bool BinaryBasicBlock::isMacroOpFusionPair(const_iterator I) const {
|
||||
auto &MIB = Function->getBinaryContext().MIB;
|
||||
ArrayRef<MCInst> Insts = Instructions;
|
||||
return MIB->isMacroOpFusionPair(Insts.slice(I - begin()));
|
||||
}
|
||||
|
||||
BinaryBasicBlock::const_iterator
|
||||
BinaryBasicBlock::getMacroOpFusionPair() const {
|
||||
if (!Function->getBinaryContext().isX86())
|
||||
return end();
|
||||
|
||||
if (getNumNonPseudos() < 2 || succ_size() != 2)
|
||||
return end();
|
||||
|
||||
auto RI = getLastNonPseudo();
|
||||
assert(RI != rend() && "cannot have an empty block with 2 successors");
|
||||
|
||||
BinaryContext &BC = Function->getBinaryContext();
|
||||
|
||||
// Skip instruction if it's an unconditional branch following
|
||||
// a conditional one.
|
||||
if (BC.MIB->isUnconditionalBranch(*RI))
|
||||
++RI;
|
||||
|
||||
if (!BC.MIB->isConditionalBranch(*RI))
|
||||
return end();
|
||||
|
||||
// Start checking with instruction preceding the conditional branch.
|
||||
++RI;
|
||||
if (RI == rend())
|
||||
return end();
|
||||
|
||||
auto II = std::prev(RI.base()); // convert to a forward iterator
|
||||
if (isMacroOpFusionPair(II))
|
||||
return II;
|
||||
|
||||
return end();
|
||||
}
|
||||
|
||||
MCInst *BinaryBasicBlock::getTerminatorBefore(MCInst *Pos) {
|
||||
BinaryContext &BC = Function->getBinaryContext();
|
||||
auto Itr = rbegin();
|
||||
|
||||
@@ -38,19 +38,6 @@ extern cl::opt<bool> PreserveBlocksAlignment;
|
||||
cl::opt<bool> AlignBlocks("align-blocks", cl::desc("align basic blocks"),
|
||||
cl::cat(BoltOptCategory));
|
||||
|
||||
cl::opt<MacroFusionType>
|
||||
AlignMacroOpFusion("align-macro-fusion",
|
||||
cl::desc("fix instruction alignment for macro-fusion (x86 relocation mode)"),
|
||||
cl::init(MFT_HOT),
|
||||
cl::values(clEnumValN(MFT_NONE, "none",
|
||||
"do not insert alignment no-ops for macro-fusion"),
|
||||
clEnumValN(MFT_HOT, "hot",
|
||||
"only insert alignment no-ops on hot execution paths (default)"),
|
||||
clEnumValN(MFT_ALL, "all",
|
||||
"always align instructions to allow macro-fusion")),
|
||||
cl::ZeroOrMore,
|
||||
cl::cat(BoltRelocCategory));
|
||||
|
||||
static cl::list<std::string>
|
||||
BreakFunctionNames("break-funcs",
|
||||
cl::CommaSeparated,
|
||||
@@ -453,20 +440,7 @@ void BinaryEmitter::emitFunctionBody(BinaryFunction &BF, FunctionFragment &FF,
|
||||
Streamer.emitLabel(EntrySymbol);
|
||||
}
|
||||
|
||||
// Check if special alignment for macro-fusion is needed.
|
||||
bool MayNeedMacroFusionAlignment =
|
||||
(opts::AlignMacroOpFusion == MFT_ALL) ||
|
||||
(opts::AlignMacroOpFusion == MFT_HOT && BB->getKnownExecutionCount());
|
||||
BinaryBasicBlock::const_iterator MacroFusionPair;
|
||||
if (MayNeedMacroFusionAlignment) {
|
||||
MacroFusionPair = BB->getMacroOpFusionPair();
|
||||
if (MacroFusionPair == BB->end())
|
||||
MayNeedMacroFusionAlignment = false;
|
||||
}
|
||||
|
||||
SMLoc LastLocSeen;
|
||||
// Remember if the last instruction emitted was a prefix.
|
||||
bool LastIsPrefix = false;
|
||||
for (auto I = BB->begin(), E = BB->end(); I != E; ++I) {
|
||||
MCInst &Instr = *I;
|
||||
|
||||
@@ -479,16 +453,6 @@ void BinaryEmitter::emitFunctionBody(BinaryFunction &BF, FunctionFragment &FF,
|
||||
continue;
|
||||
}
|
||||
|
||||
// Handle macro-fusion alignment. If we emitted a prefix as
|
||||
// the last instruction, we should've already emitted the associated
|
||||
// alignment hint, so don't emit it twice.
|
||||
if (MayNeedMacroFusionAlignment && !LastIsPrefix &&
|
||||
I == MacroFusionPair) {
|
||||
// This assumes the second instruction in the macro-op pair will get
|
||||
// assigned to its own MCRelaxableFragment. Since all JCC instructions
|
||||
// are relaxable, we should be safe.
|
||||
}
|
||||
|
||||
if (!EmitCodeOnly) {
|
||||
// A symbol to be emitted before the instruction to mark its location.
|
||||
MCSymbol *InstrLabel = BC.MIB->getInstLabel(Instr);
|
||||
@@ -525,7 +489,6 @@ void BinaryEmitter::emitFunctionBody(BinaryFunction &BF, FunctionFragment &FF,
|
||||
}
|
||||
|
||||
Streamer.emitInstruction(Instr, *BC.STI);
|
||||
LastIsPrefix = BC.MIB->isPrefix(Instr);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -2279,8 +2279,6 @@ void BinaryFunction::postProcessCFG() {
|
||||
postProcessBranches();
|
||||
}
|
||||
|
||||
calculateMacroOpFusionStats();
|
||||
|
||||
// The final cleanup of intermediate structures.
|
||||
clearList(IgnoredBranches);
|
||||
|
||||
@@ -2297,29 +2295,6 @@ void BinaryFunction::postProcessCFG() {
|
||||
"invalid CFG detected after post-processing");
|
||||
}
|
||||
|
||||
void BinaryFunction::calculateMacroOpFusionStats() {
|
||||
if (!getBinaryContext().isX86())
|
||||
return;
|
||||
for (const BinaryBasicBlock &BB : blocks()) {
|
||||
auto II = BB.getMacroOpFusionPair();
|
||||
if (II == BB.end())
|
||||
continue;
|
||||
|
||||
// Check offset of the second instruction.
|
||||
// FIXME: arch-specific.
|
||||
const uint32_t Offset = BC.MIB->getOffsetWithDefault(*std::next(II), 0);
|
||||
if (!Offset || (getAddress() + Offset) % 64)
|
||||
continue;
|
||||
|
||||
LLVM_DEBUG(dbgs() << "\nmissed macro-op fusion at address 0x"
|
||||
<< Twine::utohexstr(getAddress() + Offset)
|
||||
<< " in function " << *this << "; executed "
|
||||
<< BB.getKnownExecutionCount() << " times.\n");
|
||||
++BC.Stats.MissedMacroFusionPairs;
|
||||
BC.Stats.MissedMacroFusionExecCount += BB.getKnownExecutionCount();
|
||||
}
|
||||
}
|
||||
|
||||
void BinaryFunction::removeTagsFromProfile() {
|
||||
for (BinaryBasicBlock *BB : BasicBlocks) {
|
||||
if (BB->ExecutionCount == BinaryBasicBlock::COUNT_NO_PROFILE)
|
||||
|
||||
@@ -44,7 +44,6 @@ namespace opts {
|
||||
extern cl::OptionCategory BoltCategory;
|
||||
extern cl::OptionCategory BoltOptCategory;
|
||||
|
||||
extern cl::opt<bolt::MacroFusionType> AlignMacroOpFusion;
|
||||
extern cl::opt<unsigned> Verbosity;
|
||||
extern cl::opt<bool> EnableBAT;
|
||||
extern cl::opt<unsigned> ExecutionCountThreshold;
|
||||
@@ -1637,25 +1636,6 @@ Error PrintProgramStats::runOnFunctions(BinaryContext &BC) {
|
||||
}
|
||||
}
|
||||
|
||||
// Print information on missed macro-fusion opportunities seen on input.
|
||||
if (BC.Stats.MissedMacroFusionPairs) {
|
||||
BC.outs() << format(
|
||||
"BOLT-INFO: the input contains %zu (dynamic count : %zu)"
|
||||
" opportunities for macro-fusion optimization",
|
||||
BC.Stats.MissedMacroFusionPairs, BC.Stats.MissedMacroFusionExecCount);
|
||||
switch (opts::AlignMacroOpFusion) {
|
||||
case MFT_NONE:
|
||||
BC.outs() << ". Use -align-macro-fusion to fix.\n";
|
||||
break;
|
||||
case MFT_HOT:
|
||||
BC.outs() << ". Will fix instances on a hot path.\n";
|
||||
break;
|
||||
case MFT_ALL:
|
||||
BC.outs() << " that are going to be fixed\n";
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Collect and print information about suboptimal code layout on input.
|
||||
if (opts::ReportBadLayout) {
|
||||
std::vector<BinaryFunction *> SuboptimalFuncs;
|
||||
|
||||
@@ -75,7 +75,6 @@ extern cl::opt<bool> X86AlignBranchWithin32BBoundaries;
|
||||
|
||||
namespace opts {
|
||||
|
||||
extern cl::opt<MacroFusionType> AlignMacroOpFusion;
|
||||
extern cl::list<std::string> HotTextMoveSections;
|
||||
extern cl::opt<bool> Hugify;
|
||||
extern cl::opt<bool> Instrument;
|
||||
@@ -1969,12 +1968,6 @@ void RewriteInstance::adjustCommandLineOptions() {
|
||||
if (RuntimeLibrary *RtLibrary = BC->getRuntimeLibrary())
|
||||
RtLibrary->adjustCommandLineOptions(*BC);
|
||||
|
||||
if (opts::AlignMacroOpFusion != MFT_NONE && !BC->isX86()) {
|
||||
BC->outs()
|
||||
<< "BOLT-INFO: disabling -align-macro-fusion on non-x86 platform\n";
|
||||
opts::AlignMacroOpFusion = MFT_NONE;
|
||||
}
|
||||
|
||||
if (BC->isX86() && BC->MAB->allowAutoPadding()) {
|
||||
if (!BC->HasRelocations) {
|
||||
BC->errs()
|
||||
@@ -1985,13 +1978,6 @@ void RewriteInstance::adjustCommandLineOptions() {
|
||||
BC->outs()
|
||||
<< "BOLT-WARNING: using mitigation for Intel JCC erratum, layout "
|
||||
"may take several minutes\n";
|
||||
opts::AlignMacroOpFusion = MFT_NONE;
|
||||
}
|
||||
|
||||
if (opts::AlignMacroOpFusion != MFT_NONE && !BC->HasRelocations) {
|
||||
BC->outs() << "BOLT-INFO: disabling -align-macro-fusion in non-relocation "
|
||||
"mode\n";
|
||||
opts::AlignMacroOpFusion = MFT_NONE;
|
||||
}
|
||||
|
||||
if (opts::SplitEH && !BC->HasRelocations) {
|
||||
@@ -2013,14 +1999,6 @@ void RewriteInstance::adjustCommandLineOptions() {
|
||||
opts::StrictMode = true;
|
||||
}
|
||||
|
||||
if (BC->isX86() && BC->HasRelocations &&
|
||||
opts::AlignMacroOpFusion == MFT_HOT && !ProfileReader) {
|
||||
BC->outs()
|
||||
<< "BOLT-INFO: enabling -align-macro-fusion=all since no profile "
|
||||
"was specified\n";
|
||||
opts::AlignMacroOpFusion = MFT_ALL;
|
||||
}
|
||||
|
||||
if (!BC->HasRelocations &&
|
||||
opts::ReorderFunctions != ReorderFunctions::RT_NONE) {
|
||||
BC->errs() << "BOLT-ERROR: function reordering only works when "
|
||||
|
||||
@@ -141,10 +141,6 @@ public:
|
||||
*AArch64ExprB.getSubExpr(), Comp);
|
||||
}
|
||||
|
||||
bool isMacroOpFusionPair(ArrayRef<MCInst> Insts) const override {
|
||||
return false;
|
||||
}
|
||||
|
||||
bool shortenInstruction(MCInst &, const MCSubtargetInfo &) const override {
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -661,40 +661,6 @@ public:
|
||||
return (Desc.TSFlags & X86II::EncodingMask) == X86II::EVEX;
|
||||
}
|
||||
|
||||
bool isMacroOpFusionPair(ArrayRef<MCInst> Insts) const override {
|
||||
const auto *I = Insts.begin();
|
||||
while (I != Insts.end() && isPrefix(*I))
|
||||
++I;
|
||||
if (I == Insts.end())
|
||||
return false;
|
||||
|
||||
const MCInst &FirstInst = *I;
|
||||
++I;
|
||||
while (I != Insts.end() && isPrefix(*I))
|
||||
++I;
|
||||
if (I == Insts.end())
|
||||
return false;
|
||||
const MCInst &SecondInst = *I;
|
||||
|
||||
if (!isConditionalBranch(SecondInst))
|
||||
return false;
|
||||
// Cannot fuse if the first instruction uses RIP-relative memory.
|
||||
if (hasPCRelOperand(FirstInst))
|
||||
return false;
|
||||
|
||||
const X86::FirstMacroFusionInstKind CmpKind =
|
||||
X86::classifyFirstOpcodeInMacroFusion(FirstInst.getOpcode());
|
||||
if (CmpKind == X86::FirstMacroFusionInstKind::Invalid)
|
||||
return false;
|
||||
|
||||
X86::CondCode CC = static_cast<X86::CondCode>(getCondCode(SecondInst));
|
||||
X86::SecondMacroFusionInstKind BranchKind =
|
||||
X86::classifySecondCondCodeInMacroFusion(CC);
|
||||
if (BranchKind == X86::SecondMacroFusionInstKind::Invalid)
|
||||
return false;
|
||||
return X86::isMacroFused(CmpKind, BranchKind);
|
||||
}
|
||||
|
||||
std::optional<X86MemOperand>
|
||||
evaluateX86MemoryOperand(const MCInst &Inst) const override {
|
||||
int MemOpNo = getMemoryOperandNo(Inst);
|
||||
|
||||
Reference in New Issue
Block a user