[MachineLateInstrsCleanup] Improve compile time for huge functions.

It was discovered that this pass could be slow on huge functions, meaning 20%
compile time instead of the usual ~0.5% (with a test case spending ~19 mins
just in the backend).

The problem related to the necessary clearing of earlier kill flags when a
redundant instruction is removed. With this patch, the handling of kill flags
is now done by maintaining a map instead of scanning backwards in the
function. This remedies the compile time on the huge file fully.

Reviewed By: vpykhtin, arsenm

Differential Revision: https://reviews.llvm.org/D147532

Resolves https://github.com/llvm/llvm-project/issues/61397
This commit is contained in:
Jonas Paulsson
2023-04-02 20:23:34 +02:00
parent 7cba800104
commit cb57b7a770

View File

@@ -42,14 +42,31 @@ class MachineLateInstrsCleanup : public MachineFunctionPass {
const TargetRegisterInfo *TRI = nullptr;
const TargetInstrInfo *TII = nullptr;
// Data structures to map regs to their definitions per MBB.
using Reg2DefMap = std::map<Register, MachineInstr*>;
std::vector<Reg2DefMap> RegDefs;
// Data structures to map regs to their definitions and kills per MBB.
struct Reg2MIMap : public SmallDenseMap<Register, MachineInstr *> {
MachineInstr *get(Register Reg) {
auto I = find(Reg);
return I != end() ? I->second : nullptr;
}
bool hasIdentical(Register Reg, MachineInstr *ArgMI) {
MachineInstr *MI = get(Reg);
return MI && MI->isIdenticalTo(*ArgMI);
}
};
std::vector<Reg2MIMap> RegDefs;
std::vector<Reg2MIMap> RegKills;
// Walk through the instructions in MBB and remove any redundant
// instructions.
bool processBlock(MachineBasicBlock *MBB);
void removeRedundantDef(MachineInstr *MI);
void clearKillsForDef(Register Reg, MachineBasicBlock *MBB,
MachineBasicBlock::iterator I,
BitVector &VisitedPreds);
public:
static char ID; // Pass identification, replacement for typeid
@@ -88,6 +105,8 @@ bool MachineLateInstrsCleanup::runOnMachineFunction(MachineFunction &MF) {
RegDefs.clear();
RegDefs.resize(MF.getNumBlockIDs());
RegKills.clear();
RegKills.resize(MF.getNumBlockIDs());
// Visit all MBBs in an order that maximises the reuse from predecessors.
bool Changed = false;
@@ -102,41 +121,36 @@ bool MachineLateInstrsCleanup::runOnMachineFunction(MachineFunction &MF) {
// in MBB and if needed continue in predecessors until a use/def of Reg is
// encountered. This seems to be faster in practice than tracking kill flags
// in a map.
static void clearKillsForDef(Register Reg, MachineBasicBlock *MBB,
MachineBasicBlock::iterator I,
BitVector &VisitedPreds,
const TargetRegisterInfo *TRI) {
void MachineLateInstrsCleanup::
clearKillsForDef(Register Reg, MachineBasicBlock *MBB,
MachineBasicBlock::iterator I,
BitVector &VisitedPreds) {
VisitedPreds.set(MBB->getNumber());
while (I != MBB->begin()) {
--I;
bool Found = false;
for (auto &MO : I->operands())
if (MO.isReg() && TRI->regsOverlap(MO.getReg(), Reg)) {
if (MO.isDef())
return;
if (MO.readsReg()) {
MO.setIsKill(false);
Found = true; // Keep going for an implicit kill of the super-reg.
}
}
if (Found)
return;
// Kill flag in MBB
if (MachineInstr *KillMI = RegKills[MBB->getNumber()].get(Reg)) {
KillMI->clearRegisterKills(Reg, TRI);
return;
}
// Def in MBB (missing kill flag)
if (MachineInstr *DefMI = RegDefs[MBB->getNumber()].get(Reg))
if (DefMI->getParent() == MBB)
return;
// If an earlier def is not in MBB, continue in predecessors.
if (!MBB->isLiveIn(Reg))
MBB->addLiveIn(Reg);
assert(!MBB->pred_empty() && "Predecessor def not found!");
for (MachineBasicBlock *Pred : MBB->predecessors())
if (!VisitedPreds.test(Pred->getNumber()))
clearKillsForDef(Reg, Pred, Pred->end(), VisitedPreds, TRI);
clearKillsForDef(Reg, Pred, Pred->end(), VisitedPreds);
}
static void removeRedundantDef(MachineInstr *MI,
const TargetRegisterInfo *TRI) {
void MachineLateInstrsCleanup::removeRedundantDef(MachineInstr *MI) {
Register Reg = MI->getOperand(0).getReg();
BitVector VisitedPreds(MI->getMF()->getNumBlockIDs());
clearKillsForDef(Reg, MI->getParent(), MI->getIterator(), VisitedPreds, TRI);
clearKillsForDef(Reg, MI->getParent(), MI->getIterator(), VisitedPreds);
MI->eraseFromParent();
++NumRemoved;
}
@@ -172,7 +186,8 @@ static bool isCandidate(const MachineInstr *MI, Register &DefedReg,
bool MachineLateInstrsCleanup::processBlock(MachineBasicBlock *MBB) {
bool Changed = false;
Reg2DefMap &MBBDefs = RegDefs[MBB->getNumber()];
Reg2MIMap &MBBDefs = RegDefs[MBB->getNumber()];
Reg2MIMap &MBBKills = RegKills[MBB->getNumber()];
// Find reusable definitions in the predecessor(s).
if (!MBB->pred_empty() && !MBB->isEHPad() &&
@@ -182,9 +197,7 @@ bool MachineLateInstrsCleanup::processBlock(MachineBasicBlock *MBB) {
if (llvm::all_of(
drop_begin(MBB->predecessors()),
[&, &Reg = Reg, &DefMI = DefMI](const MachineBasicBlock *Pred) {
auto PredDefI = RegDefs[Pred->getNumber()].find(Reg);
return PredDefI != RegDefs[Pred->getNumber()].end() &&
DefMI->isIdenticalTo(*PredDefI->second);
return RegDefs[Pred->getNumber()].hasIdentical(Reg, DefMI);
})) {
MBBDefs[Reg] = DefMI;
LLVM_DEBUG(dbgs() << "Reusable instruction from pred(s): in "
@@ -201,6 +214,7 @@ bool MachineLateInstrsCleanup::processBlock(MachineBasicBlock *MBB) {
// it) are valid.
if (MI.modifiesRegister(FrameReg, TRI)) {
MBBDefs.clear();
MBBKills.clear();
continue;
}
@@ -208,24 +222,23 @@ bool MachineLateInstrsCleanup::processBlock(MachineBasicBlock *MBB) {
bool IsCandidate = isCandidate(&MI, DefedReg, FrameReg);
// Check for an earlier identical and reusable instruction.
if (IsCandidate) {
auto DefI = MBBDefs.find(DefedReg);
if (DefI != MBBDefs.end() && MI.isIdenticalTo(*DefI->second)) {
LLVM_DEBUG(dbgs() << "Removing redundant instruction in "
<< printMBBReference(*MBB) << ": " << MI;);
removeRedundantDef(&MI, TRI);
Changed = true;
continue;
}
if (IsCandidate && MBBDefs.hasIdentical(DefedReg, &MI)) {
LLVM_DEBUG(dbgs() << "Removing redundant instruction in "
<< printMBBReference(*MBB) << ": " << MI;);
removeRedundantDef(&MI);
Changed = true;
continue;
}
// Clear any entries in map that MI clobbers.
for (auto DefI = MBBDefs.begin(); DefI != MBBDefs.end();) {
Register Reg = DefI->first;
if (MI.modifiesRegister(Reg, TRI))
DefI = MBBDefs.erase(DefI);
else
++DefI;
for (auto DefI : llvm::make_early_inc_range(MBBDefs)) {
Register Reg = DefI.first;
if (MI.modifiesRegister(Reg, TRI)) {
MBBDefs.erase(Reg);
MBBKills.erase(Reg);
} else if (MI.findRegisterUseOperandIdx(Reg, false /*isKill*/, TRI) != -1)
// Keep track of the last use seen so far.
MBBKills[Reg] = &MI;
}
// Record this MI for potential later reuse.
@@ -233,6 +246,7 @@ bool MachineLateInstrsCleanup::processBlock(MachineBasicBlock *MBB) {
LLVM_DEBUG(dbgs() << "Found interesting instruction in "
<< printMBBReference(*MBB) << ": " << MI;);
MBBDefs[DefedReg] = &MI;
assert(!MBBKills.count(DefedReg) && "Should already have been removed.");
}
}