[MachineLateInstrsCleanup] Improve compile time for huge functions.
It was discovered that this pass could be slow on huge functions, meaning 20% compile time instead of the usual ~0.5% (with a test case spending ~19 mins just in the backend). The problem related to the necessary clearing of earlier kill flags when a redundant instruction is removed. With this patch, the handling of kill flags is now done by maintaining a map instead of scanning backwards in the function. This remedies the compile time on the huge file fully. Reviewed By: vpykhtin, arsenm Differential Revision: https://reviews.llvm.org/D147532 Resolves https://github.com/llvm/llvm-project/issues/61397
This commit is contained in:
@@ -42,14 +42,31 @@ class MachineLateInstrsCleanup : public MachineFunctionPass {
|
||||
const TargetRegisterInfo *TRI = nullptr;
|
||||
const TargetInstrInfo *TII = nullptr;
|
||||
|
||||
// Data structures to map regs to their definitions per MBB.
|
||||
using Reg2DefMap = std::map<Register, MachineInstr*>;
|
||||
std::vector<Reg2DefMap> RegDefs;
|
||||
// Data structures to map regs to their definitions and kills per MBB.
|
||||
struct Reg2MIMap : public SmallDenseMap<Register, MachineInstr *> {
|
||||
MachineInstr *get(Register Reg) {
|
||||
auto I = find(Reg);
|
||||
return I != end() ? I->second : nullptr;
|
||||
}
|
||||
|
||||
bool hasIdentical(Register Reg, MachineInstr *ArgMI) {
|
||||
MachineInstr *MI = get(Reg);
|
||||
return MI && MI->isIdenticalTo(*ArgMI);
|
||||
}
|
||||
};
|
||||
|
||||
std::vector<Reg2MIMap> RegDefs;
|
||||
std::vector<Reg2MIMap> RegKills;
|
||||
|
||||
// Walk through the instructions in MBB and remove any redundant
|
||||
// instructions.
|
||||
bool processBlock(MachineBasicBlock *MBB);
|
||||
|
||||
void removeRedundantDef(MachineInstr *MI);
|
||||
void clearKillsForDef(Register Reg, MachineBasicBlock *MBB,
|
||||
MachineBasicBlock::iterator I,
|
||||
BitVector &VisitedPreds);
|
||||
|
||||
public:
|
||||
static char ID; // Pass identification, replacement for typeid
|
||||
|
||||
@@ -88,6 +105,8 @@ bool MachineLateInstrsCleanup::runOnMachineFunction(MachineFunction &MF) {
|
||||
|
||||
RegDefs.clear();
|
||||
RegDefs.resize(MF.getNumBlockIDs());
|
||||
RegKills.clear();
|
||||
RegKills.resize(MF.getNumBlockIDs());
|
||||
|
||||
// Visit all MBBs in an order that maximises the reuse from predecessors.
|
||||
bool Changed = false;
|
||||
@@ -102,41 +121,36 @@ bool MachineLateInstrsCleanup::runOnMachineFunction(MachineFunction &MF) {
|
||||
// in MBB and if needed continue in predecessors until a use/def of Reg is
|
||||
// encountered. This seems to be faster in practice than tracking kill flags
|
||||
// in a map.
|
||||
static void clearKillsForDef(Register Reg, MachineBasicBlock *MBB,
|
||||
MachineBasicBlock::iterator I,
|
||||
BitVector &VisitedPreds,
|
||||
const TargetRegisterInfo *TRI) {
|
||||
void MachineLateInstrsCleanup::
|
||||
clearKillsForDef(Register Reg, MachineBasicBlock *MBB,
|
||||
MachineBasicBlock::iterator I,
|
||||
BitVector &VisitedPreds) {
|
||||
VisitedPreds.set(MBB->getNumber());
|
||||
while (I != MBB->begin()) {
|
||||
--I;
|
||||
bool Found = false;
|
||||
for (auto &MO : I->operands())
|
||||
if (MO.isReg() && TRI->regsOverlap(MO.getReg(), Reg)) {
|
||||
if (MO.isDef())
|
||||
return;
|
||||
if (MO.readsReg()) {
|
||||
MO.setIsKill(false);
|
||||
Found = true; // Keep going for an implicit kill of the super-reg.
|
||||
}
|
||||
}
|
||||
if (Found)
|
||||
return;
|
||||
|
||||
// Kill flag in MBB
|
||||
if (MachineInstr *KillMI = RegKills[MBB->getNumber()].get(Reg)) {
|
||||
KillMI->clearRegisterKills(Reg, TRI);
|
||||
return;
|
||||
}
|
||||
|
||||
// Def in MBB (missing kill flag)
|
||||
if (MachineInstr *DefMI = RegDefs[MBB->getNumber()].get(Reg))
|
||||
if (DefMI->getParent() == MBB)
|
||||
return;
|
||||
|
||||
// If an earlier def is not in MBB, continue in predecessors.
|
||||
if (!MBB->isLiveIn(Reg))
|
||||
MBB->addLiveIn(Reg);
|
||||
assert(!MBB->pred_empty() && "Predecessor def not found!");
|
||||
for (MachineBasicBlock *Pred : MBB->predecessors())
|
||||
if (!VisitedPreds.test(Pred->getNumber()))
|
||||
clearKillsForDef(Reg, Pred, Pred->end(), VisitedPreds, TRI);
|
||||
clearKillsForDef(Reg, Pred, Pred->end(), VisitedPreds);
|
||||
}
|
||||
|
||||
static void removeRedundantDef(MachineInstr *MI,
|
||||
const TargetRegisterInfo *TRI) {
|
||||
void MachineLateInstrsCleanup::removeRedundantDef(MachineInstr *MI) {
|
||||
Register Reg = MI->getOperand(0).getReg();
|
||||
BitVector VisitedPreds(MI->getMF()->getNumBlockIDs());
|
||||
clearKillsForDef(Reg, MI->getParent(), MI->getIterator(), VisitedPreds, TRI);
|
||||
clearKillsForDef(Reg, MI->getParent(), MI->getIterator(), VisitedPreds);
|
||||
MI->eraseFromParent();
|
||||
++NumRemoved;
|
||||
}
|
||||
@@ -172,7 +186,8 @@ static bool isCandidate(const MachineInstr *MI, Register &DefedReg,
|
||||
|
||||
bool MachineLateInstrsCleanup::processBlock(MachineBasicBlock *MBB) {
|
||||
bool Changed = false;
|
||||
Reg2DefMap &MBBDefs = RegDefs[MBB->getNumber()];
|
||||
Reg2MIMap &MBBDefs = RegDefs[MBB->getNumber()];
|
||||
Reg2MIMap &MBBKills = RegKills[MBB->getNumber()];
|
||||
|
||||
// Find reusable definitions in the predecessor(s).
|
||||
if (!MBB->pred_empty() && !MBB->isEHPad() &&
|
||||
@@ -182,9 +197,7 @@ bool MachineLateInstrsCleanup::processBlock(MachineBasicBlock *MBB) {
|
||||
if (llvm::all_of(
|
||||
drop_begin(MBB->predecessors()),
|
||||
[&, &Reg = Reg, &DefMI = DefMI](const MachineBasicBlock *Pred) {
|
||||
auto PredDefI = RegDefs[Pred->getNumber()].find(Reg);
|
||||
return PredDefI != RegDefs[Pred->getNumber()].end() &&
|
||||
DefMI->isIdenticalTo(*PredDefI->second);
|
||||
return RegDefs[Pred->getNumber()].hasIdentical(Reg, DefMI);
|
||||
})) {
|
||||
MBBDefs[Reg] = DefMI;
|
||||
LLVM_DEBUG(dbgs() << "Reusable instruction from pred(s): in "
|
||||
@@ -201,6 +214,7 @@ bool MachineLateInstrsCleanup::processBlock(MachineBasicBlock *MBB) {
|
||||
// it) are valid.
|
||||
if (MI.modifiesRegister(FrameReg, TRI)) {
|
||||
MBBDefs.clear();
|
||||
MBBKills.clear();
|
||||
continue;
|
||||
}
|
||||
|
||||
@@ -208,24 +222,23 @@ bool MachineLateInstrsCleanup::processBlock(MachineBasicBlock *MBB) {
|
||||
bool IsCandidate = isCandidate(&MI, DefedReg, FrameReg);
|
||||
|
||||
// Check for an earlier identical and reusable instruction.
|
||||
if (IsCandidate) {
|
||||
auto DefI = MBBDefs.find(DefedReg);
|
||||
if (DefI != MBBDefs.end() && MI.isIdenticalTo(*DefI->second)) {
|
||||
LLVM_DEBUG(dbgs() << "Removing redundant instruction in "
|
||||
<< printMBBReference(*MBB) << ": " << MI;);
|
||||
removeRedundantDef(&MI, TRI);
|
||||
Changed = true;
|
||||
continue;
|
||||
}
|
||||
if (IsCandidate && MBBDefs.hasIdentical(DefedReg, &MI)) {
|
||||
LLVM_DEBUG(dbgs() << "Removing redundant instruction in "
|
||||
<< printMBBReference(*MBB) << ": " << MI;);
|
||||
removeRedundantDef(&MI);
|
||||
Changed = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Clear any entries in map that MI clobbers.
|
||||
for (auto DefI = MBBDefs.begin(); DefI != MBBDefs.end();) {
|
||||
Register Reg = DefI->first;
|
||||
if (MI.modifiesRegister(Reg, TRI))
|
||||
DefI = MBBDefs.erase(DefI);
|
||||
else
|
||||
++DefI;
|
||||
for (auto DefI : llvm::make_early_inc_range(MBBDefs)) {
|
||||
Register Reg = DefI.first;
|
||||
if (MI.modifiesRegister(Reg, TRI)) {
|
||||
MBBDefs.erase(Reg);
|
||||
MBBKills.erase(Reg);
|
||||
} else if (MI.findRegisterUseOperandIdx(Reg, false /*isKill*/, TRI) != -1)
|
||||
// Keep track of the last use seen so far.
|
||||
MBBKills[Reg] = &MI;
|
||||
}
|
||||
|
||||
// Record this MI for potential later reuse.
|
||||
@@ -233,6 +246,7 @@ bool MachineLateInstrsCleanup::processBlock(MachineBasicBlock *MBB) {
|
||||
LLVM_DEBUG(dbgs() << "Found interesting instruction in "
|
||||
<< printMBBReference(*MBB) << ": " << MI;);
|
||||
MBBDefs[DefedReg] = &MI;
|
||||
assert(!MBBKills.count(DefedReg) && "Should already have been removed.");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user