[LV] Remove collectTriviallyDeadInstructions, already handled by VP DCE.
Now that removeDeadRecipes can remove most dead recipes across a whole VPlan, there is no need to first collect some dead instructions. Instead removeDeadRecipes can simply clean them up. Depends D127580. Reviewed By: Ayal Differential Revision: https://reviews.llvm.org/D128408
This commit is contained in:
@@ -332,11 +332,6 @@ public:
|
||||
bool requiresTooManyRuntimeChecks() const;
|
||||
|
||||
protected:
|
||||
/// Collect the instructions from the original loop that would be trivially
|
||||
/// dead in the vectorized loop if generated.
|
||||
void collectTriviallyDeadInstructions(
|
||||
SmallPtrSetImpl<Instruction *> &DeadInstructions);
|
||||
|
||||
/// Build VPlans for power-of-2 VF's between \p MinVF and \p MaxVF inclusive,
|
||||
/// according to the information gathered by Legal when it checked if it is
|
||||
/// legal to vectorize the loop.
|
||||
|
||||
@@ -7645,51 +7645,6 @@ void LoopVectorizationPlanner::printPlans(raw_ostream &O) {
|
||||
}
|
||||
#endif
|
||||
|
||||
void LoopVectorizationPlanner::collectTriviallyDeadInstructions(
|
||||
SmallPtrSetImpl<Instruction *> &DeadInstructions) {
|
||||
|
||||
// We create new control-flow for the vectorized loop, so the original exit
|
||||
// conditions will be dead after vectorization if it's only used by the
|
||||
// terminator
|
||||
SmallVector<BasicBlock*> ExitingBlocks;
|
||||
OrigLoop->getExitingBlocks(ExitingBlocks);
|
||||
for (auto *BB : ExitingBlocks) {
|
||||
auto *Cmp = dyn_cast<Instruction>(BB->getTerminator()->getOperand(0));
|
||||
if (!Cmp || !Cmp->hasOneUse())
|
||||
continue;
|
||||
|
||||
// TODO: we should introduce a getUniqueExitingBlocks on Loop
|
||||
if (!DeadInstructions.insert(Cmp).second)
|
||||
continue;
|
||||
|
||||
// The operands of the icmp is often a dead trunc, used by IndUpdate.
|
||||
// TODO: can recurse through operands in general
|
||||
for (Value *Op : Cmp->operands()) {
|
||||
if (isa<TruncInst>(Op) && Op->hasOneUse())
|
||||
DeadInstructions.insert(cast<Instruction>(Op));
|
||||
}
|
||||
}
|
||||
|
||||
// We create new "steps" for induction variable updates to which the original
|
||||
// induction variables map. An original update instruction will be dead if
|
||||
// all its users except the induction variable are dead.
|
||||
auto *Latch = OrigLoop->getLoopLatch();
|
||||
for (auto &Induction : Legal->getInductionVars()) {
|
||||
PHINode *Ind = Induction.first;
|
||||
auto *IndUpdate = cast<Instruction>(Ind->getIncomingValueForBlock(Latch));
|
||||
|
||||
// If the tail is to be folded by masking, the primary induction variable,
|
||||
// if exists, isn't dead: it will be used for masking. Don't kill it.
|
||||
if (CM.foldTailByMasking() && IndUpdate == Legal->getPrimaryInduction())
|
||||
continue;
|
||||
|
||||
if (llvm::all_of(IndUpdate->users(), [&](User *U) -> bool {
|
||||
return U == Ind || DeadInstructions.count(cast<Instruction>(U));
|
||||
}))
|
||||
DeadInstructions.insert(IndUpdate);
|
||||
}
|
||||
}
|
||||
|
||||
Value *InnerLoopUnroller::getBroadcastInstrs(Value *V) { return V; }
|
||||
|
||||
//===--------------------------------------------------------------------===//
|
||||
@@ -8577,19 +8532,11 @@ void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF,
|
||||
ElementCount MaxVF) {
|
||||
assert(OrigLoop->isInnermost() && "Inner loop expected.");
|
||||
|
||||
// Collect instructions from the original loop that will become trivially dead
|
||||
// in the vectorized loop. We don't need to vectorize these instructions. For
|
||||
// example, original induction update instructions can become dead because we
|
||||
// separately emit induction "steps" when generating code for the new loop.
|
||||
// Similarly, we create a new latch condition when setting up the structure
|
||||
// of the new loop, so the old one can become dead.
|
||||
SmallPtrSet<Instruction *, 4> DeadInstructions;
|
||||
collectTriviallyDeadInstructions(DeadInstructions);
|
||||
|
||||
// Add assume instructions we need to drop to DeadInstructions, to prevent
|
||||
// them from being added to the VPlan.
|
||||
// TODO: We only need to drop assumes in blocks that get flattend. If the
|
||||
// control flow is preserved, we should keep them.
|
||||
SmallPtrSet<Instruction *, 4> DeadInstructions;
|
||||
auto &ConditionalAssumes = Legal->getConditionalAssumes();
|
||||
DeadInstructions.insert(ConditionalAssumes.begin(), ConditionalAssumes.end());
|
||||
|
||||
|
||||
@@ -98,10 +98,9 @@ attributes #0 = { "target-cpu"="knl" }
|
||||
; FORCE-NEXT: store i32 [[TMP0]], i32* @b, align 1
|
||||
; FORCE-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x i32], [3 x i32]* @a, i32 0, i32 [[TMP0]]
|
||||
; FORCE-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 1
|
||||
; FORCE-NEXT: [[TMP8:%.*]] = insertelement <2 x i32> poison, i32 [[TMP7]], i32 0
|
||||
; FORCE-NEXT: br label [[PRED_LOAD_CONTINUE]]
|
||||
; FORCE: pred.load.continue:
|
||||
; FORCE-NEXT: [[TMP9:%.*]] = phi <2 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP8]], [[PRED_LOAD_IF]] ]
|
||||
; FORCE-NEXT: [[TMP9:%.*]] = phi i32 [ poison, [[VECTOR_BODY]] ], [ [[TMP7]], [[PRED_LOAD_IF]] ]
|
||||
; FORCE-NEXT: [[TMP10:%.*]] = extractelement <2 x i1> [[TMP2]], i32 1
|
||||
; FORCE-NEXT: br i1 [[TMP10]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4]]
|
||||
; FORCE: pred.load.if1:
|
||||
@@ -109,10 +108,9 @@ attributes #0 = { "target-cpu"="knl" }
|
||||
; FORCE-NEXT: store i32 [[TMP1]], i32* @b, align 1
|
||||
; FORCE-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x i32], [3 x i32]* @a, i32 0, i32 [[TMP1]]
|
||||
; FORCE-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 1
|
||||
; FORCE-NEXT: [[TMP13:%.*]] = insertelement <2 x i32> [[TMP9]], i32 [[TMP12]], i32 1
|
||||
; FORCE-NEXT: br label [[PRED_LOAD_CONTINUE4]]
|
||||
; FORCE: pred.load.continue2:
|
||||
; FORCE-NEXT: [[TMP14:%.*]] = phi <2 x i32> [ [[TMP9]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP13]], [[PRED_LOAD_IF3]] ]
|
||||
; FORCE-NEXT: [[TMP13:%.*]] = phi i32 [ poison, %pred.load.continue ], [ [[TMP12]], %pred.load.if1 ]
|
||||
; FORCE-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 2
|
||||
; FORCE-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2>
|
||||
; FORCE-NEXT: [[TMP15:%.*]] = icmp eq i32 [[INDEX_NEXT]], 4
|
||||
|
||||
@@ -36,8 +36,7 @@ define void @a(i8* readnone %b) {
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP9]], i32 0
|
||||
; CHECK-NEXT: br i1 [[TMP10]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
|
||||
; CHECK: pred.store.if:
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, i8* [[NEXT_GEP]], i64 -1
|
||||
; CHECK-NEXT: store i8 95, i8* [[TMP11]], align 1
|
||||
; CHECK-NEXT: store i8 95, i8* [[TMP4]], align 1
|
||||
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]]
|
||||
; CHECK: pred.store.continue:
|
||||
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i1> [[TMP9]], i32 1
|
||||
|
||||
Reference in New Issue
Block a user