[VPlan] Remove unneeded State.UF after 8ec406757c (NFC).
State.UF is not needed any longer after 8ec406757c
(https://github.com/llvm/llvm-project/pull/95842). Clean it up,
simplifying ::execute of existing recipes.
This commit is contained in:
@@ -7440,7 +7440,7 @@ static void createAndCollectMergePhiForReduction(
|
||||
const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor();
|
||||
|
||||
Value *FinalValue =
|
||||
State.get(RedResult, VPIteration(State.UF - 1, VPLane::getFirstLane()));
|
||||
State.get(RedResult, VPIteration(0, VPLane::getFirstLane()));
|
||||
auto *ResumePhi =
|
||||
dyn_cast<PHINode>(PhiR->getStartValue()->getUnderlyingValue());
|
||||
if (VectorizingEpilogue && RecurrenceDescriptor::isAnyOfRecurrenceKind(
|
||||
@@ -9453,24 +9453,8 @@ void VPReplicateRecipe::execute(VPTransformState &State) {
|
||||
}
|
||||
|
||||
if (IsUniform) {
|
||||
// If the recipe is uniform across all parts (instead of just per VF), only
|
||||
// generate a single instance.
|
||||
if ((isa<LoadInst>(UI) || isa<StoreInst>(UI)) &&
|
||||
all_of(operands(),
|
||||
[](VPValue *Op) { return Op->isDefinedOutsideLoopRegions(); })) {
|
||||
State.ILV->scalarizeInstruction(UI, this, VPIteration(0, 0), State);
|
||||
if (user_begin() != user_end()) {
|
||||
for (unsigned Part = 1; Part < State.UF; ++Part)
|
||||
State.set(this, State.get(this, VPIteration(0, 0)),
|
||||
VPIteration(Part, 0));
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
// Uniform within VL means we need to generate lane 0 only for each
|
||||
// unrolled copy.
|
||||
for (unsigned Part = 0; Part < State.UF; ++Part)
|
||||
State.ILV->scalarizeInstruction(UI, this, VPIteration(Part, 0), State);
|
||||
// Uniform within VL means we need to generate lane 0.
|
||||
State.ILV->scalarizeInstruction(UI, this, VPIteration(0, 0), State);
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -9479,17 +9463,15 @@ void VPReplicateRecipe::execute(VPTransformState &State) {
|
||||
if (isa<StoreInst>(UI) &&
|
||||
vputils::isUniformAfterVectorization(getOperand(1))) {
|
||||
auto Lane = VPLane::getLastLaneForVF(State.VF);
|
||||
State.ILV->scalarizeInstruction(UI, this, VPIteration(State.UF - 1, Lane),
|
||||
State);
|
||||
State.ILV->scalarizeInstruction(UI, this, VPIteration(0, Lane), State);
|
||||
return;
|
||||
}
|
||||
|
||||
// Generate scalar instances for all VF lanes of all UF parts.
|
||||
assert(!State.VF.isScalable() && "Can't scalarize a scalable vector");
|
||||
const unsigned EndLane = State.VF.getKnownMinValue();
|
||||
for (unsigned Part = 0; Part < State.UF; ++Part)
|
||||
for (unsigned Lane = 0; Lane < EndLane; ++Lane)
|
||||
State.ILV->scalarizeInstruction(UI, this, VPIteration(Part, Lane), State);
|
||||
for (unsigned Lane = 0; Lane < EndLane; ++Lane)
|
||||
State.ILV->scalarizeInstruction(UI, this, VPIteration(0, Lane), State);
|
||||
}
|
||||
|
||||
// Determine how to lower the scalar epilogue, which depends on 1) optimising
|
||||
|
||||
@@ -225,7 +225,7 @@ VPBasicBlock::iterator VPBasicBlock::getFirstNonPhi() {
|
||||
VPTransformState::VPTransformState(ElementCount VF, unsigned UF, LoopInfo *LI,
|
||||
DominatorTree *DT, IRBuilderBase &Builder,
|
||||
InnerLoopVectorizer *ILV, VPlan *Plan)
|
||||
: VF(VF), UF(UF), CFG(DT), LI(LI), Builder(Builder), ILV(ILV), Plan(Plan),
|
||||
: VF(VF), CFG(DT), LI(LI), Builder(Builder), ILV(ILV), Plan(Plan),
|
||||
LVer(nullptr), TypeAnalysis(Plan->getCanonicalIV()->getScalarType()) {}
|
||||
|
||||
Value *VPTransformState::get(VPValue *Def, const VPIteration &Instance) {
|
||||
@@ -772,9 +772,6 @@ void VPRegionBlock::execute(VPTransformState *State) {
|
||||
|
||||
// Enter replicating mode.
|
||||
State->Instance = VPIteration(0, 0);
|
||||
|
||||
for (unsigned Part = 0, UF = State->UF; Part < UF; ++Part) {
|
||||
State->Instance->Part = Part;
|
||||
assert(!State->VF.isScalable() && "VF is assumed to be non scalable.");
|
||||
for (unsigned Lane = 0, VF = State->VF.getKnownMinValue(); Lane < VF;
|
||||
++Lane) {
|
||||
@@ -784,7 +781,6 @@ void VPRegionBlock::execute(VPTransformState *State) {
|
||||
LLVM_DEBUG(dbgs() << "LV: VPBlock in RPO " << Block->getName() << '\n');
|
||||
Block->execute(State);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Exit replicating mode.
|
||||
@@ -963,16 +959,15 @@ void VPlan::prepareToExecute(Value *TripCountV, Value *VectorTripCountV,
|
||||
IRBuilder<> Builder(State.CFG.PrevBB->getTerminator());
|
||||
// FIXME: Model VF * UF computation completely in VPlan.
|
||||
assert(VFxUF.getNumUsers() && "VFxUF expected to always have users");
|
||||
unsigned UF = getUF();
|
||||
if (VF.getNumUsers()) {
|
||||
Value *RuntimeVF = getRuntimeVF(Builder, TCTy, State.VF);
|
||||
VF.setUnderlyingValue(RuntimeVF);
|
||||
VFxUF.setUnderlyingValue(
|
||||
State.UF > 1
|
||||
? Builder.CreateMul(RuntimeVF, ConstantInt::get(TCTy, State.UF))
|
||||
: RuntimeVF);
|
||||
UF > 1 ? Builder.CreateMul(RuntimeVF, ConstantInt::get(TCTy, UF))
|
||||
: RuntimeVF);
|
||||
} else {
|
||||
VFxUF.setUnderlyingValue(
|
||||
createStepForVF(Builder, TCTy, State.VF, State.UF));
|
||||
VFxUF.setUnderlyingValue(createStepForVF(Builder, TCTy, State.VF, UF));
|
||||
}
|
||||
|
||||
// When vectorizing the epilogue loop, the canonical induction start value
|
||||
@@ -1019,10 +1014,6 @@ static void replaceVPBBWithIRVPBB(VPBasicBlock *VPBB, BasicBlock *IRBB) {
|
||||
/// Assumes a single pre-header basic-block was created for this. Introduce
|
||||
/// additional basic-blocks as needed, and fill them all.
|
||||
void VPlan::execute(VPTransformState *State) {
|
||||
// Set UF to 1, as the unrollByUF VPlan transform already explicitly unrolled
|
||||
// the VPlan.
|
||||
// TODO: Remove State::UF and all uses.
|
||||
State->UF = 1;
|
||||
// Initialize CFG state.
|
||||
State->CFG.PrevVPBB = nullptr;
|
||||
State->CFG.ExitBB = State->CFG.PrevBB->getSingleSuccessor();
|
||||
@@ -1106,28 +1097,13 @@ void VPlan::execute(VPTransformState *State) {
|
||||
}
|
||||
|
||||
auto *PhiR = cast<VPHeaderPHIRecipe>(&R);
|
||||
// For canonical IV, first-order recurrences and in-order reduction phis,
|
||||
// only a single part is generated, which provides the last part from the
|
||||
// previous iteration. For non-ordered reductions all UF parts are
|
||||
// generated.
|
||||
bool SinglePartNeeded =
|
||||
isa<VPCanonicalIVPHIRecipe>(PhiR) ||
|
||||
isa<VPFirstOrderRecurrencePHIRecipe, VPEVLBasedIVPHIRecipe>(PhiR) ||
|
||||
(isa<VPReductionPHIRecipe>(PhiR) &&
|
||||
cast<VPReductionPHIRecipe>(PhiR)->isOrdered());
|
||||
bool NeedsScalar =
|
||||
isa<VPCanonicalIVPHIRecipe, VPEVLBasedIVPHIRecipe>(PhiR) ||
|
||||
(isa<VPReductionPHIRecipe>(PhiR) &&
|
||||
cast<VPReductionPHIRecipe>(PhiR)->isInLoop());
|
||||
unsigned LastPartForNewPhi = SinglePartNeeded ? 1 : State->UF;
|
||||
|
||||
for (unsigned Part = 0; Part < LastPartForNewPhi; ++Part) {
|
||||
Value *Phi = State->get(PhiR, Part, NeedsScalar);
|
||||
Value *Val =
|
||||
State->get(PhiR->getBackedgeValue(),
|
||||
SinglePartNeeded ? State->UF - 1 : Part, NeedsScalar);
|
||||
cast<PHINode>(Phi)->addIncoming(Val, VectorLatchBB);
|
||||
}
|
||||
Value *Phi = State->get(PhiR, 0, NeedsScalar);
|
||||
Value *Val = State->get(PhiR->getBackedgeValue(), 0, NeedsScalar);
|
||||
cast<PHINode>(Phi)->addIncoming(Val, VectorLatchBB);
|
||||
}
|
||||
|
||||
State->CFG.DTU.flush();
|
||||
|
||||
@@ -256,7 +256,6 @@ struct VPTransformState {
|
||||
|
||||
/// The chosen Vectorization and Unroll Factors of the loop being vectorized.
|
||||
ElementCount VF;
|
||||
unsigned UF;
|
||||
|
||||
/// Hold the indices to generate specific scalar instructions. Null indicates
|
||||
/// that all instances are to be generated, using either scalar or vector
|
||||
@@ -309,7 +308,7 @@ struct VPTransformState {
|
||||
assert((VF.isScalar() || V->getType()->isVectorTy()) &&
|
||||
"scalar values must be stored as (Part, 0)");
|
||||
if (!Data.PerPartOutput.count(Def)) {
|
||||
DataState::PerPartValuesTy Entry(UF);
|
||||
DataState::PerPartValuesTy Entry(1);
|
||||
Data.PerPartOutput[Def] = Entry;
|
||||
}
|
||||
Data.PerPartOutput[Def][Part] = V;
|
||||
@@ -1306,11 +1305,10 @@ private:
|
||||
/// needed.
|
||||
bool canGenerateScalarForFirstLane() const;
|
||||
|
||||
/// Utility methods serving execute(): generates a single instance of the
|
||||
/// modeled instruction for a given part. \returns the generated value for \p
|
||||
/// Part. In some cases an existing value is returned rather than a generated
|
||||
/// one.
|
||||
Value *generatePerPart(VPTransformState &State, unsigned Part);
|
||||
/// Utility methods serving execute(): generates a single vector instance of
|
||||
/// the modeled instruction. \returns the generated value. . In some cases an
|
||||
/// existing value is returned rather than a generated one.
|
||||
Value *generate(VPTransformState &State);
|
||||
|
||||
/// Utility methods serving execute(): generates a scalar single instance of
|
||||
/// the modeled instruction for a given lane. \returns the scalar generated
|
||||
@@ -1616,7 +1614,7 @@ class VPScalarCastRecipe : public VPSingleDefRecipe {
|
||||
|
||||
Type *ResultTy;
|
||||
|
||||
Value *generate(VPTransformState &State, unsigned Part);
|
||||
Value *generate(VPTransformState &State);
|
||||
|
||||
public:
|
||||
VPScalarCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy)
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user