[VPlan] Remove unneeded State.UF after 8ec406757c (NFC).

State.UF is not needed any longer after 8ec406757c (https://github.com/llvm/llvm-project/pull/95842). Clean it up, simplifying ::execute of existing recipes.
2024-09-22 20:42:37 +01:00
parent 76cffc2aa5
commit 06c3a7d2d7
4 changed files with 508 additions and 704 deletions
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -7440,7 +7440,7 @@ static void createAndCollectMergePhiForReduction(
  const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor();

  Value *FinalValue =
-      State.get(RedResult, VPIteration(State.UF - 1, VPLane::getFirstLane()));
+      State.get(RedResult, VPIteration(0, VPLane::getFirstLane()));
  auto *ResumePhi =
      dyn_cast<PHINode>(PhiR->getStartValue()->getUnderlyingValue());
  if (VectorizingEpilogue && RecurrenceDescriptor::isAnyOfRecurrenceKind(
@@ -9453,24 +9453,8 @@ void VPReplicateRecipe::execute(VPTransformState &State) {
  }

  if (IsUniform) {
-    // If the recipe is uniform across all parts (instead of just per VF), only
-    // generate a single instance.
-    if ((isa<LoadInst>(UI) || isa<StoreInst>(UI)) &&
-        all_of(operands(),
-               [](VPValue *Op) { return Op->isDefinedOutsideLoopRegions(); })) {
-      State.ILV->scalarizeInstruction(UI, this, VPIteration(0, 0), State);
-      if (user_begin() != user_end()) {
-        for (unsigned Part = 1; Part < State.UF; ++Part)
-          State.set(this, State.get(this, VPIteration(0, 0)),
-                    VPIteration(Part, 0));
-      }
-      return;
-    }
-
-    // Uniform within VL means we need to generate lane 0 only for each
-    // unrolled copy.
-    for (unsigned Part = 0; Part < State.UF; ++Part)
-      State.ILV->scalarizeInstruction(UI, this, VPIteration(Part, 0), State);
+    // Uniform within VL means we need to generate lane 0.
+    State.ILV->scalarizeInstruction(UI, this, VPIteration(0, 0), State);
    return;
  }

@@ -9479,17 +9463,15 @@ void VPReplicateRecipe::execute(VPTransformState &State) {
  if (isa<StoreInst>(UI) &&
      vputils::isUniformAfterVectorization(getOperand(1))) {
    auto Lane = VPLane::getLastLaneForVF(State.VF);
-    State.ILV->scalarizeInstruction(UI, this, VPIteration(State.UF - 1, Lane),
-                                    State);
+    State.ILV->scalarizeInstruction(UI, this, VPIteration(0, Lane), State);
    return;
  }

  // Generate scalar instances for all VF lanes of all UF parts.
  assert(!State.VF.isScalable() && "Can't scalarize a scalable vector");
  const unsigned EndLane = State.VF.getKnownMinValue();
-  for (unsigned Part = 0; Part < State.UF; ++Part)
-    for (unsigned Lane = 0; Lane < EndLane; ++Lane)
-      State.ILV->scalarizeInstruction(UI, this, VPIteration(Part, Lane), State);
+  for (unsigned Lane = 0; Lane < EndLane; ++Lane)
+    State.ILV->scalarizeInstruction(UI, this, VPIteration(0, Lane), State);
 }

 // Determine how to lower the scalar epilogue, which depends on 1) optimising
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -225,7 +225,7 @@ VPBasicBlock::iterator VPBasicBlock::getFirstNonPhi() {
 VPTransformState::VPTransformState(ElementCount VF, unsigned UF, LoopInfo *LI,
                                   DominatorTree *DT, IRBuilderBase &Builder,
                                   InnerLoopVectorizer *ILV, VPlan *Plan)
-    : VF(VF), UF(UF), CFG(DT), LI(LI), Builder(Builder), ILV(ILV), Plan(Plan),
+    : VF(VF), CFG(DT), LI(LI), Builder(Builder), ILV(ILV), Plan(Plan),
      LVer(nullptr), TypeAnalysis(Plan->getCanonicalIV()->getScalarType()) {}

 Value *VPTransformState::get(VPValue *Def, const VPIteration &Instance) {
@@ -772,9 +772,6 @@ void VPRegionBlock::execute(VPTransformState *State) {

  // Enter replicating mode.
  State->Instance = VPIteration(0, 0);
-
-  for (unsigned Part = 0, UF = State->UF; Part < UF; ++Part) {
-    State->Instance->Part = Part;
    assert(!State->VF.isScalable() && "VF is assumed to be non scalable.");
    for (unsigned Lane = 0, VF = State->VF.getKnownMinValue(); Lane < VF;
         ++Lane) {
@@ -784,7 +781,6 @@ void VPRegionBlock::execute(VPTransformState *State) {
        LLVM_DEBUG(dbgs() << "LV: VPBlock in RPO " << Block->getName() << '\n');
        Block->execute(State);
      }
-    }
  }

  // Exit replicating mode.
@@ -963,16 +959,15 @@ void VPlan::prepareToExecute(Value *TripCountV, Value *VectorTripCountV,
  IRBuilder<> Builder(State.CFG.PrevBB->getTerminator());
  // FIXME: Model VF * UF computation completely in VPlan.
  assert(VFxUF.getNumUsers() && "VFxUF expected to always have users");
+  unsigned UF = getUF();
  if (VF.getNumUsers()) {
    Value *RuntimeVF = getRuntimeVF(Builder, TCTy, State.VF);
    VF.setUnderlyingValue(RuntimeVF);
    VFxUF.setUnderlyingValue(
-        State.UF > 1
-            ? Builder.CreateMul(RuntimeVF, ConstantInt::get(TCTy, State.UF))
-            : RuntimeVF);
+        UF > 1 ? Builder.CreateMul(RuntimeVF, ConstantInt::get(TCTy, UF))
+               : RuntimeVF);
  } else {
-    VFxUF.setUnderlyingValue(
-        createStepForVF(Builder, TCTy, State.VF, State.UF));
+    VFxUF.setUnderlyingValue(createStepForVF(Builder, TCTy, State.VF, UF));
  }

  // When vectorizing the epilogue loop, the canonical induction start value
@@ -1019,10 +1014,6 @@ static void replaceVPBBWithIRVPBB(VPBasicBlock *VPBB, BasicBlock *IRBB) {
 /// Assumes a single pre-header basic-block was created for this. Introduce
 /// additional basic-blocks as needed, and fill them all.
 void VPlan::execute(VPTransformState *State) {
-  // Set UF to 1, as the unrollByUF VPlan transform already explicitly unrolled
-  // the VPlan.
-  // TODO: Remove State::UF and all uses.
-  State->UF = 1;
  // Initialize CFG state.
  State->CFG.PrevVPBB = nullptr;
  State->CFG.ExitBB = State->CFG.PrevBB->getSingleSuccessor();
@@ -1106,28 +1097,13 @@ void VPlan::execute(VPTransformState *State) {
    }

    auto *PhiR = cast<VPHeaderPHIRecipe>(&R);
-    // For  canonical IV, first-order recurrences and in-order reduction phis,
-    // only a single part is generated, which provides the last part from the
-    // previous iteration. For non-ordered reductions all UF parts are
-    // generated.
-    bool SinglePartNeeded =
-        isa<VPCanonicalIVPHIRecipe>(PhiR) ||
-        isa<VPFirstOrderRecurrencePHIRecipe, VPEVLBasedIVPHIRecipe>(PhiR) ||
-        (isa<VPReductionPHIRecipe>(PhiR) &&
-         cast<VPReductionPHIRecipe>(PhiR)->isOrdered());
    bool NeedsScalar =
        isa<VPCanonicalIVPHIRecipe, VPEVLBasedIVPHIRecipe>(PhiR) ||
        (isa<VPReductionPHIRecipe>(PhiR) &&
         cast<VPReductionPHIRecipe>(PhiR)->isInLoop());
-    unsigned LastPartForNewPhi = SinglePartNeeded ? 1 : State->UF;
-
-    for (unsigned Part = 0; Part < LastPartForNewPhi; ++Part) {
-      Value *Phi = State->get(PhiR, Part, NeedsScalar);
-      Value *Val =
-          State->get(PhiR->getBackedgeValue(),
-                     SinglePartNeeded ? State->UF - 1 : Part, NeedsScalar);
-      cast<PHINode>(Phi)->addIncoming(Val, VectorLatchBB);
-    }
+    Value *Phi = State->get(PhiR, 0, NeedsScalar);
+    Value *Val = State->get(PhiR->getBackedgeValue(), 0, NeedsScalar);
+    cast<PHINode>(Phi)->addIncoming(Val, VectorLatchBB);
  }

  State->CFG.DTU.flush();
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -256,7 +256,6 @@ struct VPTransformState {

  /// The chosen Vectorization and Unroll Factors of the loop being vectorized.
  ElementCount VF;
-  unsigned UF;

  /// Hold the indices to generate specific scalar instructions. Null indicates
  /// that all instances are to be generated, using either scalar or vector
@@ -309,7 +308,7 @@ struct VPTransformState {
    assert((VF.isScalar() || V->getType()->isVectorTy()) &&
           "scalar values must be stored as (Part, 0)");
    if (!Data.PerPartOutput.count(Def)) {
-      DataState::PerPartValuesTy Entry(UF);
+      DataState::PerPartValuesTy Entry(1);
      Data.PerPartOutput[Def] = Entry;
    }
    Data.PerPartOutput[Def][Part] = V;
@@ -1306,11 +1305,10 @@ private:
  /// needed.
  bool canGenerateScalarForFirstLane() const;

-  /// Utility methods serving execute(): generates a single instance of the
-  /// modeled instruction for a given part. \returns the generated value for \p
-  /// Part. In some cases an existing value is returned rather than a generated
-  /// one.
-  Value *generatePerPart(VPTransformState &State, unsigned Part);
+  /// Utility methods serving execute(): generates a single vector instance of
+  /// the modeled instruction. \returns the generated value. . In some cases an
+  /// existing value is returned rather than a generated one.
+  Value *generate(VPTransformState &State);

  /// Utility methods serving execute(): generates a scalar single instance of
  /// the modeled instruction for a given lane. \returns the scalar generated
@@ -1616,7 +1614,7 @@ class VPScalarCastRecipe : public VPSingleDefRecipe {

  Type *ResultTy;

-  Value *generate(VPTransformState &State, unsigned Part);
+  Value *generate(VPTransformState &State);

 public:
  VPScalarCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy)
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp