From 19174126cfe9f7e392104bd0bc56ca8ffb674115 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ga=C3=ABtan=20Bossu?= Date: Thu, 8 May 2025 08:34:53 +0100 Subject: [PATCH] [SLP] Simplify buildTree() legality checks (NFC) (#138833) This NFC aims to simplify the interfaces used in `buildTree()` to make it easier to understand where decisions for legality are made. In particular, there is now a single point of definition for legality decisions. This makes it clear where all those decisions are made. Previously, multiple variables with a large scope were passed by reference. --- .../Transforms/Vectorize/SLPVectorizer.cpp | 116 ++++++++++-------- 1 file changed, 67 insertions(+), 49 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index a6ae26f2f0e1..7fbbb2681b9e 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -4063,6 +4063,15 @@ private: } #endif + /// Create a new gather TreeEntry + TreeEntry *newGatherTreeEntry(ArrayRef VL, + const InstructionsState &S, + const EdgeInfo &UserTreeIdx, + ArrayRef ReuseShuffleIndices = {}) { + auto Invalid = ScheduleBundle::invalid(); + return newTreeEntry(VL, Invalid, S, UserTreeIdx, ReuseShuffleIndices); + } + /// Create a new VectorizableTree entry. TreeEntry *newTreeEntry(ArrayRef VL, ScheduleBundle &Bundle, const InstructionsState &S, @@ -4251,13 +4260,34 @@ private: bool areAltOperandsProfitable(const InstructionsState &S, ArrayRef VL) const; + /// Contains all the outputs of legality analysis for a list of values to + /// vectorize. + class ScalarsVectorizationLegality { + InstructionsState S; + bool IsLegal; + bool TryToFindDuplicates; + bool TrySplitVectorize; + + public: + ScalarsVectorizationLegality(InstructionsState S, bool IsLegal, + bool TryToFindDuplicates = true, + bool TrySplitVectorize = false) + : S(S), IsLegal(IsLegal), TryToFindDuplicates(TryToFindDuplicates), + TrySplitVectorize(TrySplitVectorize) { + assert((!IsLegal || (S.valid() && TryToFindDuplicates)) && + "Inconsistent state"); + } + const InstructionsState &getInstructionsState() const { return S; }; + bool isLegal() const { return IsLegal; } + bool tryToFindDuplicates() const { return TryToFindDuplicates; } + bool trySplitVectorize() const { return TrySplitVectorize; } + }; + /// Checks if the specified list of the instructions/values can be vectorized /// in general. - bool isLegalToVectorizeScalars(ArrayRef VL, unsigned Depth, - const EdgeInfo &UserTreeIdx, - InstructionsState &S, - bool &TryToFindDuplicates, - bool &TrySplitVectorize) const; + ScalarsVectorizationLegality + getScalarsVectorizationLegality(ArrayRef VL, unsigned Depth, + const EdgeInfo &UserTreeIdx) const; /// Checks if the specified list of the instructions/values can be vectorized /// and fills required data before actual scheduling of the instructions. @@ -9734,16 +9764,12 @@ bool BoUpSLP::canBuildSplitNode(ArrayRef VL, return true; } -bool BoUpSLP::isLegalToVectorizeScalars(ArrayRef VL, unsigned Depth, - const EdgeInfo &UserTreeIdx, - InstructionsState &S, - bool &TryToFindDuplicates, - bool &TrySplitVectorize) const { +BoUpSLP::ScalarsVectorizationLegality +BoUpSLP::getScalarsVectorizationLegality(ArrayRef VL, unsigned Depth, + const EdgeInfo &UserTreeIdx) const { assert((allConstant(VL) || allSameType(VL)) && "Invalid types!"); - S = getSameOpcode(VL, *TLI); - TryToFindDuplicates = true; - TrySplitVectorize = false; + InstructionsState S = getSameOpcode(VL, *TLI); // Don't go into catchswitch blocks, which can happen with PHIs. // Such blocks can only have PHIs and the catchswitch. There is no @@ -9751,8 +9777,8 @@ bool BoUpSLP::isLegalToVectorizeScalars(ArrayRef VL, unsigned Depth, if (S && isa(S.getMainOp()->getParent()->getTerminator())) { LLVM_DEBUG(dbgs() << "SLP: bundle in catchswitch block.\n"); // Do not try to pack to avoid extra instructions here. - TryToFindDuplicates = false; - return false; + return ScalarsVectorizationLegality(S, /*IsLegal=*/false, + /*TryToFindDuplicates=*/false); } // Check if this is a duplicate of another entry. @@ -9762,14 +9788,14 @@ bool BoUpSLP::isLegalToVectorizeScalars(ArrayRef VL, unsigned Depth, if (E->isSame(VL)) { LLVM_DEBUG(dbgs() << "SLP: Perfect diamond merge at " << *S.getMainOp() << ".\n"); - return false; + return ScalarsVectorizationLegality(S, /*IsLegal=*/false); } SmallPtrSet Values(llvm::from_range, E->Scalars); if (all_of(VL, [&](Value *V) { return isa(V) || Values.contains(V); })) { LLVM_DEBUG(dbgs() << "SLP: Gathering due to full overlap.\n"); - return false; + return ScalarsVectorizationLegality(S, /*IsLegal=*/false); } } } @@ -9786,7 +9812,7 @@ bool BoUpSLP::isLegalToVectorizeScalars(ArrayRef VL, unsigned Depth, cast(I)->getOpcode() == S.getOpcode(); })))) { LLVM_DEBUG(dbgs() << "SLP: Gathering due to max recursion depth.\n"); - return false; + return ScalarsVectorizationLegality(S, /*IsLegal=*/false); } // Don't handle scalable vectors @@ -9794,15 +9820,15 @@ bool BoUpSLP::isLegalToVectorizeScalars(ArrayRef VL, unsigned Depth, isa( cast(S.getMainOp())->getVectorOperandType())) { LLVM_DEBUG(dbgs() << "SLP: Gathering due to scalable vector type.\n"); - return false; + return ScalarsVectorizationLegality(S, /*IsLegal=*/false); } // Don't handle vectors. if (!SLPReVec && getValueType(VL.front())->isVectorTy()) { LLVM_DEBUG(dbgs() << "SLP: Gathering due to vector type.\n"); // Do not try to pack to avoid extra instructions here. - TryToFindDuplicates = false; - return false; + return ScalarsVectorizationLegality(S, /*IsLegal=*/false, + /*TryToFindDuplicates=*/false); } // If all of the operands are identical or constant we have a simple solution. @@ -9892,11 +9918,12 @@ bool BoUpSLP::isLegalToVectorizeScalars(ArrayRef VL, unsigned Depth, if (!S) { LLVM_DEBUG(dbgs() << "SLP: Try split and if failed, gathering due to " "C,S,B,O, small shuffle. \n"); - TrySplitVectorize = true; - return false; + return ScalarsVectorizationLegality(S, /*IsLegal=*/false, + /*TryToFindDuplicates=*/true, + /*TrySplitVectorize=*/true); } LLVM_DEBUG(dbgs() << "SLP: Gathering due to C,S,B,O, small shuffle. \n"); - return false; + return ScalarsVectorizationLegality(S, /*IsLegal=*/false); } // Don't vectorize ephemeral values. @@ -9906,8 +9933,8 @@ bool BoUpSLP::isLegalToVectorizeScalars(ArrayRef VL, unsigned Depth, LLVM_DEBUG(dbgs() << "SLP: The instruction (" << *V << ") is ephemeral.\n"); // Do not try to pack to avoid extra instructions here. - TryToFindDuplicates = false; - return false; + return ScalarsVectorizationLegality(S, /*IsLegal=*/false, + /*TryToFindDuplicates=*/false); } } } @@ -9956,7 +9983,7 @@ bool BoUpSLP::isLegalToVectorizeScalars(ArrayRef VL, unsigned Depth, if (PreferScalarize) { LLVM_DEBUG(dbgs() << "SLP: The instructions are in tree and alternate " "node is not profitable.\n"); - return false; + return ScalarsVectorizationLegality(S, /*IsLegal=*/false); } } @@ -9965,7 +9992,7 @@ bool BoUpSLP::isLegalToVectorizeScalars(ArrayRef VL, unsigned Depth, for (Value *V : VL) { if (UserIgnoreList->contains(V)) { LLVM_DEBUG(dbgs() << "SLP: Gathering due to gathered scalar.\n"); - return false; + return ScalarsVectorizationLegality(S, /*IsLegal=*/false); } } } @@ -9995,9 +10022,9 @@ bool BoUpSLP::isLegalToVectorizeScalars(ArrayRef VL, unsigned Depth, // Do not vectorize EH and non-returning blocks, not profitable in most // cases. LLVM_DEBUG(dbgs() << "SLP: bundle in unreachable block.\n"); - return false; + return ScalarsVectorizationLegality(S, /*IsLegal=*/false); } - return true; + return ScalarsVectorizationLegality(S, /*IsLegal=*/true); } void BoUpSLP::buildTreeRec(ArrayRef VLRef, unsigned Depth, @@ -10008,7 +10035,6 @@ void BoUpSLP::buildTreeRec(ArrayRef VLRef, unsigned Depth, SmallVector ReuseShuffleIndices; SmallVector VL(VLRef.begin(), VLRef.end()); - InstructionsState S = InstructionsState::invalid(); // Tries to build split node. auto TrySplitNode = [&](const InstructionsState &LocalState) { SmallVector Op1, Op2; @@ -10042,22 +10068,20 @@ void BoUpSLP::buildTreeRec(ArrayRef VLRef, unsigned Depth, return true; }; - bool TryToPackDuplicates; - bool TrySplitVectorize; - if (!isLegalToVectorizeScalars(VL, Depth, UserTreeIdx, S, TryToPackDuplicates, - TrySplitVectorize)) { - if (TrySplitVectorize) { + ScalarsVectorizationLegality Legality = + getScalarsVectorizationLegality(VL, Depth, UserTreeIdx); + const InstructionsState &S = Legality.getInstructionsState(); + if (!Legality.isLegal()) { + if (Legality.trySplitVectorize()) { auto [MainOp, AltOp] = getMainAltOpsNoStateVL(VL); // Last chance to try to vectorize alternate node. if (MainOp && AltOp && TrySplitNode(InstructionsState(MainOp, AltOp))) return; } - if (TryToPackDuplicates) + if (Legality.tryToFindDuplicates()) tryToFindDuplicates(VL, ReuseShuffleIndices, *TTI, *TLI, S, UserTreeIdx); - auto Invalid = ScheduleBundle::invalid(); - newTreeEntry(VL, Invalid /*not vectorized*/, S, UserTreeIdx, - ReuseShuffleIndices); + newGatherTreeEntry(VL, S, UserTreeIdx, ReuseShuffleIndices); return; } @@ -10068,9 +10092,7 @@ void BoUpSLP::buildTreeRec(ArrayRef VLRef, unsigned Depth, // Check that every instruction appears once in this bundle. if (!tryToFindDuplicates(VL, ReuseShuffleIndices, *TTI, *TLI, S, UserTreeIdx, /*TryPad=*/true)) { - auto Invalid = ScheduleBundle::invalid(); - newTreeEntry(VL, Invalid /*not vectorized*/, S, UserTreeIdx, - ReuseShuffleIndices); + newGatherTreeEntry(VL, S, UserTreeIdx, ReuseShuffleIndices); return; } @@ -10083,9 +10105,7 @@ void BoUpSLP::buildTreeRec(ArrayRef VLRef, unsigned Depth, TreeEntry::EntryState State = getScalarsVectorizationState( S, VL, IsScatterVectorizeUserTE, CurrentOrder, PointerOps); if (State == TreeEntry::NeedToGather) { - auto Invalid = ScheduleBundle::invalid(); - newTreeEntry(VL, Invalid /*not vectorized*/, S, UserTreeIdx, - ReuseShuffleIndices); + newGatherTreeEntry(VL, S, UserTreeIdx, ReuseShuffleIndices); return; } @@ -10109,9 +10129,7 @@ void BoUpSLP::buildTreeRec(ArrayRef VLRef, unsigned Depth, // Last chance to try to vectorize alternate node. if (S.isAltShuffle() && ReuseShuffleIndices.empty() && TrySplitNode(S)) return; - auto Invalid = ScheduleBundle::invalid(); - newTreeEntry(VL, Invalid /*not vectorized*/, S, UserTreeIdx, - ReuseShuffleIndices); + newGatherTreeEntry(VL, S, UserTreeIdx, ReuseShuffleIndices); NonScheduledFirst.insert(VL.front()); if (S.getOpcode() == Instruction::Load && BS.ScheduleRegionSize < BS.ScheduleRegionSizeLimit)