Reland "[LV] Print remark when loop cannot be vectorized due to invalid costs."

The original patch was: https://reviews.llvm.org/D105806 There were some issues with undeterministic behaviour of the sorting function, which led to scalable-call.ll passing and/or failing. This patch fixes the issue by numbering all instructions in the array first, and using that number as the order, which should provide a consistent ordering. This reverts commit a607f64118.
2021-07-15 15:32:31 +01:00
parent a6ca88e908
commit 239d01fa88
2 changed files with 144 additions and 11 deletions
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -1676,8 +1676,13 @@ private:
  /// Returns the expected execution cost. The unit of the cost does
  /// not matter because we use the 'cost' units to compare different
  /// vector widths. The cost that is returned is *not* normalized by
-  /// the factor width.
-  VectorizationCostTy expectedCost(ElementCount VF);
+  /// the factor width. If \p Invalid is not nullptr, this function
+  /// will add a pair(Instruction*, ElementCount) to \p Invalid for
+  /// each instruction that has an Invalid cost for the given VF.
+  using InstructionVFPair = std::pair<Instruction *, ElementCount>;
+  VectorizationCostTy
+  expectedCost(ElementCount VF,
+               SmallVectorImpl<InstructionVFPair> *Invalid = nullptr);

  /// Returns the execution time cost of an instruction for a given vector
  /// width. Vector width of one means scalar.
@@ -6075,12 +6080,13 @@ VectorizationFactor LoopVectorizationCostModel::selectVectorizationFactor(
    ChosenFactor.Cost = InstructionCost::getMax();
  }

+  SmallVector<InstructionVFPair> InvalidCosts;
  for (const auto &i : VFCandidates) {
    // The cost for scalar VF=1 is already calculated, so ignore it.
    if (i.isScalar())
      continue;

-    VectorizationCostTy C = expectedCost(i);
+    VectorizationCostTy C = expectedCost(i, &InvalidCosts);
    VectorizationFactor Candidate(i, C.first);
    LLVM_DEBUG(
        dbgs() << "LV: Vector loop of width " << i << " costs: "
@@ -6103,6 +6109,66 @@ VectorizationFactor LoopVectorizationCostModel::selectVectorizationFactor(
      ChosenFactor = Candidate;
  }

+  // Emit a report of VFs with invalid costs in the loop.
+  if (!InvalidCosts.empty()) {
+    // Group the remarks per instruction, keeping the instruction order from
+    // InvalidCosts.
+    std::map<Instruction *, unsigned> Numbering;
+    unsigned I = 0;
+    for (auto &Pair : InvalidCosts)
+      if (!Numbering.count(Pair.first))
+        Numbering[Pair.first] = I++;
+
+    // Sort the list, first on instruction(number) then on VF.
+    llvm::sort(InvalidCosts,
+               [&Numbering](InstructionVFPair &A, InstructionVFPair &B) {
+                 if (Numbering[A.first] != Numbering[B.first])
+                   return Numbering[A.first] < Numbering[B.first];
+                 ElementCountComparator ECC;
+                 return ECC(A.second, B.second);
+               });
+
+    // For a list of ordered instruction-vf pairs:
+    //   [(load, vf1), (load, vf2), (store, vf1)]
+    // Group the instructions together to emit separate remarks for:
+    //   load  (vf1, vf2)
+    //   store (vf1)
+    auto Tail = ArrayRef<InstructionVFPair>(InvalidCosts);
+    auto Subset = ArrayRef<InstructionVFPair>();
+    do {
+      if (Subset.empty())
+        Subset = Tail.take_front(1);
+
+      Instruction *I = Subset.front().first;
+
+      // If the next instruction is different, or if there are no other pairs,
+      // emit a remark for the collated subset. e.g.
+      //   [(load, vf1), (load, vf2))]
+      // to emit:
+      //  remark: invalid costs for 'load' at VF=(vf, vf2)
+      if (Subset == Tail || Tail[Subset.size()].first != I) {
+        std::string OutString;
+        raw_string_ostream OS(OutString);
+        assert(!Subset.empty() && "Unexpected empty range");
+        OS << "Instruction with invalid costs prevented vectorization at VF=(";
+        for (auto &Pair : Subset)
+          OS << (Pair.second == Subset.front().second ? "" : ", ")
+             << Pair.second;
+        OS << "):";
+        if (auto *CI = dyn_cast<CallInst>(I))
+          OS << " call to " << CI->getCalledFunction()->getName();
+        else
+          OS << " " << I->getOpcodeName();
+        OS.flush();
+        reportVectorizationInfo(OutString, "InvalidCost", ORE, TheLoop, I);
+        Tail = Tail.drop_front(Subset.size());
+        Subset = {};
+      } else
+        // Grow the subset by one element
+        Subset = Tail.take_front(Subset.size() + 1);
+    } while (!Tail.empty());
+  }
+
  if (!EnableCondStoresVectorization && NumPredStores) {
    reportVectorizationFailure("There are conditional stores.",
        "store that is conditionally executed prevents vectorization",
@@ -6884,7 +6950,8 @@ int LoopVectorizationCostModel::computePredInstDiscount(
 }

 LoopVectorizationCostModel::VectorizationCostTy
-LoopVectorizationCostModel::expectedCost(ElementCount VF) {
+LoopVectorizationCostModel::expectedCost(
+    ElementCount VF, SmallVectorImpl<InstructionVFPair> *Invalid) {
  VectorizationCostTy Cost;

  // For each block.
@@ -6904,6 +6971,10 @@ LoopVectorizationCostModel::expectedCost(ElementCount VF) {
      if (ForceTargetInstructionCost.getNumOccurrences() > 0)
        C.first = InstructionCost(ForceTargetInstructionCost);

+      // Keep a list of instructions with invalid costs.
+      if (Invalid && !C.first.isValid())
+        Invalid->emplace_back(&I, VF);
+
      BlockCost.first += C.first;
      BlockCost.second |= C.second;
      LLVM_DEBUG(dbgs() << "LV: Found an estimated cost of " << C.first