Reland "[LV] Print remark when loop cannot be vectorized due to invalid costs."

The original patch was:
  https://reviews.llvm.org/D105806

There were some issues with undeterministic behaviour of the sorting
function, which led to scalable-call.ll passing and/or failing. This
patch fixes the issue by numbering all instructions in the array first,
and using that number as the order, which should provide a consistent
ordering.

This reverts commit a607f64118.
This commit is contained in:
Sander de Smalen
2021-07-15 15:32:31 +01:00
parent a6ca88e908
commit 239d01fa88
2 changed files with 144 additions and 11 deletions

View File

@@ -1676,8 +1676,13 @@ private:
/// Returns the expected execution cost. The unit of the cost does
/// not matter because we use the 'cost' units to compare different
/// vector widths. The cost that is returned is *not* normalized by
/// the factor width.
VectorizationCostTy expectedCost(ElementCount VF);
/// the factor width. If \p Invalid is not nullptr, this function
/// will add a pair(Instruction*, ElementCount) to \p Invalid for
/// each instruction that has an Invalid cost for the given VF.
using InstructionVFPair = std::pair<Instruction *, ElementCount>;
VectorizationCostTy
expectedCost(ElementCount VF,
SmallVectorImpl<InstructionVFPair> *Invalid = nullptr);
/// Returns the execution time cost of an instruction for a given vector
/// width. Vector width of one means scalar.
@@ -6075,12 +6080,13 @@ VectorizationFactor LoopVectorizationCostModel::selectVectorizationFactor(
ChosenFactor.Cost = InstructionCost::getMax();
}
SmallVector<InstructionVFPair> InvalidCosts;
for (const auto &i : VFCandidates) {
// The cost for scalar VF=1 is already calculated, so ignore it.
if (i.isScalar())
continue;
VectorizationCostTy C = expectedCost(i);
VectorizationCostTy C = expectedCost(i, &InvalidCosts);
VectorizationFactor Candidate(i, C.first);
LLVM_DEBUG(
dbgs() << "LV: Vector loop of width " << i << " costs: "
@@ -6103,6 +6109,66 @@ VectorizationFactor LoopVectorizationCostModel::selectVectorizationFactor(
ChosenFactor = Candidate;
}
// Emit a report of VFs with invalid costs in the loop.
if (!InvalidCosts.empty()) {
// Group the remarks per instruction, keeping the instruction order from
// InvalidCosts.
std::map<Instruction *, unsigned> Numbering;
unsigned I = 0;
for (auto &Pair : InvalidCosts)
if (!Numbering.count(Pair.first))
Numbering[Pair.first] = I++;
// Sort the list, first on instruction(number) then on VF.
llvm::sort(InvalidCosts,
[&Numbering](InstructionVFPair &A, InstructionVFPair &B) {
if (Numbering[A.first] != Numbering[B.first])
return Numbering[A.first] < Numbering[B.first];
ElementCountComparator ECC;
return ECC(A.second, B.second);
});
// For a list of ordered instruction-vf pairs:
// [(load, vf1), (load, vf2), (store, vf1)]
// Group the instructions together to emit separate remarks for:
// load (vf1, vf2)
// store (vf1)
auto Tail = ArrayRef<InstructionVFPair>(InvalidCosts);
auto Subset = ArrayRef<InstructionVFPair>();
do {
if (Subset.empty())
Subset = Tail.take_front(1);
Instruction *I = Subset.front().first;
// If the next instruction is different, or if there are no other pairs,
// emit a remark for the collated subset. e.g.
// [(load, vf1), (load, vf2))]
// to emit:
// remark: invalid costs for 'load' at VF=(vf, vf2)
if (Subset == Tail || Tail[Subset.size()].first != I) {
std::string OutString;
raw_string_ostream OS(OutString);
assert(!Subset.empty() && "Unexpected empty range");
OS << "Instruction with invalid costs prevented vectorization at VF=(";
for (auto &Pair : Subset)
OS << (Pair.second == Subset.front().second ? "" : ", ")
<< Pair.second;
OS << "):";
if (auto *CI = dyn_cast<CallInst>(I))
OS << " call to " << CI->getCalledFunction()->getName();
else
OS << " " << I->getOpcodeName();
OS.flush();
reportVectorizationInfo(OutString, "InvalidCost", ORE, TheLoop, I);
Tail = Tail.drop_front(Subset.size());
Subset = {};
} else
// Grow the subset by one element
Subset = Tail.take_front(Subset.size() + 1);
} while (!Tail.empty());
}
if (!EnableCondStoresVectorization && NumPredStores) {
reportVectorizationFailure("There are conditional stores.",
"store that is conditionally executed prevents vectorization",
@@ -6884,7 +6950,8 @@ int LoopVectorizationCostModel::computePredInstDiscount(
}
LoopVectorizationCostModel::VectorizationCostTy
LoopVectorizationCostModel::expectedCost(ElementCount VF) {
LoopVectorizationCostModel::expectedCost(
ElementCount VF, SmallVectorImpl<InstructionVFPair> *Invalid) {
VectorizationCostTy Cost;
// For each block.
@@ -6904,6 +6971,10 @@ LoopVectorizationCostModel::expectedCost(ElementCount VF) {
if (ForceTargetInstructionCost.getNumOccurrences() > 0)
C.first = InstructionCost(ForceTargetInstructionCost);
// Keep a list of instructions with invalid costs.
if (Invalid && !C.first.isValid())
Invalid->emplace_back(&I, VF);
BlockCost.first += C.first;
BlockCost.second |= C.second;
LLVM_DEBUG(dbgs() << "LV: Found an estimated cost of " << C.first