Revert "[SLP] Extend reordering data of tree entry to support PHI nodes"
This reverts commit 87a20868eb as it has
problems with scalable vectors and use-list orders. Test to follow.
This commit is contained in:
@@ -3795,49 +3795,6 @@ BoUpSLP::findPartiallyOrderedLoads(const BoUpSLP::TreeEntry &TE) {
|
||||
return None;
|
||||
}
|
||||
|
||||
/// Check if two insertelement instructions are from the same buildvector.
|
||||
static bool areTwoInsertFromSameBuildVector(
|
||||
InsertElementInst *VU, InsertElementInst *V,
|
||||
function_ref<Value *(InsertElementInst *)> GetBaseOperand) {
|
||||
// Instructions must be from the same basic blocks.
|
||||
if (VU->getParent() != V->getParent())
|
||||
return false;
|
||||
// Checks if 2 insertelements are from the same buildvector.
|
||||
if (VU->getType() != V->getType())
|
||||
return false;
|
||||
// Multiple used inserts are separate nodes.
|
||||
if (!VU->hasOneUse() && !V->hasOneUse())
|
||||
return false;
|
||||
auto *IE1 = VU;
|
||||
auto *IE2 = V;
|
||||
unsigned Idx1 = *getInsertIndex(IE1);
|
||||
unsigned Idx2 = *getInsertIndex(IE2);
|
||||
// Go through the vector operand of insertelement instructions trying to find
|
||||
// either VU as the original vector for IE2 or V as the original vector for
|
||||
// IE1.
|
||||
do {
|
||||
if (IE2 == VU)
|
||||
return VU->hasOneUse();
|
||||
if (IE1 == V)
|
||||
return V->hasOneUse();
|
||||
if (IE1) {
|
||||
if ((IE1 != VU && !IE1->hasOneUse()) ||
|
||||
getInsertIndex(IE1).value_or(Idx2) == Idx2)
|
||||
IE1 = nullptr;
|
||||
else
|
||||
IE1 = dyn_cast_or_null<InsertElementInst>(GetBaseOperand(IE1));
|
||||
}
|
||||
if (IE2) {
|
||||
if ((IE2 != V && !IE2->hasOneUse()) ||
|
||||
getInsertIndex(IE2).value_or(Idx1) == Idx1)
|
||||
IE2 = nullptr;
|
||||
else
|
||||
IE2 = dyn_cast_or_null<InsertElementInst>(GetBaseOperand(IE2));
|
||||
}
|
||||
} while (IE1 || IE2);
|
||||
return false;
|
||||
}
|
||||
|
||||
Optional<BoUpSLP::OrdersType> BoUpSLP::getReorderingData(const TreeEntry &TE,
|
||||
bool TopToBottom) {
|
||||
// No need to reorder if need to shuffle reuses, still need to shuffle the
|
||||
@@ -3901,58 +3858,6 @@ Optional<BoUpSLP::OrdersType> BoUpSLP::getReorderingData(const TreeEntry &TE,
|
||||
(TopToBottom && isa<StoreInst, InsertElementInst>(TE.getMainOp()))) &&
|
||||
!TE.isAltShuffle())
|
||||
return TE.ReorderIndices;
|
||||
if (TE.State == TreeEntry::Vectorize && TE.getOpcode() == Instruction::PHI) {
|
||||
auto PHICompare = [](llvm::Value *V1, llvm::Value *V2) {
|
||||
if (V1->user_empty() || V2->user_empty())
|
||||
return false;
|
||||
auto *FirstUserOfPhi1 = cast<Instruction>(*V1->user_begin());
|
||||
auto *FirstUserOfPhi2 = cast<Instruction>(*V2->user_begin());
|
||||
if (auto *IE1 = dyn_cast<InsertElementInst>(FirstUserOfPhi1))
|
||||
if (auto *IE2 = dyn_cast<InsertElementInst>(FirstUserOfPhi2)) {
|
||||
if (!areTwoInsertFromSameBuildVector(
|
||||
IE1, IE2,
|
||||
[](InsertElementInst *II) { return II->getOperand(0); }))
|
||||
return false;
|
||||
Optional<unsigned> Idx1 = getInsertIndex(IE1);
|
||||
Optional<unsigned> Idx2 = getInsertIndex(IE2);
|
||||
if (Idx1 == None || Idx2 == None)
|
||||
return false;
|
||||
return *Idx1 < *Idx2;
|
||||
}
|
||||
if (auto *EE1 = dyn_cast<ExtractElementInst>(FirstUserOfPhi1))
|
||||
if (auto *EE2 = dyn_cast<ExtractElementInst>(FirstUserOfPhi2)) {
|
||||
if (EE1->getOperand(0) != EE2->getOperand(0))
|
||||
return false;
|
||||
Optional<unsigned> Idx1 = getExtractIndex(EE1);
|
||||
Optional<unsigned> Idx2 = getExtractIndex(EE2);
|
||||
if (Idx1 == None || Idx2 == None)
|
||||
return false;
|
||||
return *Idx1 < *Idx2;
|
||||
}
|
||||
return false;
|
||||
};
|
||||
auto IsIdentityOrder = [](const OrdersType &Order) {
|
||||
for (unsigned Idx : seq<unsigned>(0, Order.size()))
|
||||
if (Idx != Order[Idx])
|
||||
return false;
|
||||
return true;
|
||||
};
|
||||
if (!TE.ReorderIndices.empty())
|
||||
return TE.ReorderIndices;
|
||||
DenseMap<Value *, unsigned> PhiToId;
|
||||
SmallVector<Value *, 4> Phis;
|
||||
OrdersType ResOrder(TE.Scalars.size());
|
||||
for (unsigned Id = 0, Sz = TE.Scalars.size(); Id < Sz; ++Id) {
|
||||
PhiToId[TE.Scalars[Id]] = Id;
|
||||
Phis.push_back(TE.Scalars[Id]);
|
||||
}
|
||||
llvm::stable_sort(Phis, PHICompare);
|
||||
for (unsigned Id = 0, Sz = Phis.size(); Id < Sz; ++Id)
|
||||
ResOrder[Id] = PhiToId[Phis[Id]];
|
||||
if (IsIdentityOrder(ResOrder))
|
||||
return {};
|
||||
return ResOrder;
|
||||
}
|
||||
if (TE.State == TreeEntry::NeedToGather) {
|
||||
// TODO: add analysis of other gather nodes with extractelement
|
||||
// instructions and other values/instructions, not only undefs.
|
||||
@@ -4030,9 +3935,6 @@ void BoUpSLP::reorderTopToBottom() {
|
||||
// their ordering.
|
||||
DenseMap<const TreeEntry *, OrdersType> GathersToOrders;
|
||||
|
||||
// Phi nodes can have preferred ordering based on their result users
|
||||
DenseMap<const TreeEntry *, OrdersType> PhisToOrders;
|
||||
|
||||
// AltShuffles can also have a preferred ordering that leads to fewer
|
||||
// instructions, e.g., the addsub instruction in x86.
|
||||
DenseMap<const TreeEntry *, OrdersType> AltShufflesToOrders;
|
||||
@@ -4047,7 +3949,7 @@ void BoUpSLP::reorderTopToBottom() {
|
||||
// extracts.
|
||||
for_each(VectorizableTree, [this, &TTIRef, &VFToOrderedEntries,
|
||||
&GathersToOrders, &ExternalUserReorderMap,
|
||||
&AltShufflesToOrders, &PhisToOrders](
|
||||
&AltShufflesToOrders](
|
||||
const std::unique_ptr<TreeEntry> &TE) {
|
||||
// Look for external users that will probably be vectorized.
|
||||
SmallVector<OrdersType, 1> ExternalUserReorderIndices =
|
||||
@@ -4104,9 +4006,6 @@ void BoUpSLP::reorderTopToBottom() {
|
||||
VFToOrderedEntries[TE->getVectorFactor()].insert(TE.get());
|
||||
if (TE->State != TreeEntry::Vectorize || !TE->ReuseShuffleIndices.empty())
|
||||
GathersToOrders.try_emplace(TE.get(), *CurrentOrder);
|
||||
if (TE->State == TreeEntry::Vectorize &&
|
||||
TE->getOpcode() == Instruction::PHI)
|
||||
PhisToOrders.try_emplace(TE.get(), *CurrentOrder);
|
||||
}
|
||||
});
|
||||
|
||||
@@ -4132,8 +4031,8 @@ void BoUpSLP::reorderTopToBottom() {
|
||||
if (!OpTE->ReuseShuffleIndices.empty() && !GathersToOrders.count(OpTE))
|
||||
continue;
|
||||
// Count number of orders uses.
|
||||
const auto &Order = [OpTE, &GathersToOrders, &AltShufflesToOrders,
|
||||
&PhisToOrders]() -> const OrdersType & {
|
||||
const auto &Order = [OpTE, &GathersToOrders,
|
||||
&AltShufflesToOrders]() -> const OrdersType & {
|
||||
if (OpTE->State == TreeEntry::NeedToGather ||
|
||||
!OpTE->ReuseShuffleIndices.empty()) {
|
||||
auto It = GathersToOrders.find(OpTE);
|
||||
@@ -4145,12 +4044,6 @@ void BoUpSLP::reorderTopToBottom() {
|
||||
if (It != AltShufflesToOrders.end())
|
||||
return It->second;
|
||||
}
|
||||
if (OpTE->State == TreeEntry::Vectorize &&
|
||||
isa<PHINode>(OpTE->getMainOp())) {
|
||||
auto It = PhisToOrders.find(OpTE);
|
||||
if (It != PhisToOrders.end())
|
||||
return It->second;
|
||||
}
|
||||
return OpTE->ReorderIndices;
|
||||
}();
|
||||
// First consider the order of the external scalar users.
|
||||
@@ -7245,6 +7138,49 @@ InstructionCost BoUpSLP::getSpillCost() const {
|
||||
return Cost;
|
||||
}
|
||||
|
||||
/// Check if two insertelement instructions are from the same buildvector.
|
||||
static bool areTwoInsertFromSameBuildVector(
|
||||
InsertElementInst *VU, InsertElementInst *V,
|
||||
function_ref<Value *(InsertElementInst *)> GetBaseOperand) {
|
||||
// Instructions must be from the same basic blocks.
|
||||
if (VU->getParent() != V->getParent())
|
||||
return false;
|
||||
// Checks if 2 insertelements are from the same buildvector.
|
||||
if (VU->getType() != V->getType())
|
||||
return false;
|
||||
// Multiple used inserts are separate nodes.
|
||||
if (!VU->hasOneUse() && !V->hasOneUse())
|
||||
return false;
|
||||
auto *IE1 = VU;
|
||||
auto *IE2 = V;
|
||||
unsigned Idx1 = *getInsertIndex(IE1);
|
||||
unsigned Idx2 = *getInsertIndex(IE2);
|
||||
// Go through the vector operand of insertelement instructions trying to find
|
||||
// either VU as the original vector for IE2 or V as the original vector for
|
||||
// IE1.
|
||||
do {
|
||||
if (IE2 == VU)
|
||||
return VU->hasOneUse();
|
||||
if (IE1 == V)
|
||||
return V->hasOneUse();
|
||||
if (IE1) {
|
||||
if ((IE1 != VU && !IE1->hasOneUse()) ||
|
||||
getInsertIndex(IE1).value_or(Idx2) == Idx2)
|
||||
IE1 = nullptr;
|
||||
else
|
||||
IE1 = dyn_cast_or_null<InsertElementInst>(GetBaseOperand(IE1));
|
||||
}
|
||||
if (IE2) {
|
||||
if ((IE2 != V && !IE2->hasOneUse()) ||
|
||||
getInsertIndex(IE2).value_or(Idx1) == Idx1)
|
||||
IE2 = nullptr;
|
||||
else
|
||||
IE2 = dyn_cast_or_null<InsertElementInst>(GetBaseOperand(IE2));
|
||||
}
|
||||
} while (IE1 || IE2);
|
||||
return false;
|
||||
}
|
||||
|
||||
/// Checks if the \p IE1 instructions is followed by \p IE2 instruction in the
|
||||
/// buildvector sequence.
|
||||
static bool isFirstInsertElement(const InsertElementInst *IE1,
|
||||
|
||||
@@ -63,8 +63,8 @@ define <4 x half> @phis_reverse(i1 %cmp1, <4 x half> %in1, <4 x half> %in2) {
|
||||
; CHECK-NEXT: [[A1:%.*]] = extractelement <4 x half> [[IN1]], i64 1
|
||||
; CHECK-NEXT: [[A2:%.*]] = extractelement <4 x half> [[IN1]], i64 2
|
||||
; CHECK-NEXT: [[A3:%.*]] = extractelement <4 x half> [[IN1]], i64 3
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x half> poison, half [[A0]], i32 0
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x half> [[TMP0]], half [[A1]], i32 1
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x half> poison, half [[A1]], i32 0
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x half> [[TMP0]], half [[A0]], i32 1
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x half> poison, half [[A2]], i32 0
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x half> [[TMP2]], half [[A3]], i32 1
|
||||
; CHECK-NEXT: br i1 [[CMP:%.*]], label [[BB1:%.*]], label [[BB0:%.*]]
|
||||
@@ -73,15 +73,15 @@ define <4 x half> @phis_reverse(i1 %cmp1, <4 x half> %in1, <4 x half> %in2) {
|
||||
; CHECK-NEXT: [[B1:%.*]] = extractelement <4 x half> [[IN2]], i64 1
|
||||
; CHECK-NEXT: [[B2:%.*]] = extractelement <4 x half> [[IN2]], i64 2
|
||||
; CHECK-NEXT: [[B3:%.*]] = extractelement <4 x half> [[IN2]], i64 3
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x half> poison, half [[B0]], i32 0
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x half> [[TMP4]], half [[B1]], i32 1
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x half> poison, half [[B1]], i32 0
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x half> [[TMP4]], half [[B0]], i32 1
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x half> poison, half [[B2]], i32 0
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x half> [[TMP6]], half [[B3]], i32 1
|
||||
; CHECK-NEXT: br label [[BB1:%.*]]
|
||||
; CHECK: bb1:
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = phi <2 x half> [ [[TMP1]], %entry ], [ [[TMP5]], %bb0 ]
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = phi <2 x half> [ [[TMP3]], %entry ], [ [[TMP7]], %bb0 ]
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x half> [[TMP8]], <2 x half> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x half> [[TMP8]], <2 x half> poison, <4 x i32> <i32 1, i32 0, i32 undef, i32 undef>
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x half> [[TMP9]], <2 x half> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
|
||||
; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <4 x half> [[TMP10]], <4 x half> [[TMP11]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
|
||||
; CHECK-NEXT: ret <4 x half> [[TMP12]]
|
||||
|
||||
Reference in New Issue
Block a user