[AArch64][TTI] Reduce cost for splatting whole first vector segment (SVE) (#145701)
Improve cost modeling for splatting the first 128b segment.
This commit is contained in:
@@ -6750,6 +6750,21 @@ inline std::optional<unsigned> isDUPQMask(ArrayRef<int> Mask, unsigned Segments,
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
/// isDUPFirstSegmentMask - matches a splat of the first 128b segment.
|
||||
inline bool isDUPFirstSegmentMask(ArrayRef<int> Mask, unsigned Segments,
|
||||
unsigned SegmentSize) {
|
||||
// Make sure there's no size changes.
|
||||
if (SegmentSize * Segments != Mask.size())
|
||||
return false;
|
||||
|
||||
// Check that all lanes refer to the equivalent lane in the first segment.
|
||||
// Undef/poison lanes (<0) are also accepted.
|
||||
return all_of(enumerate(Mask), [&](auto P) {
|
||||
const unsigned IndexWithinSegment = P.index() % SegmentSize;
|
||||
return P.value() < 0 || unsigned(P.value()) == IndexWithinSegment;
|
||||
});
|
||||
}
|
||||
|
||||
} // namespace llvm
|
||||
|
||||
#endif
|
||||
|
||||
@@ -5600,9 +5600,8 @@ AArch64TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy,
|
||||
}
|
||||
|
||||
// Segmented shuffle matching.
|
||||
if ((ST->hasSVE2p1() || ST->hasSME2p1()) &&
|
||||
ST->isSVEorStreamingSVEAvailable() && Kind == TTI::SK_PermuteSingleSrc &&
|
||||
isa<FixedVectorType>(SrcTy) && !Mask.empty() &&
|
||||
if (Kind == TTI::SK_PermuteSingleSrc && isa<FixedVectorType>(SrcTy) &&
|
||||
!Mask.empty() && SrcTy->getPrimitiveSizeInBits().isNonZero() &&
|
||||
SrcTy->getPrimitiveSizeInBits().isKnownMultipleOf(
|
||||
AArch64::SVEBitsPerBlock)) {
|
||||
|
||||
@@ -5612,7 +5611,14 @@ AArch64TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy,
|
||||
unsigned SegmentElts = VTy->getNumElements() / Segments;
|
||||
|
||||
// dupq zd.t, zn.t[idx]
|
||||
if (isDUPQMask(Mask, Segments, SegmentElts))
|
||||
if ((ST->hasSVE2p1() || ST->hasSME2p1()) &&
|
||||
ST->isSVEorStreamingSVEAvailable() &&
|
||||
isDUPQMask(Mask, Segments, SegmentElts))
|
||||
return LT.first;
|
||||
|
||||
// mov zd.q, vn
|
||||
if (ST->isSVEorStreamingSVEAvailable() &&
|
||||
isDUPFirstSegmentMask(Mask, Segments, SegmentElts))
|
||||
return LT.first;
|
||||
}
|
||||
|
||||
|
||||
@@ -49,5 +49,53 @@ define void @dup_within_each_segment_512b() #1 {
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @dup_whole_segment_256b() #0 {
|
||||
; CHECK-LABEL: 'dup_whole_segment_256b'
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %dup_seg_b = shufflevector <32 x i8> poison, <32 x i8> poison, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %dup_seg_h = shufflevector <16 x i16> poison, <16 x i16> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %dup_seg_s = shufflevector <8 x i32> poison, <8 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %dup_seg_d = shufflevector <4 x i64> poison, <4 x i64> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %dup_seg_512b_d = shufflevector <8 x double> poison, <8 x double> poison, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %dup_seg_s_with_poison = shufflevector <8 x float> poison, <8 x float> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 3, i32 poison, i32 1, i32 2, i32 3>
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
|
||||
;
|
||||
%dup_seg_b = shufflevector <32 x i8> poison, <32 x i8> poison, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15,
|
||||
i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
||||
%dup_seg_h = shufflevector <16 x i16> poison, <16 x i16> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
|
||||
i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
||||
%dup_seg_s = shufflevector <8 x i32> poison, <8 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3,
|
||||
i32 0, i32 1, i32 2, i32 3>
|
||||
%dup_seg_d = shufflevector <4 x i64> poison, <4 x i64> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
|
||||
%dup_seg_512b_d = shufflevector <8 x double> poison, <8 x double> poison, <8 x i32> <i32 0, i32 1, i32 0, i32 1,
|
||||
i32 0, i32 1, i32 0, i32 1>
|
||||
%dup_seg_s_with_poison = shufflevector <8 x float> poison, <8 x float> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 3,
|
||||
i32 poison, i32 1, i32 2, i32 3>
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @dup_whole_segment_512b() #1 {
|
||||
; CHECK-LABEL: 'dup_whole_segment_512b'
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %dup_seg_b = shufflevector <32 x i8> poison, <32 x i8> poison, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %dup_seg_h = shufflevector <16 x i16> poison, <16 x i16> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %dup_seg_s = shufflevector <8 x i32> poison, <8 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %dup_seg_d = shufflevector <4 x i64> poison, <4 x i64> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %dup_seg_512b_d = shufflevector <8 x double> poison, <8 x double> poison, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %dup_seg_s_with_poison = shufflevector <8 x float> poison, <8 x float> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 3, i32 poison, i32 1, i32 2, i32 3>
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
|
||||
;
|
||||
%dup_seg_b = shufflevector <32 x i8> poison, <32 x i8> poison, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15,
|
||||
i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
||||
%dup_seg_h = shufflevector <16 x i16> poison, <16 x i16> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
|
||||
i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
||||
%dup_seg_s = shufflevector <8 x i32> poison, <8 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3,
|
||||
i32 0, i32 1, i32 2, i32 3>
|
||||
%dup_seg_d = shufflevector <4 x i64> poison, <4 x i64> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
|
||||
%dup_seg_512b_d = shufflevector <8 x double> poison, <8 x double> poison, <8 x i32> <i32 0, i32 1, i32 0, i32 1,
|
||||
i32 0, i32 1, i32 0, i32 1>
|
||||
%dup_seg_s_with_poison = shufflevector <8 x float> poison, <8 x float> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 3,
|
||||
i32 poison, i32 1, i32 2, i32 3>
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { noinline vscale_range(2,2) }
|
||||
attributes #1 = { noinline vscale_range(4,4) }
|
||||
|
||||
Reference in New Issue
Block a user