[RISCV] Add cost model for fixed broadcast shuffle
This patch adds basic broadcast shuffle costs in order to enable SLP vectorization. And adds `getLMULCost` to consider reciprocal throughput for different LMUL. Reviewed By: reames Differential Revision: https://reviews.llvm.org/D137276
This commit is contained in:
@@ -31,6 +31,27 @@ static cl::opt<unsigned> SLPMaxVF(
|
||||
"SLP vectorizer. Defaults to 1 which disables SLP."),
|
||||
cl::init(1), cl::Hidden);
|
||||
|
||||
InstructionCost RISCVTTIImpl::getLMULCost(MVT VT) {
|
||||
// TODO: Here assume reciprocal throughput is 1 for LMUL_1, it is
|
||||
// implementation-defined.
|
||||
if (!VT.isVector())
|
||||
return InstructionCost::getInvalid();
|
||||
unsigned Cost;
|
||||
if (VT.isScalableVector()) {
|
||||
unsigned LMul;
|
||||
bool Fractional;
|
||||
std::tie(LMul, Fractional) =
|
||||
RISCVVType::decodeVLMUL(RISCVTargetLowering::getLMUL(VT));
|
||||
if (Fractional)
|
||||
Cost = 1;
|
||||
else
|
||||
Cost = LMul;
|
||||
} else {
|
||||
Cost = VT.getSizeInBits() / ST->getRealMinVLen();
|
||||
}
|
||||
return std::max<unsigned>(Cost, 1);
|
||||
}
|
||||
|
||||
InstructionCost RISCVTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty,
|
||||
TTI::TargetCostKind CostKind) {
|
||||
assert(Ty->isIntegerTy() &&
|
||||
@@ -255,6 +276,44 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
|
||||
}
|
||||
}
|
||||
|
||||
if (isa<FixedVectorType>(Tp) && Kind == TargetTransformInfo::SK_Broadcast) {
|
||||
std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Tp);
|
||||
bool HasScalar = (Args.size() > 0) && (Operator::getOpcode(Args[0]) ==
|
||||
Instruction::InsertElement);
|
||||
if (LT.second.getScalarSizeInBits() == 1) {
|
||||
if (HasScalar) {
|
||||
// Example sequence:
|
||||
// andi a0, a0, 1
|
||||
// vsetivli zero, 2, e8, mf8, ta, ma (ignored)
|
||||
// vmv.v.x v8, a0
|
||||
// vmsne.vi v0, v8, 0
|
||||
return LT.first * getLMULCost(LT.second) * 3;
|
||||
}
|
||||
// Example sequence:
|
||||
// vsetivli zero, 2, e8, mf8, ta, mu (ignored)
|
||||
// vmv.v.i v8, 0
|
||||
// vmerge.vim v8, v8, 1, v0
|
||||
// vmv.x.s a0, v8
|
||||
// andi a0, a0, 1
|
||||
// vmv.v.x v8, a0
|
||||
// vmsne.vi v0, v8, 0
|
||||
|
||||
return LT.first * getLMULCost(LT.second) * 6;
|
||||
}
|
||||
|
||||
if (HasScalar) {
|
||||
// Example sequence:
|
||||
// vmv.v.x v8, a0
|
||||
return LT.first * getLMULCost(LT.second);
|
||||
}
|
||||
|
||||
// Example sequence:
|
||||
// vrgather.vi v9, v8, 0
|
||||
// TODO: vrgather could be slower than vmv.v.x. It is
|
||||
// implementation-dependent.
|
||||
return LT.first * getLMULCost(LT.second);
|
||||
}
|
||||
|
||||
return BaseT::getShuffleCost(Kind, Tp, Mask, CostKind, Index, SubTp);
|
||||
}
|
||||
|
||||
|
||||
@@ -46,6 +46,10 @@ class RISCVTTIImpl : public BasicTTIImplBase<RISCVTTIImpl> {
|
||||
/// the true cost significantly if getVScaleForTuning is wildly off for the
|
||||
/// actual target hardware.
|
||||
unsigned getEstimatedVLFor(VectorType *Ty);
|
||||
|
||||
/// Return the cost of LMUL. The larger the LMUL, the higher the cost.
|
||||
InstructionCost getLMULCost(MVT VT);
|
||||
|
||||
public:
|
||||
explicit RISCVTTIImpl(const RISCVTargetMachine *TM, const Function &F)
|
||||
: BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
|
||||
|
||||
114
llvm/test/Analysis/CostModel/RISCV/fixed-shuffle-broadcast.ll
Normal file
114
llvm/test/Analysis/CostModel/RISCV/fixed-shuffle-broadcast.ll
Normal file
@@ -0,0 +1,114 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
|
||||
; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -S -mtriple=riscv64 -mattr=+v,+f,+d,+zfh,+experimental-zvfh | FileCheck %s
|
||||
|
||||
define void @broadcast_fixed() #0{
|
||||
; CHECK-LABEL: 'broadcast_fixed'
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %zero = shufflevector <2 x half> undef, <2 x half> undef, <2 x i32> zeroinitializer
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = shufflevector <4 x half> undef, <4 x half> undef, <4 x i32> zeroinitializer
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = shufflevector <8 x half> undef, <8 x half> undef, <8 x i32> zeroinitializer
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %3 = shufflevector <16 x half> undef, <16 x half> undef, <16 x i32> zeroinitializer
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %4 = shufflevector <32 x half> undef, <32 x half> undef, <32 x i32> zeroinitializer
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %5 = shufflevector <64 x half> undef, <64 x half> undef, <64 x i32> zeroinitializer
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = shufflevector <2 x float> undef, <2 x float> undef, <2 x i32> zeroinitializer
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = shufflevector <4 x float> undef, <4 x float> undef, <4 x i32> zeroinitializer
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %8 = shufflevector <8 x float> undef, <8 x float> undef, <8 x i32> zeroinitializer
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %9 = shufflevector <16 x float> undef, <16 x float> undef, <16 x i32> zeroinitializer
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %10 = shufflevector <32 x float> undef, <32 x float> undef, <32 x i32> zeroinitializer
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %11 = shufflevector <2 x double> undef, <2 x double> undef, <2 x i32> zeroinitializer
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %12 = shufflevector <4 x double> undef, <4 x double> undef, <4 x i32> zeroinitializer
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %13 = shufflevector <8 x double> undef, <8 x double> undef, <8 x i32> zeroinitializer
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %14 = shufflevector <16 x double> undef, <16 x double> undef, <16 x i32> zeroinitializer
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %15 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> zeroinitializer
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %16 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> zeroinitializer
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %17 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> zeroinitializer
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %18 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> zeroinitializer
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %19 = shufflevector <32 x i8> undef, <32 x i8> undef, <32 x i32> zeroinitializer
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %20 = shufflevector <64 x i8> undef, <64 x i8> undef, <64 x i32> zeroinitializer
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %21 = shufflevector <128 x i8> undef, <128 x i8> undef, <128 x i32> zeroinitializer
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %22 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> zeroinitializer
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %23 = shufflevector <4 x i16> undef, <4 x i16> undef, <4 x i32> zeroinitializer
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %24 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> zeroinitializer
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %25 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> zeroinitializer
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %26 = shufflevector <32 x i16> undef, <32 x i16> undef, <32 x i32> zeroinitializer
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %27 = shufflevector <64 x i16> undef, <64 x i16> undef, <64 x i32> zeroinitializer
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %28 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> zeroinitializer
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %29 = shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> zeroinitializer
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %30 = shufflevector <8 x i32> undef, <8 x i32> undef, <8 x i32> zeroinitializer
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %31 = shufflevector <16 x i32> undef, <16 x i32> undef, <16 x i32> zeroinitializer
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %32 = shufflevector <32 x i32> undef, <32 x i32> undef, <32 x i32> zeroinitializer
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %33 = shufflevector <2 x i64> undef, <2 x i64> undef, <2 x i32> zeroinitializer
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %34 = shufflevector <4 x i64> undef, <4 x i64> undef, <4 x i32> zeroinitializer
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %35 = shufflevector <8 x i64> undef, <8 x i64> undef, <8 x i32> zeroinitializer
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %36 = shufflevector <16 x i64> undef, <16 x i64> undef, <16 x i32> zeroinitializer
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %37 = shufflevector <2 x i1> undef, <2 x i1> undef, <2 x i32> zeroinitializer
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %38 = shufflevector <4 x i1> undef, <4 x i1> undef, <4 x i32> zeroinitializer
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %39 = shufflevector <8 x i1> undef, <8 x i1> undef, <8 x i32> zeroinitializer
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %40 = shufflevector <16 x i1> undef, <16 x i1> undef, <16 x i32> zeroinitializer
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %41 = shufflevector <32 x i1> undef, <32 x i1> undef, <32 x i32> zeroinitializer
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %42 = shufflevector <64 x i1> undef, <64 x i1> undef, <64 x i32> zeroinitializer
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %43 = shufflevector <128 x i1> undef, <128 x i1> undef, <128 x i32> zeroinitializer
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %ins1 = insertelement <128 x i1> poison, i1 poison, i32 0
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %44 = shufflevector <128 x i1> %ins1, <128 x i1> poison, <128 x i32> zeroinitializer
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ins2 = insertelement <2 x i8> poison, i8 3, i32 0
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %45 = shufflevector <2 x i8> %ins2, <2 x i8> undef, <2 x i32> zeroinitializer
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
|
||||
;
|
||||
%zero = shufflevector <2 x half> undef, <2 x half> undef, <2 x i32> zeroinitializer
|
||||
%1 = shufflevector <4 x half> undef, <4 x half> undef, <4 x i32> zeroinitializer
|
||||
%2 = shufflevector <8 x half> undef, <8 x half> undef, <8 x i32> zeroinitializer
|
||||
%3 = shufflevector <16 x half> undef, <16 x half> undef, <16 x i32> zeroinitializer
|
||||
%4 = shufflevector <32 x half> undef, <32 x half> undef, <32 x i32> zeroinitializer
|
||||
%5 = shufflevector <64 x half> undef, <64 x half> undef, <64 x i32> zeroinitializer
|
||||
|
||||
%6 = shufflevector <2 x float> undef, <2 x float> undef, <2 x i32> zeroinitializer
|
||||
%7 = shufflevector <4 x float> undef, <4 x float> undef, <4 x i32> zeroinitializer
|
||||
%8 = shufflevector <8 x float> undef, <8 x float> undef, <8 x i32> zeroinitializer
|
||||
%9 = shufflevector <16 x float> undef, <16 x float> undef, <16 x i32> zeroinitializer
|
||||
%10 = shufflevector <32 x float> undef, <32 x float> undef, <32 x i32> zeroinitializer
|
||||
|
||||
%11 = shufflevector <2 x double> undef, <2 x double> undef, <2 x i32> zeroinitializer
|
||||
%12 = shufflevector <4 x double> undef, <4 x double> undef, <4 x i32> zeroinitializer
|
||||
%13 = shufflevector <8 x double> undef, <8 x double> undef, <8 x i32> zeroinitializer
|
||||
%14 = shufflevector <16 x double> undef, <16 x double> undef, <16 x i32> zeroinitializer
|
||||
|
||||
%15 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> zeroinitializer
|
||||
%16 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> zeroinitializer
|
||||
%17 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> zeroinitializer
|
||||
%18 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> zeroinitializer
|
||||
%19 = shufflevector <32 x i8> undef, <32 x i8> undef, <32 x i32> zeroinitializer
|
||||
%20 = shufflevector <64 x i8> undef, <64 x i8> undef, <64 x i32> zeroinitializer
|
||||
%21 = shufflevector <128 x i8> undef, <128 x i8> undef, <128 x i32> zeroinitializer
|
||||
|
||||
%22 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> zeroinitializer
|
||||
%23 = shufflevector <4 x i16> undef, <4 x i16> undef, <4 x i32> zeroinitializer
|
||||
%24 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> zeroinitializer
|
||||
%25 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> zeroinitializer
|
||||
%26 = shufflevector <32 x i16> undef, <32 x i16> undef, <32 x i32> zeroinitializer
|
||||
%27 = shufflevector <64 x i16> undef, <64 x i16> undef, <64 x i32> zeroinitializer
|
||||
|
||||
%28 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> zeroinitializer
|
||||
%29 = shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> zeroinitializer
|
||||
%30 = shufflevector <8 x i32> undef, <8 x i32> undef, <8 x i32> zeroinitializer
|
||||
%31 = shufflevector <16 x i32> undef, <16 x i32> undef, <16 x i32> zeroinitializer
|
||||
%32 = shufflevector <32 x i32> undef, <32 x i32> undef, <32 x i32> zeroinitializer
|
||||
|
||||
%33 = shufflevector <2 x i64> undef, <2 x i64> undef, <2 x i32> zeroinitializer
|
||||
%34 = shufflevector <4 x i64> undef, <4 x i64> undef, <4 x i32> zeroinitializer
|
||||
%35 = shufflevector <8 x i64> undef, <8 x i64> undef, <8 x i32> zeroinitializer
|
||||
%36 = shufflevector <16 x i64> undef, <16 x i64> undef, <16 x i32> zeroinitializer
|
||||
|
||||
%37 = shufflevector <2 x i1> undef, <2 x i1> undef, <2 x i32> zeroinitializer
|
||||
%38 = shufflevector <4 x i1> undef, <4 x i1> undef, <4 x i32> zeroinitializer
|
||||
%39 = shufflevector <8 x i1> undef, <8 x i1> undef, <8 x i32> zeroinitializer
|
||||
%40 = shufflevector <16 x i1> undef, <16 x i1> undef, <16 x i32> zeroinitializer
|
||||
%41 = shufflevector <32 x i1> undef, <32 x i1> undef, <32 x i32> zeroinitializer
|
||||
%42 = shufflevector <64 x i1> undef, <64 x i1> undef, <64 x i32> zeroinitializer
|
||||
%43 = shufflevector <128 x i1> undef, <128 x i1> undef, <128 x i32> zeroinitializer
|
||||
|
||||
%ins1 = insertelement <128 x i1> poison, i1 poison, i32 0
|
||||
%44 = shufflevector <128 x i1> %ins1, <128 x i1> poison, <128 x i32> zeroinitializer
|
||||
|
||||
%ins2 = insertelement <2 x i8> poison, i8 3, i32 0
|
||||
%45 = shufflevector <2 x i8> %ins2, <2 x i8> undef, <2 x i32> zeroinitializer
|
||||
ret void
|
||||
}
|
||||
Reference in New Issue
Block a user