Files
clang-p2996/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
Kerry McLaughlin 7647822156 [AArch64][SVE] Remove i1 type from isElementTypeLegalForScalableVector
`collectElementTypesForWidening` collects the types of load, store and
reduction Phis in a loop. These types are later checked using
`isElementTypeLegalForScalableVector` to prevent vectorisation of
loops with instruction types that are unsupported.

This patch removes i1 from the list of types supported for scalable
vectors. This fixes an assert ("Cannot yet scalarize uniform stores") in
`setCostBasedWideningDecision` when we have a loop containing a uniform
i1 store and a scalable VF, which we cannot create a scatter for.

Reviewed By: david-arm

Differential Revision: https://reviews.llvm.org/D113680
2021-11-12 14:24:38 +00:00

328 lines
12 KiB
C++

//===- AArch64TargetTransformInfo.h - AArch64 specific TTI ------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
/// This file a TargetTransformInfo::Concept conforming object specific to the
/// AArch64 target machine. It uses the target's detailed information to
/// provide more precise answers to certain TTI queries, while letting the
/// target independent and default TTI implementations handle the rest.
///
//===----------------------------------------------------------------------===//
#ifndef LLVM_LIB_TARGET_AARCH64_AARCH64TARGETTRANSFORMINFO_H
#define LLVM_LIB_TARGET_AARCH64_AARCH64TARGETTRANSFORMINFO_H
#include "AArch64.h"
#include "AArch64Subtarget.h"
#include "AArch64TargetMachine.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/CodeGen/BasicTTIImpl.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Intrinsics.h"
#include <cstdint>
namespace llvm {
class APInt;
class Instruction;
class IntrinsicInst;
class Loop;
class SCEV;
class ScalarEvolution;
class Type;
class Value;
class VectorType;
class AArch64TTIImpl : public BasicTTIImplBase<AArch64TTIImpl> {
using BaseT = BasicTTIImplBase<AArch64TTIImpl>;
using TTI = TargetTransformInfo;
friend BaseT;
const AArch64Subtarget *ST;
const AArch64TargetLowering *TLI;
const AArch64Subtarget *getST() const { return ST; }
const AArch64TargetLowering *getTLI() const { return TLI; }
enum MemIntrinsicType {
VECTOR_LDST_TWO_ELEMENTS,
VECTOR_LDST_THREE_ELEMENTS,
VECTOR_LDST_FOUR_ELEMENTS
};
bool isWideningInstruction(Type *Ty, unsigned Opcode,
ArrayRef<const Value *> Args);
public:
explicit AArch64TTIImpl(const AArch64TargetMachine *TM, const Function &F)
: BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
TLI(ST->getTargetLowering()) {}
bool areInlineCompatible(const Function *Caller,
const Function *Callee) const;
/// \name Scalar TTI Implementations
/// @{
using BaseT::getIntImmCost;
InstructionCost getIntImmCost(int64_t Val);
InstructionCost getIntImmCost(const APInt &Imm, Type *Ty,
TTI::TargetCostKind CostKind);
InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx,
const APInt &Imm, Type *Ty,
TTI::TargetCostKind CostKind,
Instruction *Inst = nullptr);
InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
const APInt &Imm, Type *Ty,
TTI::TargetCostKind CostKind);
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth);
/// @}
/// \name Vector TTI Implementations
/// @{
bool enableInterleavedAccessVectorization() { return true; }
unsigned getNumberOfRegisters(unsigned ClassID) const {
bool Vector = (ClassID == 1);
if (Vector) {
if (ST->hasNEON())
return 32;
return 0;
}
return 31;
}
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
TTI::TargetCostKind CostKind);
Optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
IntrinsicInst &II) const;
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const {
switch (K) {
case TargetTransformInfo::RGK_Scalar:
return TypeSize::getFixed(64);
case TargetTransformInfo::RGK_FixedWidthVector:
if (ST->hasSVE())
return TypeSize::getFixed(
std::max(ST->getMinSVEVectorSizeInBits(), 128u));
return TypeSize::getFixed(ST->hasNEON() ? 128 : 0);
case TargetTransformInfo::RGK_ScalableVector:
return TypeSize::getScalable(ST->hasSVE() ? 128 : 0);
}
llvm_unreachable("Unsupported register kind");
}
unsigned getMinVectorRegisterBitWidth() const {
return ST->getMinVectorRegisterBitWidth();
}
Optional<unsigned> getVScaleForTuning() const {
return ST->getVScaleForTuning();
}
/// Try to return an estimate cost factor that can be used as a multiplier
/// when scalarizing an operation for a vector with ElementCount \p VF.
/// For scalable vectors this currently takes the most pessimistic view based
/// upon the maximum possible value for vscale.
unsigned getMaxNumElements(ElementCount VF) const {
if (!VF.isScalable())
return VF.getFixedValue();
return VF.getKnownMinValue() * ST->getVScaleForTuning();
}
unsigned getMaxInterleaveFactor(unsigned VF);
InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
Align Alignment, unsigned AddressSpace,
TTI::TargetCostKind CostKind);
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
const Value *Ptr, bool VariableMask,
Align Alignment,
TTI::TargetCostKind CostKind,
const Instruction *I = nullptr);
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
TTI::CastContextHint CCH,
TTI::TargetCostKind CostKind,
const Instruction *I = nullptr);
InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst,
VectorType *VecTy, unsigned Index);
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind,
const Instruction *I = nullptr);
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
unsigned Index);
InstructionCost getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy,
bool IsUnsigned,
TTI::TargetCostKind CostKind);
InstructionCost getArithmeticReductionCostSVE(unsigned Opcode,
VectorType *ValTy,
TTI::TargetCostKind CostKind);
InstructionCost getSpliceCost(VectorType *Tp, int Index);
InstructionCost getArithmeticInstrCost(
unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None,
ArrayRef<const Value *> Args = ArrayRef<const Value *>(),
const Instruction *CxtI = nullptr);
InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
const SCEV *Ptr);
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
CmpInst::Predicate VecPred,
TTI::TargetCostKind CostKind,
const Instruction *I = nullptr);
TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize,
bool IsZeroCmp) const;
bool useNeonVector(const Type *Ty) const;
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src,
MaybeAlign Alignment, unsigned AddressSpace,
TTI::TargetCostKind CostKind,
const Instruction *I = nullptr);
InstructionCost getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys);
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
TTI::UnrollingPreferences &UP,
OptimizationRemarkEmitter *ORE);
void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
TTI::PeelingPreferences &PP);
Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
Type *ExpectedType);
bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info);
bool isElementTypeLegalForScalableVector(Type *Ty) const {
if (Ty->isPointerTy())
return true;
if (Ty->isBFloatTy() && ST->hasBF16())
return true;
if (Ty->isHalfTy() || Ty->isFloatTy() || Ty->isDoubleTy())
return true;
if (Ty->isIntegerTy(8) || Ty->isIntegerTy(16) ||
Ty->isIntegerTy(32) || Ty->isIntegerTy(64))
return true;
return false;
}
bool isLegalMaskedLoadStore(Type *DataType, Align Alignment) {
if (!ST->hasSVE())
return false;
// For fixed vectors, avoid scalarization if using SVE for them.
if (isa<FixedVectorType>(DataType) && !ST->useSVEForFixedLengthVectors())
return false; // Fall back to scalarization of masked operations.
return isElementTypeLegalForScalableVector(DataType->getScalarType());
}
bool isLegalMaskedLoad(Type *DataType, Align Alignment) {
return isLegalMaskedLoadStore(DataType, Alignment);
}
bool isLegalMaskedStore(Type *DataType, Align Alignment) {
return isLegalMaskedLoadStore(DataType, Alignment);
}
bool isLegalMaskedGatherScatter(Type *DataType) const {
if (!ST->hasSVE())
return false;
// For fixed vectors, scalarize if not using SVE for them.
auto *DataTypeFVTy = dyn_cast<FixedVectorType>(DataType);
if (DataTypeFVTy && (!ST->useSVEForFixedLengthVectors() ||
DataTypeFVTy->getNumElements() < 2))
return false;
return isElementTypeLegalForScalableVector(DataType->getScalarType());
}
bool isLegalMaskedGather(Type *DataType, Align Alignment) const {
return isLegalMaskedGatherScatter(DataType);
}
bool isLegalMaskedScatter(Type *DataType, Align Alignment) const {
return isLegalMaskedGatherScatter(DataType);
}
bool isLegalNTStore(Type *DataType, Align Alignment) {
// NOTE: The logic below is mostly geared towards LV, which calls it with
// vectors with 2 elements. We might want to improve that, if other
// users show up.
// Nontemporal vector stores can be directly lowered to STNP, if the vector
// can be halved so that each half fits into a register. That's the case if
// the element type fits into a register and the number of elements is a
// power of 2 > 1.
if (auto *DataTypeVTy = dyn_cast<VectorType>(DataType)) {
unsigned NumElements =
cast<FixedVectorType>(DataTypeVTy)->getNumElements();
unsigned EltSize = DataTypeVTy->getElementType()->getScalarSizeInBits();
return NumElements > 1 && isPowerOf2_64(NumElements) && EltSize >= 8 &&
EltSize <= 128 && isPowerOf2_64(EltSize);
}
return BaseT::isLegalNTStore(DataType, Alignment);
}
bool enableOrderedReductions() const { return true; }
InstructionCost getInterleavedMemoryOpCost(
unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
bool UseMaskForCond = false, bool UseMaskForGaps = false);
bool
shouldConsiderAddressTypePromotion(const Instruction &I,
bool &AllowPromotionWithoutCommonHeader);
bool shouldExpandReduction(const IntrinsicInst *II) const { return false; }
unsigned getGISelRematGlobalCost() const {
return 2;
}
bool supportsScalableVectors() const { return ST->hasSVE(); }
bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc,
ElementCount VF) const;
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
Optional<FastMathFlags> FMF,
TTI::TargetCostKind CostKind);
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp,
ArrayRef<int> Mask, int Index,
VectorType *SubTp);
/// @}
};
} // end namespace llvm
#endif // LLVM_LIB_TARGET_AARCH64_AARCH64TARGETTRANSFORMINFO_H