This patch adds
- New arguments to getMinPrefetchStride() to let the target decide on a
per-loop basis if software prefetching should be done even with a stride
within the limit of the hw prefetcher.
- New TTI hook enableWritePrefetching() to let a target do write prefetching
by default (defaults to false).
- In LoopDataPrefetch:
- A search through the whole loop to gather information before emitting any
prefetches. This way the target can get information via new arguments to
getMinPrefetchStride() and emit prefetches more selectively. Collected
information includes: Does the loop have a call, how many memory
accesses, how many of them are strided, how many prefetches will cover
them. This is NFC to before as long as the target does not change its
definition of getMinPrefetchStride().
- If a previous access to the same exact address was 'read', and the
current one is 'write', make it a 'write' prefetch.
- If two accesses that are covered by the same prefetch do not dominate
each other, put the prefetch in a block that dominates both of them.
- If a ConstantMaxTripCount is less than ItersAhead, then skip the loop.
- A SystemZ implementation of getMinPrefetchStride().
Review: Ulrich Weigand, Michael Kruse
Differential Revision: https://reviews.llvm.org/D70228
119 lines
4.9 KiB
C++
119 lines
4.9 KiB
C++
//===-- SystemZTargetTransformInfo.h - SystemZ-specific TTI ---------------===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#ifndef LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZTARGETTRANSFORMINFO_H
|
|
#define LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZTARGETTRANSFORMINFO_H
|
|
|
|
#include "SystemZTargetMachine.h"
|
|
#include "llvm/Analysis/TargetTransformInfo.h"
|
|
#include "llvm/CodeGen/BasicTTIImpl.h"
|
|
|
|
namespace llvm {
|
|
|
|
class SystemZTTIImpl : public BasicTTIImplBase<SystemZTTIImpl> {
|
|
typedef BasicTTIImplBase<SystemZTTIImpl> BaseT;
|
|
typedef TargetTransformInfo TTI;
|
|
friend BaseT;
|
|
|
|
const SystemZSubtarget *ST;
|
|
const SystemZTargetLowering *TLI;
|
|
|
|
const SystemZSubtarget *getST() const { return ST; }
|
|
const SystemZTargetLowering *getTLI() const { return TLI; }
|
|
|
|
unsigned const LIBCALL_COST = 30;
|
|
|
|
public:
|
|
explicit SystemZTTIImpl(const SystemZTargetMachine *TM, const Function &F)
|
|
: BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
|
|
TLI(ST->getTargetLowering()) {}
|
|
|
|
/// \name Scalar TTI Implementations
|
|
/// @{
|
|
|
|
unsigned getInliningThresholdMultiplier() { return 3; }
|
|
|
|
int getIntImmCost(const APInt &Imm, Type *Ty);
|
|
|
|
int getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty);
|
|
int getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
|
|
Type *Ty);
|
|
|
|
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth);
|
|
|
|
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
|
|
TTI::UnrollingPreferences &UP);
|
|
|
|
bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
|
|
TargetTransformInfo::LSRCost &C2);
|
|
/// @}
|
|
|
|
/// \name Vector TTI Implementations
|
|
/// @{
|
|
|
|
unsigned getNumberOfRegisters(unsigned ClassID) const;
|
|
unsigned getRegisterBitWidth(bool Vector) const;
|
|
|
|
unsigned getCacheLineSize() const override { return 256; }
|
|
unsigned getPrefetchDistance() const override { return 4500; }
|
|
unsigned getMinPrefetchStride(unsigned NumMemAccesses,
|
|
unsigned NumStridedMemAccesses,
|
|
unsigned NumPrefetches,
|
|
bool HasCall) const override;
|
|
bool enableWritePrefetching() const override { return true; }
|
|
|
|
bool hasDivRemOp(Type *DataType, bool IsSigned);
|
|
bool prefersVectorizedAddressing() { return false; }
|
|
bool LSRWithInstrQueries() { return true; }
|
|
bool supportsEfficientVectorElementLoadStore() { return true; }
|
|
bool enableInterleavedAccessVectorization() { return true; }
|
|
|
|
int getArithmeticInstrCost(
|
|
unsigned Opcode, Type *Ty,
|
|
TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
|
|
TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
|
|
TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
|
|
TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None,
|
|
ArrayRef<const Value *> Args = ArrayRef<const Value *>(),
|
|
const Instruction *CxtI = nullptr);
|
|
int getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp);
|
|
unsigned getVectorTruncCost(Type *SrcTy, Type *DstTy);
|
|
unsigned getVectorBitmaskConversionCost(Type *SrcTy, Type *DstTy);
|
|
unsigned getBoolVecToIntConversionCost(unsigned Opcode, Type *Dst,
|
|
const Instruction *I);
|
|
int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
|
|
const Instruction *I = nullptr);
|
|
int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
|
|
const Instruction *I = nullptr);
|
|
int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
|
|
bool isFoldableLoad(const LoadInst *Ld, const Instruction *&FoldedValue);
|
|
int getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
|
|
unsigned AddressSpace, const Instruction *I = nullptr);
|
|
|
|
int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
|
|
unsigned Factor,
|
|
ArrayRef<unsigned> Indices,
|
|
unsigned Alignment,
|
|
unsigned AddressSpace,
|
|
bool UseMaskForCond = false,
|
|
bool UseMaskForGaps = false);
|
|
|
|
int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
|
|
ArrayRef<Value *> Args, FastMathFlags FMF,
|
|
unsigned VF = 1, const Instruction *I = nullptr);
|
|
int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, ArrayRef<Type *> Tys,
|
|
FastMathFlags FMF,
|
|
unsigned ScalarizationCostPassed = UINT_MAX,
|
|
const Instruction *I = nullptr);
|
|
/// @}
|
|
};
|
|
|
|
} // end namespace llvm
|
|
|
|
#endif
|