Files
clang-p2996/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h
Jonas Paulsson 36d4421f50 [LoopDataPrefetch + SystemZ] Let target decide on prefetching for each loop.
This patch adds

- New arguments to getMinPrefetchStride() to let the target decide on a
  per-loop basis if software prefetching should be done even with a stride
  within the limit of the hw prefetcher.

- New TTI hook enableWritePrefetching() to let a target do write prefetching
  by default (defaults to false).

- In LoopDataPrefetch:

  - A search through the whole loop to gather information before emitting any
    prefetches. This way the target can get information via new arguments to
    getMinPrefetchStride() and emit prefetches more selectively. Collected
    information includes: Does the loop have a call, how many memory
    accesses, how many of them are strided, how many prefetches will cover
    them. This is NFC to before as long as the target does not change its
    definition of getMinPrefetchStride().

  - If a previous access to the same exact address was 'read', and the
    current one is 'write', make it a 'write' prefetch.

  - If two accesses that are covered by the same prefetch do not dominate
    each other, put the prefetch in a block that dominates both of them.

  - If a ConstantMaxTripCount is less than ItersAhead, then skip the loop.

- A SystemZ implementation of getMinPrefetchStride().

Review: Ulrich Weigand, Michael Kruse

Differential Revision: https://reviews.llvm.org/D70228
2020-04-02 14:57:46 +02:00

119 lines
4.9 KiB
C++

//===-- SystemZTargetTransformInfo.h - SystemZ-specific TTI ---------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZTARGETTRANSFORMINFO_H
#define LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZTARGETTRANSFORMINFO_H
#include "SystemZTargetMachine.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/CodeGen/BasicTTIImpl.h"
namespace llvm {
class SystemZTTIImpl : public BasicTTIImplBase<SystemZTTIImpl> {
typedef BasicTTIImplBase<SystemZTTIImpl> BaseT;
typedef TargetTransformInfo TTI;
friend BaseT;
const SystemZSubtarget *ST;
const SystemZTargetLowering *TLI;
const SystemZSubtarget *getST() const { return ST; }
const SystemZTargetLowering *getTLI() const { return TLI; }
unsigned const LIBCALL_COST = 30;
public:
explicit SystemZTTIImpl(const SystemZTargetMachine *TM, const Function &F)
: BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
TLI(ST->getTargetLowering()) {}
/// \name Scalar TTI Implementations
/// @{
unsigned getInliningThresholdMultiplier() { return 3; }
int getIntImmCost(const APInt &Imm, Type *Ty);
int getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty);
int getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
Type *Ty);
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth);
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
TTI::UnrollingPreferences &UP);
bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
TargetTransformInfo::LSRCost &C2);
/// @}
/// \name Vector TTI Implementations
/// @{
unsigned getNumberOfRegisters(unsigned ClassID) const;
unsigned getRegisterBitWidth(bool Vector) const;
unsigned getCacheLineSize() const override { return 256; }
unsigned getPrefetchDistance() const override { return 4500; }
unsigned getMinPrefetchStride(unsigned NumMemAccesses,
unsigned NumStridedMemAccesses,
unsigned NumPrefetches,
bool HasCall) const override;
bool enableWritePrefetching() const override { return true; }
bool hasDivRemOp(Type *DataType, bool IsSigned);
bool prefersVectorizedAddressing() { return false; }
bool LSRWithInstrQueries() { return true; }
bool supportsEfficientVectorElementLoadStore() { return true; }
bool enableInterleavedAccessVectorization() { return true; }
int getArithmeticInstrCost(
unsigned Opcode, Type *Ty,
TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None,
ArrayRef<const Value *> Args = ArrayRef<const Value *>(),
const Instruction *CxtI = nullptr);
int getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp);
unsigned getVectorTruncCost(Type *SrcTy, Type *DstTy);
unsigned getVectorBitmaskConversionCost(Type *SrcTy, Type *DstTy);
unsigned getBoolVecToIntConversionCost(unsigned Opcode, Type *Dst,
const Instruction *I);
int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
const Instruction *I = nullptr);
int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
const Instruction *I = nullptr);
int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
bool isFoldableLoad(const LoadInst *Ld, const Instruction *&FoldedValue);
int getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
unsigned AddressSpace, const Instruction *I = nullptr);
int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
unsigned Factor,
ArrayRef<unsigned> Indices,
unsigned Alignment,
unsigned AddressSpace,
bool UseMaskForCond = false,
bool UseMaskForGaps = false);
int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
ArrayRef<Value *> Args, FastMathFlags FMF,
unsigned VF = 1, const Instruction *I = nullptr);
int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, ArrayRef<Type *> Tys,
FastMathFlags FMF,
unsigned ScalarizationCostPassed = UINT_MAX,
const Instruction *I = nullptr);
/// @}
};
} // end namespace llvm
#endif