[NFC][AMDGPU] Reduce includes dependencies, part 2

1. Splitted out some parts of R600 target to separate modules/headers.
2. Reduced some include lists in headers.
3. Minor forward declarations, redundant includes and flags in GCNSubtarget
   cleanup.

Reviewed By: foad

Differential Revision: https://reviews.llvm.org/D109351
This commit is contained in:
Daniil Fukalov
2021-09-07 11:21:04 +03:00
parent 3e260efdfc
commit 47d6274d4c
14 changed files with 624 additions and 532 deletions

View File

@@ -10,6 +10,7 @@
//===----------------------------------------------------------------------===//
#include "AMDGPUAliasAnalysis.h"
#include "AMDGPU.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Instructions.h"
@@ -37,6 +38,10 @@ ImmutablePass *llvm::createAMDGPUExternalAAWrapperPass() {
return new AMDGPUExternalAAWrapper();
}
AMDGPUAAWrapperPass::AMDGPUAAWrapperPass() : ImmutablePass(ID) {
initializeAMDGPUAAWrapperPassPass(*PassRegistry::getPassRegistry());
}
void AMDGPUAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
}

View File

@@ -12,7 +12,6 @@
#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUALIASANALYSIS_H
#define LLVM_LIB_TARGET_AMDGPU_AMDGPUALIASANALYSIS_H
#include "AMDGPU.h"
#include "llvm/Analysis/AliasAnalysis.h"
namespace llvm {
@@ -66,9 +65,7 @@ class AMDGPUAAWrapperPass : public ImmutablePass {
public:
static char ID;
AMDGPUAAWrapperPass() : ImmutablePass(ID) {
initializeAMDGPUAAWrapperPassPass(*PassRegistry::getPassRegistry());
}
AMDGPUAAWrapperPass();
AMDGPUAAResult &getResult() { return *Result; }
const AMDGPUAAResult &getResult() const { return *Result; }

View File

@@ -11,11 +11,11 @@
//
//===----------------------------------------------------------------------===//
#include "AMDGPUISelDAGToDAG.h"
#include "AMDGPU.h"
#include "AMDGPUTargetMachine.h"
#include "MCTargetDesc/R600MCTargetDesc.h"
#include "R600.h"
#include "R600Subtarget.h"
#include "R600RegisterInfo.h"
#include "SIMachineFunctionInfo.h"
#include "llvm/Analysis/LegacyDivergenceAnalysis.h"
#include "llvm/Analysis/ValueTracking.h"
@@ -35,287 +35,12 @@
using namespace llvm;
namespace llvm {
class R600InstrInfo;
} // end namespace llvm
//===----------------------------------------------------------------------===//
// Instruction Selector Implementation
//===----------------------------------------------------------------------===//
namespace {
static bool isNullConstantOrUndef(SDValue V) {
if (V.isUndef())
return true;
ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V);
return Const != nullptr && Const->isZero();
}
static bool getConstantValue(SDValue N, uint32_t &Out) {
// This is only used for packed vectors, where using 0 for undef should
// always be good.
if (N.isUndef()) {
Out = 0;
return true;
}
if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N)) {
Out = C->getAPIntValue().getSExtValue();
return true;
}
if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N)) {
Out = C->getValueAPF().bitcastToAPInt().getSExtValue();
return true;
}
return false;
}
// TODO: Handle undef as zero
static SDNode *packConstantV2I16(const SDNode *N, SelectionDAG &DAG,
bool Negate = false) {
assert(N->getOpcode() == ISD::BUILD_VECTOR && N->getNumOperands() == 2);
uint32_t LHSVal, RHSVal;
if (getConstantValue(N->getOperand(0), LHSVal) &&
getConstantValue(N->getOperand(1), RHSVal)) {
SDLoc SL(N);
uint32_t K = Negate ?
(-LHSVal & 0xffff) | (-RHSVal << 16) :
(LHSVal & 0xffff) | (RHSVal << 16);
return DAG.getMachineNode(AMDGPU::S_MOV_B32, SL, N->getValueType(0),
DAG.getTargetConstant(K, SL, MVT::i32));
}
return nullptr;
}
static SDNode *packNegConstantV2I16(const SDNode *N, SelectionDAG &DAG) {
return packConstantV2I16(N, DAG, true);
}
/// AMDGPU specific code to select AMDGPU machine instructions for
/// SelectionDAG operations.
class AMDGPUDAGToDAGISel : public SelectionDAGISel {
// Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can
// make the right decision when generating code for different targets.
const GCNSubtarget *Subtarget;
// Default FP mode for the current function.
AMDGPU::SIModeRegisterDefaults Mode;
bool EnableLateStructurizeCFG;
// Instructions that will be lowered with a final instruction that zeros the
// high result bits.
bool fp16SrcZerosHighBits(unsigned Opc) const;
public:
explicit AMDGPUDAGToDAGISel(TargetMachine *TM = nullptr,
CodeGenOpt::Level OptLevel = CodeGenOpt::Default)
: SelectionDAGISel(*TM, OptLevel) {
EnableLateStructurizeCFG = AMDGPUTargetMachine::EnableLateStructurizeCFG;
}
~AMDGPUDAGToDAGISel() override = default;
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<AMDGPUArgumentUsageInfo>();
AU.addRequired<LegacyDivergenceAnalysis>();
#ifdef EXPENSIVE_CHECKS
AU.addRequired<DominatorTreeWrapperPass>();
AU.addRequired<LoopInfoWrapperPass>();
#endif
SelectionDAGISel::getAnalysisUsage(AU);
}
bool matchLoadD16FromBuildVector(SDNode *N) const;
bool runOnMachineFunction(MachineFunction &MF) override;
void PreprocessISelDAG() override;
void Select(SDNode *N) override;
StringRef getPassName() const override;
void PostprocessISelDAG() override;
protected:
void SelectBuildVector(SDNode *N, unsigned RegClassID);
private:
std::pair<SDValue, SDValue> foldFrameIndex(SDValue N) const;
bool isNoNanSrc(SDValue N) const;
bool isInlineImmediate(const SDNode *N, bool Negated = false) const;
bool isNegInlineImmediate(const SDNode *N) const {
return isInlineImmediate(N, true);
}
bool isInlineImmediate16(int64_t Imm) const {
return AMDGPU::isInlinableLiteral16(Imm, Subtarget->hasInv2PiInlineImm());
}
bool isInlineImmediate32(int64_t Imm) const {
return AMDGPU::isInlinableLiteral32(Imm, Subtarget->hasInv2PiInlineImm());
}
bool isInlineImmediate64(int64_t Imm) const {
return AMDGPU::isInlinableLiteral64(Imm, Subtarget->hasInv2PiInlineImm());
}
bool isInlineImmediate(const APFloat &Imm) const {
return Subtarget->getInstrInfo()->isInlineConstant(Imm);
}
bool isVGPRImm(const SDNode *N) const;
bool isUniformLoad(const SDNode *N) const;
bool isUniformBr(const SDNode *N) const;
bool isBaseWithConstantOffset64(SDValue Addr, SDValue &LHS,
SDValue &RHS) const;
MachineSDNode *buildSMovImm64(SDLoc &DL, uint64_t Val, EVT VT) const;
SDNode *glueCopyToOp(SDNode *N, SDValue NewChain, SDValue Glue) const;
SDNode *glueCopyToM0(SDNode *N, SDValue Val) const;
SDNode *glueCopyToM0LDSInit(SDNode *N) const;
const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const;
virtual bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset);
virtual bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset);
bool isDSOffsetLegal(SDValue Base, unsigned Offset) const;
bool isDSOffset2Legal(SDValue Base, unsigned Offset0, unsigned Offset1,
unsigned Size) const;
bool SelectDS1Addr1Offset(SDValue Ptr, SDValue &Base, SDValue &Offset) const;
bool SelectDS64Bit4ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0,
SDValue &Offset1) const;
bool SelectDS128Bit8ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0,
SDValue &Offset1) const;
bool SelectDSReadWrite2(SDValue Ptr, SDValue &Base, SDValue &Offset0,
SDValue &Offset1, unsigned Size) const;
bool SelectMUBUF(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
SDValue &SOffset, SDValue &Offset, SDValue &Offen,
SDValue &Idxen, SDValue &Addr64) const;
bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
SDValue &SOffset, SDValue &Offset) const;
bool SelectMUBUFScratchOffen(SDNode *Parent,
SDValue Addr, SDValue &RSrc, SDValue &VAddr,
SDValue &SOffset, SDValue &ImmOffset) const;
bool SelectMUBUFScratchOffset(SDNode *Parent,
SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
SDValue &Offset) const;
bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
SDValue &Offset) const;
bool SelectFlatOffsetImpl(SDNode *N, SDValue Addr, SDValue &VAddr,
SDValue &Offset, uint64_t FlatVariant) const;
bool SelectFlatOffset(SDNode *N, SDValue Addr, SDValue &VAddr,
SDValue &Offset) const;
bool SelectGlobalOffset(SDNode *N, SDValue Addr, SDValue &VAddr,
SDValue &Offset) const;
bool SelectScratchOffset(SDNode *N, SDValue Addr, SDValue &VAddr,
SDValue &Offset) const;
bool SelectGlobalSAddr(SDNode *N, SDValue Addr, SDValue &SAddr,
SDValue &VOffset, SDValue &Offset) const;
bool SelectScratchSAddr(SDNode *N, SDValue Addr, SDValue &SAddr,
SDValue &Offset) const;
bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset,
bool &Imm) const;
SDValue Expand32BitAddress(SDValue Addr) const;
bool SelectSMRD(SDValue Addr, SDValue &SBase, SDValue &Offset,
bool &Imm) const;
bool SelectSMRDImm(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
bool SelectSMRDImm32(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
bool SelectSMRDSgpr(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
bool SelectSMRDBufferImm(SDValue Addr, SDValue &Offset) const;
bool SelectSMRDBufferImm32(SDValue Addr, SDValue &Offset) const;
bool SelectMOVRELOffset(SDValue Index, SDValue &Base, SDValue &Offset) const;
bool SelectVOP3Mods_NNaN(SDValue In, SDValue &Src, SDValue &SrcMods) const;
bool SelectVOP3ModsImpl(SDValue In, SDValue &Src, unsigned &SrcMods,
bool AllowAbs = true) const;
bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
bool SelectVOP3BMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
bool SelectVOP3NoMods(SDValue In, SDValue &Src) const;
bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods,
SDValue &Clamp, SDValue &Omod) const;
bool SelectVOP3BMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
SDValue &Clamp, SDValue &Omod) const;
bool SelectVOP3NoMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
SDValue &Clamp, SDValue &Omod) const;
bool SelectVOP3OMods(SDValue In, SDValue &Src,
SDValue &Clamp, SDValue &Omod) const;
bool SelectVOP3PMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
bool SelectVOP3OpSel(SDValue In, SDValue &Src, SDValue &SrcMods) const;
bool SelectVOP3OpSelMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
bool SelectVOP3PMadMixModsImpl(SDValue In, SDValue &Src, unsigned &Mods) const;
bool SelectVOP3PMadMixMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
SDValue getHi16Elt(SDValue In) const;
SDValue getMaterializedScalarImm32(int64_t Val, const SDLoc &DL) const;
void SelectADD_SUB_I64(SDNode *N);
void SelectAddcSubb(SDNode *N);
void SelectUADDO_USUBO(SDNode *N);
void SelectDIV_SCALE(SDNode *N);
void SelectMAD_64_32(SDNode *N);
void SelectFMA_W_CHAIN(SDNode *N);
void SelectFMUL_W_CHAIN(SDNode *N);
SDNode *getS_BFE(unsigned Opcode, const SDLoc &DL, SDValue Val,
uint32_t Offset, uint32_t Width);
void SelectS_BFEFromShifts(SDNode *N);
void SelectS_BFE(SDNode *N);
bool isCBranchSCC(const SDNode *N) const;
void SelectBRCOND(SDNode *N);
void SelectFMAD_FMA(SDNode *N);
void SelectATOMIC_CMP_SWAP(SDNode *N);
void SelectDSAppendConsume(SDNode *N, unsigned IntrID);
void SelectDS_GWS(SDNode *N, unsigned IntrID);
void SelectInterpP1F16(SDNode *N);
void SelectINTRINSIC_W_CHAIN(SDNode *N);
void SelectINTRINSIC_WO_CHAIN(SDNode *N);
void SelectINTRINSIC_VOID(SDNode *N);
protected:
// Include the pieces autogenerated from the target description.
#include "AMDGPUGenDAGISel.inc"
};
class R600DAGToDAGISel : public AMDGPUDAGToDAGISel {
const R600Subtarget *Subtarget;
bool isConstantLoad(const MemSDNode *N, int cbID) const;
bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr);
bool SelectGlobalValueVariableOffset(SDValue Addr, SDValue &BaseReg,
SDValue& Offset);
public:
explicit R600DAGToDAGISel(TargetMachine *TM, CodeGenOpt::Level OptLevel) :
AMDGPUDAGToDAGISel(TM, OptLevel) {}
void Select(SDNode *N) override;
bool SelectADDRIndirect(SDValue Addr, SDValue &Base,
SDValue &Offset) override;
bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
SDValue &Offset) override;
bool runOnMachineFunction(MachineFunction &MF) override;
void PreprocessISelDAG() override {}
protected:
// Include the pieces autogenerated from the target description.
#include "R600GenDAGISel.inc"
};
static SDValue stripBitcast(SDValue Val) {
return Val.getOpcode() == ISD::BITCAST ? Val.getOperand(0) : Val;
}
@@ -389,11 +114,11 @@ FunctionPass *llvm::createAMDGPUISelDag(TargetMachine *TM,
return new AMDGPUDAGToDAGISel(TM, OptLevel);
}
/// This pass converts a legalized DAG into a R600-specific
// DAG, ready for instruction scheduling.
FunctionPass *llvm::createR600ISelDag(TargetMachine *TM,
CodeGenOpt::Level OptLevel) {
return new R600DAGToDAGISel(TM, OptLevel);
AMDGPUDAGToDAGISel::AMDGPUDAGToDAGISel(
TargetMachine *TM /*= nullptr*/,
CodeGenOpt::Level OptLevel /*= CodeGenOpt::Default*/)
: SelectionDAGISel(*TM, OptLevel) {
EnableLateStructurizeCFG = AMDGPUTargetMachine::EnableLateStructurizeCFG;
}
bool AMDGPUDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
@@ -471,6 +196,16 @@ bool AMDGPUDAGToDAGISel::fp16SrcZerosHighBits(unsigned Opc) const {
}
}
void AMDGPUDAGToDAGISel::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<AMDGPUArgumentUsageInfo>();
AU.addRequired<LegacyDivergenceAnalysis>();
#ifdef EXPENSIVE_CHECKS
AU.addRequired<DominatorTreeWrapperPass>();
AU.addRequired<LoopInfoWrapperPass>();
#endif
SelectionDAGISel::getAnalysisUsage(AU);
}
bool AMDGPUDAGToDAGISel::matchLoadD16FromBuildVector(SDNode *N) const {
assert(Subtarget->d16PreservesUnusedBits());
MVT VT = N->getValueType(0).getSimpleVT();
@@ -3121,128 +2856,3 @@ void AMDGPUDAGToDAGISel::PostprocessISelDAG() {
CurDAG->RemoveDeadNodes();
} while (IsModified);
}
bool R600DAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
Subtarget = &MF.getSubtarget<R600Subtarget>();
return SelectionDAGISel::runOnMachineFunction(MF);
}
bool R600DAGToDAGISel::isConstantLoad(const MemSDNode *N, int CbId) const {
if (!N->readMem())
return false;
if (CbId == -1)
return N->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS ||
N->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT;
return N->getAddressSpace() == AMDGPUAS::CONSTANT_BUFFER_0 + CbId;
}
bool R600DAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr,
SDValue& IntPtr) {
if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Addr)) {
IntPtr = CurDAG->getIntPtrConstant(Cst->getZExtValue() / 4, SDLoc(Addr),
true);
return true;
}
return false;
}
bool R600DAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr,
SDValue& BaseReg, SDValue &Offset) {
if (!isa<ConstantSDNode>(Addr)) {
BaseReg = Addr;
Offset = CurDAG->getIntPtrConstant(0, SDLoc(Addr), true);
return true;
}
return false;
}
void R600DAGToDAGISel::Select(SDNode *N) {
unsigned int Opc = N->getOpcode();
if (N->isMachineOpcode()) {
N->setNodeId(-1);
return; // Already selected.
}
switch (Opc) {
default: break;
case AMDGPUISD::BUILD_VERTICAL_VECTOR:
case ISD::SCALAR_TO_VECTOR:
case ISD::BUILD_VECTOR: {
EVT VT = N->getValueType(0);
unsigned NumVectorElts = VT.getVectorNumElements();
unsigned RegClassID;
// BUILD_VECTOR was lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG
// that adds a 128 bits reg copy when going through TwoAddressInstructions
// pass. We want to avoid 128 bits copies as much as possible because they
// can't be bundled by our scheduler.
switch(NumVectorElts) {
case 2: RegClassID = R600::R600_Reg64RegClassID; break;
case 4:
if (Opc == AMDGPUISD::BUILD_VERTICAL_VECTOR)
RegClassID = R600::R600_Reg128VerticalRegClassID;
else
RegClassID = R600::R600_Reg128RegClassID;
break;
default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR");
}
SelectBuildVector(N, RegClassID);
return;
}
}
SelectCode(N);
}
bool R600DAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base,
SDValue &Offset) {
ConstantSDNode *C;
SDLoc DL(Addr);
if ((C = dyn_cast<ConstantSDNode>(Addr))) {
Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
} else if ((Addr.getOpcode() == AMDGPUISD::DWORDADDR) &&
(C = dyn_cast<ConstantSDNode>(Addr.getOperand(0)))) {
Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
} else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) &&
(C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) {
Base = Addr.getOperand(0);
Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
} else {
Base = Addr;
Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
}
return true;
}
bool R600DAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
SDValue &Offset) {
ConstantSDNode *IMMOffset;
if (Addr.getOpcode() == ISD::ADD
&& (IMMOffset = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))
&& isInt<16>(IMMOffset->getZExtValue())) {
Base = Addr.getOperand(0);
Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr),
MVT::i32);
return true;
// If the pointer address is constant, we can move it to the offset field.
} else if ((IMMOffset = dyn_cast<ConstantSDNode>(Addr))
&& isInt<16>(IMMOffset->getZExtValue())) {
Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(),
SDLoc(CurDAG->getEntryNode()),
R600::ZERO, MVT::i32);
Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr),
MVT::i32);
return true;
}
// Default case, no offset
Base = Addr;
Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
return true;
}

View File

@@ -0,0 +1,257 @@
//===-- AMDGPUISelDAGToDAG.h - A dag to dag inst selector for AMDGPU ----===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//==-----------------------------------------------------------------------===//
//
/// \file
/// Defines an instruction selector for the AMDGPU target.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUISELDAGTODAG_H
#define LLVM_LIB_TARGET_AMDGPU_AMDGPUISELDAGTODAG_H
#include "GCNSubtarget.h"
#include "SIMachineFunctionInfo.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
#include "llvm/Target/TargetMachine.h"
using namespace llvm;
namespace {
static inline bool isNullConstantOrUndef(SDValue V) {
if (V.isUndef())
return true;
ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V);
return Const != nullptr && Const->isZero();
}
static inline bool getConstantValue(SDValue N, uint32_t &Out) {
// This is only used for packed vectors, where using 0 for undef should
// always be good.
if (N.isUndef()) {
Out = 0;
return true;
}
if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N)) {
Out = C->getAPIntValue().getSExtValue();
return true;
}
if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N)) {
Out = C->getValueAPF().bitcastToAPInt().getSExtValue();
return true;
}
return false;
}
// TODO: Handle undef as zero
static inline SDNode *packConstantV2I16(const SDNode *N, SelectionDAG &DAG,
bool Negate = false) {
assert(N->getOpcode() == ISD::BUILD_VECTOR && N->getNumOperands() == 2);
uint32_t LHSVal, RHSVal;
if (getConstantValue(N->getOperand(0), LHSVal) &&
getConstantValue(N->getOperand(1), RHSVal)) {
SDLoc SL(N);
uint32_t K = Negate ? (-LHSVal & 0xffff) | (-RHSVal << 16)
: (LHSVal & 0xffff) | (RHSVal << 16);
return DAG.getMachineNode(AMDGPU::S_MOV_B32, SL, N->getValueType(0),
DAG.getTargetConstant(K, SL, MVT::i32));
}
return nullptr;
}
static inline SDNode *packNegConstantV2I16(const SDNode *N, SelectionDAG &DAG) {
return packConstantV2I16(N, DAG, true);
}
} // namespace
/// AMDGPU specific code to select AMDGPU machine instructions for
/// SelectionDAG operations.
class AMDGPUDAGToDAGISel : public SelectionDAGISel {
// Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can
// make the right decision when generating code for different targets.
const GCNSubtarget *Subtarget;
// Default FP mode for the current function.
AMDGPU::SIModeRegisterDefaults Mode;
bool EnableLateStructurizeCFG;
// Instructions that will be lowered with a final instruction that zeros the
// high result bits.
bool fp16SrcZerosHighBits(unsigned Opc) const;
public:
explicit AMDGPUDAGToDAGISel(TargetMachine *TM = nullptr,
CodeGenOpt::Level OptLevel = CodeGenOpt::Default);
~AMDGPUDAGToDAGISel() override = default;
void getAnalysisUsage(AnalysisUsage &AU) const override;
bool matchLoadD16FromBuildVector(SDNode *N) const;
bool runOnMachineFunction(MachineFunction &MF) override;
void PreprocessISelDAG() override;
void Select(SDNode *N) override;
StringRef getPassName() const override;
void PostprocessISelDAG() override;
protected:
void SelectBuildVector(SDNode *N, unsigned RegClassID);
private:
std::pair<SDValue, SDValue> foldFrameIndex(SDValue N) const;
bool isNoNanSrc(SDValue N) const;
bool isInlineImmediate(const SDNode *N, bool Negated = false) const;
bool isNegInlineImmediate(const SDNode *N) const {
return isInlineImmediate(N, true);
}
bool isInlineImmediate16(int64_t Imm) const {
return AMDGPU::isInlinableLiteral16(Imm, Subtarget->hasInv2PiInlineImm());
}
bool isInlineImmediate32(int64_t Imm) const {
return AMDGPU::isInlinableLiteral32(Imm, Subtarget->hasInv2PiInlineImm());
}
bool isInlineImmediate64(int64_t Imm) const {
return AMDGPU::isInlinableLiteral64(Imm, Subtarget->hasInv2PiInlineImm());
}
bool isInlineImmediate(const APFloat &Imm) const {
return Subtarget->getInstrInfo()->isInlineConstant(Imm);
}
bool isVGPRImm(const SDNode *N) const;
bool isUniformLoad(const SDNode *N) const;
bool isUniformBr(const SDNode *N) const;
bool isBaseWithConstantOffset64(SDValue Addr, SDValue &LHS,
SDValue &RHS) const;
MachineSDNode *buildSMovImm64(SDLoc &DL, uint64_t Val, EVT VT) const;
SDNode *glueCopyToOp(SDNode *N, SDValue NewChain, SDValue Glue) const;
SDNode *glueCopyToM0(SDNode *N, SDValue Val) const;
SDNode *glueCopyToM0LDSInit(SDNode *N) const;
const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const;
virtual bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset);
virtual bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset);
bool isDSOffsetLegal(SDValue Base, unsigned Offset) const;
bool isDSOffset2Legal(SDValue Base, unsigned Offset0, unsigned Offset1,
unsigned Size) const;
bool SelectDS1Addr1Offset(SDValue Ptr, SDValue &Base, SDValue &Offset) const;
bool SelectDS64Bit4ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0,
SDValue &Offset1) const;
bool SelectDS128Bit8ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0,
SDValue &Offset1) const;
bool SelectDSReadWrite2(SDValue Ptr, SDValue &Base, SDValue &Offset0,
SDValue &Offset1, unsigned Size) const;
bool SelectMUBUF(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
SDValue &SOffset, SDValue &Offset, SDValue &Offen,
SDValue &Idxen, SDValue &Addr64) const;
bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
SDValue &SOffset, SDValue &Offset) const;
bool SelectMUBUFScratchOffen(SDNode *Parent, SDValue Addr, SDValue &RSrc,
SDValue &VAddr, SDValue &SOffset,
SDValue &ImmOffset) const;
bool SelectMUBUFScratchOffset(SDNode *Parent, SDValue Addr, SDValue &SRsrc,
SDValue &Soffset, SDValue &Offset) const;
bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
SDValue &Offset) const;
bool SelectFlatOffsetImpl(SDNode *N, SDValue Addr, SDValue &VAddr,
SDValue &Offset, uint64_t FlatVariant) const;
bool SelectFlatOffset(SDNode *N, SDValue Addr, SDValue &VAddr,
SDValue &Offset) const;
bool SelectGlobalOffset(SDNode *N, SDValue Addr, SDValue &VAddr,
SDValue &Offset) const;
bool SelectScratchOffset(SDNode *N, SDValue Addr, SDValue &VAddr,
SDValue &Offset) const;
bool SelectGlobalSAddr(SDNode *N, SDValue Addr, SDValue &SAddr,
SDValue &VOffset, SDValue &Offset) const;
bool SelectScratchSAddr(SDNode *N, SDValue Addr, SDValue &SAddr,
SDValue &Offset) const;
bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset,
bool &Imm) const;
SDValue Expand32BitAddress(SDValue Addr) const;
bool SelectSMRD(SDValue Addr, SDValue &SBase, SDValue &Offset,
bool &Imm) const;
bool SelectSMRDImm(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
bool SelectSMRDImm32(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
bool SelectSMRDSgpr(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
bool SelectSMRDBufferImm(SDValue Addr, SDValue &Offset) const;
bool SelectSMRDBufferImm32(SDValue Addr, SDValue &Offset) const;
bool SelectMOVRELOffset(SDValue Index, SDValue &Base, SDValue &Offset) const;
bool SelectVOP3Mods_NNaN(SDValue In, SDValue &Src, SDValue &SrcMods) const;
bool SelectVOP3ModsImpl(SDValue In, SDValue &Src, unsigned &SrcMods,
bool AllowAbs = true) const;
bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
bool SelectVOP3BMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
bool SelectVOP3NoMods(SDValue In, SDValue &Src) const;
bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods,
SDValue &Clamp, SDValue &Omod) const;
bool SelectVOP3BMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
SDValue &Clamp, SDValue &Omod) const;
bool SelectVOP3NoMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
SDValue &Clamp, SDValue &Omod) const;
bool SelectVOP3OMods(SDValue In, SDValue &Src, SDValue &Clamp,
SDValue &Omod) const;
bool SelectVOP3PMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
bool SelectVOP3OpSel(SDValue In, SDValue &Src, SDValue &SrcMods) const;
bool SelectVOP3OpSelMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
bool SelectVOP3PMadMixModsImpl(SDValue In, SDValue &Src,
unsigned &Mods) const;
bool SelectVOP3PMadMixMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
SDValue getHi16Elt(SDValue In) const;
SDValue getMaterializedScalarImm32(int64_t Val, const SDLoc &DL) const;
void SelectADD_SUB_I64(SDNode *N);
void SelectAddcSubb(SDNode *N);
void SelectUADDO_USUBO(SDNode *N);
void SelectDIV_SCALE(SDNode *N);
void SelectMAD_64_32(SDNode *N);
void SelectFMA_W_CHAIN(SDNode *N);
void SelectFMUL_W_CHAIN(SDNode *N);
SDNode *getS_BFE(unsigned Opcode, const SDLoc &DL, SDValue Val,
uint32_t Offset, uint32_t Width);
void SelectS_BFEFromShifts(SDNode *N);
void SelectS_BFE(SDNode *N);
bool isCBranchSCC(const SDNode *N) const;
void SelectBRCOND(SDNode *N);
void SelectFMAD_FMA(SDNode *N);
void SelectATOMIC_CMP_SWAP(SDNode *N);
void SelectDSAppendConsume(SDNode *N, unsigned IntrID);
void SelectDS_GWS(SDNode *N, unsigned IntrID);
void SelectInterpP1F16(SDNode *N);
void SelectINTRINSIC_W_CHAIN(SDNode *N);
void SelectINTRINSIC_WO_CHAIN(SDNode *N);
void SelectINTRINSIC_VOID(SDNode *N);
protected:
// Include the pieces autogenerated from the target description.
#include "AMDGPUGenDAGISel.inc"
};
#endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUISELDAGTODAG_H

View File

@@ -12,12 +12,11 @@
//===----------------------------------------------------------------------===//
//
#include "AMDGPUMCInstLower.h"
#include "AMDGPUAsmPrinter.h"
#include "AMDGPUTargetMachine.h"
#include "MCTargetDesc/AMDGPUInstPrinter.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "R600AsmPrinter.h"
#include "R600Subtarget.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/IR/Constants.h"
@@ -35,36 +34,6 @@
using namespace llvm;
namespace {
class AMDGPUMCInstLower {
MCContext &Ctx;
const TargetSubtargetInfo &ST;
const AsmPrinter &AP;
public:
AMDGPUMCInstLower(MCContext &ctx, const TargetSubtargetInfo &ST,
const AsmPrinter &AP);
bool lowerOperand(const MachineOperand &MO, MCOperand &MCOp) const;
/// Lower a MachineInstr to an MCInst
void lower(const MachineInstr *MI, MCInst &OutMI) const;
};
class R600MCInstLower : public AMDGPUMCInstLower {
public:
R600MCInstLower(MCContext &ctx, const R600Subtarget &ST,
const AsmPrinter &AP);
/// Lower a MachineInstr to an MCInst
void lower(const MachineInstr *MI, MCInst &OutMI) const;
};
} // End anonymous namespace
#include "AMDGPUGenMCPseudoLowering.inc"
AMDGPUMCInstLower::AMDGPUMCInstLower(MCContext &ctx,
@@ -195,30 +164,6 @@ bool AMDGPUAsmPrinter::lowerOperand(const MachineOperand &MO,
return MCInstLowering.lowerOperand(MO, MCOp);
}
static const MCExpr *lowerAddrSpaceCast(const TargetMachine &TM,
const Constant *CV,
MCContext &OutContext) {
// TargetMachine does not support llvm-style cast. Use C++-style cast.
// This is safe since TM is always of type AMDGPUTargetMachine or its
// derived class.
auto &AT = static_cast<const AMDGPUTargetMachine&>(TM);
auto *CE = dyn_cast<ConstantExpr>(CV);
// Lower null pointers in private and local address space.
// Clang generates addrspacecast for null pointers in private and local
// address space, which needs to be lowered.
if (CE && CE->getOpcode() == Instruction::AddrSpaceCast) {
auto Op = CE->getOperand(0);
auto SrcAddr = Op->getType()->getPointerAddressSpace();
if (Op->isNullValue() && AT.getNullPointerValue(SrcAddr) == 0) {
auto DstAddr = CE->getType()->getPointerAddressSpace();
return MCConstantExpr::create(AT.getNullPointerValue(DstAddr),
OutContext);
}
}
return nullptr;
}
const MCExpr *AMDGPUAsmPrinter::lowerConstant(const Constant *CV) {
if (const MCExpr *E = lowerAddrSpaceCast(TM, CV, OutContext))
return E;
@@ -332,47 +277,3 @@ void AMDGPUAsmPrinter::emitInstruction(const MachineInstr *MI) {
}
}
}
R600MCInstLower::R600MCInstLower(MCContext &Ctx, const R600Subtarget &ST,
const AsmPrinter &AP) :
AMDGPUMCInstLower(Ctx, ST, AP) { }
void R600MCInstLower::lower(const MachineInstr *MI, MCInst &OutMI) const {
OutMI.setOpcode(MI->getOpcode());
for (const MachineOperand &MO : MI->explicit_operands()) {
MCOperand MCOp;
lowerOperand(MO, MCOp);
OutMI.addOperand(MCOp);
}
}
void R600AsmPrinter::emitInstruction(const MachineInstr *MI) {
const R600Subtarget &STI = MF->getSubtarget<R600Subtarget>();
R600MCInstLower MCInstLowering(OutContext, STI, *this);
StringRef Err;
if (!STI.getInstrInfo()->verifyInstruction(*MI, Err)) {
LLVMContext &C = MI->getParent()->getParent()->getFunction().getContext();
C.emitError("Illegal instruction detected: " + Err);
MI->print(errs());
}
if (MI->isBundle()) {
const MachineBasicBlock *MBB = MI->getParent();
MachineBasicBlock::const_instr_iterator I = ++MI->getIterator();
while (I != MBB->instr_end() && I->isInsideBundle()) {
emitInstruction(&*I);
++I;
}
} else {
MCInst TmpInst;
MCInstLowering.lower(MI, TmpInst);
EmitToStreamer(*OutStreamer, TmpInst);
}
}
const MCExpr *R600AsmPrinter::lowerConstant(const Constant *CV) {
if (const MCExpr *E = lowerAddrSpaceCast(TM, CV, OutContext))
return E;
return AsmPrinter::lowerConstant(CV);
}

View File

@@ -0,0 +1,69 @@
//===- AMDGPUMCInstLower.h - Lower AMDGPU MachineInstr to an MCInst -------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
/// \file
/// Header of lower AMDGPU MachineInstrs to their corresponding MCInst.
//
//===----------------------------------------------------------------------===//
//
#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUMCINSTLOWER_H
#define LLVM_LIB_TARGET_AMDGPU_AMDGPUMCINSTLOWER_H
#include "AMDGPUTargetMachine.h"
#include "llvm/IR/Constants.h"
#include "llvm/Support/Casting.h"
namespace llvm {
class AsmPrinter;
class MCContext;
} // namespace llvm
using namespace llvm;
class AMDGPUMCInstLower {
MCContext &Ctx;
const TargetSubtargetInfo &ST;
const AsmPrinter &AP;
public:
AMDGPUMCInstLower(MCContext &ctx, const TargetSubtargetInfo &ST,
const AsmPrinter &AP);
bool lowerOperand(const MachineOperand &MO, MCOperand &MCOp) const;
/// Lower a MachineInstr to an MCInst
void lower(const MachineInstr *MI, MCInst &OutMI) const;
};
namespace {
static inline const MCExpr *lowerAddrSpaceCast(const TargetMachine &TM,
const Constant *CV,
MCContext &OutContext) {
// TargetMachine does not support llvm-style cast. Use C++-style cast.
// This is safe since TM is always of type AMDGPUTargetMachine or its
// derived class.
auto &AT = static_cast<const AMDGPUTargetMachine &>(TM);
auto *CE = dyn_cast<ConstantExpr>(CV);
// Lower null pointers in private and local address space.
// Clang generates addrspacecast for null pointers in private and local
// address space, which needs to be lowered.
if (CE && CE->getOpcode() == Instruction::AddrSpaceCast) {
auto Op = CE->getOperand(0);
auto SrcAddr = Op->getType()->getPointerAddressSpace();
if (Op->isNullValue() && AT.getNullPointerValue(SrcAddr) == 0) {
auto DstAddr = CE->getType()->getPointerAddressSpace();
return MCConstantExpr::create(AT.getNullPointerValue(DstAddr),
OutContext);
}
}
return nullptr;
}
} // namespace
#endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUMCINSTLOWER_H

View File

@@ -6,7 +6,6 @@
//
//==-----------------------------------------------------------------------===//
#include "AMDGPU.h"
#include "MCTargetDesc/R600MCTargetDesc.h"
#include "R600.h"
#include "R600RegisterInfo.h"

View File

@@ -44,24 +44,24 @@ add_llvm_target(AMDGPUCodeGen
AMDGPUAliasAnalysis.cpp
AMDGPUAlwaysInlinePass.cpp
AMDGPUAnnotateKernelFeatures.cpp
AMDGPUAttributor.cpp
AMDGPUAnnotateUniformValues.cpp
AMDGPUArgumentUsageInfo.cpp
AMDGPUAsmPrinter.cpp
AMDGPUAtomicOptimizer.cpp
AMDGPUAttributor.cpp
AMDGPUCallLowering.cpp
AMDGPUCodeGenPrepare.cpp
AMDGPUCtorDtorLowering.cpp
AMDGPUExportClustering.cpp
AMDGPUFixFunctionBitcasts.cpp
AMDGPUCtorDtorLowering.cpp
AMDGPUFrameLowering.cpp
AMDGPUGlobalISelUtils.cpp
AMDGPUHSAMetadataStreamer.cpp
AMDGPUInstCombineIntrinsic.cpp
AMDGPUInstrInfo.cpp
AMDGPUInstructionSelector.cpp
AMDGPUISelDAGToDAG.cpp
AMDGPUISelLowering.cpp
AMDGPUGlobalISelUtils.cpp
AMDGPULateCodeGenPrepare.cpp
AMDGPULegalizerInfo.cpp
AMDGPULibCalls.cpp
@@ -77,13 +77,16 @@ add_llvm_target(AMDGPUCodeGen
AMDGPUMCInstLower.cpp
AMDGPUMIRFormatter.cpp
AMDGPUOpenCLEnqueuedBlockLowering.cpp
AMDGPUPerfHintAnalysis.cpp
AMDGPUPostLegalizerCombiner.cpp
AMDGPUPreLegalizerCombiner.cpp
AMDGPUPrintfRuntimeBinding.cpp
AMDGPUPromoteAlloca.cpp
AMDGPUPropagateAttributes.cpp
AMDGPURegBankCombiner.cpp
AMDGPURegisterBankInfo.cpp
AMDGPUReplaceLDSUseWithPointer.cpp
AMDGPUResourceUsageAnalysis.cpp
AMDGPURewriteOutArguments.cpp
AMDGPUSubtarget.cpp
AMDGPUTargetMachine.cpp
@@ -91,13 +94,14 @@ add_llvm_target(AMDGPUCodeGen
AMDGPUTargetTransformInfo.cpp
AMDGPUUnifyDivergentExitNodes.cpp
AMDGPUUnifyMetadata.cpp
AMDGPUPerfHintAnalysis.cpp
AMDILCFGStructurizer.cpp
AMDGPUPrintfRuntimeBinding.cpp
AMDGPUResourceUsageAnalysis.cpp
GCNDPPCombine.cpp
GCNHazardRecognizer.cpp
GCNILPSched.cpp
GCNIterativeScheduler.cpp
GCNMinRegStrategy.cpp
GCNNSAReassign.cpp
GCNPreRAOptimizations.cpp
GCNRegPressure.cpp
GCNSchedStrategy.cpp
R600AsmPrinter.cpp
@@ -107,9 +111,11 @@ add_llvm_target(AMDGPUCodeGen
R600ExpandSpecialInstrs.cpp
R600FrameLowering.cpp
R600InstrInfo.cpp
R600ISelDAGToDAG.cpp
R600ISelLowering.cpp
R600MachineFunctionInfo.cpp
R600MachineScheduler.cpp
R600MCInstLower.cpp
R600OpenCLImageTypeLoweringPass.cpp
R600OptimizeVectorRegisters.cpp
R600Packetizer.cpp
@@ -120,15 +126,14 @@ add_llvm_target(AMDGPUCodeGen
SIAnnotateControlFlow.cpp
SIFixSGPRCopies.cpp
SIFixVGPRCopies.cpp
SIPreAllocateWWMRegs.cpp
SIFoldOperands.cpp
SIFormMemoryClauses.cpp
SIFrameLowering.cpp
SIInsertHardClauses.cpp
SILateBranchLowering.cpp
SIInsertWaitcnts.cpp
SIInstrInfo.cpp
SIISelLowering.cpp
SILateBranchLowering.cpp
SILoadStoreOptimizer.cpp
SILowerControlFlow.cpp
SILowerI1Copies.cpp
@@ -136,21 +141,18 @@ add_llvm_target(AMDGPUCodeGen
SIMachineFunctionInfo.cpp
SIMachineScheduler.cpp
SIMemoryLegalizer.cpp
SIModeRegister.cpp
SIOptimizeExecMasking.cpp
SIOptimizeExecMaskingPreRA.cpp
SIOptimizeVGPRLiveRange.cpp
SIPeepholeSDWA.cpp
SIPostRABundler.cpp
SIPreAllocateWWMRegs.cpp
SIPreEmitPeephole.cpp
SIProgramInfo.cpp
SIRegisterInfo.cpp
SIShrinkInstructions.cpp
SIWholeQuadMode.cpp
GCNILPSched.cpp
GCNNSAReassign.cpp
GCNDPPCombine.cpp
GCNPreRAOptimizations.cpp
SIModeRegister.cpp
LINK_COMPONENTS
Analysis

View File

@@ -97,7 +97,6 @@ protected:
bool FP64;
bool FMA;
bool MIMG_R128;
bool IsGCN;
bool CIInsts;
bool GFX8Insts;
bool GFX9Insts;
@@ -165,13 +164,8 @@ protected:
bool HasArchitectedFlatScratch;
bool AddNoCarryInsts;
bool HasUnpackedD16VMem;
bool R600ALUInst;
bool CaymanISA;
bool CFALUBug;
bool LDSMisalignedBug;
bool HasMFMAInlineLiteralBug;
bool HasVertexCache;
short TexVTXClauseSize;
bool UnalignedBufferAccess;
bool UnalignedDSAccess;
bool HasPackedTID;

View File

@@ -0,0 +1,184 @@
//===-- R600ISelDAGToDAG.cpp - A dag to dag inst selector for R600 --------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//==-----------------------------------------------------------------------===//
//
/// \file
/// Defines an instruction selector for the R600 subtarget.
//
//===----------------------------------------------------------------------===//
#include "AMDGPU.h"
#include "AMDGPUISelDAGToDAG.h"
#include "MCTargetDesc/R600MCTargetDesc.h"
#include "R600.h"
#include "R600Subtarget.h"
#include "llvm/Analysis/ValueTracking.h"
class R600DAGToDAGISel : public AMDGPUDAGToDAGISel {
const R600Subtarget *Subtarget;
bool isConstantLoad(const MemSDNode *N, int cbID) const;
bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue &IntPtr);
bool SelectGlobalValueVariableOffset(SDValue Addr, SDValue &BaseReg,
SDValue &Offset);
public:
explicit R600DAGToDAGISel(TargetMachine *TM, CodeGenOpt::Level OptLevel)
: AMDGPUDAGToDAGISel(TM, OptLevel) {}
void Select(SDNode *N) override;
bool SelectADDRIndirect(SDValue Addr, SDValue &Base,
SDValue &Offset) override;
bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
SDValue &Offset) override;
bool runOnMachineFunction(MachineFunction &MF) override;
void PreprocessISelDAG() override {}
protected:
// Include the pieces autogenerated from the target description.
#include "R600GenDAGISel.inc"
};
bool R600DAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
Subtarget = &MF.getSubtarget<R600Subtarget>();
return SelectionDAGISel::runOnMachineFunction(MF);
}
bool R600DAGToDAGISel::isConstantLoad(const MemSDNode *N, int CbId) const {
if (!N->readMem())
return false;
if (CbId == -1)
return N->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS ||
N->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT;
return N->getAddressSpace() == AMDGPUAS::CONSTANT_BUFFER_0 + CbId;
}
bool R600DAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr,
SDValue &IntPtr) {
if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Addr)) {
IntPtr =
CurDAG->getIntPtrConstant(Cst->getZExtValue() / 4, SDLoc(Addr), true);
return true;
}
return false;
}
bool R600DAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr,
SDValue &BaseReg,
SDValue &Offset) {
if (!isa<ConstantSDNode>(Addr)) {
BaseReg = Addr;
Offset = CurDAG->getIntPtrConstant(0, SDLoc(Addr), true);
return true;
}
return false;
}
void R600DAGToDAGISel::Select(SDNode *N) {
unsigned int Opc = N->getOpcode();
if (N->isMachineOpcode()) {
N->setNodeId(-1);
return; // Already selected.
}
switch (Opc) {
default:
break;
case AMDGPUISD::BUILD_VERTICAL_VECTOR:
case ISD::SCALAR_TO_VECTOR:
case ISD::BUILD_VECTOR: {
EVT VT = N->getValueType(0);
unsigned NumVectorElts = VT.getVectorNumElements();
unsigned RegClassID;
// BUILD_VECTOR was lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG
// that adds a 128 bits reg copy when going through TwoAddressInstructions
// pass. We want to avoid 128 bits copies as much as possible because they
// can't be bundled by our scheduler.
switch (NumVectorElts) {
case 2:
RegClassID = R600::R600_Reg64RegClassID;
break;
case 4:
if (Opc == AMDGPUISD::BUILD_VERTICAL_VECTOR)
RegClassID = R600::R600_Reg128VerticalRegClassID;
else
RegClassID = R600::R600_Reg128RegClassID;
break;
default:
llvm_unreachable("Do not know how to lower this BUILD_VECTOR");
}
SelectBuildVector(N, RegClassID);
return;
}
}
SelectCode(N);
}
bool R600DAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base,
SDValue &Offset) {
ConstantSDNode *C;
SDLoc DL(Addr);
if ((C = dyn_cast<ConstantSDNode>(Addr))) {
Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
} else if ((Addr.getOpcode() == AMDGPUISD::DWORDADDR) &&
(C = dyn_cast<ConstantSDNode>(Addr.getOperand(0)))) {
Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
} else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) &&
(C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) {
Base = Addr.getOperand(0);
Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
} else {
Base = Addr;
Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
}
return true;
}
bool R600DAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
SDValue &Offset) {
ConstantSDNode *IMMOffset;
if (Addr.getOpcode() == ISD::ADD &&
(IMMOffset = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) &&
isInt<16>(IMMOffset->getZExtValue())) {
Base = Addr.getOperand(0);
Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr),
MVT::i32);
return true;
// If the pointer address is constant, we can move it to the offset field.
} else if ((IMMOffset = dyn_cast<ConstantSDNode>(Addr)) &&
isInt<16>(IMMOffset->getZExtValue())) {
Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(),
SDLoc(CurDAG->getEntryNode()), R600::ZERO,
MVT::i32);
Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr),
MVT::i32);
return true;
}
// Default case, no offset
Base = Addr;
Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
return true;
}
/// This pass converts a legalized DAG into a R600-specific
// DAG, ready for instruction scheduling.
FunctionPass *llvm::createR600ISelDag(TargetMachine *TM,
CodeGenOpt::Level OptLevel) {
return new R600DAGToDAGISel(TM, OptLevel);
}

View File

@@ -0,0 +1,73 @@
//===- R600MCInstLower.cpp - Lower R600 MachineInstr to an MCInst ---------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
/// \file
/// Code to lower R600 MachineInstrs to their corresponding MCInst.
//
//===----------------------------------------------------------------------===//
//
#include "AMDGPUMCInstLower.h"
#include "R600AsmPrinter.h"
#include "R600Subtarget.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
class R600MCInstLower : public AMDGPUMCInstLower {
public:
R600MCInstLower(MCContext &ctx, const R600Subtarget &ST,
const AsmPrinter &AP);
/// Lower a MachineInstr to an MCInst
void lower(const MachineInstr *MI, MCInst &OutMI) const;
};
R600MCInstLower::R600MCInstLower(MCContext &Ctx, const R600Subtarget &ST,
const AsmPrinter &AP)
: AMDGPUMCInstLower(Ctx, ST, AP) {}
void R600MCInstLower::lower(const MachineInstr *MI, MCInst &OutMI) const {
OutMI.setOpcode(MI->getOpcode());
for (const MachineOperand &MO : MI->explicit_operands()) {
MCOperand MCOp;
lowerOperand(MO, MCOp);
OutMI.addOperand(MCOp);
}
}
void R600AsmPrinter::emitInstruction(const MachineInstr *MI) {
const R600Subtarget &STI = MF->getSubtarget<R600Subtarget>();
R600MCInstLower MCInstLowering(OutContext, STI, *this);
StringRef Err;
if (!STI.getInstrInfo()->verifyInstruction(*MI, Err)) {
LLVMContext &C = MI->getParent()->getParent()->getFunction().getContext();
C.emitError("Illegal instruction detected: " + Err);
MI->print(errs());
}
if (MI->isBundle()) {
const MachineBasicBlock *MBB = MI->getParent();
MachineBasicBlock::const_instr_iterator I = ++MI->getIterator();
while (I != MBB->instr_end() && I->isInsideBundle()) {
emitInstruction(&*I);
++I;
}
} else {
MCInst TmpInst;
MCInstLowering.lower(MI, TmpInst);
EmitToStreamer(*OutStreamer, TmpInst);
}
}
const MCExpr *R600AsmPrinter::lowerConstant(const Constant *CV) {
if (const MCExpr *E = lowerAddrSpaceCast(TM, CV, OutContext))
return E;
return AsmPrinter::lowerConstant(CV);
}

View File

@@ -15,6 +15,7 @@
//===----------------------------------------------------------------------===//
#include "R600TargetTransformInfo.h"
#include "AMDGPU.h"
#include "AMDGPUTargetMachine.h"
#include "R600Subtarget.h"

View File

@@ -11,6 +11,7 @@
//===----------------------------------------------------------------------===//
#include "AMDGPULDSUtils.h"
#include "AMDGPU.h"
#include "Utils/AMDGPUBaseInfo.h"
#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/SetVector.h"

View File

@@ -13,7 +13,6 @@
#ifndef LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPULDSUTILS_H
#define LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPULDSUTILS_H
#include "AMDGPU.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/IR/Constants.h"