Files
clang-p2996/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
Tom Stellard 44b30b4537 AMDGPU: Remove #include "MCTargetDesc/AMDGPUMCTargetDesc.h" from common headers
Summary:
MCTargetDesc/AMDGPUMCTargetDesc.h contains enums for all the instuction
and register defintions, which are huge so we only want to include
them where needed.

This will also make it easier if we want to split the R600 and GCN
definitions into separate tablegenerated files.

I was unable to remove AMDGPUMCTargetDesc.h from SIMachineFunctionInfo.h
because it uses some enums from the header to initialize default values
for the SIMachineFunction class, so I ended up having to remove includes of
SIMachineFunctionInfo.h from headers too.

Reviewers: arsenm, nhaehnle

Reviewed By: nhaehnle

Subscribers: MatzeB, kzhuravl, wdng, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits

Differential Revision: https://reviews.llvm.org/D46272

llvm-svn: 332930
2018-05-22 02:03:23 +00:00

229 lines
8.3 KiB
C++

//===-- llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp - Call lowering -----===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
///
/// \file
/// This file implements the lowering of LLVM calls to machine code calls for
/// GlobalISel.
///
//===----------------------------------------------------------------------===//
#include "AMDGPUCallLowering.h"
#include "AMDGPU.h"
#include "AMDGPUISelLowering.h"
#include "AMDGPUSubtarget.h"
#include "SIISelLowering.h"
#include "SIMachineFunctionInfo.h"
#include "SIRegisterInfo.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
using namespace llvm;
AMDGPUCallLowering::AMDGPUCallLowering(const AMDGPUTargetLowering &TLI)
: CallLowering(&TLI), AMDGPUASI(TLI.getAMDGPUAS()) {
}
bool AMDGPUCallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
const Value *Val, unsigned VReg) const {
// FIXME: Add support for non-void returns.
if (Val)
return false;
MIRBuilder.buildInstr(AMDGPU::S_ENDPGM);
return true;
}
unsigned AMDGPUCallLowering::lowerParameterPtr(MachineIRBuilder &MIRBuilder,
Type *ParamTy,
unsigned Offset) const {
MachineFunction &MF = MIRBuilder.getMF();
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
MachineRegisterInfo &MRI = MF.getRegInfo();
const Function &F = MF.getFunction();
const DataLayout &DL = F.getParent()->getDataLayout();
PointerType *PtrTy = PointerType::get(ParamTy, AMDGPUASI.CONSTANT_ADDRESS);
LLT PtrType = getLLTForType(*PtrTy, DL);
unsigned DstReg = MRI.createGenericVirtualRegister(PtrType);
unsigned KernArgSegmentPtr =
MFI->getPreloadedReg(AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR);
unsigned KernArgSegmentVReg = MRI.getLiveInVirtReg(KernArgSegmentPtr);
unsigned OffsetReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
MIRBuilder.buildConstant(OffsetReg, Offset);
MIRBuilder.buildGEP(DstReg, KernArgSegmentVReg, OffsetReg);
return DstReg;
}
void AMDGPUCallLowering::lowerParameter(MachineIRBuilder &MIRBuilder,
Type *ParamTy, unsigned Offset,
unsigned DstReg) const {
MachineFunction &MF = MIRBuilder.getMF();
const Function &F = MF.getFunction();
const DataLayout &DL = F.getParent()->getDataLayout();
PointerType *PtrTy = PointerType::get(ParamTy, AMDGPUASI.CONSTANT_ADDRESS);
MachinePointerInfo PtrInfo(UndefValue::get(PtrTy));
unsigned TypeSize = DL.getTypeStoreSize(ParamTy);
unsigned Align = DL.getABITypeAlignment(ParamTy);
unsigned PtrReg = lowerParameterPtr(MIRBuilder, ParamTy, Offset);
MachineMemOperand *MMO =
MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad |
MachineMemOperand::MONonTemporal |
MachineMemOperand::MOInvariant,
TypeSize, Align);
MIRBuilder.buildLoad(DstReg, PtrReg, *MMO);
}
bool AMDGPUCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
const Function &F,
ArrayRef<unsigned> VRegs) const {
// AMDGPU_GS and AMDGP_HS are not supported yet.
if (F.getCallingConv() == CallingConv::AMDGPU_GS ||
F.getCallingConv() == CallingConv::AMDGPU_HS)
return false;
MachineFunction &MF = MIRBuilder.getMF();
const SISubtarget *Subtarget = static_cast<const SISubtarget *>(&MF.getSubtarget());
MachineRegisterInfo &MRI = MF.getRegInfo();
SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
const SIRegisterInfo *TRI = MF.getSubtarget<SISubtarget>().getRegisterInfo();
const DataLayout &DL = F.getParent()->getDataLayout();
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(F.getCallingConv(), F.isVarArg(), MF, ArgLocs, F.getContext());
// FIXME: How should these inputs interact with inreg / custom SGPR inputs?
if (Info->hasPrivateSegmentBuffer()) {
unsigned PrivateSegmentBufferReg = Info->addPrivateSegmentBuffer(*TRI);
MF.addLiveIn(PrivateSegmentBufferReg, &AMDGPU::SReg_128RegClass);
CCInfo.AllocateReg(PrivateSegmentBufferReg);
}
if (Info->hasDispatchPtr()) {
unsigned DispatchPtrReg = Info->addDispatchPtr(*TRI);
// FIXME: Need to add reg as live-in
CCInfo.AllocateReg(DispatchPtrReg);
}
if (Info->hasQueuePtr()) {
unsigned QueuePtrReg = Info->addQueuePtr(*TRI);
// FIXME: Need to add reg as live-in
CCInfo.AllocateReg(QueuePtrReg);
}
if (Info->hasKernargSegmentPtr()) {
unsigned InputPtrReg = Info->addKernargSegmentPtr(*TRI);
const LLT P2 = LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64);
unsigned VReg = MRI.createGenericVirtualRegister(P2);
MRI.addLiveIn(InputPtrReg, VReg);
MIRBuilder.getMBB().addLiveIn(InputPtrReg);
MIRBuilder.buildCopy(VReg, InputPtrReg);
CCInfo.AllocateReg(InputPtrReg);
}
if (Info->hasDispatchID()) {
unsigned DispatchIDReg = Info->addDispatchID(*TRI);
// FIXME: Need to add reg as live-in
CCInfo.AllocateReg(DispatchIDReg);
}
if (Info->hasFlatScratchInit()) {
unsigned FlatScratchInitReg = Info->addFlatScratchInit(*TRI);
// FIXME: Need to add reg as live-in
CCInfo.AllocateReg(FlatScratchInitReg);
}
unsigned NumArgs = F.arg_size();
Function::const_arg_iterator CurOrigArg = F.arg_begin();
const AMDGPUTargetLowering &TLI = *getTLI<AMDGPUTargetLowering>();
unsigned PSInputNum = 0;
BitVector Skipped(NumArgs);
for (unsigned i = 0; i != NumArgs; ++i, ++CurOrigArg) {
EVT ValEVT = TLI.getValueType(DL, CurOrigArg->getType());
// We can only hanlde simple value types at the moment.
ISD::ArgFlagsTy Flags;
ArgInfo OrigArg{VRegs[i], CurOrigArg->getType()};
setArgFlags(OrigArg, i + 1, DL, F);
Flags.setOrigAlign(DL.getABITypeAlignment(CurOrigArg->getType()));
if (F.getCallingConv() == CallingConv::AMDGPU_PS &&
!OrigArg.Flags.isInReg() && !OrigArg.Flags.isByVal() &&
PSInputNum <= 15) {
if (CurOrigArg->use_empty() && !Info->isPSInputAllocated(PSInputNum)) {
Skipped.set(i);
++PSInputNum;
continue;
}
Info->markPSInputAllocated(PSInputNum);
if (!CurOrigArg->use_empty())
Info->markPSInputEnabled(PSInputNum);
++PSInputNum;
}
CCAssignFn *AssignFn = CCAssignFnForCall(F.getCallingConv(),
/*IsVarArg=*/false);
if (ValEVT.isVector()) {
EVT ElemVT = ValEVT.getVectorElementType();
if (!ValEVT.isSimple())
return false;
MVT ValVT = ElemVT.getSimpleVT();
bool Res = AssignFn(i, ValVT, ValVT, CCValAssign::Full,
OrigArg.Flags, CCInfo);
if (!Res)
return false;
} else {
MVT ValVT = ValEVT.getSimpleVT();
if (!ValEVT.isSimple())
return false;
bool Res =
AssignFn(i, ValVT, ValVT, CCValAssign::Full, OrigArg.Flags, CCInfo);
// Fail if we don't know how to handle this type.
if (Res)
return false;
}
}
Function::const_arg_iterator Arg = F.arg_begin();
if (F.getCallingConv() == CallingConv::AMDGPU_VS ||
F.getCallingConv() == CallingConv::AMDGPU_PS) {
for (unsigned i = 0, OrigArgIdx = 0;
OrigArgIdx != NumArgs && i != ArgLocs.size(); ++Arg, ++OrigArgIdx) {
if (Skipped.test(OrigArgIdx))
continue;
CCValAssign &VA = ArgLocs[i++];
MRI.addLiveIn(VA.getLocReg(), VRegs[OrigArgIdx]);
MIRBuilder.getMBB().addLiveIn(VA.getLocReg());
MIRBuilder.buildCopy(VRegs[OrigArgIdx], VA.getLocReg());
}
return true;
}
for (unsigned i = 0; i != ArgLocs.size(); ++i, ++Arg) {
// FIXME: We should be getting DebugInfo from the arguments some how.
CCValAssign &VA = ArgLocs[i];
lowerParameter(MIRBuilder, Arg->getType(),
VA.getLocMemOffset() +
Subtarget->getExplicitKernelArgOffset(MF), VRegs[i]);
}
return true;
}