Introduce pseudo-registers for registers needed for stack access, which are replaced during finalizeLowering. Note these pseudo-registers are currently only used for the used register location, and not for determining their input argument register. This is better because it avoids the need to try to predict whether a call will be emitted from the IR, and also detects stack objects introduced by legalization. Test changes are from the HasStackObjects check being more accurate since stack objects introduced during legalization are now known. llvm-svn: 308325
290 lines
9.5 KiB
C++
290 lines
9.5 KiB
C++
//===-- SIMachineFunctionInfo.cpp -------- SI Machine Function Info -------===//
|
|
//
|
|
// The LLVM Compiler Infrastructure
|
|
//
|
|
// This file is distributed under the University of Illinois Open Source
|
|
// License. See LICENSE.TXT for details.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "SIMachineFunctionInfo.h"
|
|
#include "AMDGPUSubtarget.h"
|
|
#include "SIInstrInfo.h"
|
|
#include "llvm/CodeGen/MachineFrameInfo.h"
|
|
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
|
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
|
#include "llvm/IR/Function.h"
|
|
#include "llvm/IR/LLVMContext.h"
|
|
|
|
#define MAX_LANES 64
|
|
|
|
using namespace llvm;
|
|
|
|
SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
|
|
: AMDGPUMachineFunction(MF),
|
|
TIDReg(AMDGPU::NoRegister),
|
|
ScratchRSrcReg(AMDGPU::PRIVATE_RSRC_REG),
|
|
ScratchWaveOffsetReg(AMDGPU::SCRATCH_WAVE_OFFSET_REG),
|
|
FrameOffsetReg(AMDGPU::FP_REG),
|
|
StackPtrOffsetReg(AMDGPU::SP_REG),
|
|
PrivateSegmentBufferUserSGPR(AMDGPU::NoRegister),
|
|
DispatchPtrUserSGPR(AMDGPU::NoRegister),
|
|
QueuePtrUserSGPR(AMDGPU::NoRegister),
|
|
KernargSegmentPtrUserSGPR(AMDGPU::NoRegister),
|
|
DispatchIDUserSGPR(AMDGPU::NoRegister),
|
|
FlatScratchInitUserSGPR(AMDGPU::NoRegister),
|
|
PrivateSegmentSizeUserSGPR(AMDGPU::NoRegister),
|
|
GridWorkGroupCountXUserSGPR(AMDGPU::NoRegister),
|
|
GridWorkGroupCountYUserSGPR(AMDGPU::NoRegister),
|
|
GridWorkGroupCountZUserSGPR(AMDGPU::NoRegister),
|
|
WorkGroupIDXSystemSGPR(AMDGPU::NoRegister),
|
|
WorkGroupIDYSystemSGPR(AMDGPU::NoRegister),
|
|
WorkGroupIDZSystemSGPR(AMDGPU::NoRegister),
|
|
WorkGroupInfoSystemSGPR(AMDGPU::NoRegister),
|
|
PrivateSegmentWaveByteOffsetSystemSGPR(AMDGPU::NoRegister),
|
|
WorkItemIDXVGPR(AMDGPU::NoRegister),
|
|
WorkItemIDYVGPR(AMDGPU::NoRegister),
|
|
WorkItemIDZVGPR(AMDGPU::NoRegister),
|
|
PSInputAddr(0),
|
|
PSInputEnable(0),
|
|
ReturnsVoid(true),
|
|
FlatWorkGroupSizes(0, 0),
|
|
WavesPerEU(0, 0),
|
|
DebuggerWorkGroupIDStackObjectIndices({{0, 0, 0}}),
|
|
DebuggerWorkItemIDStackObjectIndices({{0, 0, 0}}),
|
|
LDSWaveSpillSize(0),
|
|
NumUserSGPRs(0),
|
|
NumSystemSGPRs(0),
|
|
HasSpilledSGPRs(false),
|
|
HasSpilledVGPRs(false),
|
|
HasNonSpillStackObjects(false),
|
|
NumSpilledSGPRs(0),
|
|
NumSpilledVGPRs(0),
|
|
PrivateSegmentBuffer(false),
|
|
DispatchPtr(false),
|
|
QueuePtr(false),
|
|
KernargSegmentPtr(false),
|
|
DispatchID(false),
|
|
FlatScratchInit(false),
|
|
GridWorkgroupCountX(false),
|
|
GridWorkgroupCountY(false),
|
|
GridWorkgroupCountZ(false),
|
|
WorkGroupIDX(false),
|
|
WorkGroupIDY(false),
|
|
WorkGroupIDZ(false),
|
|
WorkGroupInfo(false),
|
|
PrivateSegmentWaveByteOffset(false),
|
|
WorkItemIDX(false),
|
|
WorkItemIDY(false),
|
|
WorkItemIDZ(false),
|
|
ImplicitBufferPtr(false) {
|
|
const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
|
|
const Function *F = MF.getFunction();
|
|
FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(*F);
|
|
WavesPerEU = ST.getWavesPerEU(*F);
|
|
|
|
if (!isEntryFunction()) {
|
|
// Non-entry functions have no special inputs for now, other registers
|
|
// required for scratch access.
|
|
ScratchRSrcReg = AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3;
|
|
ScratchWaveOffsetReg = AMDGPU::SGPR4;
|
|
FrameOffsetReg = AMDGPU::SGPR5;
|
|
StackPtrOffsetReg = AMDGPU::SGPR32;
|
|
|
|
// FIXME: Not really a system SGPR.
|
|
PrivateSegmentWaveByteOffsetSystemSGPR = ScratchWaveOffsetReg;
|
|
}
|
|
|
|
CallingConv::ID CC = F->getCallingConv();
|
|
if (CC == CallingConv::AMDGPU_KERNEL || CC == CallingConv::SPIR_KERNEL) {
|
|
KernargSegmentPtr = !F->arg_empty();
|
|
WorkGroupIDX = true;
|
|
WorkItemIDX = true;
|
|
} else if (CC == CallingConv::AMDGPU_PS) {
|
|
PSInputAddr = AMDGPU::getInitialPSInputAddr(*F);
|
|
}
|
|
|
|
if (ST.debuggerEmitPrologue()) {
|
|
// Enable everything.
|
|
WorkGroupIDX = true;
|
|
WorkGroupIDY = true;
|
|
WorkGroupIDZ = true;
|
|
WorkItemIDX = true;
|
|
WorkItemIDY = true;
|
|
WorkItemIDZ = true;
|
|
} else {
|
|
if (F->hasFnAttribute("amdgpu-work-group-id-x"))
|
|
WorkGroupIDX = true;
|
|
|
|
if (F->hasFnAttribute("amdgpu-work-group-id-y"))
|
|
WorkGroupIDY = true;
|
|
|
|
if (F->hasFnAttribute("amdgpu-work-group-id-z"))
|
|
WorkGroupIDZ = true;
|
|
|
|
if (F->hasFnAttribute("amdgpu-work-item-id-x"))
|
|
WorkItemIDX = true;
|
|
|
|
if (F->hasFnAttribute("amdgpu-work-item-id-y"))
|
|
WorkItemIDY = true;
|
|
|
|
if (F->hasFnAttribute("amdgpu-work-item-id-z"))
|
|
WorkItemIDZ = true;
|
|
}
|
|
|
|
const MachineFrameInfo &FrameInfo = MF.getFrameInfo();
|
|
bool MaySpill = ST.isVGPRSpillingEnabled(*F);
|
|
bool HasStackObjects = FrameInfo.hasStackObjects();
|
|
|
|
if (isEntryFunction()) {
|
|
// X, XY, and XYZ are the only supported combinations, so make sure Y is
|
|
// enabled if Z is.
|
|
if (WorkItemIDZ)
|
|
WorkItemIDY = true;
|
|
|
|
if (HasStackObjects || MaySpill) {
|
|
PrivateSegmentWaveByteOffset = true;
|
|
|
|
// HS and GS always have the scratch wave offset in SGPR5 on GFX9.
|
|
if (ST.getGeneration() >= AMDGPUSubtarget::GFX9 &&
|
|
(CC == CallingConv::AMDGPU_HS || CC == CallingConv::AMDGPU_GS))
|
|
PrivateSegmentWaveByteOffsetSystemSGPR = AMDGPU::SGPR5;
|
|
}
|
|
}
|
|
|
|
if (ST.isAmdCodeObjectV2(MF)) {
|
|
if (HasStackObjects || MaySpill)
|
|
PrivateSegmentBuffer = true;
|
|
|
|
if (F->hasFnAttribute("amdgpu-dispatch-ptr"))
|
|
DispatchPtr = true;
|
|
|
|
if (F->hasFnAttribute("amdgpu-queue-ptr"))
|
|
QueuePtr = true;
|
|
|
|
if (F->hasFnAttribute("amdgpu-dispatch-id"))
|
|
DispatchID = true;
|
|
} else if (ST.isMesaGfxShader(MF)) {
|
|
if (HasStackObjects || MaySpill)
|
|
ImplicitBufferPtr = true;
|
|
}
|
|
|
|
if (F->hasFnAttribute("amdgpu-kernarg-segment-ptr"))
|
|
KernargSegmentPtr = true;
|
|
|
|
// We don't need to worry about accessing spills with flat instructions.
|
|
// TODO: On VI where we must use flat for global, we should be able to omit
|
|
// this if it is never used for generic access.
|
|
if (HasStackObjects && ST.hasFlatAddressSpace() && ST.isAmdHsaOS() &&
|
|
isEntryFunction())
|
|
FlatScratchInit = true;
|
|
}
|
|
|
|
unsigned SIMachineFunctionInfo::addPrivateSegmentBuffer(
|
|
const SIRegisterInfo &TRI) {
|
|
PrivateSegmentBufferUserSGPR = TRI.getMatchingSuperReg(
|
|
getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_128RegClass);
|
|
NumUserSGPRs += 4;
|
|
return PrivateSegmentBufferUserSGPR;
|
|
}
|
|
|
|
unsigned SIMachineFunctionInfo::addDispatchPtr(const SIRegisterInfo &TRI) {
|
|
DispatchPtrUserSGPR = TRI.getMatchingSuperReg(
|
|
getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass);
|
|
NumUserSGPRs += 2;
|
|
return DispatchPtrUserSGPR;
|
|
}
|
|
|
|
unsigned SIMachineFunctionInfo::addQueuePtr(const SIRegisterInfo &TRI) {
|
|
QueuePtrUserSGPR = TRI.getMatchingSuperReg(
|
|
getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass);
|
|
NumUserSGPRs += 2;
|
|
return QueuePtrUserSGPR;
|
|
}
|
|
|
|
unsigned SIMachineFunctionInfo::addKernargSegmentPtr(const SIRegisterInfo &TRI) {
|
|
KernargSegmentPtrUserSGPR = TRI.getMatchingSuperReg(
|
|
getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass);
|
|
NumUserSGPRs += 2;
|
|
return KernargSegmentPtrUserSGPR;
|
|
}
|
|
|
|
unsigned SIMachineFunctionInfo::addDispatchID(const SIRegisterInfo &TRI) {
|
|
DispatchIDUserSGPR = TRI.getMatchingSuperReg(
|
|
getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass);
|
|
NumUserSGPRs += 2;
|
|
return DispatchIDUserSGPR;
|
|
}
|
|
|
|
unsigned SIMachineFunctionInfo::addFlatScratchInit(const SIRegisterInfo &TRI) {
|
|
FlatScratchInitUserSGPR = TRI.getMatchingSuperReg(
|
|
getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass);
|
|
NumUserSGPRs += 2;
|
|
return FlatScratchInitUserSGPR;
|
|
}
|
|
|
|
unsigned SIMachineFunctionInfo::addImplicitBufferPtr(const SIRegisterInfo &TRI) {
|
|
ImplicitBufferPtrUserSGPR = TRI.getMatchingSuperReg(
|
|
getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass);
|
|
NumUserSGPRs += 2;
|
|
return ImplicitBufferPtrUserSGPR;
|
|
}
|
|
|
|
/// Reserve a slice of a VGPR to support spilling for FrameIndex \p FI.
|
|
bool SIMachineFunctionInfo::allocateSGPRSpillToVGPR(MachineFunction &MF,
|
|
int FI) {
|
|
std::vector<SpilledReg> &SpillLanes = SGPRToVGPRSpills[FI];
|
|
|
|
// This has already been allocated.
|
|
if (!SpillLanes.empty())
|
|
return true;
|
|
|
|
const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
|
|
const SIRegisterInfo *TRI = ST.getRegisterInfo();
|
|
MachineFrameInfo &FrameInfo = MF.getFrameInfo();
|
|
MachineRegisterInfo &MRI = MF.getRegInfo();
|
|
unsigned WaveSize = ST.getWavefrontSize();
|
|
|
|
unsigned Size = FrameInfo.getObjectSize(FI);
|
|
assert(Size >= 4 && Size <= 64 && "invalid sgpr spill size");
|
|
assert(TRI->spillSGPRToVGPR() && "not spilling SGPRs to VGPRs");
|
|
|
|
int NumLanes = Size / 4;
|
|
|
|
// Make sure to handle the case where a wide SGPR spill may span between two
|
|
// VGPRs.
|
|
for (int I = 0; I < NumLanes; ++I, ++NumVGPRSpillLanes) {
|
|
unsigned LaneVGPR;
|
|
unsigned VGPRIndex = (NumVGPRSpillLanes % WaveSize);
|
|
|
|
if (VGPRIndex == 0) {
|
|
LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF);
|
|
if (LaneVGPR == AMDGPU::NoRegister) {
|
|
// We have no VGPRs left for spilling SGPRs. Reset because we won't
|
|
// partially spill the SGPR to VGPRs.
|
|
SGPRToVGPRSpills.erase(FI);
|
|
NumVGPRSpillLanes -= I;
|
|
return false;
|
|
}
|
|
|
|
SpillVGPRs.push_back(LaneVGPR);
|
|
|
|
// Add this register as live-in to all blocks to avoid machine verifer
|
|
// complaining about use of an undefined physical register.
|
|
for (MachineBasicBlock &BB : MF)
|
|
BB.addLiveIn(LaneVGPR);
|
|
} else {
|
|
LaneVGPR = SpillVGPRs.back();
|
|
}
|
|
|
|
SpillLanes.push_back(SpilledReg(LaneVGPR, VGPRIndex));
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
void SIMachineFunctionInfo::removeSGPRToVGPRFrameIndices(MachineFrameInfo &MFI) {
|
|
for (auto &R : SGPRToVGPRSpills)
|
|
MFI.RemoveStackObject(R.first);
|
|
}
|