Summary: The comment explains it: emitError does not necessarily exit the compilation process, and then using NoRegister leads to assertions later on. This generates incorrect code, of course, but the user should know to not use the result when an error has been emitted. It would be nice to have a test-case for this inside the LLVM repository, but llc exits on error. shader-db tests trigger the underlying issue at least on Tonga. Reviewers: arsenm, tstellarAMD, mareko Subscribers: arsenm, llvm-commits Differential Revision: http://reviews.llvm.org/D15826 llvm-svn: 256757
192 lines
6.2 KiB
C++
192 lines
6.2 KiB
C++
//===-- SIMachineFunctionInfo.cpp - SI Machine Function Info -------===//
|
|
//
|
|
// The LLVM Compiler Infrastructure
|
|
//
|
|
// This file is distributed under the University of Illinois Open Source
|
|
// License. See LICENSE.TXT for details.
|
|
//
|
|
/// \file
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
#include "SIMachineFunctionInfo.h"
|
|
#include "AMDGPUSubtarget.h"
|
|
#include "SIInstrInfo.h"
|
|
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
|
#include "llvm/CodeGen/MachineFrameInfo.h"
|
|
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
|
#include "llvm/IR/Function.h"
|
|
#include "llvm/IR/LLVMContext.h"
|
|
|
|
#define MAX_LANES 64
|
|
|
|
using namespace llvm;
|
|
|
|
|
|
// Pin the vtable to this file.
|
|
void SIMachineFunctionInfo::anchor() {}
|
|
|
|
SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
|
|
: AMDGPUMachineFunction(MF),
|
|
TIDReg(AMDGPU::NoRegister),
|
|
ScratchRSrcReg(AMDGPU::NoRegister),
|
|
ScratchWaveOffsetReg(AMDGPU::NoRegister),
|
|
PrivateSegmentBufferUserSGPR(AMDGPU::NoRegister),
|
|
DispatchPtrUserSGPR(AMDGPU::NoRegister),
|
|
QueuePtrUserSGPR(AMDGPU::NoRegister),
|
|
KernargSegmentPtrUserSGPR(AMDGPU::NoRegister),
|
|
DispatchIDUserSGPR(AMDGPU::NoRegister),
|
|
FlatScratchInitUserSGPR(AMDGPU::NoRegister),
|
|
PrivateSegmentSizeUserSGPR(AMDGPU::NoRegister),
|
|
GridWorkGroupCountXUserSGPR(AMDGPU::NoRegister),
|
|
GridWorkGroupCountYUserSGPR(AMDGPU::NoRegister),
|
|
GridWorkGroupCountZUserSGPR(AMDGPU::NoRegister),
|
|
WorkGroupIDXSystemSGPR(AMDGPU::NoRegister),
|
|
WorkGroupIDYSystemSGPR(AMDGPU::NoRegister),
|
|
WorkGroupIDZSystemSGPR(AMDGPU::NoRegister),
|
|
WorkGroupInfoSystemSGPR(AMDGPU::NoRegister),
|
|
PrivateSegmentWaveByteOffsetSystemSGPR(AMDGPU::NoRegister),
|
|
LDSWaveSpillSize(0),
|
|
PSInputAddr(0),
|
|
NumUserSGPRs(0),
|
|
NumSystemSGPRs(0),
|
|
HasSpilledSGPRs(false),
|
|
HasSpilledVGPRs(false),
|
|
PrivateSegmentBuffer(false),
|
|
DispatchPtr(false),
|
|
QueuePtr(false),
|
|
DispatchID(false),
|
|
KernargSegmentPtr(false),
|
|
FlatScratchInit(false),
|
|
GridWorkgroupCountX(false),
|
|
GridWorkgroupCountY(false),
|
|
GridWorkgroupCountZ(false),
|
|
WorkGroupIDX(true),
|
|
WorkGroupIDY(false),
|
|
WorkGroupIDZ(false),
|
|
WorkGroupInfo(false),
|
|
PrivateSegmentWaveByteOffset(false),
|
|
WorkItemIDX(true),
|
|
WorkItemIDY(false),
|
|
WorkItemIDZ(false) {
|
|
const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>();
|
|
const Function *F = MF.getFunction();
|
|
|
|
const MachineFrameInfo *FrameInfo = MF.getFrameInfo();
|
|
|
|
if (getShaderType() == ShaderType::COMPUTE)
|
|
KernargSegmentPtr = true;
|
|
|
|
if (F->hasFnAttribute("amdgpu-work-group-id-y"))
|
|
WorkGroupIDY = true;
|
|
|
|
if (F->hasFnAttribute("amdgpu-work-group-id-z"))
|
|
WorkGroupIDZ = true;
|
|
|
|
if (F->hasFnAttribute("amdgpu-work-item-id-y"))
|
|
WorkItemIDY = true;
|
|
|
|
if (F->hasFnAttribute("amdgpu-work-item-id-z"))
|
|
WorkItemIDZ = true;
|
|
|
|
bool MaySpill = ST.isVGPRSpillingEnabled(this);
|
|
bool HasStackObjects = FrameInfo->hasStackObjects();
|
|
|
|
if (HasStackObjects || MaySpill)
|
|
PrivateSegmentWaveByteOffset = true;
|
|
|
|
if (ST.isAmdHsaOS()) {
|
|
if (HasStackObjects || MaySpill)
|
|
PrivateSegmentBuffer = true;
|
|
|
|
if (F->hasFnAttribute("amdgpu-dispatch-ptr"))
|
|
DispatchPtr = true;
|
|
}
|
|
|
|
// X, XY, and XYZ are the only supported combinations, so make sure Y is
|
|
// enabled if Z is.
|
|
if (WorkItemIDZ)
|
|
WorkItemIDY = true;
|
|
}
|
|
|
|
unsigned SIMachineFunctionInfo::addPrivateSegmentBuffer(
|
|
const SIRegisterInfo &TRI) {
|
|
PrivateSegmentBufferUserSGPR = TRI.getMatchingSuperReg(
|
|
getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_128RegClass);
|
|
NumUserSGPRs += 4;
|
|
return PrivateSegmentBufferUserSGPR;
|
|
}
|
|
|
|
unsigned SIMachineFunctionInfo::addDispatchPtr(const SIRegisterInfo &TRI) {
|
|
DispatchPtrUserSGPR = TRI.getMatchingSuperReg(
|
|
getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass);
|
|
NumUserSGPRs += 2;
|
|
return DispatchPtrUserSGPR;
|
|
}
|
|
|
|
unsigned SIMachineFunctionInfo::addQueuePtr(const SIRegisterInfo &TRI) {
|
|
QueuePtrUserSGPR = TRI.getMatchingSuperReg(
|
|
getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass);
|
|
NumUserSGPRs += 2;
|
|
return QueuePtrUserSGPR;
|
|
}
|
|
|
|
unsigned SIMachineFunctionInfo::addKernargSegmentPtr(const SIRegisterInfo &TRI) {
|
|
KernargSegmentPtrUserSGPR = TRI.getMatchingSuperReg(
|
|
getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass);
|
|
NumUserSGPRs += 2;
|
|
return KernargSegmentPtrUserSGPR;
|
|
}
|
|
|
|
SIMachineFunctionInfo::SpilledReg SIMachineFunctionInfo::getSpilledReg(
|
|
MachineFunction *MF,
|
|
unsigned FrameIndex,
|
|
unsigned SubIdx) {
|
|
const MachineFrameInfo *FrameInfo = MF->getFrameInfo();
|
|
const SIRegisterInfo *TRI = static_cast<const SIRegisterInfo *>(
|
|
MF->getSubtarget<AMDGPUSubtarget>().getRegisterInfo());
|
|
MachineRegisterInfo &MRI = MF->getRegInfo();
|
|
int64_t Offset = FrameInfo->getObjectOffset(FrameIndex);
|
|
Offset += SubIdx * 4;
|
|
|
|
unsigned LaneVGPRIdx = Offset / (64 * 4);
|
|
unsigned Lane = (Offset / 4) % 64;
|
|
|
|
struct SpilledReg Spill;
|
|
|
|
if (!LaneVGPRs.count(LaneVGPRIdx)) {
|
|
unsigned LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass);
|
|
|
|
if (LaneVGPR == AMDGPU::NoRegister) {
|
|
LLVMContext &Ctx = MF->getFunction()->getContext();
|
|
Ctx.emitError("Ran out of VGPRs for spilling SGPR");
|
|
|
|
// When compiling from inside Mesa, the compilation continues.
|
|
// Select an arbitrary register to avoid triggering assertions
|
|
// during subsequent passes.
|
|
LaneVGPR = AMDGPU::VGPR0;
|
|
}
|
|
|
|
LaneVGPRs[LaneVGPRIdx] = LaneVGPR;
|
|
|
|
// Add this register as live-in to all blocks to avoid machine verifer
|
|
// complaining about use of an undefined physical register.
|
|
for (MachineFunction::iterator BI = MF->begin(), BE = MF->end();
|
|
BI != BE; ++BI) {
|
|
BI->addLiveIn(LaneVGPR);
|
|
}
|
|
}
|
|
|
|
Spill.VGPR = LaneVGPRs[LaneVGPRIdx];
|
|
Spill.Lane = Lane;
|
|
return Spill;
|
|
}
|
|
|
|
unsigned SIMachineFunctionInfo::getMaximumWorkGroupSize(
|
|
const MachineFunction &MF) const {
|
|
const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>();
|
|
// FIXME: We should get this information from kernel attributes if it
|
|
// is available.
|
|
return getShaderType() == ShaderType::COMPUTE ? 256 : ST.getWavefrontSize();
|
|
}
|