Files
clang-p2996/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
Tom Stellard 79a1fd718c AMDGPU: allow specifying a workgroup size that needs to fit in a compute unit
Summary:
For GL_ARB_compute_shader we need to support workgroup sizes of at least 1024. However, if we want to allow large workgroup sizes, we may need to use less registers, as we have to run more waves per SIMD.

This patch adds an attribute to specify the maximum work group size the compiled program needs to support. It defaults, to 256, as that has no wave restrictions.

Reducing the number of registers available is done similarly to how the registers were reserved for chips with the sgpr init bug.

Reviewers: mareko, arsenm, tstellarAMD, nhaehnle

Subscribers: FireBurn, kerberizer, llvm-commits, arsenm

Differential Revision: http://reviews.llvm.org/D18340

Patch By: Bas Nieuwenhuizen

llvm-svn: 266337
2016-04-14 16:27:07 +00:00

66 lines
1.8 KiB
C++

//===-- AMDGPUBaseInfo.h - Top level definitions for AMDGPU -----*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
#define LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
#include "AMDKernelCodeT.h"
#include "llvm/IR/CallingConv.h"
namespace llvm {
class FeatureBitset;
class Function;
class GlobalValue;
class MCContext;
class MCSection;
class MCSubtargetInfo;
namespace AMDGPU {
struct IsaVersion {
unsigned Major;
unsigned Minor;
unsigned Stepping;
};
IsaVersion getIsaVersion(const FeatureBitset &Features);
void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
const FeatureBitset &Features);
MCSection *getHSATextSection(MCContext &Ctx);
MCSection *getHSADataGlobalAgentSection(MCContext &Ctx);
MCSection *getHSADataGlobalProgramSection(MCContext &Ctx);
MCSection *getHSARodataReadonlyAgentSection(MCContext &Ctx);
bool isGroupSegment(const GlobalValue *GV);
bool isGlobalSegment(const GlobalValue *GV);
bool isReadOnlySegment(const GlobalValue *GV);
unsigned getMaximumWorkGroupSize(const Function &F);
unsigned getInitialPSInputAddr(const Function &F);
bool isShader(CallingConv::ID cc);
bool isCompute(CallingConv::ID cc);
bool isSI(const MCSubtargetInfo &STI);
bool isCI(const MCSubtargetInfo &STI);
bool isVI(const MCSubtargetInfo &STI);
/// If \p Reg is a pseudo reg, return the correct hardware register given
/// \p STI otherwise return \p Reg.
unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI);
} // end namespace AMDGPU
} // end namespace llvm
#endif