The AMDGPU kernel ABI is not directly representable in SPIR-V, since it relies on passing aggregates `byref`, and SPIR-V only encodes `byval` (which the AMDGPU BE disallows for kernel arguments). As a temporary solution to this mismatch, we add special handling for AMDGCN flavoured SPIR-V, whereby aggregates are passed as direct, both to kernels and to normal functions. This is not ideal (there are pathological cases where performance is heavily impacted), but empirically robust and guaranteed to work as the AMDGPU BE retains handling of `direct` passing for legacy reasons. We will revisit this in the future, but as it stands it is enough to pass a wide array of integration tests and generates correct SPIR-V and correct reverse translation into LLVM IR. The amdgpu-kernel-arg-pointer-type test is updated via the automated script, and thus becomes quite noisy.
275 lines
11 KiB
C++
275 lines
11 KiB
C++
//===- SPIR.cpp -----------------------------------------------------------===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "ABIInfoImpl.h"
|
|
#include "TargetInfo.h"
|
|
|
|
using namespace clang;
|
|
using namespace clang::CodeGen;
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// Base ABI and target codegen info implementation common between SPIR and
|
|
// SPIR-V.
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
namespace {
|
|
class CommonSPIRABIInfo : public DefaultABIInfo {
|
|
public:
|
|
CommonSPIRABIInfo(CodeGenTypes &CGT) : DefaultABIInfo(CGT) { setCCs(); }
|
|
|
|
private:
|
|
void setCCs();
|
|
};
|
|
|
|
class SPIRVABIInfo : public CommonSPIRABIInfo {
|
|
public:
|
|
SPIRVABIInfo(CodeGenTypes &CGT) : CommonSPIRABIInfo(CGT) {}
|
|
void computeInfo(CGFunctionInfo &FI) const override;
|
|
|
|
private:
|
|
ABIArgInfo classifyReturnType(QualType RetTy) const;
|
|
ABIArgInfo classifyKernelArgumentType(QualType Ty) const;
|
|
ABIArgInfo classifyArgumentType(QualType Ty) const;
|
|
};
|
|
} // end anonymous namespace
|
|
namespace {
|
|
class CommonSPIRTargetCodeGenInfo : public TargetCodeGenInfo {
|
|
public:
|
|
CommonSPIRTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT)
|
|
: TargetCodeGenInfo(std::make_unique<CommonSPIRABIInfo>(CGT)) {}
|
|
CommonSPIRTargetCodeGenInfo(std::unique_ptr<ABIInfo> ABIInfo)
|
|
: TargetCodeGenInfo(std::move(ABIInfo)) {}
|
|
|
|
LangAS getASTAllocaAddressSpace() const override {
|
|
return getLangASFromTargetAS(
|
|
getABIInfo().getDataLayout().getAllocaAddrSpace());
|
|
}
|
|
|
|
unsigned getOpenCLKernelCallingConv() const override;
|
|
llvm::Type *getOpenCLType(CodeGenModule &CGM, const Type *T) const override;
|
|
};
|
|
class SPIRVTargetCodeGenInfo : public CommonSPIRTargetCodeGenInfo {
|
|
public:
|
|
SPIRVTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT)
|
|
: CommonSPIRTargetCodeGenInfo(std::make_unique<SPIRVABIInfo>(CGT)) {}
|
|
void setCUDAKernelCallingConvention(const FunctionType *&FT) const override;
|
|
};
|
|
} // End anonymous namespace.
|
|
|
|
void CommonSPIRABIInfo::setCCs() {
|
|
assert(getRuntimeCC() == llvm::CallingConv::C);
|
|
RuntimeCC = llvm::CallingConv::SPIR_FUNC;
|
|
}
|
|
|
|
ABIArgInfo SPIRVABIInfo::classifyReturnType(QualType RetTy) const {
|
|
if (getTarget().getTriple().getVendor() != llvm::Triple::AMD)
|
|
return DefaultABIInfo::classifyReturnType(RetTy);
|
|
if (!isAggregateTypeForABI(RetTy) || getRecordArgABI(RetTy, getCXXABI()))
|
|
return DefaultABIInfo::classifyReturnType(RetTy);
|
|
|
|
if (const RecordType *RT = RetTy->getAs<RecordType>()) {
|
|
const RecordDecl *RD = RT->getDecl();
|
|
if (RD->hasFlexibleArrayMember())
|
|
return DefaultABIInfo::classifyReturnType(RetTy);
|
|
}
|
|
|
|
// TODO: The AMDGPU ABI is non-trivial to represent in SPIR-V; in order to
|
|
// avoid encoding various architecture specific bits here we return everything
|
|
// as direct to retain type info for things like aggregates, for later perusal
|
|
// when translating back to LLVM/lowering in the BE. This is also why we
|
|
// disable flattening as the outcomes can mismatch between SPIR-V and AMDGPU.
|
|
// This will be revisited / optimised in the future.
|
|
return ABIArgInfo::getDirect(CGT.ConvertType(RetTy), 0u, nullptr, false);
|
|
}
|
|
|
|
ABIArgInfo SPIRVABIInfo::classifyKernelArgumentType(QualType Ty) const {
|
|
if (getContext().getLangOpts().CUDAIsDevice) {
|
|
// Coerce pointer arguments with default address space to CrossWorkGroup
|
|
// pointers for HIPSPV/CUDASPV. When the language mode is HIP/CUDA, the
|
|
// SPIRTargetInfo maps cuda_device to SPIR-V's CrossWorkGroup address space.
|
|
llvm::Type *LTy = CGT.ConvertType(Ty);
|
|
auto DefaultAS = getContext().getTargetAddressSpace(LangAS::Default);
|
|
auto GlobalAS = getContext().getTargetAddressSpace(LangAS::cuda_device);
|
|
auto *PtrTy = llvm::dyn_cast<llvm::PointerType>(LTy);
|
|
if (PtrTy && PtrTy->getAddressSpace() == DefaultAS) {
|
|
LTy = llvm::PointerType::get(PtrTy->getContext(), GlobalAS);
|
|
return ABIArgInfo::getDirect(LTy, 0, nullptr, false);
|
|
}
|
|
|
|
if (isAggregateTypeForABI(Ty)) {
|
|
if (getTarget().getTriple().getVendor() == llvm::Triple::AMD)
|
|
// TODO: The AMDGPU kernel ABI passes aggregates byref, which is not
|
|
// currently expressible in SPIR-V; SPIR-V passes aggregates byval,
|
|
// which the AMDGPU kernel ABI does not allow. Passing aggregates as
|
|
// direct works around this impedance mismatch, as it retains type info
|
|
// and can be correctly handled, post reverse-translation, by the AMDGPU
|
|
// BE, which has to support this CC for legacy OpenCL purposes. It can
|
|
// be brittle and does lead to performance degradation in certain
|
|
// pathological cases. This will be revisited / optimised in the future,
|
|
// once a way to deal with the byref/byval impedance mismatch is
|
|
// identified.
|
|
return ABIArgInfo::getDirect(LTy, 0, nullptr, false);
|
|
// Force copying aggregate type in kernel arguments by value when
|
|
// compiling CUDA targeting SPIR-V. This is required for the object
|
|
// copied to be valid on the device.
|
|
// This behavior follows the CUDA spec
|
|
// https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#global-function-argument-processing,
|
|
// and matches the NVPTX implementation.
|
|
return getNaturalAlignIndirect(Ty, /* byval */ true);
|
|
}
|
|
}
|
|
return classifyArgumentType(Ty);
|
|
}
|
|
|
|
ABIArgInfo SPIRVABIInfo::classifyArgumentType(QualType Ty) const {
|
|
if (getTarget().getTriple().getVendor() != llvm::Triple::AMD)
|
|
return DefaultABIInfo::classifyArgumentType(Ty);
|
|
if (!isAggregateTypeForABI(Ty))
|
|
return DefaultABIInfo::classifyArgumentType(Ty);
|
|
|
|
// Records with non-trivial destructors/copy-constructors should not be
|
|
// passed by value.
|
|
if (auto RAA = getRecordArgABI(Ty, getCXXABI()))
|
|
return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory);
|
|
|
|
if (const RecordType *RT = Ty->getAs<RecordType>()) {
|
|
const RecordDecl *RD = RT->getDecl();
|
|
if (RD->hasFlexibleArrayMember())
|
|
return DefaultABIInfo::classifyArgumentType(Ty);
|
|
}
|
|
|
|
return ABIArgInfo::getDirect(CGT.ConvertType(Ty), 0u, nullptr, false);
|
|
}
|
|
|
|
void SPIRVABIInfo::computeInfo(CGFunctionInfo &FI) const {
|
|
// The logic is same as in DefaultABIInfo with an exception on the kernel
|
|
// arguments handling.
|
|
llvm::CallingConv::ID CC = FI.getCallingConvention();
|
|
|
|
if (!getCXXABI().classifyReturnType(FI))
|
|
FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
|
|
|
|
for (auto &I : FI.arguments()) {
|
|
if (CC == llvm::CallingConv::SPIR_KERNEL) {
|
|
I.info = classifyKernelArgumentType(I.type);
|
|
} else {
|
|
I.info = classifyArgumentType(I.type);
|
|
}
|
|
}
|
|
}
|
|
|
|
namespace clang {
|
|
namespace CodeGen {
|
|
void computeSPIRKernelABIInfo(CodeGenModule &CGM, CGFunctionInfo &FI) {
|
|
if (CGM.getTarget().getTriple().isSPIRV())
|
|
SPIRVABIInfo(CGM.getTypes()).computeInfo(FI);
|
|
else
|
|
CommonSPIRABIInfo(CGM.getTypes()).computeInfo(FI);
|
|
}
|
|
}
|
|
}
|
|
|
|
unsigned CommonSPIRTargetCodeGenInfo::getOpenCLKernelCallingConv() const {
|
|
return llvm::CallingConv::SPIR_KERNEL;
|
|
}
|
|
|
|
void SPIRVTargetCodeGenInfo::setCUDAKernelCallingConvention(
|
|
const FunctionType *&FT) const {
|
|
// Convert HIP kernels to SPIR-V kernels.
|
|
if (getABIInfo().getContext().getLangOpts().HIP) {
|
|
FT = getABIInfo().getContext().adjustFunctionType(
|
|
FT, FT->getExtInfo().withCallingConv(CC_OpenCLKernel));
|
|
return;
|
|
}
|
|
}
|
|
|
|
/// Construct a SPIR-V target extension type for the given OpenCL image type.
|
|
static llvm::Type *getSPIRVImageType(llvm::LLVMContext &Ctx, StringRef BaseType,
|
|
StringRef OpenCLName,
|
|
unsigned AccessQualifier) {
|
|
// These parameters compare to the operands of OpTypeImage (see
|
|
// https://registry.khronos.org/SPIR-V/specs/unified1/SPIRV.html#OpTypeImage
|
|
// for more details). The first 6 integer parameters all default to 0, and
|
|
// will be changed to 1 only for the image type(s) that set the parameter to
|
|
// one. The 7th integer parameter is the access qualifier, which is tacked on
|
|
// at the end.
|
|
SmallVector<unsigned, 7> IntParams = {0, 0, 0, 0, 0, 0};
|
|
|
|
// Choose the dimension of the image--this corresponds to the Dim enum in
|
|
// SPIR-V (first integer parameter of OpTypeImage).
|
|
if (OpenCLName.starts_with("image2d"))
|
|
IntParams[0] = 1; // 1D
|
|
else if (OpenCLName.starts_with("image3d"))
|
|
IntParams[0] = 2; // 2D
|
|
else if (OpenCLName == "image1d_buffer")
|
|
IntParams[0] = 5; // Buffer
|
|
else
|
|
assert(OpenCLName.starts_with("image1d") && "Unknown image type");
|
|
|
|
// Set the other integer parameters of OpTypeImage if necessary. Note that the
|
|
// OpenCL image types don't provide any information for the Sampled or
|
|
// Image Format parameters.
|
|
if (OpenCLName.contains("_depth"))
|
|
IntParams[1] = 1;
|
|
if (OpenCLName.contains("_array"))
|
|
IntParams[2] = 1;
|
|
if (OpenCLName.contains("_msaa"))
|
|
IntParams[3] = 1;
|
|
|
|
// Access qualifier
|
|
IntParams.push_back(AccessQualifier);
|
|
|
|
return llvm::TargetExtType::get(Ctx, BaseType, {llvm::Type::getVoidTy(Ctx)},
|
|
IntParams);
|
|
}
|
|
|
|
llvm::Type *CommonSPIRTargetCodeGenInfo::getOpenCLType(CodeGenModule &CGM,
|
|
const Type *Ty) const {
|
|
llvm::LLVMContext &Ctx = CGM.getLLVMContext();
|
|
if (auto *PipeTy = dyn_cast<PipeType>(Ty))
|
|
return llvm::TargetExtType::get(Ctx, "spirv.Pipe", {},
|
|
{!PipeTy->isReadOnly()});
|
|
if (auto *BuiltinTy = dyn_cast<BuiltinType>(Ty)) {
|
|
enum AccessQualifier : unsigned { AQ_ro = 0, AQ_wo = 1, AQ_rw = 2 };
|
|
switch (BuiltinTy->getKind()) {
|
|
#define IMAGE_TYPE(ImgType, Id, SingletonId, Access, Suffix) \
|
|
case BuiltinType::Id: \
|
|
return getSPIRVImageType(Ctx, "spirv.Image", #ImgType, AQ_##Suffix);
|
|
#include "clang/Basic/OpenCLImageTypes.def"
|
|
case BuiltinType::OCLSampler:
|
|
return llvm::TargetExtType::get(Ctx, "spirv.Sampler");
|
|
case BuiltinType::OCLEvent:
|
|
return llvm::TargetExtType::get(Ctx, "spirv.Event");
|
|
case BuiltinType::OCLClkEvent:
|
|
return llvm::TargetExtType::get(Ctx, "spirv.DeviceEvent");
|
|
case BuiltinType::OCLQueue:
|
|
return llvm::TargetExtType::get(Ctx, "spirv.Queue");
|
|
case BuiltinType::OCLReserveID:
|
|
return llvm::TargetExtType::get(Ctx, "spirv.ReserveId");
|
|
#define INTEL_SUBGROUP_AVC_TYPE(Name, Id) \
|
|
case BuiltinType::OCLIntelSubgroupAVC##Id: \
|
|
return llvm::TargetExtType::get(Ctx, "spirv.Avc" #Id "INTEL");
|
|
#include "clang/Basic/OpenCLExtensionTypes.def"
|
|
default:
|
|
return nullptr;
|
|
}
|
|
}
|
|
|
|
return nullptr;
|
|
}
|
|
|
|
std::unique_ptr<TargetCodeGenInfo>
|
|
CodeGen::createCommonSPIRTargetCodeGenInfo(CodeGenModule &CGM) {
|
|
return std::make_unique<CommonSPIRTargetCodeGenInfo>(CGM.getTypes());
|
|
}
|
|
|
|
std::unique_ptr<TargetCodeGenInfo>
|
|
CodeGen::createSPIRVTargetCodeGenInfo(CodeGenModule &CGM) {
|
|
return std::make_unique<SPIRVTargetCodeGenInfo>(CGM.getTypes());
|
|
}
|