Files
clang-p2996/clang/lib/Basic/Targets/NVPTX.cpp
Yaxun (Sam) Liu a6786cdd57 [HIPSPV][3/4] Enable SPIR-V emission for HIP
This patch enables SPIR-V binary emission for HIP device code via the
HIPSPV tool chain.

‘--offload’ option, which is envisioned in [1], is added for specifying
offload targets. This option is used to override default device target
(amdgcn-amd-amdhsa) for HIP compilation for emitting device code as
SPIR-V binary. The option is handled in getHIPOffloadTargetTriple().

getOffloadingDeviceToolChain() function (based on the design in the
SYCL repository) is added to select HIPSPVToolChain when HIP offload
target is ‘spirv64’.

The HIPActionBuilder is modified to produce LLVM IR at the backend
phase. HIPSPV tool chain expects to receive HIP device code as LLVM
IR so it can run external LLVM passes over them. HIPSPV TC is also
responsible for emitting the SPIR-V binary.

A Cuda GPU architecture ‘generic’ is added. The name is picked from
the LLVM SPIR-V Backend. In the HIPSPV code path the architecture
name is inserted to the bundle entry ID as target ID. Target ID is
expected to be always present so a component in the target triple
is not mistaken as target ID.

Tests are added for checking the HIPSPV tool chain.

[1]: https://lists.llvm.org/pipermail/cfe-dev/2020-December/067362.html

Patch by: Henry Linjamäki

Reviewed by: Yaxun Liu, Artem Belevich, Alexey Bader

Differential Revision: https://reviews.llvm.org/D110622
2021-12-20 10:45:09 -05:00

270 lines
9.5 KiB
C++

//===--- NVPTX.cpp - Implement NVPTX target feature support ---------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file implements NVPTX TargetInfo objects.
//
//===----------------------------------------------------------------------===//
#include "NVPTX.h"
#include "Targets.h"
#include "clang/Basic/Builtins.h"
#include "clang/Basic/MacroBuilder.h"
#include "clang/Basic/TargetBuiltins.h"
#include "llvm/ADT/StringSwitch.h"
using namespace clang;
using namespace clang::targets;
const Builtin::Info NVPTXTargetInfo::BuiltinInfo[] = {
#define BUILTIN(ID, TYPE, ATTRS) \
{#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr},
#define LIBBUILTIN(ID, TYPE, ATTRS, HEADER) \
{#ID, TYPE, ATTRS, HEADER, ALL_LANGUAGES, nullptr},
#define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) \
{#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, FEATURE},
#include "clang/Basic/BuiltinsNVPTX.def"
};
const char *const NVPTXTargetInfo::GCCRegNames[] = {"r0"};
NVPTXTargetInfo::NVPTXTargetInfo(const llvm::Triple &Triple,
const TargetOptions &Opts,
unsigned TargetPointerWidth)
: TargetInfo(Triple) {
assert((TargetPointerWidth == 32 || TargetPointerWidth == 64) &&
"NVPTX only supports 32- and 64-bit modes.");
PTXVersion = 32;
for (const StringRef Feature : Opts.FeaturesAsWritten) {
if (!Feature.startswith("+ptx"))
continue;
PTXVersion = llvm::StringSwitch<unsigned>(Feature)
.Case("+ptx75", 75)
.Case("+ptx74", 74)
.Case("+ptx73", 73)
.Case("+ptx72", 72)
.Case("+ptx71", 71)
.Case("+ptx70", 70)
.Case("+ptx65", 65)
.Case("+ptx64", 64)
.Case("+ptx63", 63)
.Case("+ptx61", 61)
.Case("+ptx60", 60)
.Case("+ptx50", 50)
.Case("+ptx43", 43)
.Case("+ptx42", 42)
.Case("+ptx41", 41)
.Case("+ptx40", 40)
.Case("+ptx32", 32)
.Default(32);
}
TLSSupported = false;
VLASupported = false;
AddrSpaceMap = &NVPTXAddrSpaceMap;
UseAddrSpaceMapMangling = true;
// Define available target features
// These must be defined in sorted order!
NoAsmVariants = true;
GPU = CudaArch::SM_20;
if (TargetPointerWidth == 32)
resetDataLayout("e-p:32:32-i64:64-i128:128-v16:16-v32:32-n16:32:64");
else if (Opts.NVPTXUseShortPointers)
resetDataLayout(
"e-p3:32:32-p4:32:32-p5:32:32-i64:64-i128:128-v16:16-v32:32-n16:32:64");
else
resetDataLayout("e-i64:64-i128:128-v16:16-v32:32-n16:32:64");
// If possible, get a TargetInfo for our host triple, so we can match its
// types.
llvm::Triple HostTriple(Opts.HostTriple);
if (!HostTriple.isNVPTX())
HostTarget.reset(AllocateTarget(llvm::Triple(Opts.HostTriple), Opts));
// If no host target, make some guesses about the data layout and return.
if (!HostTarget) {
LongWidth = LongAlign = TargetPointerWidth;
PointerWidth = PointerAlign = TargetPointerWidth;
switch (TargetPointerWidth) {
case 32:
SizeType = TargetInfo::UnsignedInt;
PtrDiffType = TargetInfo::SignedInt;
IntPtrType = TargetInfo::SignedInt;
break;
case 64:
SizeType = TargetInfo::UnsignedLong;
PtrDiffType = TargetInfo::SignedLong;
IntPtrType = TargetInfo::SignedLong;
break;
default:
llvm_unreachable("TargetPointerWidth must be 32 or 64");
}
return;
}
// Copy properties from host target.
PointerWidth = HostTarget->getPointerWidth(/* AddrSpace = */ 0);
PointerAlign = HostTarget->getPointerAlign(/* AddrSpace = */ 0);
BoolWidth = HostTarget->getBoolWidth();
BoolAlign = HostTarget->getBoolAlign();
IntWidth = HostTarget->getIntWidth();
IntAlign = HostTarget->getIntAlign();
HalfWidth = HostTarget->getHalfWidth();
HalfAlign = HostTarget->getHalfAlign();
FloatWidth = HostTarget->getFloatWidth();
FloatAlign = HostTarget->getFloatAlign();
DoubleWidth = HostTarget->getDoubleWidth();
DoubleAlign = HostTarget->getDoubleAlign();
LongWidth = HostTarget->getLongWidth();
LongAlign = HostTarget->getLongAlign();
LongLongWidth = HostTarget->getLongLongWidth();
LongLongAlign = HostTarget->getLongLongAlign();
MinGlobalAlign = HostTarget->getMinGlobalAlign(/* TypeSize = */ 0);
NewAlign = HostTarget->getNewAlign();
DefaultAlignForAttributeAligned =
HostTarget->getDefaultAlignForAttributeAligned();
SizeType = HostTarget->getSizeType();
IntMaxType = HostTarget->getIntMaxType();
PtrDiffType = HostTarget->getPtrDiffType(/* AddrSpace = */ 0);
IntPtrType = HostTarget->getIntPtrType();
WCharType = HostTarget->getWCharType();
WIntType = HostTarget->getWIntType();
Char16Type = HostTarget->getChar16Type();
Char32Type = HostTarget->getChar32Type();
Int64Type = HostTarget->getInt64Type();
SigAtomicType = HostTarget->getSigAtomicType();
ProcessIDType = HostTarget->getProcessIDType();
UseBitFieldTypeAlignment = HostTarget->useBitFieldTypeAlignment();
UseZeroLengthBitfieldAlignment = HostTarget->useZeroLengthBitfieldAlignment();
UseExplicitBitFieldAlignment = HostTarget->useExplicitBitFieldAlignment();
ZeroLengthBitfieldBoundary = HostTarget->getZeroLengthBitfieldBoundary();
// This is a bit of a lie, but it controls __GCC_ATOMIC_XXX_LOCK_FREE, and
// we need those macros to be identical on host and device, because (among
// other things) they affect which standard library classes are defined, and
// we need all classes to be defined on both the host and device.
MaxAtomicInlineWidth = HostTarget->getMaxAtomicInlineWidth();
// Properties intentionally not copied from host:
// - LargeArrayMinWidth, LargeArrayAlign: Not visible across the
// host/device boundary.
// - SuitableAlign: Not visible across the host/device boundary, and may
// correctly be different on host/device, e.g. if host has wider vector
// types than device.
// - LongDoubleWidth, LongDoubleAlign: nvptx's long double type is the same
// as its double type, but that's not necessarily true on the host.
// TODO: nvcc emits a warning when using long double on device; we should
// do the same.
}
ArrayRef<const char *> NVPTXTargetInfo::getGCCRegNames() const {
return llvm::makeArrayRef(GCCRegNames);
}
bool NVPTXTargetInfo::hasFeature(StringRef Feature) const {
return llvm::StringSwitch<bool>(Feature)
.Cases("ptx", "nvptx", true)
.Default(false);
}
void NVPTXTargetInfo::getTargetDefines(const LangOptions &Opts,
MacroBuilder &Builder) const {
Builder.defineMacro("__PTX__");
Builder.defineMacro("__NVPTX__");
if (Opts.CUDAIsDevice) {
// Set __CUDA_ARCH__ for the GPU specified.
std::string CUDAArchCode = [this] {
switch (GPU) {
case CudaArch::GFX600:
case CudaArch::GFX601:
case CudaArch::GFX602:
case CudaArch::GFX700:
case CudaArch::GFX701:
case CudaArch::GFX702:
case CudaArch::GFX703:
case CudaArch::GFX704:
case CudaArch::GFX705:
case CudaArch::GFX801:
case CudaArch::GFX802:
case CudaArch::GFX803:
case CudaArch::GFX805:
case CudaArch::GFX810:
case CudaArch::GFX900:
case CudaArch::GFX902:
case CudaArch::GFX904:
case CudaArch::GFX906:
case CudaArch::GFX908:
case CudaArch::GFX909:
case CudaArch::GFX90a:
case CudaArch::GFX90c:
case CudaArch::GFX1010:
case CudaArch::GFX1011:
case CudaArch::GFX1012:
case CudaArch::GFX1013:
case CudaArch::GFX1030:
case CudaArch::GFX1031:
case CudaArch::GFX1032:
case CudaArch::GFX1033:
case CudaArch::GFX1034:
case CudaArch::GFX1035:
case CudaArch::Generic:
case CudaArch::LAST:
break;
case CudaArch::UNUSED:
case CudaArch::UNKNOWN:
assert(false && "No GPU arch when compiling CUDA device code.");
return "";
case CudaArch::SM_20:
return "200";
case CudaArch::SM_21:
return "210";
case CudaArch::SM_30:
return "300";
case CudaArch::SM_32:
return "320";
case CudaArch::SM_35:
return "350";
case CudaArch::SM_37:
return "370";
case CudaArch::SM_50:
return "500";
case CudaArch::SM_52:
return "520";
case CudaArch::SM_53:
return "530";
case CudaArch::SM_60:
return "600";
case CudaArch::SM_61:
return "610";
case CudaArch::SM_62:
return "620";
case CudaArch::SM_70:
return "700";
case CudaArch::SM_72:
return "720";
case CudaArch::SM_75:
return "750";
case CudaArch::SM_80:
return "800";
case CudaArch::SM_86:
return "860";
}
llvm_unreachable("unhandled CudaArch");
}();
Builder.defineMacro("__CUDA_ARCH__", CUDAArchCode);
}
}
ArrayRef<Builtin::Info> NVPTXTargetInfo::getTargetBuiltins() const {
return llvm::makeArrayRef(BuiltinInfo, clang::NVPTX::LastTSBuiltin -
Builtin::FirstTSBuiltin);
}