Currently, there is some refactoring needed in existing interface of OpenCL option settings to support OpenCL C 3.0. The problem is that OpenCL extensions and features are not only determined by the target platform but also by the OpenCL version. Also, there are core extensions/features which are supported unconditionally in specific OpenCL C version. In fact, these rules are not being followed for all targets. For example, there are some targets (as nvptx and r600) which don't support OpenCL C 2.0 core features (nvptx.languageOptsOpenCL.cl, r600.languageOptsOpenCL.cl). After the change there will be explicit differentiation between optional core and core OpenCL features which allows giving diagnostics if target doesn't support any of necessary core features for specific OpenCL version. This patch also eliminates `OpenCLOptions` instance duplication from `TargetOptions`. `OpenCLOptions` instance should take place in `Sema` as it's going to be modified during parsing. Removing this duplication will also allow to generally simplify `OpenCLOptions` class for parsing purposes. Reviewed By: Anastasia Differential Revision: https://reviews.llvm.org/D92277
439 lines
13 KiB
C++
439 lines
13 KiB
C++
//===--- AMDGPU.h - Declare AMDGPU target feature support -------*- C++ -*-===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
// This file declares AMDGPU TargetInfo objects.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#ifndef LLVM_CLANG_LIB_BASIC_TARGETS_AMDGPU_H
|
|
#define LLVM_CLANG_LIB_BASIC_TARGETS_AMDGPU_H
|
|
|
|
#include "clang/Basic/TargetID.h"
|
|
#include "clang/Basic/TargetInfo.h"
|
|
#include "clang/Basic/TargetOptions.h"
|
|
#include "llvm/ADT/StringSet.h"
|
|
#include "llvm/ADT/Triple.h"
|
|
#include "llvm/Support/Compiler.h"
|
|
#include "llvm/Support/TargetParser.h"
|
|
|
|
namespace clang {
|
|
namespace targets {
|
|
|
|
class LLVM_LIBRARY_VISIBILITY AMDGPUTargetInfo final : public TargetInfo {
|
|
|
|
static const Builtin::Info BuiltinInfo[];
|
|
static const char *const GCCRegNames[];
|
|
|
|
enum AddrSpace {
|
|
Generic = 0,
|
|
Global = 1,
|
|
Local = 3,
|
|
Constant = 4,
|
|
Private = 5
|
|
};
|
|
static const LangASMap AMDGPUDefIsGenMap;
|
|
static const LangASMap AMDGPUDefIsPrivMap;
|
|
|
|
llvm::AMDGPU::GPUKind GPUKind;
|
|
unsigned GPUFeatures;
|
|
unsigned WavefrontSize;
|
|
|
|
/// Target ID is device name followed by optional feature name postfixed
|
|
/// by plus or minus sign delimitted by colon, e.g. gfx908:xnack+:sramecc-.
|
|
/// If the target ID contains feature+, map it to true.
|
|
/// If the target ID contains feature-, map it to false.
|
|
/// If the target ID does not contain a feature (default), do not map it.
|
|
llvm::StringMap<bool> OffloadArchFeatures;
|
|
std::string TargetID;
|
|
|
|
bool hasFP64() const {
|
|
return getTriple().getArch() == llvm::Triple::amdgcn ||
|
|
!!(GPUFeatures & llvm::AMDGPU::FEATURE_FP64);
|
|
}
|
|
|
|
/// Has fast fma f32
|
|
bool hasFastFMAF() const {
|
|
return !!(GPUFeatures & llvm::AMDGPU::FEATURE_FAST_FMA_F32);
|
|
}
|
|
|
|
/// Has fast fma f64
|
|
bool hasFastFMA() const {
|
|
return getTriple().getArch() == llvm::Triple::amdgcn;
|
|
}
|
|
|
|
bool hasFMAF() const {
|
|
return getTriple().getArch() == llvm::Triple::amdgcn ||
|
|
!!(GPUFeatures & llvm::AMDGPU::FEATURE_FMA);
|
|
}
|
|
|
|
bool hasFullRateDenormalsF32() const {
|
|
return !!(GPUFeatures & llvm::AMDGPU::FEATURE_FAST_DENORMAL_F32);
|
|
}
|
|
|
|
bool hasLDEXPF() const {
|
|
return getTriple().getArch() == llvm::Triple::amdgcn ||
|
|
!!(GPUFeatures & llvm::AMDGPU::FEATURE_LDEXP);
|
|
}
|
|
|
|
static bool isAMDGCN(const llvm::Triple &TT) {
|
|
return TT.getArch() == llvm::Triple::amdgcn;
|
|
}
|
|
|
|
static bool isR600(const llvm::Triple &TT) {
|
|
return TT.getArch() == llvm::Triple::r600;
|
|
}
|
|
|
|
public:
|
|
AMDGPUTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts);
|
|
|
|
void setAddressSpaceMap(bool DefaultIsPrivate);
|
|
|
|
void adjust(LangOptions &Opts) override;
|
|
|
|
uint64_t getPointerWidthV(unsigned AddrSpace) const override {
|
|
if (isR600(getTriple()))
|
|
return 32;
|
|
|
|
if (AddrSpace == Private || AddrSpace == Local)
|
|
return 32;
|
|
|
|
return 64;
|
|
}
|
|
|
|
uint64_t getPointerAlignV(unsigned AddrSpace) const override {
|
|
return getPointerWidthV(AddrSpace);
|
|
}
|
|
|
|
uint64_t getMaxPointerWidth() const override {
|
|
return getTriple().getArch() == llvm::Triple::amdgcn ? 64 : 32;
|
|
}
|
|
|
|
const char *getClobbers() const override { return ""; }
|
|
|
|
ArrayRef<const char *> getGCCRegNames() const override;
|
|
|
|
ArrayRef<TargetInfo::GCCRegAlias> getGCCRegAliases() const override {
|
|
return None;
|
|
}
|
|
|
|
/// Accepted register names: (n, m is unsigned integer, n < m)
|
|
/// v
|
|
/// s
|
|
/// a
|
|
/// {vn}, {v[n]}
|
|
/// {sn}, {s[n]}
|
|
/// {an}, {a[n]}
|
|
/// {S} , where S is a special register name
|
|
////{v[n:m]}
|
|
/// {s[n:m]}
|
|
/// {a[n:m]}
|
|
bool validateAsmConstraint(const char *&Name,
|
|
TargetInfo::ConstraintInfo &Info) const override {
|
|
static const ::llvm::StringSet<> SpecialRegs({
|
|
"exec", "vcc", "flat_scratch", "m0", "scc", "tba", "tma",
|
|
"flat_scratch_lo", "flat_scratch_hi", "vcc_lo", "vcc_hi", "exec_lo",
|
|
"exec_hi", "tma_lo", "tma_hi", "tba_lo", "tba_hi",
|
|
});
|
|
|
|
switch (*Name) {
|
|
case 'I':
|
|
Info.setRequiresImmediate(-16, 64);
|
|
return true;
|
|
case 'J':
|
|
Info.setRequiresImmediate(-32768, 32767);
|
|
return true;
|
|
case 'A':
|
|
case 'B':
|
|
case 'C':
|
|
Info.setRequiresImmediate();
|
|
return true;
|
|
default:
|
|
break;
|
|
}
|
|
|
|
StringRef S(Name);
|
|
|
|
if (S == "DA" || S == "DB") {
|
|
Name++;
|
|
Info.setRequiresImmediate();
|
|
return true;
|
|
}
|
|
|
|
bool HasLeftParen = false;
|
|
if (S.front() == '{') {
|
|
HasLeftParen = true;
|
|
S = S.drop_front();
|
|
}
|
|
if (S.empty())
|
|
return false;
|
|
if (S.front() != 'v' && S.front() != 's' && S.front() != 'a') {
|
|
if (!HasLeftParen)
|
|
return false;
|
|
auto E = S.find('}');
|
|
if (!SpecialRegs.count(S.substr(0, E)))
|
|
return false;
|
|
S = S.drop_front(E + 1);
|
|
if (!S.empty())
|
|
return false;
|
|
// Found {S} where S is a special register.
|
|
Info.setAllowsRegister();
|
|
Name = S.data() - 1;
|
|
return true;
|
|
}
|
|
S = S.drop_front();
|
|
if (!HasLeftParen) {
|
|
if (!S.empty())
|
|
return false;
|
|
// Found s, v or a.
|
|
Info.setAllowsRegister();
|
|
Name = S.data() - 1;
|
|
return true;
|
|
}
|
|
bool HasLeftBracket = false;
|
|
if (!S.empty() && S.front() == '[') {
|
|
HasLeftBracket = true;
|
|
S = S.drop_front();
|
|
}
|
|
unsigned long long N;
|
|
if (S.empty() || consumeUnsignedInteger(S, 10, N))
|
|
return false;
|
|
if (!S.empty() && S.front() == ':') {
|
|
if (!HasLeftBracket)
|
|
return false;
|
|
S = S.drop_front();
|
|
unsigned long long M;
|
|
if (consumeUnsignedInteger(S, 10, M) || N >= M)
|
|
return false;
|
|
}
|
|
if (HasLeftBracket) {
|
|
if (S.empty() || S.front() != ']')
|
|
return false;
|
|
S = S.drop_front();
|
|
}
|
|
if (S.empty() || S.front() != '}')
|
|
return false;
|
|
S = S.drop_front();
|
|
if (!S.empty())
|
|
return false;
|
|
// Found {vn}, {sn}, {an}, {v[n]}, {s[n]}, {a[n]}, {v[n:m]}, {s[n:m]}
|
|
// or {a[n:m]}.
|
|
Info.setAllowsRegister();
|
|
Name = S.data() - 1;
|
|
return true;
|
|
}
|
|
|
|
// \p Constraint will be left pointing at the last character of
|
|
// the constraint. In practice, it won't be changed unless the
|
|
// constraint is longer than one character.
|
|
std::string convertConstraint(const char *&Constraint) const override {
|
|
|
|
StringRef S(Constraint);
|
|
if (S == "DA" || S == "DB") {
|
|
return std::string("^") + std::string(Constraint++, 2);
|
|
}
|
|
|
|
const char *Begin = Constraint;
|
|
TargetInfo::ConstraintInfo Info("", "");
|
|
if (validateAsmConstraint(Constraint, Info))
|
|
return std::string(Begin).substr(0, Constraint - Begin + 1);
|
|
|
|
Constraint = Begin;
|
|
return std::string(1, *Constraint);
|
|
}
|
|
|
|
bool
|
|
initFeatureMap(llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags,
|
|
StringRef CPU,
|
|
const std::vector<std::string> &FeatureVec) const override;
|
|
|
|
ArrayRef<Builtin::Info> getTargetBuiltins() const override;
|
|
|
|
bool useFP16ConversionIntrinsics() const override { return false; }
|
|
|
|
void getTargetDefines(const LangOptions &Opts,
|
|
MacroBuilder &Builder) const override;
|
|
|
|
BuiltinVaListKind getBuiltinVaListKind() const override {
|
|
return TargetInfo::CharPtrBuiltinVaList;
|
|
}
|
|
|
|
bool isValidCPUName(StringRef Name) const override {
|
|
if (getTriple().getArch() == llvm::Triple::amdgcn)
|
|
return llvm::AMDGPU::parseArchAMDGCN(Name) != llvm::AMDGPU::GK_NONE;
|
|
return llvm::AMDGPU::parseArchR600(Name) != llvm::AMDGPU::GK_NONE;
|
|
}
|
|
|
|
void fillValidCPUList(SmallVectorImpl<StringRef> &Values) const override;
|
|
|
|
bool setCPU(const std::string &Name) override {
|
|
if (getTriple().getArch() == llvm::Triple::amdgcn) {
|
|
GPUKind = llvm::AMDGPU::parseArchAMDGCN(Name);
|
|
GPUFeatures = llvm::AMDGPU::getArchAttrAMDGCN(GPUKind);
|
|
} else {
|
|
GPUKind = llvm::AMDGPU::parseArchR600(Name);
|
|
GPUFeatures = llvm::AMDGPU::getArchAttrR600(GPUKind);
|
|
}
|
|
|
|
return GPUKind != llvm::AMDGPU::GK_NONE;
|
|
}
|
|
|
|
void setSupportedOpenCLOpts() override {
|
|
auto &Opts = getSupportedOpenCLOpts();
|
|
Opts["cl_clang_storage_class_specifiers"] = true;
|
|
Opts["__cl_clang_variadic_functions"] = true;
|
|
Opts["__cl_clang_function_pointers"] = true;
|
|
|
|
bool IsAMDGCN = isAMDGCN(getTriple());
|
|
|
|
Opts["cl_khr_fp64"] = hasFP64();
|
|
|
|
if (IsAMDGCN || GPUKind >= llvm::AMDGPU::GK_CEDAR) {
|
|
Opts["cl_khr_byte_addressable_store"] = true;
|
|
Opts["cl_khr_global_int32_base_atomics"] = true;
|
|
Opts["cl_khr_global_int32_extended_atomics"] = true;
|
|
Opts["cl_khr_local_int32_base_atomics"] = true;
|
|
Opts["cl_khr_local_int32_extended_atomics"] = true;
|
|
}
|
|
|
|
if (IsAMDGCN) {
|
|
Opts["cl_khr_fp16"] = true;
|
|
Opts["cl_khr_int64_base_atomics"] = true;
|
|
Opts["cl_khr_int64_extended_atomics"] = true;
|
|
Opts["cl_khr_mipmap_image"] = true;
|
|
Opts["cl_khr_mipmap_image_writes"] = true;
|
|
Opts["cl_khr_subgroups"] = true;
|
|
Opts["cl_khr_3d_image_writes"] = true;
|
|
Opts["cl_amd_media_ops"] = true;
|
|
Opts["cl_amd_media_ops2"] = true;
|
|
}
|
|
}
|
|
|
|
LangAS getOpenCLTypeAddrSpace(OpenCLTypeKind TK) const override {
|
|
switch (TK) {
|
|
case OCLTK_Image:
|
|
return LangAS::opencl_constant;
|
|
|
|
case OCLTK_ClkEvent:
|
|
case OCLTK_Queue:
|
|
case OCLTK_ReserveID:
|
|
return LangAS::opencl_global;
|
|
|
|
default:
|
|
return TargetInfo::getOpenCLTypeAddrSpace(TK);
|
|
}
|
|
}
|
|
|
|
LangAS getOpenCLBuiltinAddressSpace(unsigned AS) const override {
|
|
switch (AS) {
|
|
case 0:
|
|
return LangAS::opencl_generic;
|
|
case 1:
|
|
return LangAS::opencl_global;
|
|
case 3:
|
|
return LangAS::opencl_local;
|
|
case 4:
|
|
return LangAS::opencl_constant;
|
|
case 5:
|
|
return LangAS::opencl_private;
|
|
default:
|
|
return getLangASFromTargetAS(AS);
|
|
}
|
|
}
|
|
|
|
LangAS getCUDABuiltinAddressSpace(unsigned AS) const override {
|
|
return LangAS::Default;
|
|
}
|
|
|
|
llvm::Optional<LangAS> getConstantAddressSpace() const override {
|
|
return getLangASFromTargetAS(Constant);
|
|
}
|
|
|
|
/// \returns Target specific vtbl ptr address space.
|
|
unsigned getVtblPtrAddressSpace() const override {
|
|
return static_cast<unsigned>(Constant);
|
|
}
|
|
|
|
/// \returns If a target requires an address within a target specific address
|
|
/// space \p AddressSpace to be converted in order to be used, then return the
|
|
/// corresponding target specific DWARF address space.
|
|
///
|
|
/// \returns Otherwise return None and no conversion will be emitted in the
|
|
/// DWARF.
|
|
Optional<unsigned>
|
|
getDWARFAddressSpace(unsigned AddressSpace) const override {
|
|
const unsigned DWARF_Private = 1;
|
|
const unsigned DWARF_Local = 2;
|
|
if (AddressSpace == Private) {
|
|
return DWARF_Private;
|
|
} else if (AddressSpace == Local) {
|
|
return DWARF_Local;
|
|
} else {
|
|
return None;
|
|
}
|
|
}
|
|
|
|
CallingConvCheckResult checkCallingConvention(CallingConv CC) const override {
|
|
switch (CC) {
|
|
default:
|
|
return CCCR_Warning;
|
|
case CC_C:
|
|
case CC_OpenCLKernel:
|
|
return CCCR_OK;
|
|
}
|
|
}
|
|
|
|
// In amdgcn target the null pointer in global, constant, and generic
|
|
// address space has value 0 but in private and local address space has
|
|
// value ~0.
|
|
uint64_t getNullPointerValue(LangAS AS) const override {
|
|
// FIXME: Also should handle region.
|
|
return (AS == LangAS::opencl_local || AS == LangAS::opencl_private)
|
|
? ~0 : 0;
|
|
}
|
|
|
|
void setAuxTarget(const TargetInfo *Aux) override;
|
|
|
|
bool hasExtIntType() const override { return true; }
|
|
|
|
// Record offload arch features since they are needed for defining the
|
|
// pre-defined macros.
|
|
bool handleTargetFeatures(std::vector<std::string> &Features,
|
|
DiagnosticsEngine &Diags) override {
|
|
auto TargetIDFeatures =
|
|
getAllPossibleTargetIDFeatures(getTriple(), getArchNameAMDGCN(GPUKind));
|
|
llvm::for_each(Features, [&](const auto &F) {
|
|
assert(F.front() == '+' || F.front() == '-');
|
|
if (F == "+wavefrontsize64")
|
|
WavefrontSize = 64;
|
|
bool IsOn = F.front() == '+';
|
|
StringRef Name = StringRef(F).drop_front();
|
|
if (llvm::find(TargetIDFeatures, Name) == TargetIDFeatures.end())
|
|
return;
|
|
assert(OffloadArchFeatures.find(Name) == OffloadArchFeatures.end());
|
|
OffloadArchFeatures[Name] = IsOn;
|
|
});
|
|
return true;
|
|
}
|
|
|
|
Optional<std::string> getTargetID() const override {
|
|
if (!isAMDGCN(getTriple()))
|
|
return llvm::None;
|
|
// When -target-cpu is not set, we assume generic code that it is valid
|
|
// for all GPU and use an empty string as target ID to represent that.
|
|
if (GPUKind == llvm::AMDGPU::GK_NONE)
|
|
return std::string("");
|
|
return getCanonicalTargetID(getArchNameAMDGCN(GPUKind),
|
|
OffloadArchFeatures);
|
|
}
|
|
};
|
|
|
|
} // namespace targets
|
|
} // namespace clang
|
|
|
|
#endif // LLVM_CLANG_LIB_BASIC_TARGETS_AMDGPU_H
|