AMDGPU: Add gfx950 subtarget definitions (#116307)
Mostly a stub, but adds some baseline tests and tests for removed instructions.
This commit is contained in:
@@ -712,6 +712,8 @@ Target Specific Changes
|
||||
AMDGPU Support
|
||||
^^^^^^^^^^^^^^
|
||||
|
||||
- Initial support for gfx950
|
||||
|
||||
- Added headers ``gpuintrin.h`` and ``amdgpuintrin.h`` that contains common
|
||||
definitions for GPU builtin functions. This header can be included for OpenMP,
|
||||
CUDA, HIP, OpenCL, and C/C++.
|
||||
|
||||
@@ -107,6 +107,7 @@ enum class OffloadArch {
|
||||
GFX940,
|
||||
GFX941,
|
||||
GFX942,
|
||||
GFX950,
|
||||
GFX10_1_GENERIC,
|
||||
GFX1010,
|
||||
GFX1011,
|
||||
|
||||
@@ -125,6 +125,7 @@ static const OffloadArchToStringMap arch_names[] = {
|
||||
GFX(940), // gfx940
|
||||
GFX(941), // gfx941
|
||||
GFX(942), // gfx942
|
||||
GFX(950), // gfx950
|
||||
{OffloadArch::GFX10_1_GENERIC, "gfx10-1-generic", "compute_amdgcn"},
|
||||
GFX(1010), // gfx1010
|
||||
GFX(1011), // gfx1011
|
||||
|
||||
@@ -209,6 +209,7 @@ void NVPTXTargetInfo::getTargetDefines(const LangOptions &Opts,
|
||||
case OffloadArch::GFX940:
|
||||
case OffloadArch::GFX941:
|
||||
case OffloadArch::GFX942:
|
||||
case OffloadArch::GFX950:
|
||||
case OffloadArch::GFX10_1_GENERIC:
|
||||
case OffloadArch::GFX1010:
|
||||
case OffloadArch::GFX1011:
|
||||
|
||||
@@ -2304,6 +2304,7 @@ void CGOpenMPRuntimeGPU::processRequiresDirective(const OMPRequiresDecl *D) {
|
||||
case OffloadArch::GFX940:
|
||||
case OffloadArch::GFX941:
|
||||
case OffloadArch::GFX942:
|
||||
case OffloadArch::GFX950:
|
||||
case OffloadArch::GFX10_1_GENERIC:
|
||||
case OffloadArch::GFX1010:
|
||||
case OffloadArch::GFX1011:
|
||||
|
||||
@@ -32,6 +32,7 @@
|
||||
// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx940 -emit-llvm -o - %s | FileCheck --check-prefix=GFX940 %s
|
||||
// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx941 -emit-llvm -o - %s | FileCheck --check-prefix=GFX941 %s
|
||||
// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx942 -emit-llvm -o - %s | FileCheck --check-prefix=GFX942 %s
|
||||
// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx950 -emit-llvm -o - %s | FileCheck --check-prefix=GFX950 %s
|
||||
// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1010 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1010 %s
|
||||
// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1011 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1011 %s
|
||||
// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1012 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1012 %s
|
||||
@@ -88,6 +89,7 @@
|
||||
// GFX941: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+fp8-conversion-insts,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64,+xf32-insts"
|
||||
// GFX942: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+fp8-conversion-insts,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64,+xf32-insts"
|
||||
// GFX9_4_Generic: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
|
||||
// GFX950: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+fp8-conversion-insts,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+gfx950-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
|
||||
// GFX1010: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dpp,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"
|
||||
// GFX1011: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"
|
||||
// GFX1012: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"
|
||||
|
||||
@@ -110,6 +110,7 @@
|
||||
// RUN: %clang -E -dM -target amdgcn -mcpu=gfx940 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=64 -DCPU=gfx940 -DFAMILY=GFX9
|
||||
// RUN: %clang -E -dM -target amdgcn -mcpu=gfx941 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=64 -DCPU=gfx941 -DFAMILY=GFX9
|
||||
// RUN: %clang -E -dM -target amdgcn -mcpu=gfx942 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=64 -DCPU=gfx942 -DFAMILY=GFX9
|
||||
// RUN: %clang -E -dM -target amdgcn -mcpu=gfx950 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=64 -DCPU=gfx950 -DFAMILY=GFX9
|
||||
// RUN: %clang -E -dM -target amdgcn -mcpu=gfx1010 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=32 -DCPU=gfx1010 -DFAMILY=GFX10
|
||||
// RUN: %clang -E -dM -target amdgcn -mcpu=gfx1011 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=32 -DCPU=gfx1011 -DFAMILY=GFX10
|
||||
// RUN: %clang -E -dM -target amdgcn -mcpu=gfx1012 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=32 -DCPU=gfx1012 -DFAMILY=GFX10
|
||||
|
||||
@@ -95,6 +95,7 @@
|
||||
// RUN: %clang -### -target amdgcn -mcpu=gfx940 %s 2>&1 | FileCheck --check-prefix=GFX940 %s
|
||||
// RUN: %clang -### -target amdgcn -mcpu=gfx941 %s 2>&1 | FileCheck --check-prefix=GFX941 %s
|
||||
// RUN: %clang -### -target amdgcn -mcpu=gfx942 %s 2>&1 | FileCheck --check-prefix=GFX942 %s
|
||||
// RUN: %clang -### -target amdgcn -mcpu=gfx950 %s 2>&1 | FileCheck --check-prefix=GFX950 %s
|
||||
// RUN: %clang -### -target amdgcn -mcpu=gfx1010 %s 2>&1 | FileCheck --check-prefix=GFX1010 %s
|
||||
// RUN: %clang -### -target amdgcn -mcpu=gfx1011 %s 2>&1 | FileCheck --check-prefix=GFX1011 %s
|
||||
// RUN: %clang -### -target amdgcn -mcpu=gfx1012 %s 2>&1 | FileCheck --check-prefix=GFX1012 %s
|
||||
@@ -150,6 +151,7 @@
|
||||
// GFX940: "-target-cpu" "gfx940"
|
||||
// GFX941: "-target-cpu" "gfx941"
|
||||
// GFX942: "-target-cpu" "gfx942"
|
||||
// GFX950: "-target-cpu" "gfx950"
|
||||
// GFX1010: "-target-cpu" "gfx1010"
|
||||
// GFX1011: "-target-cpu" "gfx1011"
|
||||
// GFX1012: "-target-cpu" "gfx1012"
|
||||
|
||||
@@ -48,6 +48,7 @@
|
||||
// CHECK-SAME: {{^}}, gfx940
|
||||
// CHECK-SAME: {{^}}, gfx941
|
||||
// CHECK-SAME: {{^}}, gfx942
|
||||
// CHECK-SAME: {{^}}, gfx950
|
||||
// CHECK-SAME: {{^}}, gfx1010
|
||||
// CHECK-SAME: {{^}}, gfx1011
|
||||
// CHECK-SAME: {{^}}, gfx1012
|
||||
|
||||
@@ -54,6 +54,7 @@
|
||||
// CHECK-SAME: {{^}}, gfx940
|
||||
// CHECK-SAME: {{^}}, gfx941
|
||||
// CHECK-SAME: {{^}}, gfx942
|
||||
// CHECK-SAME: {{^}}, gfx950
|
||||
// CHECK-SAME: {{^}}, gfx10-1-generic
|
||||
// CHECK-SAME: {{^}}, gfx1010
|
||||
// CHECK-SAME: {{^}}, gfx1011
|
||||
|
||||
@@ -399,6 +399,13 @@ Every processor supports every OS ABI (see :ref:`amdgpu-os`) with the following
|
||||
work-item
|
||||
IDs
|
||||
|
||||
``gfx950`` ``amdgcn`` dGPU - sramecc - Architected *TBA*
|
||||
- tgsplit flat
|
||||
- xnack scratch .. TODO::
|
||||
- kernarg preload - Packed
|
||||
work-item Add product
|
||||
IDs names.
|
||||
|
||||
**GCN GFX10.1 (RDNA 1)** [AMD-GCN-GFX10-RDNA1]_
|
||||
-----------------------------------------------------------------------------------------------------------------------
|
||||
``gfx1010`` ``amdgcn`` dGPU - cumode - Absolute - *rocm-amdhsa* - Radeon RX 5700
|
||||
@@ -2178,7 +2185,7 @@ The AMDGPU backend uses the following ELF header:
|
||||
``EF_AMDGPU_MACH_AMDGCN_GFX942`` 0x04c ``gfx942``
|
||||
*reserved* 0x04d Reserved.
|
||||
``EF_AMDGPU_MACH_AMDGCN_GFX1201`` 0x04e ``gfx1201``
|
||||
*reserved* 0x04f Reserved.
|
||||
``EF_AMDGPU_MACH_AMDGCN_GFX950`` 0x04f ``gfx950``
|
||||
*reserved* 0x050 Reserved.
|
||||
``EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC`` 0x051 ``gfx9-generic``
|
||||
``EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC`` 0x052 ``gfx10-1-generic``
|
||||
|
||||
@@ -811,7 +811,7 @@ enum : unsigned {
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX942 = 0x04c,
|
||||
EF_AMDGPU_MACH_AMDGCN_RESERVED_0X4D = 0x04d,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX1201 = 0x04e,
|
||||
EF_AMDGPU_MACH_AMDGCN_RESERVED_0X4F = 0x04f,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX950 = 0x04f,
|
||||
EF_AMDGPU_MACH_AMDGCN_RESERVED_0X50 = 0x050,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC = 0x051,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC = 0x052,
|
||||
|
||||
@@ -86,18 +86,19 @@ enum GPUKind : uint32_t {
|
||||
GK_GFX940 = 68,
|
||||
GK_GFX941 = 69,
|
||||
GK_GFX942 = 70,
|
||||
GK_GFX950 = 71,
|
||||
|
||||
GK_GFX1010 = 71,
|
||||
GK_GFX1011 = 72,
|
||||
GK_GFX1012 = 73,
|
||||
GK_GFX1013 = 74,
|
||||
GK_GFX1030 = 75,
|
||||
GK_GFX1031 = 76,
|
||||
GK_GFX1032 = 77,
|
||||
GK_GFX1033 = 78,
|
||||
GK_GFX1034 = 79,
|
||||
GK_GFX1035 = 80,
|
||||
GK_GFX1036 = 81,
|
||||
GK_GFX1010 = 72,
|
||||
GK_GFX1011 = 73,
|
||||
GK_GFX1012 = 74,
|
||||
GK_GFX1013 = 75,
|
||||
GK_GFX1030 = 76,
|
||||
GK_GFX1031 = 77,
|
||||
GK_GFX1032 = 78,
|
||||
GK_GFX1033 = 79,
|
||||
GK_GFX1034 = 80,
|
||||
GK_GFX1035 = 81,
|
||||
GK_GFX1036 = 82,
|
||||
|
||||
GK_GFX1100 = 90,
|
||||
GK_GFX1101 = 91,
|
||||
|
||||
@@ -550,6 +550,8 @@ StringRef ELFObjectFileBase::getAMDGPUCPUName() const {
|
||||
return "gfx941";
|
||||
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX942:
|
||||
return "gfx942";
|
||||
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX950:
|
||||
return "gfx950";
|
||||
|
||||
// AMDGCN GFX10.
|
||||
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1010:
|
||||
|
||||
@@ -609,6 +609,7 @@ void ScalarBitSetTraits<ELFYAML::ELF_EF>::bitset(IO &IO,
|
||||
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX940, EF_AMDGPU_MACH);
|
||||
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX941, EF_AMDGPU_MACH);
|
||||
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX942, EF_AMDGPU_MACH);
|
||||
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX950, EF_AMDGPU_MACH);
|
||||
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1010, EF_AMDGPU_MACH);
|
||||
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1011, EF_AMDGPU_MACH);
|
||||
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1012, EF_AMDGPU_MACH);
|
||||
|
||||
@@ -360,6 +360,12 @@ def FeatureGFX940Insts : SubtargetFeature<"gfx940-insts",
|
||||
"Additional instructions for GFX940+"
|
||||
>;
|
||||
|
||||
def FeatureGFX950Insts : SubtargetFeature<"gfx950-insts",
|
||||
"GFX950Insts",
|
||||
"true",
|
||||
"Additional instructions for GFX950+"
|
||||
>;
|
||||
|
||||
def FeatureGFX10Insts : SubtargetFeature<"gfx10-insts",
|
||||
"GFX10Insts",
|
||||
"true",
|
||||
@@ -1470,6 +1476,14 @@ def FeatureISAVersion9_4_Common : FeatureSet<
|
||||
FeatureFlatBufferGlobalAtomicFaddF64Inst
|
||||
]>;
|
||||
|
||||
def FeatureISAVersion9_5_Common : FeatureSet<
|
||||
!listconcat(FeatureISAVersion9_4_Common.Features,
|
||||
[FeatureFP8Insts,
|
||||
FeatureFP8ConversionInsts,
|
||||
FeatureCvtFP8VOP1Bug,
|
||||
FeatureGFX950Insts
|
||||
])>;
|
||||
|
||||
def FeatureISAVersion9_4_0 : FeatureSet<
|
||||
!listconcat(FeatureISAVersion9_4_Common.Features,
|
||||
[
|
||||
@@ -1503,6 +1517,8 @@ def FeatureISAVersion9_4_Generic : FeatureSet<
|
||||
!listconcat(FeatureISAVersion9_4_Common.Features,
|
||||
[FeatureRequiresCOV6])>;
|
||||
|
||||
def FeatureISAVersion9_5_0 : FeatureSet<FeatureISAVersion9_5_Common.Features>;
|
||||
|
||||
def FeatureISAVersion10_Common : FeatureSet<
|
||||
[FeatureGFX10,
|
||||
FeatureLDSBankCount32,
|
||||
|
||||
@@ -204,6 +204,10 @@ def : ProcessorModel<"gfx942", SIDPGFX940FullSpeedModel,
|
||||
FeatureISAVersion9_4_2.Features
|
||||
>;
|
||||
|
||||
def : ProcessorModel<"gfx950", SIDPGFX940FullSpeedModel,
|
||||
FeatureISAVersion9_5_0.Features
|
||||
>;
|
||||
|
||||
// [gfx900, gfx902, gfx904, gfx906, gfx909, gfx90c]
|
||||
def : ProcessorModel<"gfx9-generic", SIQuarterSpeedModel,
|
||||
FeatureISAVersion9_Generic.Features
|
||||
|
||||
@@ -106,6 +106,7 @@ protected:
|
||||
bool GFX9Insts = false;
|
||||
bool GFX90AInsts = false;
|
||||
bool GFX940Insts = false;
|
||||
bool GFX950Insts = false;
|
||||
bool GFX10Insts = false;
|
||||
bool GFX11Insts = false;
|
||||
bool GFX12Insts = false;
|
||||
|
||||
@@ -96,6 +96,7 @@ StringRef AMDGPUTargetStreamer::getArchNameFromElfMach(unsigned ElfMach) {
|
||||
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX940: AK = GK_GFX940; break;
|
||||
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX941: AK = GK_GFX941; break;
|
||||
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX942: AK = GK_GFX942; break;
|
||||
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX950: AK = GK_GFX950; break;
|
||||
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1010: AK = GK_GFX1010; break;
|
||||
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1011: AK = GK_GFX1011; break;
|
||||
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1012: AK = GK_GFX1012; break;
|
||||
@@ -182,6 +183,7 @@ unsigned AMDGPUTargetStreamer::getElfMach(StringRef GPU) {
|
||||
case GK_GFX940: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX940;
|
||||
case GK_GFX941: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX941;
|
||||
case GK_GFX942: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX942;
|
||||
case GK_GFX950: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX950;
|
||||
case GK_GFX1010: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1010;
|
||||
case GK_GFX1011: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1011;
|
||||
case GK_GFX1012: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1012;
|
||||
|
||||
@@ -107,6 +107,7 @@ constexpr GPUInfo AMDGCNGPUs[] = {
|
||||
{{"gfx940"}, {"gfx940"}, GK_GFX940, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK|FEATURE_SRAMECC},
|
||||
{{"gfx941"}, {"gfx941"}, GK_GFX941, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK|FEATURE_SRAMECC},
|
||||
{{"gfx942"}, {"gfx942"}, GK_GFX942, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK|FEATURE_SRAMECC},
|
||||
{{"gfx950"}, {"gfx950"}, GK_GFX950, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK|FEATURE_SRAMECC},
|
||||
{{"gfx1010"}, {"gfx1010"}, GK_GFX1010, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_XNACK|FEATURE_WGP},
|
||||
{{"gfx1011"}, {"gfx1011"}, GK_GFX1011, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_XNACK|FEATURE_WGP},
|
||||
{{"gfx1012"}, {"gfx1012"}, GK_GFX1012, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_XNACK|FEATURE_WGP},
|
||||
@@ -262,6 +263,7 @@ AMDGPU::IsaVersion AMDGPU::getIsaVersion(StringRef GPU) {
|
||||
case GK_GFX940: return {9, 4, 0};
|
||||
case GK_GFX941: return {9, 4, 1};
|
||||
case GK_GFX942: return {9, 4, 2};
|
||||
case GK_GFX950: return {9, 5, 0};
|
||||
case GK_GFX1010: return {10, 1, 0};
|
||||
case GK_GFX1011: return {10, 1, 1};
|
||||
case GK_GFX1012: return {10, 1, 2};
|
||||
@@ -361,7 +363,8 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T,
|
||||
Features["wavefrontsize32"] = true;
|
||||
Features["wavefrontsize64"] = true;
|
||||
} else if (T.isAMDGCN()) {
|
||||
switch (parseArchAMDGCN(GPU)) {
|
||||
AMDGPU::GPUKind Kind = parseArchAMDGCN(GPU);
|
||||
switch (Kind) {
|
||||
case GK_GFX1201:
|
||||
case GK_GFX1200:
|
||||
case GK_GFX12_GENERIC:
|
||||
@@ -466,12 +469,16 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T,
|
||||
Features["s-memtime-inst"] = true;
|
||||
Features["gws"] = true;
|
||||
break;
|
||||
case GK_GFX950:
|
||||
Features["gfx950-insts"] = true;
|
||||
[[fallthrough]];
|
||||
case GK_GFX942:
|
||||
case GK_GFX941:
|
||||
case GK_GFX940:
|
||||
Features["fp8-insts"] = true;
|
||||
Features["fp8-conversion-insts"] = true;
|
||||
Features["xf32-insts"] = true;
|
||||
if (Kind != GK_GFX950)
|
||||
Features["xf32-insts"] = true;
|
||||
[[fallthrough]];
|
||||
case GK_GFX9_4_GENERIC:
|
||||
Features["gfx940-insts"] = true;
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx940 < %s | FileCheck --check-prefixes=GCN %s
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx940 < %s | FileCheck --check-prefixes=GCN,GFX-940 %s
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx950 < %s | FileCheck --check-prefixes=GCN,GFX-950 %s
|
||||
|
||||
; TODO: Add global-isel when it can support bf16
|
||||
|
||||
@@ -198,19 +199,33 @@ entry:
|
||||
}
|
||||
|
||||
define amdgpu_ps void @fptrunc_f32_to_bf16(float %a, ptr %out) {
|
||||
; GCN-LABEL: fptrunc_f32_to_bf16:
|
||||
; GCN: ; %bb.0: ; %entry
|
||||
; GCN-NEXT: v_mov_b32_e32 v3, v2
|
||||
; GCN-NEXT: v_mov_b32_e32 v2, v1
|
||||
; GCN-NEXT: v_bfe_u32 v1, v0, 16, 1
|
||||
; GCN-NEXT: s_movk_i32 s0, 0x7fff
|
||||
; GCN-NEXT: v_add3_u32 v1, v1, v0, s0
|
||||
; GCN-NEXT: v_or_b32_e32 v4, 0x400000, v0
|
||||
; GCN-NEXT: v_cmp_u_f32_e32 vcc, v0, v0
|
||||
; GCN-NEXT: s_nop 1
|
||||
; GCN-NEXT: v_cndmask_b32_e32 v0, v1, v4, vcc
|
||||
; GCN-NEXT: flat_store_short_d16_hi v[2:3], v0 sc0 sc1
|
||||
; GCN-NEXT: s_endpgm
|
||||
; GFX-940-LABEL: fptrunc_f32_to_bf16:
|
||||
; GFX-940: ; %bb.0: ; %entry
|
||||
; GFX-940-NEXT: v_mov_b32_e32 v3, v2
|
||||
; GFX-940-NEXT: v_mov_b32_e32 v2, v1
|
||||
; GFX-940-NEXT: v_bfe_u32 v1, v0, 16, 1
|
||||
; GFX-940-NEXT: s_movk_i32 s0, 0x7fff
|
||||
; GFX-940-NEXT: v_add3_u32 v1, v1, v0, s0
|
||||
; GFX-940-NEXT: v_or_b32_e32 v4, 0x400000, v0
|
||||
; GFX-940-NEXT: v_cmp_u_f32_e32 vcc, v0, v0
|
||||
; GFX-940-NEXT: s_nop 1
|
||||
; GFX-940-NEXT: v_cndmask_b32_e32 v0, v1, v4, vcc
|
||||
; GFX-940-NEXT: flat_store_short_d16_hi v[2:3], v0 sc0 sc1
|
||||
; GFX-940-NEXT: s_endpgm
|
||||
;
|
||||
; GFX-950-LABEL: fptrunc_f32_to_bf16:
|
||||
; GFX-950: ; %bb.0: ; %entry
|
||||
; GFX-950-NEXT: v_mov_b32_e32 v3, v2
|
||||
; GFX-950-NEXT: v_mov_b32_e32 v2, v1
|
||||
; GFX-950-NEXT: v_bfe_u32 v1, v0, 16, 1
|
||||
; GFX-950-NEXT: s_movk_i32 s0, 0x7fff
|
||||
; GFX-950-NEXT: v_add3_u32 v1, v1, v0, s0
|
||||
; GFX-950-NEXT: v_or_b32_e32 v4, 0x400000, v0
|
||||
; GFX-950-NEXT: v_cmp_u_f32_e32 vcc, v0, v0
|
||||
; GFX-950-NEXT: s_nop 1
|
||||
; GFX-950-NEXT: v_cndmask_b32_e32 v0, v1, v4, vcc
|
||||
; GFX-950-NEXT: flat_store_short_d16_hi v[2:3], v0
|
||||
; GFX-950-NEXT: s_endpgm
|
||||
entry:
|
||||
%a.cvt = fptrunc float %a to bfloat
|
||||
store bfloat %a.cvt, ptr %out
|
||||
@@ -218,20 +233,35 @@ entry:
|
||||
}
|
||||
|
||||
define amdgpu_ps void @fptrunc_f32_to_bf16_abs(float %a, ptr %out) {
|
||||
; GCN-LABEL: fptrunc_f32_to_bf16_abs:
|
||||
; GCN: ; %bb.0: ; %entry
|
||||
; GCN-NEXT: v_mov_b32_e32 v3, v2
|
||||
; GCN-NEXT: v_mov_b32_e32 v2, v1
|
||||
; GCN-NEXT: v_and_b32_e32 v1, 0x7fffffff, v0
|
||||
; GCN-NEXT: v_bfe_u32 v4, v1, 16, 1
|
||||
; GCN-NEXT: s_movk_i32 s0, 0x7fff
|
||||
; GCN-NEXT: v_add3_u32 v4, v4, v1, s0
|
||||
; GCN-NEXT: v_or_b32_e32 v1, 0x400000, v1
|
||||
; GCN-NEXT: v_cmp_u_f32_e64 vcc, |v0|, |v0|
|
||||
; GCN-NEXT: s_nop 1
|
||||
; GCN-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
|
||||
; GCN-NEXT: flat_store_short_d16_hi v[2:3], v0 sc0 sc1
|
||||
; GCN-NEXT: s_endpgm
|
||||
; GFX-940-LABEL: fptrunc_f32_to_bf16_abs:
|
||||
; GFX-940: ; %bb.0: ; %entry
|
||||
; GFX-940-NEXT: v_mov_b32_e32 v3, v2
|
||||
; GFX-940-NEXT: v_mov_b32_e32 v2, v1
|
||||
; GFX-940-NEXT: v_and_b32_e32 v1, 0x7fffffff, v0
|
||||
; GFX-940-NEXT: v_bfe_u32 v4, v1, 16, 1
|
||||
; GFX-940-NEXT: s_movk_i32 s0, 0x7fff
|
||||
; GFX-940-NEXT: v_add3_u32 v4, v4, v1, s0
|
||||
; GFX-940-NEXT: v_or_b32_e32 v1, 0x400000, v1
|
||||
; GFX-940-NEXT: v_cmp_u_f32_e64 vcc, |v0|, |v0|
|
||||
; GFX-940-NEXT: s_nop 1
|
||||
; GFX-940-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
|
||||
; GFX-940-NEXT: flat_store_short_d16_hi v[2:3], v0 sc0 sc1
|
||||
; GFX-940-NEXT: s_endpgm
|
||||
;
|
||||
; GFX-950-LABEL: fptrunc_f32_to_bf16_abs:
|
||||
; GFX-950: ; %bb.0: ; %entry
|
||||
; GFX-950-NEXT: v_mov_b32_e32 v3, v2
|
||||
; GFX-950-NEXT: v_mov_b32_e32 v2, v1
|
||||
; GFX-950-NEXT: v_and_b32_e32 v1, 0x7fffffff, v0
|
||||
; GFX-950-NEXT: v_bfe_u32 v4, v1, 16, 1
|
||||
; GFX-950-NEXT: s_movk_i32 s0, 0x7fff
|
||||
; GFX-950-NEXT: v_add3_u32 v4, v4, v1, s0
|
||||
; GFX-950-NEXT: v_or_b32_e32 v1, 0x400000, v1
|
||||
; GFX-950-NEXT: v_cmp_u_f32_e64 vcc, |v0|, |v0|
|
||||
; GFX-950-NEXT: s_nop 1
|
||||
; GFX-950-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
|
||||
; GFX-950-NEXT: flat_store_short_d16_hi v[2:3], v0
|
||||
; GFX-950-NEXT: s_endpgm
|
||||
entry:
|
||||
%a.abs = call float @llvm.fabs.f32(float %a)
|
||||
%a.cvt = fptrunc float %a.abs to bfloat
|
||||
@@ -240,20 +270,35 @@ entry:
|
||||
}
|
||||
|
||||
define amdgpu_ps void @fptrunc_f32_to_bf16_neg(float %a, ptr %out) {
|
||||
; GCN-LABEL: fptrunc_f32_to_bf16_neg:
|
||||
; GCN: ; %bb.0: ; %entry
|
||||
; GCN-NEXT: v_mov_b32_e32 v3, v2
|
||||
; GCN-NEXT: v_mov_b32_e32 v2, v1
|
||||
; GCN-NEXT: v_xor_b32_e32 v1, 0x80000000, v0
|
||||
; GCN-NEXT: v_bfe_u32 v4, v1, 16, 1
|
||||
; GCN-NEXT: s_movk_i32 s0, 0x7fff
|
||||
; GCN-NEXT: v_add3_u32 v4, v4, v1, s0
|
||||
; GCN-NEXT: v_or_b32_e32 v1, 0x400000, v1
|
||||
; GCN-NEXT: v_cmp_u_f32_e64 vcc, -v0, -v0
|
||||
; GCN-NEXT: s_nop 1
|
||||
; GCN-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
|
||||
; GCN-NEXT: flat_store_short_d16_hi v[2:3], v0 sc0 sc1
|
||||
; GCN-NEXT: s_endpgm
|
||||
; GFX-940-LABEL: fptrunc_f32_to_bf16_neg:
|
||||
; GFX-940: ; %bb.0: ; %entry
|
||||
; GFX-940-NEXT: v_mov_b32_e32 v3, v2
|
||||
; GFX-940-NEXT: v_mov_b32_e32 v2, v1
|
||||
; GFX-940-NEXT: v_xor_b32_e32 v1, 0x80000000, v0
|
||||
; GFX-940-NEXT: v_bfe_u32 v4, v1, 16, 1
|
||||
; GFX-940-NEXT: s_movk_i32 s0, 0x7fff
|
||||
; GFX-940-NEXT: v_add3_u32 v4, v4, v1, s0
|
||||
; GFX-940-NEXT: v_or_b32_e32 v1, 0x400000, v1
|
||||
; GFX-940-NEXT: v_cmp_u_f32_e64 vcc, -v0, -v0
|
||||
; GFX-940-NEXT: s_nop 1
|
||||
; GFX-940-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
|
||||
; GFX-940-NEXT: flat_store_short_d16_hi v[2:3], v0 sc0 sc1
|
||||
; GFX-940-NEXT: s_endpgm
|
||||
;
|
||||
; GFX-950-LABEL: fptrunc_f32_to_bf16_neg:
|
||||
; GFX-950: ; %bb.0: ; %entry
|
||||
; GFX-950-NEXT: v_mov_b32_e32 v3, v2
|
||||
; GFX-950-NEXT: v_mov_b32_e32 v2, v1
|
||||
; GFX-950-NEXT: v_xor_b32_e32 v1, 0x80000000, v0
|
||||
; GFX-950-NEXT: v_bfe_u32 v4, v1, 16, 1
|
||||
; GFX-950-NEXT: s_movk_i32 s0, 0x7fff
|
||||
; GFX-950-NEXT: v_add3_u32 v4, v4, v1, s0
|
||||
; GFX-950-NEXT: v_or_b32_e32 v1, 0x400000, v1
|
||||
; GFX-950-NEXT: v_cmp_u_f32_e64 vcc, -v0, -v0
|
||||
; GFX-950-NEXT: s_nop 1
|
||||
; GFX-950-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
|
||||
; GFX-950-NEXT: flat_store_short_d16_hi v[2:3], v0
|
||||
; GFX-950-NEXT: s_endpgm
|
||||
entry:
|
||||
%a.neg = fneg float %a
|
||||
%a.cvt = fptrunc float %a.neg to bfloat
|
||||
@@ -262,29 +307,53 @@ entry:
|
||||
}
|
||||
|
||||
define amdgpu_ps void @fptrunc_f64_to_bf16(double %a, ptr %out) {
|
||||
; GCN-LABEL: fptrunc_f64_to_bf16:
|
||||
; GCN: ; %bb.0: ; %entry
|
||||
; GCN-NEXT: v_cvt_f32_f64_e64 v6, |v[0:1]|
|
||||
; GCN-NEXT: v_cvt_f64_f32_e32 v[4:5], v6
|
||||
; GCN-NEXT: v_and_b32_e32 v7, 1, v6
|
||||
; GCN-NEXT: v_cmp_gt_f64_e64 s[2:3], |v[0:1]|, v[4:5]
|
||||
; GCN-NEXT: v_cmp_nlg_f64_e64 s[0:1], |v[0:1]|, v[4:5]
|
||||
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v7
|
||||
; GCN-NEXT: v_cndmask_b32_e64 v4, -1, 1, s[2:3]
|
||||
; GCN-NEXT: v_add_u32_e32 v4, v6, v4
|
||||
; GCN-NEXT: s_or_b64 vcc, s[0:1], vcc
|
||||
; GCN-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc
|
||||
; GCN-NEXT: s_brev_b32 s0, 1
|
||||
; GCN-NEXT: v_and_or_b32 v5, v1, s0, v4
|
||||
; GCN-NEXT: v_bfe_u32 v4, v4, 16, 1
|
||||
; GCN-NEXT: s_movk_i32 s0, 0x7fff
|
||||
; GCN-NEXT: v_add3_u32 v4, v4, v5, s0
|
||||
; GCN-NEXT: v_or_b32_e32 v5, 0x400000, v5
|
||||
; GCN-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
|
||||
; GCN-NEXT: s_nop 1
|
||||
; GCN-NEXT: v_cndmask_b32_e32 v0, v4, v5, vcc
|
||||
; GCN-NEXT: flat_store_short_d16_hi v[2:3], v0 sc0 sc1
|
||||
; GCN-NEXT: s_endpgm
|
||||
; GFX-940-LABEL: fptrunc_f64_to_bf16:
|
||||
; GFX-940: ; %bb.0: ; %entry
|
||||
; GFX-940-NEXT: v_cvt_f32_f64_e64 v6, |v[0:1]|
|
||||
; GFX-940-NEXT: v_cvt_f64_f32_e32 v[4:5], v6
|
||||
; GFX-940-NEXT: v_and_b32_e32 v7, 1, v6
|
||||
; GFX-940-NEXT: v_cmp_gt_f64_e64 s[2:3], |v[0:1]|, v[4:5]
|
||||
; GFX-940-NEXT: v_cmp_nlg_f64_e64 s[0:1], |v[0:1]|, v[4:5]
|
||||
; GFX-940-NEXT: v_cmp_eq_u32_e32 vcc, 1, v7
|
||||
; GFX-940-NEXT: v_cndmask_b32_e64 v4, -1, 1, s[2:3]
|
||||
; GFX-940-NEXT: v_add_u32_e32 v4, v6, v4
|
||||
; GFX-940-NEXT: s_or_b64 vcc, s[0:1], vcc
|
||||
; GFX-940-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc
|
||||
; GFX-940-NEXT: s_brev_b32 s0, 1
|
||||
; GFX-940-NEXT: v_and_or_b32 v5, v1, s0, v4
|
||||
; GFX-940-NEXT: v_bfe_u32 v4, v4, 16, 1
|
||||
; GFX-940-NEXT: s_movk_i32 s0, 0x7fff
|
||||
; GFX-940-NEXT: v_add3_u32 v4, v4, v5, s0
|
||||
; GFX-940-NEXT: v_or_b32_e32 v5, 0x400000, v5
|
||||
; GFX-940-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
|
||||
; GFX-940-NEXT: s_nop 1
|
||||
; GFX-940-NEXT: v_cndmask_b32_e32 v0, v4, v5, vcc
|
||||
; GFX-940-NEXT: flat_store_short_d16_hi v[2:3], v0 sc0 sc1
|
||||
; GFX-940-NEXT: s_endpgm
|
||||
;
|
||||
; GFX-950-LABEL: fptrunc_f64_to_bf16:
|
||||
; GFX-950: ; %bb.0: ; %entry
|
||||
; GFX-950-NEXT: v_cvt_f32_f64_e64 v6, |v[0:1]|
|
||||
; GFX-950-NEXT: v_cvt_f64_f32_e32 v[4:5], v6
|
||||
; GFX-950-NEXT: v_and_b32_e32 v7, 1, v6
|
||||
; GFX-950-NEXT: v_cmp_gt_f64_e64 s[2:3], |v[0:1]|, v[4:5]
|
||||
; GFX-950-NEXT: v_cmp_nlg_f64_e64 s[0:1], |v[0:1]|, v[4:5]
|
||||
; GFX-950-NEXT: v_cmp_eq_u32_e32 vcc, 1, v7
|
||||
; GFX-950-NEXT: v_cndmask_b32_e64 v4, -1, 1, s[2:3]
|
||||
; GFX-950-NEXT: v_add_u32_e32 v4, v6, v4
|
||||
; GFX-950-NEXT: s_or_b64 vcc, s[0:1], vcc
|
||||
; GFX-950-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc
|
||||
; GFX-950-NEXT: s_brev_b32 s0, 1
|
||||
; GFX-950-NEXT: v_and_or_b32 v5, v1, s0, v4
|
||||
; GFX-950-NEXT: v_bfe_u32 v4, v4, 16, 1
|
||||
; GFX-950-NEXT: s_movk_i32 s0, 0x7fff
|
||||
; GFX-950-NEXT: v_add3_u32 v4, v4, v5, s0
|
||||
; GFX-950-NEXT: v_or_b32_e32 v5, 0x400000, v5
|
||||
; GFX-950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
|
||||
; GFX-950-NEXT: s_nop 1
|
||||
; GFX-950-NEXT: v_cndmask_b32_e32 v0, v4, v5, vcc
|
||||
; GFX-950-NEXT: flat_store_short_d16_hi v[2:3], v0
|
||||
; GFX-950-NEXT: s_endpgm
|
||||
entry:
|
||||
%a.cvt = fptrunc double %a to bfloat
|
||||
store bfloat %a.cvt, ptr %out
|
||||
@@ -292,30 +361,55 @@ entry:
|
||||
}
|
||||
|
||||
define amdgpu_ps void @fptrunc_f64_to_bf16_neg(double %a, ptr %out) {
|
||||
; GCN-LABEL: fptrunc_f64_to_bf16_neg:
|
||||
; GCN: ; %bb.0: ; %entry
|
||||
; GCN-NEXT: v_cvt_f32_f64_e64 v7, |v[0:1]|
|
||||
; GCN-NEXT: v_cvt_f64_f32_e32 v[4:5], v7
|
||||
; GCN-NEXT: v_and_b32_e32 v8, 1, v7
|
||||
; GCN-NEXT: v_cmp_gt_f64_e64 s[2:3], |v[0:1]|, v[4:5]
|
||||
; GCN-NEXT: v_cmp_nlg_f64_e64 s[0:1], |v[0:1]|, v[4:5]
|
||||
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v8
|
||||
; GCN-NEXT: v_cndmask_b32_e64 v4, -1, 1, s[2:3]
|
||||
; GCN-NEXT: v_add_u32_e32 v4, v7, v4
|
||||
; GCN-NEXT: s_or_b64 vcc, s[0:1], vcc
|
||||
; GCN-NEXT: s_brev_b32 s4, 1
|
||||
; GCN-NEXT: v_xor_b32_e32 v6, 0x80000000, v1
|
||||
; GCN-NEXT: v_cndmask_b32_e32 v4, v4, v7, vcc
|
||||
; GCN-NEXT: v_and_or_b32 v5, v6, s4, v4
|
||||
; GCN-NEXT: v_bfe_u32 v4, v4, 16, 1
|
||||
; GCN-NEXT: s_movk_i32 s0, 0x7fff
|
||||
; GCN-NEXT: v_add3_u32 v4, v4, v5, s0
|
||||
; GCN-NEXT: v_or_b32_e32 v5, 0x400000, v5
|
||||
; GCN-NEXT: v_cmp_u_f64_e64 vcc, -v[0:1], -v[0:1]
|
||||
; GCN-NEXT: s_nop 1
|
||||
; GCN-NEXT: v_cndmask_b32_e32 v0, v4, v5, vcc
|
||||
; GCN-NEXT: flat_store_short_d16_hi v[2:3], v0 sc0 sc1
|
||||
; GCN-NEXT: s_endpgm
|
||||
; GFX-940-LABEL: fptrunc_f64_to_bf16_neg:
|
||||
; GFX-940: ; %bb.0: ; %entry
|
||||
; GFX-940-NEXT: v_cvt_f32_f64_e64 v7, |v[0:1]|
|
||||
; GFX-940-NEXT: v_cvt_f64_f32_e32 v[4:5], v7
|
||||
; GFX-940-NEXT: v_and_b32_e32 v8, 1, v7
|
||||
; GFX-940-NEXT: v_cmp_gt_f64_e64 s[2:3], |v[0:1]|, v[4:5]
|
||||
; GFX-940-NEXT: v_cmp_nlg_f64_e64 s[0:1], |v[0:1]|, v[4:5]
|
||||
; GFX-940-NEXT: v_cmp_eq_u32_e32 vcc, 1, v8
|
||||
; GFX-940-NEXT: v_cndmask_b32_e64 v4, -1, 1, s[2:3]
|
||||
; GFX-940-NEXT: v_add_u32_e32 v4, v7, v4
|
||||
; GFX-940-NEXT: s_or_b64 vcc, s[0:1], vcc
|
||||
; GFX-940-NEXT: s_brev_b32 s4, 1
|
||||
; GFX-940-NEXT: v_xor_b32_e32 v6, 0x80000000, v1
|
||||
; GFX-940-NEXT: v_cndmask_b32_e32 v4, v4, v7, vcc
|
||||
; GFX-940-NEXT: v_and_or_b32 v5, v6, s4, v4
|
||||
; GFX-940-NEXT: v_bfe_u32 v4, v4, 16, 1
|
||||
; GFX-940-NEXT: s_movk_i32 s0, 0x7fff
|
||||
; GFX-940-NEXT: v_add3_u32 v4, v4, v5, s0
|
||||
; GFX-940-NEXT: v_or_b32_e32 v5, 0x400000, v5
|
||||
; GFX-940-NEXT: v_cmp_u_f64_e64 vcc, -v[0:1], -v[0:1]
|
||||
; GFX-940-NEXT: s_nop 1
|
||||
; GFX-940-NEXT: v_cndmask_b32_e32 v0, v4, v5, vcc
|
||||
; GFX-940-NEXT: flat_store_short_d16_hi v[2:3], v0 sc0 sc1
|
||||
; GFX-940-NEXT: s_endpgm
|
||||
;
|
||||
; GFX-950-LABEL: fptrunc_f64_to_bf16_neg:
|
||||
; GFX-950: ; %bb.0: ; %entry
|
||||
; GFX-950-NEXT: v_cvt_f32_f64_e64 v7, |v[0:1]|
|
||||
; GFX-950-NEXT: v_cvt_f64_f32_e32 v[4:5], v7
|
||||
; GFX-950-NEXT: v_and_b32_e32 v8, 1, v7
|
||||
; GFX-950-NEXT: v_cmp_gt_f64_e64 s[2:3], |v[0:1]|, v[4:5]
|
||||
; GFX-950-NEXT: v_cmp_nlg_f64_e64 s[0:1], |v[0:1]|, v[4:5]
|
||||
; GFX-950-NEXT: v_cmp_eq_u32_e32 vcc, 1, v8
|
||||
; GFX-950-NEXT: v_cndmask_b32_e64 v4, -1, 1, s[2:3]
|
||||
; GFX-950-NEXT: v_add_u32_e32 v4, v7, v4
|
||||
; GFX-950-NEXT: s_or_b64 vcc, s[0:1], vcc
|
||||
; GFX-950-NEXT: s_brev_b32 s4, 1
|
||||
; GFX-950-NEXT: v_xor_b32_e32 v6, 0x80000000, v1
|
||||
; GFX-950-NEXT: v_cndmask_b32_e32 v4, v4, v7, vcc
|
||||
; GFX-950-NEXT: v_and_or_b32 v5, v6, s4, v4
|
||||
; GFX-950-NEXT: v_bfe_u32 v4, v4, 16, 1
|
||||
; GFX-950-NEXT: s_movk_i32 s0, 0x7fff
|
||||
; GFX-950-NEXT: v_add3_u32 v4, v4, v5, s0
|
||||
; GFX-950-NEXT: v_or_b32_e32 v5, 0x400000, v5
|
||||
; GFX-950-NEXT: v_cmp_u_f64_e64 vcc, -v[0:1], -v[0:1]
|
||||
; GFX-950-NEXT: s_nop 1
|
||||
; GFX-950-NEXT: v_cndmask_b32_e32 v0, v4, v5, vcc
|
||||
; GFX-950-NEXT: flat_store_short_d16_hi v[2:3], v0
|
||||
; GFX-950-NEXT: s_endpgm
|
||||
entry:
|
||||
%a.neg = fneg double %a
|
||||
%a.cvt = fptrunc double %a.neg to bfloat
|
||||
@@ -324,30 +418,55 @@ entry:
|
||||
}
|
||||
|
||||
define amdgpu_ps void @fptrunc_f64_to_bf16_abs(double %a, ptr %out) {
|
||||
; GCN-LABEL: fptrunc_f64_to_bf16_abs:
|
||||
; GCN: ; %bb.0: ; %entry
|
||||
; GCN-NEXT: v_cvt_f32_f64_e64 v7, |v[0:1]|
|
||||
; GCN-NEXT: v_cvt_f64_f32_e32 v[4:5], v7
|
||||
; GCN-NEXT: v_and_b32_e32 v8, 1, v7
|
||||
; GCN-NEXT: v_cmp_gt_f64_e64 s[2:3], |v[0:1]|, v[4:5]
|
||||
; GCN-NEXT: v_cmp_nlg_f64_e64 s[0:1], |v[0:1]|, v[4:5]
|
||||
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v8
|
||||
; GCN-NEXT: v_cndmask_b32_e64 v4, -1, 1, s[2:3]
|
||||
; GCN-NEXT: v_add_u32_e32 v4, v7, v4
|
||||
; GCN-NEXT: s_or_b64 vcc, s[0:1], vcc
|
||||
; GCN-NEXT: v_and_b32_e32 v6, 0x7fffffff, v1
|
||||
; GCN-NEXT: v_cndmask_b32_e32 v4, v4, v7, vcc
|
||||
; GCN-NEXT: s_brev_b32 s0, 1
|
||||
; GCN-NEXT: v_and_or_b32 v5, v6, s0, v4
|
||||
; GCN-NEXT: v_bfe_u32 v4, v4, 16, 1
|
||||
; GCN-NEXT: s_movk_i32 s0, 0x7fff
|
||||
; GCN-NEXT: v_add3_u32 v4, v4, v5, s0
|
||||
; GCN-NEXT: v_or_b32_e32 v5, 0x400000, v5
|
||||
; GCN-NEXT: v_cmp_u_f64_e64 vcc, |v[0:1]|, |v[0:1]|
|
||||
; GCN-NEXT: s_nop 1
|
||||
; GCN-NEXT: v_cndmask_b32_e32 v0, v4, v5, vcc
|
||||
; GCN-NEXT: flat_store_short_d16_hi v[2:3], v0 sc0 sc1
|
||||
; GCN-NEXT: s_endpgm
|
||||
; GFX-940-LABEL: fptrunc_f64_to_bf16_abs:
|
||||
; GFX-940: ; %bb.0: ; %entry
|
||||
; GFX-940-NEXT: v_cvt_f32_f64_e64 v7, |v[0:1]|
|
||||
; GFX-940-NEXT: v_cvt_f64_f32_e32 v[4:5], v7
|
||||
; GFX-940-NEXT: v_and_b32_e32 v8, 1, v7
|
||||
; GFX-940-NEXT: v_cmp_gt_f64_e64 s[2:3], |v[0:1]|, v[4:5]
|
||||
; GFX-940-NEXT: v_cmp_nlg_f64_e64 s[0:1], |v[0:1]|, v[4:5]
|
||||
; GFX-940-NEXT: v_cmp_eq_u32_e32 vcc, 1, v8
|
||||
; GFX-940-NEXT: v_cndmask_b32_e64 v4, -1, 1, s[2:3]
|
||||
; GFX-940-NEXT: v_add_u32_e32 v4, v7, v4
|
||||
; GFX-940-NEXT: s_or_b64 vcc, s[0:1], vcc
|
||||
; GFX-940-NEXT: v_and_b32_e32 v6, 0x7fffffff, v1
|
||||
; GFX-940-NEXT: v_cndmask_b32_e32 v4, v4, v7, vcc
|
||||
; GFX-940-NEXT: s_brev_b32 s0, 1
|
||||
; GFX-940-NEXT: v_and_or_b32 v5, v6, s0, v4
|
||||
; GFX-940-NEXT: v_bfe_u32 v4, v4, 16, 1
|
||||
; GFX-940-NEXT: s_movk_i32 s0, 0x7fff
|
||||
; GFX-940-NEXT: v_add3_u32 v4, v4, v5, s0
|
||||
; GFX-940-NEXT: v_or_b32_e32 v5, 0x400000, v5
|
||||
; GFX-940-NEXT: v_cmp_u_f64_e64 vcc, |v[0:1]|, |v[0:1]|
|
||||
; GFX-940-NEXT: s_nop 1
|
||||
; GFX-940-NEXT: v_cndmask_b32_e32 v0, v4, v5, vcc
|
||||
; GFX-940-NEXT: flat_store_short_d16_hi v[2:3], v0 sc0 sc1
|
||||
; GFX-940-NEXT: s_endpgm
|
||||
;
|
||||
; GFX-950-LABEL: fptrunc_f64_to_bf16_abs:
|
||||
; GFX-950: ; %bb.0: ; %entry
|
||||
; GFX-950-NEXT: v_cvt_f32_f64_e64 v7, |v[0:1]|
|
||||
; GFX-950-NEXT: v_cvt_f64_f32_e32 v[4:5], v7
|
||||
; GFX-950-NEXT: v_and_b32_e32 v8, 1, v7
|
||||
; GFX-950-NEXT: v_cmp_gt_f64_e64 s[2:3], |v[0:1]|, v[4:5]
|
||||
; GFX-950-NEXT: v_cmp_nlg_f64_e64 s[0:1], |v[0:1]|, v[4:5]
|
||||
; GFX-950-NEXT: v_cmp_eq_u32_e32 vcc, 1, v8
|
||||
; GFX-950-NEXT: v_cndmask_b32_e64 v4, -1, 1, s[2:3]
|
||||
; GFX-950-NEXT: v_add_u32_e32 v4, v7, v4
|
||||
; GFX-950-NEXT: s_or_b64 vcc, s[0:1], vcc
|
||||
; GFX-950-NEXT: v_and_b32_e32 v6, 0x7fffffff, v1
|
||||
; GFX-950-NEXT: v_cndmask_b32_e32 v4, v4, v7, vcc
|
||||
; GFX-950-NEXT: s_brev_b32 s0, 1
|
||||
; GFX-950-NEXT: v_and_or_b32 v5, v6, s0, v4
|
||||
; GFX-950-NEXT: v_bfe_u32 v4, v4, 16, 1
|
||||
; GFX-950-NEXT: s_movk_i32 s0, 0x7fff
|
||||
; GFX-950-NEXT: v_add3_u32 v4, v4, v5, s0
|
||||
; GFX-950-NEXT: v_or_b32_e32 v5, 0x400000, v5
|
||||
; GFX-950-NEXT: v_cmp_u_f64_e64 vcc, |v[0:1]|, |v[0:1]|
|
||||
; GFX-950-NEXT: s_nop 1
|
||||
; GFX-950-NEXT: v_cndmask_b32_e32 v0, v4, v5, vcc
|
||||
; GFX-950-NEXT: flat_store_short_d16_hi v[2:3], v0
|
||||
; GFX-950-NEXT: s_endpgm
|
||||
entry:
|
||||
%a.abs = call double @llvm.fabs.f64(double %a)
|
||||
%a.cvt = fptrunc double %a.abs to bfloat
|
||||
|
||||
@@ -80,6 +80,9 @@
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 < %s | FileCheck --check-prefixes=GFX942 %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -mattr=-xnack < %s | FileCheck --check-prefixes=GFX942-NOXNACK %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -mattr=+xnack < %s | FileCheck --check-prefixes=GFX942-XNACK %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck --check-prefixes=GFX950 %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 -mattr=-xnack < %s | FileCheck --check-prefixes=GFX950-NOXNACK %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 -mattr=+xnack < %s | FileCheck --check-prefixes=GFX950-XNACK %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 < %s | FileCheck --check-prefixes=GFX1010 %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=-xnack < %s | FileCheck --check-prefixes=GFX1010-NOXNACK %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+xnack < %s | FileCheck --check-prefixes=GFX1010-XNACK %s
|
||||
@@ -180,6 +183,9 @@
|
||||
; GFX942: .amdgcn_target "amdgcn-amd-amdhsa--gfx942"
|
||||
; GFX942-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx942:xnack-"
|
||||
; GFX942-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx942:xnack+"
|
||||
; GFX950: .amdgcn_target "amdgcn-amd-amdhsa--gfx950"
|
||||
; GFX950-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx950:xnack-"
|
||||
; GFX950-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx950:xnack+"
|
||||
; GFX1010: .amdgcn_target "amdgcn-amd-amdhsa--gfx1010"
|
||||
; GFX1010-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx1010:xnack-"
|
||||
; GFX1010-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx1010:xnack+"
|
||||
|
||||
@@ -57,6 +57,7 @@
|
||||
; RUN: llc -filetype=obj -mtriple=amdgcn -mcpu=gfx940 < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX940 %s
|
||||
; RUN: llc -filetype=obj -mtriple=amdgcn -mcpu=gfx941 < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX941 %s
|
||||
; RUN: llc -filetype=obj -mtriple=amdgcn -mcpu=gfx942 < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX942 %s
|
||||
; RUN: llc -filetype=obj -mtriple=amdgcn -mcpu=gfx950 < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX950 %s
|
||||
; RUN: llc -filetype=obj -mtriple=amdgcn -mcpu=gfx1010 < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX1010 %s
|
||||
; RUN: llc -filetype=obj -mtriple=amdgcn -mcpu=gfx1011 < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX1011 %s
|
||||
; RUN: llc -filetype=obj -mtriple=amdgcn -mcpu=gfx1012 < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX1012 %s
|
||||
@@ -139,6 +140,7 @@
|
||||
; GFX940: EF_AMDGPU_MACH_AMDGCN_GFX940 (0x40)
|
||||
; GFX941: EF_AMDGPU_MACH_AMDGCN_GFX941 (0x4B)
|
||||
; GFX942: EF_AMDGPU_MACH_AMDGCN_GFX942 (0x4C)
|
||||
; GFX950: EF_AMDGPU_MACH_AMDGCN_GFX950 (0x4F)
|
||||
; GFX1010: EF_AMDGPU_MACH_AMDGCN_GFX1010 (0x33)
|
||||
; GFX1011: EF_AMDGPU_MACH_AMDGCN_GFX1011 (0x34)
|
||||
; GFX1012: EF_AMDGPU_MACH_AMDGCN_GFX1012 (0x35)
|
||||
|
||||
@@ -12,6 +12,9 @@
|
||||
; RUN: llc -filetype=obj -mtriple=amdgcn -mcpu=gfx940 < %s | llvm-readobj --file-header - | FileCheck --check-prefix=SRAM-ECC-GFX940 %s
|
||||
; RUN: llc -filetype=obj -mtriple=amdgcn -mcpu=gfx940 -mattr=+sramecc < %s | llvm-readobj --file-header - | FileCheck --check-prefix=SRAM-ECC-GFX940 %s
|
||||
|
||||
; RUN: llc -filetype=obj -mtriple=amdgcn -mcpu=gfx950 < %s | llvm-readobj --file-header - | FileCheck --check-prefix=SRAM-ECC-GFX950 %s
|
||||
; RUN: llc -filetype=obj -mtriple=amdgcn -mcpu=gfx950 -mattr=+sramecc < %s | llvm-readobj --file-header - | FileCheck --check-prefix=SRAM-ECC-GFX950 %s
|
||||
|
||||
; NO-SRAM-ECC-GFX906: Flags [
|
||||
; NO-SRAM-ECC-GFX906-NEXT: EF_AMDGPU_FEATURE_XNACK_V3 (0x100)
|
||||
; NO-SRAM-ECC-GFX906-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX906 (0x2F)
|
||||
@@ -44,6 +47,11 @@
|
||||
; SRAM-ECC-GFX940: EF_AMDGPU_MACH_AMDGCN_GFX940 (0x40)
|
||||
; SRAM-ECC-GFX940: ]
|
||||
|
||||
; SRAM-ECC-GFX950: Flags [
|
||||
; SRAM-ECC-GFX950: EF_AMDGPU_FEATURE_SRAMECC_V3 (0x200)
|
||||
; SRAM-ECC-GFX950: EF_AMDGPU_MACH_AMDGCN_GFX950 (0x4F)
|
||||
; SRAM-ECC-GFX950: ]
|
||||
|
||||
define amdgpu_kernel void @elf_header() {
|
||||
ret void
|
||||
}
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -1,6 +1,8 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx940 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX940 %s
|
||||
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx940 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX940 %s
|
||||
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx950 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX940 %s
|
||||
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx950 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX940 %s
|
||||
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX12 %s
|
||||
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX12 %s
|
||||
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -1,4 +1,5 @@
|
||||
// RUN: llvm-mc -triple=amdgcn -mcpu=gfx940 -show-encoding %s | FileCheck -check-prefix=GFX940 %s
|
||||
// RUN: llvm-mc -triple=amdgcn -mcpu=gfx950 -show-encoding %s | FileCheck -check-prefix=GFX940 %s
|
||||
|
||||
scratch_load_dword a2, v4, s6
|
||||
// GFX940: scratch_load_dword a2, v4, s6 ; encoding: [0x00,0x60,0x50,0xdc,0x04,0x00,0x86,0x02]
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
// RUN: llvm-mc -triple=amdgcn -mcpu=gfx940 -show-encoding %s | FileCheck --check-prefix=GFX940 --strict-whitespace %s
|
||||
// RUN: llvm-mc -triple=amdgcn -mcpu=gfx950 -show-encoding %s | FileCheck --check-prefix=GFX940 --strict-whitespace %s
|
||||
// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx90a %s 2>&1 | FileCheck --check-prefixes=NOT-GFX940,GFX90A --implicit-check-not=error: %s
|
||||
// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1010 %s 2>&1 | FileCheck --check-prefixes=NOT-GFX940,GFX10 --implicit-check-not=error: %s
|
||||
|
||||
|
||||
179
llvm/test/MC/AMDGPU/gfx950-unsupported.s
Normal file
179
llvm/test/MC/AMDGPU/gfx950-unsupported.s
Normal file
@@ -0,0 +1,179 @@
|
||||
// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx950 %s 2>&1 | FileCheck -check-prefix=ERR %s
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// v_mfma_f32_32x32x4_xf32
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
v_mfma_f32_32x32x4_xf32 a[0:3], v[2:3], v[4:5], a[2:5]
|
||||
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
|
||||
|
||||
v_mfma_f32_32x32x4_xf32 v[0:3], v[0:3], v[0:3], v[0:3]
|
||||
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
|
||||
|
||||
v_mfma_f32_32x32x4_xf32 v[0:3], v[0:3], v[0:3], v[0:3]
|
||||
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
|
||||
|
||||
v_mfma_f32_32x32x4_xf32 a[0:3], a[0:3], a[0:3], a[0:3]
|
||||
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
|
||||
|
||||
v_mfma_f32_32x32x4_xf32 a[0:3], a[0:3], a[0:3], a[0:3]
|
||||
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
|
||||
|
||||
v_mfma_f32_32x32x4_xf32 v[0:3], a[0:3], v[0:3], 1.0
|
||||
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
|
||||
|
||||
v_mfma_f32_32x32x4_xf32 a[0:3], v[0:3], a[0:3], 1.0
|
||||
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
|
||||
|
||||
v_mfma_f32_32x32x4_xf32 v[0:3], v[0:3], v[0:3], v[0:3] blgp:5
|
||||
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
|
||||
|
||||
v_mfma_f32_32x32x4_xf32 a[0:3], a[0:3], a[0:3], a[0:3] blgp:1
|
||||
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
|
||||
|
||||
v_mfma_f32_32x32x4_xf32 a[0:3], a[0:3], a[0:3], a[0:3] cbsz:3
|
||||
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
|
||||
|
||||
v_mfma_f32_32x32x4_xf32 a[0:3], a[0:3], a[0:3], a[0:3] abid:1
|
||||
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
|
||||
|
||||
v_mfma_f32_32x32x4_xf32 a[0:3], a[0:3], a[0:3], a[0:3] cbsz:3 abid:1
|
||||
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
|
||||
|
||||
v_mfma_f32_32x32x4_xf32 a[0:3], v[0:3], v[0:3], a[4:7]
|
||||
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
|
||||
|
||||
v_mfma_f32_32x32x4_xf32 v[0:3], a[0:3], a[0:3], v[4:7]
|
||||
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
|
||||
|
||||
v_mfma_f32_32x32x4_xf32 a[0:3], v[2:3], v[4:5], a[2:5]
|
||||
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
|
||||
|
||||
v_mfma_f32_32x32x4_xf32 v[0:3], v[0:3], v[0:3], v[0:3]
|
||||
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
|
||||
|
||||
v_mfma_f32_32x32x4_xf32 v[0:3], v[0:3], v[0:3], v[0:3]
|
||||
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
|
||||
|
||||
v_mfma_f32_32x32x4_xf32 a[0:3], a[0:3], a[0:3], a[0:3]
|
||||
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
|
||||
|
||||
v_mfma_f32_32x32x4_xf32 a[0:3], a[0:3], a[0:3], a[0:3]
|
||||
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
|
||||
|
||||
v_mfma_f32_32x32x4_xf32 v[0:3], a[0:3], v[0:3], 1.0
|
||||
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
|
||||
|
||||
v_mfma_f32_32x32x4_xf32 a[0:3], v[0:3], a[0:3], 1.0
|
||||
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
|
||||
|
||||
v_mfma_f32_32x32x4_xf32 v[0:3], v[0:3], v[0:3], v[0:3] blgp:5
|
||||
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
|
||||
|
||||
v_mfma_f32_32x32x4_xf32 a[0:3], a[0:3], a[0:3], a[0:3] blgp:1
|
||||
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
|
||||
|
||||
v_mfma_f32_32x32x4_xf32 a[0:3], a[0:3], a[0:3], a[0:3] cbsz:3
|
||||
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
|
||||
|
||||
v_mfma_f32_32x32x4_xf32 a[0:3], a[0:3], a[0:3], a[0:3] abid:1
|
||||
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
|
||||
|
||||
v_mfma_f32_32x32x4_xf32 a[0:3], a[0:3], a[0:3], a[0:3] cbsz:3 abid:1
|
||||
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
|
||||
|
||||
v_mfma_f32_32x32x4_xf32 a[0:3], v[0:3], v[0:3], a[4:7]
|
||||
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
|
||||
|
||||
v_mfma_f32_32x32x4_xf32 v[0:3], a[0:3], a[0:3], v[4:7]
|
||||
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
|
||||
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// v_mfma_f32_16x16x8_xf32
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
v_mfma_f32_16x16x8_xf32 a[0:3], v[2:3], v[4:5], a[2:5]
|
||||
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
|
||||
|
||||
v_mfma_f32_16x16x8_xf32 v[0:3], v[0:3], v[0:3], v[0:3]
|
||||
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
|
||||
|
||||
v_mfma_f32_16x16x8_xf32 v[0:3], v[0:3], v[0:3], v[0:3]
|
||||
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
|
||||
|
||||
v_mfma_f32_16x16x8_xf32 a[0:3], a[0:3], a[0:3], a[0:3]
|
||||
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
|
||||
|
||||
v_mfma_f32_16x16x8_xf32 a[0:3], a[0:3], a[0:3], a[0:3]
|
||||
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
|
||||
|
||||
v_mfma_f32_16x16x8_xf32 v[0:3], a[0:3], v[0:3], 1.0
|
||||
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
|
||||
|
||||
v_mfma_f32_16x16x8_xf32 a[0:3], v[0:3], a[0:3], 1.0
|
||||
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
|
||||
|
||||
v_mfma_f32_16x16x8_xf32 v[0:3], v[0:3], v[0:3], v[0:3] blgp:5
|
||||
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
|
||||
|
||||
v_mfma_f32_16x16x8_xf32 a[0:3], a[0:3], a[0:3], a[0:3] blgp:1
|
||||
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
|
||||
|
||||
v_mfma_f32_16x16x8_xf32 a[0:3], a[0:3], a[0:3], a[0:3] cbsz:3
|
||||
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
|
||||
|
||||
v_mfma_f32_16x16x8_xf32 a[0:3], a[0:3], a[0:3], a[0:3] abid:1
|
||||
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
|
||||
|
||||
v_mfma_f32_16x16x8_xf32 a[0:3], a[0:3], a[0:3], a[0:3] cbsz:3 abid:1
|
||||
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
|
||||
|
||||
v_mfma_f32_16x16x8_xf32 a[0:3], v[0:3], v[0:3], a[4:7]
|
||||
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
|
||||
|
||||
v_mfma_f32_16x16x8_xf32 v[0:3], a[0:3], a[0:3], v[4:7]
|
||||
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
|
||||
|
||||
|
||||
v_mfma_f32_16x16x8_xf32 a[0:3], v[2:3], v[4:5], a[2:5]
|
||||
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
|
||||
|
||||
v_mfma_f32_16x16x8_xf32 v[0:3], v[0:3], v[0:3], v[0:3]
|
||||
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
|
||||
|
||||
v_mfma_f32_16x16x8_xf32 v[0:3], v[0:3], v[0:3], v[0:3]
|
||||
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
|
||||
|
||||
v_mfma_f32_16x16x8_xf32 a[0:3], a[0:3], a[0:3], a[0:3]
|
||||
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
|
||||
|
||||
v_mfma_f32_16x16x8_xf32 a[0:3], a[0:3], a[0:3], a[0:3]
|
||||
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
|
||||
|
||||
v_mfma_f32_16x16x8_xf32 v[0:3], a[0:3], v[0:3], 1.0
|
||||
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
|
||||
|
||||
v_mfma_f32_16x16x8_xf32 a[0:3], v[0:3], a[0:3], 1.0
|
||||
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
|
||||
|
||||
v_mfma_f32_16x16x8_xf32 v[0:3], v[0:3], v[0:3], v[0:3] blgp:5
|
||||
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
|
||||
|
||||
v_mfma_f32_16x16x8_xf32 a[0:3], a[0:3], a[0:3], a[0:3] blgp:1
|
||||
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
|
||||
|
||||
v_mfma_f32_16x16x8_xf32 a[0:3], a[0:3], a[0:3], a[0:3] cbsz:3
|
||||
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
|
||||
|
||||
v_mfma_f32_16x16x8_xf32 a[0:3], a[0:3], a[0:3], a[0:3] abid:1
|
||||
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
|
||||
|
||||
v_mfma_f32_16x16x8_xf32 a[0:3], a[0:3], a[0:3], a[0:3] cbsz:3 abid:1
|
||||
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
|
||||
|
||||
v_mfma_f32_16x16x8_xf32 a[0:3], v[0:3], v[0:3], a[4:7]
|
||||
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
|
||||
|
||||
v_mfma_f32_16x16x8_xf32 v[0:3], a[0:3], a[0:3], v[4:7]
|
||||
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
|
||||
13
llvm/test/MC/AMDGPU/gfx950_invalid_encoding.txt
Normal file
13
llvm/test/MC/AMDGPU/gfx950_invalid_encoding.txt
Normal file
@@ -0,0 +1,13 @@
|
||||
# RUN: llvm-mc -disassemble -arch=amdgcn -mcpu=gfx950 -show-encoding %s 2>&1 | FileCheck --implicit-check-not=warning: --check-prefix=GFX950 %s
|
||||
|
||||
# GFX950: warning: invalid instruction encoding
|
||||
0x00,0x80,0xbe,0xd3,0x02,0x09,0x0a,0x04
|
||||
|
||||
# GFX950: warning: invalid instruction encoding
|
||||
0x00,0x00,0xbe,0xd3,0x02,0x09,0x0a,0x04
|
||||
|
||||
# GFX950: warning: invalid instruction encoding
|
||||
0x00,0x00,0xbf,0xd3,0x02,0x09,0x0a,0x04
|
||||
|
||||
# GFX950: warning: invalid instruction encoding
|
||||
0x00,0x80,0xbf,0xd3,0x02,0x09,0x0a,0x04
|
||||
@@ -1,4 +1,5 @@
|
||||
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx940 -disassemble -show-encoding %s | FileCheck -strict-whitespace --check-prefix=GFX940 %s
|
||||
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx950 -disassemble -show-encoding %s | FileCheck -strict-whitespace --check-prefix=GFX940 %s
|
||||
|
||||
# GFX940: global_load_dword v2, v[2:3], off sc0 ; encoding: [0x00,0x80,0x51,0xdc,0x02,0x00,0x7f,0x02]
|
||||
0x00,0x80,0x51,0xdc,0x02,0x00,0x7f,0x02
|
||||
|
||||
@@ -162,6 +162,10 @@
|
||||
# RUN: llvm-readobj -S --file-headers %t.o.AMDGCN_GFX942 | FileCheck --check-prefixes=ELF-AMDGCN-ALL,ELF-AMDGCN-GFX942 %s
|
||||
# RUN: obj2yaml %t.o.AMDGCN_GFX942 | FileCheck --check-prefixes=YAML-AMDGCN-ALL,YAML-AMDGCN-GFX942 %s
|
||||
|
||||
# RUN: sed -e 's/<BITS>/64/' -e 's/<MACH>/AMDGCN_GFX950/' %s | yaml2obj -o %t.o.AMDGCN_GFX950
|
||||
# RUN: llvm-readobj -S --file-headers %t.o.AMDGCN_GFX950 | FileCheck --check-prefixes=ELF-AMDGCN-ALL,ELF-AMDGCN-GFX950 %s
|
||||
# RUN: obj2yaml %t.o.AMDGCN_GFX950 | FileCheck --check-prefixes=YAML-AMDGCN-ALL,YAML-AMDGCN-GFX950 %s
|
||||
|
||||
# RUN: sed -e 's/<BITS>/64/' -e 's/<MACH>/AMDGCN_GFX1010/' %s | yaml2obj -o %t.o.AMDGCN_GFX1010
|
||||
# RUN: llvm-readobj -S --file-headers %t.o.AMDGCN_GFX1010 | FileCheck --check-prefixes=ELF-AMDGCN-ALL,ELF-AMDGCN-GFX1010 %s
|
||||
# RUN: obj2yaml %t.o.AMDGCN_GFX1010 | FileCheck --check-prefixes=YAML-AMDGCN-ALL,YAML-AMDGCN-GFX1010 %s
|
||||
@@ -411,6 +415,9 @@
|
||||
# ELF-AMDGCN-GFX942: EF_AMDGPU_MACH_AMDGCN_GFX942 (0x4C)
|
||||
# YAML-AMDGCN-GFX942: Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX942 ]
|
||||
|
||||
# ELF-AMDGCN-GFX950: EF_AMDGPU_MACH_AMDGCN_GFX950 (0x4F)
|
||||
# YAML-AMDGCN-GFX950: Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX950 ]
|
||||
|
||||
# ELF-AMDGCN-GFX1010: EF_AMDGPU_MACH_AMDGCN_GFX1010 (0x33)
|
||||
# YAML-AMDGCN-GFX1010: Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX1010 ]
|
||||
|
||||
|
||||
@@ -137,7 +137,6 @@ define amdgpu_kernel void @test_kernel() {
|
||||
|
||||
; ----------------------------------GFX9---------------------------------------
|
||||
;
|
||||
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=6 -mcpu=gfx9-4-generic -filetype=obj -O0 -o %t.o %s
|
||||
; RUN: llvm-objdump -D --arch-name=amdgcn -mllvm --amdhsa-code-object-version=6 --mcpu=gfx9-4-generic %t.o > %t-specify.txt
|
||||
; RUN: llvm-objdump -D -mllvm --amdhsa-code-object-version=6 %t.o > %t-detect.txt
|
||||
@@ -148,6 +147,11 @@ define amdgpu_kernel void @test_kernel() {
|
||||
; RUN: llvm-objdump -D -mllvm --amdhsa-code-object-version=6 %t.o > %t-detect.txt
|
||||
; RUN: diff %t-specify.txt %t-detect.txt
|
||||
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 -filetype=obj -O0 -o %t.o %s
|
||||
; RUN: llvm-objdump -D --arch-name=amdgcn --mcpu=gfx950 %t.o > %t-specify.txt
|
||||
; RUN: llvm-objdump -D %t.o > %t-detect.txt
|
||||
; RUN: diff %t-specify.txt %t-detect.txt
|
||||
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -filetype=obj -O0 -o %t.o %s
|
||||
; RUN: llvm-objdump -D --arch-name=amdgcn --mcpu=gfx942 %t.o > %t-specify.txt
|
||||
; RUN: llvm-objdump -D %t.o > %t-detect.txt
|
||||
|
||||
@@ -223,6 +223,15 @@
|
||||
# RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX942
|
||||
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX942 -DFLAG_VALUE=0x4C
|
||||
|
||||
# RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX950
|
||||
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX950 -DFLAG_VALUE=0x4F
|
||||
|
||||
# RUN: yaml2obj %s -o %t -DABI_VERSION=1 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX950
|
||||
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=1 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX950 -DFLAG_VALUE=0x4F
|
||||
|
||||
# RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX950
|
||||
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX950 -DFLAG_VALUE=0x4F
|
||||
|
||||
# RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1010
|
||||
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1010 -DFLAG_VALUE=0x33
|
||||
|
||||
|
||||
@@ -1619,6 +1619,7 @@ const EnumEntry<unsigned> ElfHeaderMipsFlags[] = {
|
||||
ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX940, "gfx940"), \
|
||||
ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX941, "gfx941"), \
|
||||
ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX942, "gfx942"), \
|
||||
ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX950, "gfx950"), \
|
||||
ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1010, "gfx1010"), \
|
||||
ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1011, "gfx1011"), \
|
||||
ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1012, "gfx1012"), \
|
||||
|
||||
@@ -43,7 +43,7 @@ set(include_directory ${devicertl_base_directory}/include)
|
||||
set(source_directory ${devicertl_base_directory}/src)
|
||||
|
||||
set(all_amdgpu_architectures "gfx700;gfx701;gfx801;gfx803;gfx900;gfx902;gfx906"
|
||||
"gfx908;gfx90a;gfx90c;gfx940;gfx941;gfx942;gfx1010"
|
||||
"gfx908;gfx90a;gfx90c;gfx940;gfx941;gfx942;gfx950;gfx1010"
|
||||
"gfx1030;gfx1031;gfx1032;gfx1033;gfx1034;gfx1035"
|
||||
"gfx1036;gfx1100;gfx1101;gfx1102;gfx1103;gfx1150"
|
||||
"gfx1151;gfx1152;gfx1153")
|
||||
|
||||
Reference in New Issue
Block a user