AMDGPU: Add gfx950 subtarget definitions (#116307)

Mostly a stub, but adds some baseline tests and
tests for removed instructions.
This commit is contained in:
Matt Arsenault
2024-11-18 10:41:14 -08:00
committed by GitHub
parent 18be88e20a
commit a6fc489bb7
43 changed files with 5063 additions and 4435 deletions

View File

@@ -712,6 +712,8 @@ Target Specific Changes
AMDGPU Support
^^^^^^^^^^^^^^
- Initial support for gfx950
- Added headers ``gpuintrin.h`` and ``amdgpuintrin.h`` that contains common
definitions for GPU builtin functions. This header can be included for OpenMP,
CUDA, HIP, OpenCL, and C/C++.

View File

@@ -107,6 +107,7 @@ enum class OffloadArch {
GFX940,
GFX941,
GFX942,
GFX950,
GFX10_1_GENERIC,
GFX1010,
GFX1011,

View File

@@ -125,6 +125,7 @@ static const OffloadArchToStringMap arch_names[] = {
GFX(940), // gfx940
GFX(941), // gfx941
GFX(942), // gfx942
GFX(950), // gfx950
{OffloadArch::GFX10_1_GENERIC, "gfx10-1-generic", "compute_amdgcn"},
GFX(1010), // gfx1010
GFX(1011), // gfx1011

View File

@@ -209,6 +209,7 @@ void NVPTXTargetInfo::getTargetDefines(const LangOptions &Opts,
case OffloadArch::GFX940:
case OffloadArch::GFX941:
case OffloadArch::GFX942:
case OffloadArch::GFX950:
case OffloadArch::GFX10_1_GENERIC:
case OffloadArch::GFX1010:
case OffloadArch::GFX1011:

View File

@@ -2304,6 +2304,7 @@ void CGOpenMPRuntimeGPU::processRequiresDirective(const OMPRequiresDecl *D) {
case OffloadArch::GFX940:
case OffloadArch::GFX941:
case OffloadArch::GFX942:
case OffloadArch::GFX950:
case OffloadArch::GFX10_1_GENERIC:
case OffloadArch::GFX1010:
case OffloadArch::GFX1011:

View File

@@ -32,6 +32,7 @@
// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx940 -emit-llvm -o - %s | FileCheck --check-prefix=GFX940 %s
// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx941 -emit-llvm -o - %s | FileCheck --check-prefix=GFX941 %s
// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx942 -emit-llvm -o - %s | FileCheck --check-prefix=GFX942 %s
// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx950 -emit-llvm -o - %s | FileCheck --check-prefix=GFX950 %s
// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1010 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1010 %s
// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1011 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1011 %s
// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1012 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1012 %s
@@ -88,6 +89,7 @@
// GFX941: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+fp8-conversion-insts,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64,+xf32-insts"
// GFX942: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+fp8-conversion-insts,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64,+xf32-insts"
// GFX9_4_Generic: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
// GFX950: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+fp8-conversion-insts,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+gfx950-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
// GFX1010: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dpp,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"
// GFX1011: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"
// GFX1012: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"

View File

@@ -110,6 +110,7 @@
// RUN: %clang -E -dM -target amdgcn -mcpu=gfx940 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=64 -DCPU=gfx940 -DFAMILY=GFX9
// RUN: %clang -E -dM -target amdgcn -mcpu=gfx941 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=64 -DCPU=gfx941 -DFAMILY=GFX9
// RUN: %clang -E -dM -target amdgcn -mcpu=gfx942 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=64 -DCPU=gfx942 -DFAMILY=GFX9
// RUN: %clang -E -dM -target amdgcn -mcpu=gfx950 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=64 -DCPU=gfx950 -DFAMILY=GFX9
// RUN: %clang -E -dM -target amdgcn -mcpu=gfx1010 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=32 -DCPU=gfx1010 -DFAMILY=GFX10
// RUN: %clang -E -dM -target amdgcn -mcpu=gfx1011 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=32 -DCPU=gfx1011 -DFAMILY=GFX10
// RUN: %clang -E -dM -target amdgcn -mcpu=gfx1012 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=32 -DCPU=gfx1012 -DFAMILY=GFX10

View File

@@ -95,6 +95,7 @@
// RUN: %clang -### -target amdgcn -mcpu=gfx940 %s 2>&1 | FileCheck --check-prefix=GFX940 %s
// RUN: %clang -### -target amdgcn -mcpu=gfx941 %s 2>&1 | FileCheck --check-prefix=GFX941 %s
// RUN: %clang -### -target amdgcn -mcpu=gfx942 %s 2>&1 | FileCheck --check-prefix=GFX942 %s
// RUN: %clang -### -target amdgcn -mcpu=gfx950 %s 2>&1 | FileCheck --check-prefix=GFX950 %s
// RUN: %clang -### -target amdgcn -mcpu=gfx1010 %s 2>&1 | FileCheck --check-prefix=GFX1010 %s
// RUN: %clang -### -target amdgcn -mcpu=gfx1011 %s 2>&1 | FileCheck --check-prefix=GFX1011 %s
// RUN: %clang -### -target amdgcn -mcpu=gfx1012 %s 2>&1 | FileCheck --check-prefix=GFX1012 %s
@@ -150,6 +151,7 @@
// GFX940: "-target-cpu" "gfx940"
// GFX941: "-target-cpu" "gfx941"
// GFX942: "-target-cpu" "gfx942"
// GFX950: "-target-cpu" "gfx950"
// GFX1010: "-target-cpu" "gfx1010"
// GFX1011: "-target-cpu" "gfx1011"
// GFX1012: "-target-cpu" "gfx1012"

View File

@@ -48,6 +48,7 @@
// CHECK-SAME: {{^}}, gfx940
// CHECK-SAME: {{^}}, gfx941
// CHECK-SAME: {{^}}, gfx942
// CHECK-SAME: {{^}}, gfx950
// CHECK-SAME: {{^}}, gfx1010
// CHECK-SAME: {{^}}, gfx1011
// CHECK-SAME: {{^}}, gfx1012

View File

@@ -54,6 +54,7 @@
// CHECK-SAME: {{^}}, gfx940
// CHECK-SAME: {{^}}, gfx941
// CHECK-SAME: {{^}}, gfx942
// CHECK-SAME: {{^}}, gfx950
// CHECK-SAME: {{^}}, gfx10-1-generic
// CHECK-SAME: {{^}}, gfx1010
// CHECK-SAME: {{^}}, gfx1011

View File

@@ -399,6 +399,13 @@ Every processor supports every OS ABI (see :ref:`amdgpu-os`) with the following
work-item
IDs
``gfx950`` ``amdgcn`` dGPU - sramecc - Architected *TBA*
- tgsplit flat
- xnack scratch .. TODO::
- kernarg preload - Packed
work-item Add product
IDs names.
**GCN GFX10.1 (RDNA 1)** [AMD-GCN-GFX10-RDNA1]_
-----------------------------------------------------------------------------------------------------------------------
``gfx1010`` ``amdgcn`` dGPU - cumode - Absolute - *rocm-amdhsa* - Radeon RX 5700
@@ -2178,7 +2185,7 @@ The AMDGPU backend uses the following ELF header:
``EF_AMDGPU_MACH_AMDGCN_GFX942`` 0x04c ``gfx942``
*reserved* 0x04d Reserved.
``EF_AMDGPU_MACH_AMDGCN_GFX1201`` 0x04e ``gfx1201``
*reserved* 0x04f Reserved.
``EF_AMDGPU_MACH_AMDGCN_GFX950`` 0x04f ``gfx950``
*reserved* 0x050 Reserved.
``EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC`` 0x051 ``gfx9-generic``
``EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC`` 0x052 ``gfx10-1-generic``

View File

@@ -811,7 +811,7 @@ enum : unsigned {
EF_AMDGPU_MACH_AMDGCN_GFX942 = 0x04c,
EF_AMDGPU_MACH_AMDGCN_RESERVED_0X4D = 0x04d,
EF_AMDGPU_MACH_AMDGCN_GFX1201 = 0x04e,
EF_AMDGPU_MACH_AMDGCN_RESERVED_0X4F = 0x04f,
EF_AMDGPU_MACH_AMDGCN_GFX950 = 0x04f,
EF_AMDGPU_MACH_AMDGCN_RESERVED_0X50 = 0x050,
EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC = 0x051,
EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC = 0x052,

View File

@@ -86,18 +86,19 @@ enum GPUKind : uint32_t {
GK_GFX940 = 68,
GK_GFX941 = 69,
GK_GFX942 = 70,
GK_GFX950 = 71,
GK_GFX1010 = 71,
GK_GFX1011 = 72,
GK_GFX1012 = 73,
GK_GFX1013 = 74,
GK_GFX1030 = 75,
GK_GFX1031 = 76,
GK_GFX1032 = 77,
GK_GFX1033 = 78,
GK_GFX1034 = 79,
GK_GFX1035 = 80,
GK_GFX1036 = 81,
GK_GFX1010 = 72,
GK_GFX1011 = 73,
GK_GFX1012 = 74,
GK_GFX1013 = 75,
GK_GFX1030 = 76,
GK_GFX1031 = 77,
GK_GFX1032 = 78,
GK_GFX1033 = 79,
GK_GFX1034 = 80,
GK_GFX1035 = 81,
GK_GFX1036 = 82,
GK_GFX1100 = 90,
GK_GFX1101 = 91,

View File

@@ -550,6 +550,8 @@ StringRef ELFObjectFileBase::getAMDGPUCPUName() const {
return "gfx941";
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX942:
return "gfx942";
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX950:
return "gfx950";
// AMDGCN GFX10.
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1010:

View File

@@ -609,6 +609,7 @@ void ScalarBitSetTraits<ELFYAML::ELF_EF>::bitset(IO &IO,
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX940, EF_AMDGPU_MACH);
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX941, EF_AMDGPU_MACH);
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX942, EF_AMDGPU_MACH);
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX950, EF_AMDGPU_MACH);
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1010, EF_AMDGPU_MACH);
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1011, EF_AMDGPU_MACH);
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1012, EF_AMDGPU_MACH);

View File

@@ -360,6 +360,12 @@ def FeatureGFX940Insts : SubtargetFeature<"gfx940-insts",
"Additional instructions for GFX940+"
>;
def FeatureGFX950Insts : SubtargetFeature<"gfx950-insts",
"GFX950Insts",
"true",
"Additional instructions for GFX950+"
>;
def FeatureGFX10Insts : SubtargetFeature<"gfx10-insts",
"GFX10Insts",
"true",
@@ -1470,6 +1476,14 @@ def FeatureISAVersion9_4_Common : FeatureSet<
FeatureFlatBufferGlobalAtomicFaddF64Inst
]>;
def FeatureISAVersion9_5_Common : FeatureSet<
!listconcat(FeatureISAVersion9_4_Common.Features,
[FeatureFP8Insts,
FeatureFP8ConversionInsts,
FeatureCvtFP8VOP1Bug,
FeatureGFX950Insts
])>;
def FeatureISAVersion9_4_0 : FeatureSet<
!listconcat(FeatureISAVersion9_4_Common.Features,
[
@@ -1503,6 +1517,8 @@ def FeatureISAVersion9_4_Generic : FeatureSet<
!listconcat(FeatureISAVersion9_4_Common.Features,
[FeatureRequiresCOV6])>;
def FeatureISAVersion9_5_0 : FeatureSet<FeatureISAVersion9_5_Common.Features>;
def FeatureISAVersion10_Common : FeatureSet<
[FeatureGFX10,
FeatureLDSBankCount32,

View File

@@ -204,6 +204,10 @@ def : ProcessorModel<"gfx942", SIDPGFX940FullSpeedModel,
FeatureISAVersion9_4_2.Features
>;
def : ProcessorModel<"gfx950", SIDPGFX940FullSpeedModel,
FeatureISAVersion9_5_0.Features
>;
// [gfx900, gfx902, gfx904, gfx906, gfx909, gfx90c]
def : ProcessorModel<"gfx9-generic", SIQuarterSpeedModel,
FeatureISAVersion9_Generic.Features

View File

@@ -106,6 +106,7 @@ protected:
bool GFX9Insts = false;
bool GFX90AInsts = false;
bool GFX940Insts = false;
bool GFX950Insts = false;
bool GFX10Insts = false;
bool GFX11Insts = false;
bool GFX12Insts = false;

View File

@@ -96,6 +96,7 @@ StringRef AMDGPUTargetStreamer::getArchNameFromElfMach(unsigned ElfMach) {
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX940: AK = GK_GFX940; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX941: AK = GK_GFX941; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX942: AK = GK_GFX942; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX950: AK = GK_GFX950; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1010: AK = GK_GFX1010; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1011: AK = GK_GFX1011; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1012: AK = GK_GFX1012; break;
@@ -182,6 +183,7 @@ unsigned AMDGPUTargetStreamer::getElfMach(StringRef GPU) {
case GK_GFX940: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX940;
case GK_GFX941: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX941;
case GK_GFX942: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX942;
case GK_GFX950: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX950;
case GK_GFX1010: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1010;
case GK_GFX1011: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1011;
case GK_GFX1012: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1012;

View File

@@ -107,6 +107,7 @@ constexpr GPUInfo AMDGCNGPUs[] = {
{{"gfx940"}, {"gfx940"}, GK_GFX940, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK|FEATURE_SRAMECC},
{{"gfx941"}, {"gfx941"}, GK_GFX941, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK|FEATURE_SRAMECC},
{{"gfx942"}, {"gfx942"}, GK_GFX942, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK|FEATURE_SRAMECC},
{{"gfx950"}, {"gfx950"}, GK_GFX950, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK|FEATURE_SRAMECC},
{{"gfx1010"}, {"gfx1010"}, GK_GFX1010, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_XNACK|FEATURE_WGP},
{{"gfx1011"}, {"gfx1011"}, GK_GFX1011, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_XNACK|FEATURE_WGP},
{{"gfx1012"}, {"gfx1012"}, GK_GFX1012, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_XNACK|FEATURE_WGP},
@@ -262,6 +263,7 @@ AMDGPU::IsaVersion AMDGPU::getIsaVersion(StringRef GPU) {
case GK_GFX940: return {9, 4, 0};
case GK_GFX941: return {9, 4, 1};
case GK_GFX942: return {9, 4, 2};
case GK_GFX950: return {9, 5, 0};
case GK_GFX1010: return {10, 1, 0};
case GK_GFX1011: return {10, 1, 1};
case GK_GFX1012: return {10, 1, 2};
@@ -361,7 +363,8 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T,
Features["wavefrontsize32"] = true;
Features["wavefrontsize64"] = true;
} else if (T.isAMDGCN()) {
switch (parseArchAMDGCN(GPU)) {
AMDGPU::GPUKind Kind = parseArchAMDGCN(GPU);
switch (Kind) {
case GK_GFX1201:
case GK_GFX1200:
case GK_GFX12_GENERIC:
@@ -466,12 +469,16 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T,
Features["s-memtime-inst"] = true;
Features["gws"] = true;
break;
case GK_GFX950:
Features["gfx950-insts"] = true;
[[fallthrough]];
case GK_GFX942:
case GK_GFX941:
case GK_GFX940:
Features["fp8-insts"] = true;
Features["fp8-conversion-insts"] = true;
Features["xf32-insts"] = true;
if (Kind != GK_GFX950)
Features["xf32-insts"] = true;
[[fallthrough]];
case GK_GFX9_4_GENERIC:
Features["gfx940-insts"] = true;

View File

@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
; RUN: llc -march=amdgcn -mcpu=gfx940 < %s | FileCheck --check-prefixes=GCN %s
; RUN: llc -march=amdgcn -mcpu=gfx940 < %s | FileCheck --check-prefixes=GCN,GFX-940 %s
; RUN: llc -march=amdgcn -mcpu=gfx950 < %s | FileCheck --check-prefixes=GCN,GFX-950 %s
; TODO: Add global-isel when it can support bf16
@@ -198,19 +199,33 @@ entry:
}
define amdgpu_ps void @fptrunc_f32_to_bf16(float %a, ptr %out) {
; GCN-LABEL: fptrunc_f32_to_bf16:
; GCN: ; %bb.0: ; %entry
; GCN-NEXT: v_mov_b32_e32 v3, v2
; GCN-NEXT: v_mov_b32_e32 v2, v1
; GCN-NEXT: v_bfe_u32 v1, v0, 16, 1
; GCN-NEXT: s_movk_i32 s0, 0x7fff
; GCN-NEXT: v_add3_u32 v1, v1, v0, s0
; GCN-NEXT: v_or_b32_e32 v4, 0x400000, v0
; GCN-NEXT: v_cmp_u_f32_e32 vcc, v0, v0
; GCN-NEXT: s_nop 1
; GCN-NEXT: v_cndmask_b32_e32 v0, v1, v4, vcc
; GCN-NEXT: flat_store_short_d16_hi v[2:3], v0 sc0 sc1
; GCN-NEXT: s_endpgm
; GFX-940-LABEL: fptrunc_f32_to_bf16:
; GFX-940: ; %bb.0: ; %entry
; GFX-940-NEXT: v_mov_b32_e32 v3, v2
; GFX-940-NEXT: v_mov_b32_e32 v2, v1
; GFX-940-NEXT: v_bfe_u32 v1, v0, 16, 1
; GFX-940-NEXT: s_movk_i32 s0, 0x7fff
; GFX-940-NEXT: v_add3_u32 v1, v1, v0, s0
; GFX-940-NEXT: v_or_b32_e32 v4, 0x400000, v0
; GFX-940-NEXT: v_cmp_u_f32_e32 vcc, v0, v0
; GFX-940-NEXT: s_nop 1
; GFX-940-NEXT: v_cndmask_b32_e32 v0, v1, v4, vcc
; GFX-940-NEXT: flat_store_short_d16_hi v[2:3], v0 sc0 sc1
; GFX-940-NEXT: s_endpgm
;
; GFX-950-LABEL: fptrunc_f32_to_bf16:
; GFX-950: ; %bb.0: ; %entry
; GFX-950-NEXT: v_mov_b32_e32 v3, v2
; GFX-950-NEXT: v_mov_b32_e32 v2, v1
; GFX-950-NEXT: v_bfe_u32 v1, v0, 16, 1
; GFX-950-NEXT: s_movk_i32 s0, 0x7fff
; GFX-950-NEXT: v_add3_u32 v1, v1, v0, s0
; GFX-950-NEXT: v_or_b32_e32 v4, 0x400000, v0
; GFX-950-NEXT: v_cmp_u_f32_e32 vcc, v0, v0
; GFX-950-NEXT: s_nop 1
; GFX-950-NEXT: v_cndmask_b32_e32 v0, v1, v4, vcc
; GFX-950-NEXT: flat_store_short_d16_hi v[2:3], v0
; GFX-950-NEXT: s_endpgm
entry:
%a.cvt = fptrunc float %a to bfloat
store bfloat %a.cvt, ptr %out
@@ -218,20 +233,35 @@ entry:
}
define amdgpu_ps void @fptrunc_f32_to_bf16_abs(float %a, ptr %out) {
; GCN-LABEL: fptrunc_f32_to_bf16_abs:
; GCN: ; %bb.0: ; %entry
; GCN-NEXT: v_mov_b32_e32 v3, v2
; GCN-NEXT: v_mov_b32_e32 v2, v1
; GCN-NEXT: v_and_b32_e32 v1, 0x7fffffff, v0
; GCN-NEXT: v_bfe_u32 v4, v1, 16, 1
; GCN-NEXT: s_movk_i32 s0, 0x7fff
; GCN-NEXT: v_add3_u32 v4, v4, v1, s0
; GCN-NEXT: v_or_b32_e32 v1, 0x400000, v1
; GCN-NEXT: v_cmp_u_f32_e64 vcc, |v0|, |v0|
; GCN-NEXT: s_nop 1
; GCN-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
; GCN-NEXT: flat_store_short_d16_hi v[2:3], v0 sc0 sc1
; GCN-NEXT: s_endpgm
; GFX-940-LABEL: fptrunc_f32_to_bf16_abs:
; GFX-940: ; %bb.0: ; %entry
; GFX-940-NEXT: v_mov_b32_e32 v3, v2
; GFX-940-NEXT: v_mov_b32_e32 v2, v1
; GFX-940-NEXT: v_and_b32_e32 v1, 0x7fffffff, v0
; GFX-940-NEXT: v_bfe_u32 v4, v1, 16, 1
; GFX-940-NEXT: s_movk_i32 s0, 0x7fff
; GFX-940-NEXT: v_add3_u32 v4, v4, v1, s0
; GFX-940-NEXT: v_or_b32_e32 v1, 0x400000, v1
; GFX-940-NEXT: v_cmp_u_f32_e64 vcc, |v0|, |v0|
; GFX-940-NEXT: s_nop 1
; GFX-940-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
; GFX-940-NEXT: flat_store_short_d16_hi v[2:3], v0 sc0 sc1
; GFX-940-NEXT: s_endpgm
;
; GFX-950-LABEL: fptrunc_f32_to_bf16_abs:
; GFX-950: ; %bb.0: ; %entry
; GFX-950-NEXT: v_mov_b32_e32 v3, v2
; GFX-950-NEXT: v_mov_b32_e32 v2, v1
; GFX-950-NEXT: v_and_b32_e32 v1, 0x7fffffff, v0
; GFX-950-NEXT: v_bfe_u32 v4, v1, 16, 1
; GFX-950-NEXT: s_movk_i32 s0, 0x7fff
; GFX-950-NEXT: v_add3_u32 v4, v4, v1, s0
; GFX-950-NEXT: v_or_b32_e32 v1, 0x400000, v1
; GFX-950-NEXT: v_cmp_u_f32_e64 vcc, |v0|, |v0|
; GFX-950-NEXT: s_nop 1
; GFX-950-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
; GFX-950-NEXT: flat_store_short_d16_hi v[2:3], v0
; GFX-950-NEXT: s_endpgm
entry:
%a.abs = call float @llvm.fabs.f32(float %a)
%a.cvt = fptrunc float %a.abs to bfloat
@@ -240,20 +270,35 @@ entry:
}
define amdgpu_ps void @fptrunc_f32_to_bf16_neg(float %a, ptr %out) {
; GCN-LABEL: fptrunc_f32_to_bf16_neg:
; GCN: ; %bb.0: ; %entry
; GCN-NEXT: v_mov_b32_e32 v3, v2
; GCN-NEXT: v_mov_b32_e32 v2, v1
; GCN-NEXT: v_xor_b32_e32 v1, 0x80000000, v0
; GCN-NEXT: v_bfe_u32 v4, v1, 16, 1
; GCN-NEXT: s_movk_i32 s0, 0x7fff
; GCN-NEXT: v_add3_u32 v4, v4, v1, s0
; GCN-NEXT: v_or_b32_e32 v1, 0x400000, v1
; GCN-NEXT: v_cmp_u_f32_e64 vcc, -v0, -v0
; GCN-NEXT: s_nop 1
; GCN-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
; GCN-NEXT: flat_store_short_d16_hi v[2:3], v0 sc0 sc1
; GCN-NEXT: s_endpgm
; GFX-940-LABEL: fptrunc_f32_to_bf16_neg:
; GFX-940: ; %bb.0: ; %entry
; GFX-940-NEXT: v_mov_b32_e32 v3, v2
; GFX-940-NEXT: v_mov_b32_e32 v2, v1
; GFX-940-NEXT: v_xor_b32_e32 v1, 0x80000000, v0
; GFX-940-NEXT: v_bfe_u32 v4, v1, 16, 1
; GFX-940-NEXT: s_movk_i32 s0, 0x7fff
; GFX-940-NEXT: v_add3_u32 v4, v4, v1, s0
; GFX-940-NEXT: v_or_b32_e32 v1, 0x400000, v1
; GFX-940-NEXT: v_cmp_u_f32_e64 vcc, -v0, -v0
; GFX-940-NEXT: s_nop 1
; GFX-940-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
; GFX-940-NEXT: flat_store_short_d16_hi v[2:3], v0 sc0 sc1
; GFX-940-NEXT: s_endpgm
;
; GFX-950-LABEL: fptrunc_f32_to_bf16_neg:
; GFX-950: ; %bb.0: ; %entry
; GFX-950-NEXT: v_mov_b32_e32 v3, v2
; GFX-950-NEXT: v_mov_b32_e32 v2, v1
; GFX-950-NEXT: v_xor_b32_e32 v1, 0x80000000, v0
; GFX-950-NEXT: v_bfe_u32 v4, v1, 16, 1
; GFX-950-NEXT: s_movk_i32 s0, 0x7fff
; GFX-950-NEXT: v_add3_u32 v4, v4, v1, s0
; GFX-950-NEXT: v_or_b32_e32 v1, 0x400000, v1
; GFX-950-NEXT: v_cmp_u_f32_e64 vcc, -v0, -v0
; GFX-950-NEXT: s_nop 1
; GFX-950-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
; GFX-950-NEXT: flat_store_short_d16_hi v[2:3], v0
; GFX-950-NEXT: s_endpgm
entry:
%a.neg = fneg float %a
%a.cvt = fptrunc float %a.neg to bfloat
@@ -262,29 +307,53 @@ entry:
}
define amdgpu_ps void @fptrunc_f64_to_bf16(double %a, ptr %out) {
; GCN-LABEL: fptrunc_f64_to_bf16:
; GCN: ; %bb.0: ; %entry
; GCN-NEXT: v_cvt_f32_f64_e64 v6, |v[0:1]|
; GCN-NEXT: v_cvt_f64_f32_e32 v[4:5], v6
; GCN-NEXT: v_and_b32_e32 v7, 1, v6
; GCN-NEXT: v_cmp_gt_f64_e64 s[2:3], |v[0:1]|, v[4:5]
; GCN-NEXT: v_cmp_nlg_f64_e64 s[0:1], |v[0:1]|, v[4:5]
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v7
; GCN-NEXT: v_cndmask_b32_e64 v4, -1, 1, s[2:3]
; GCN-NEXT: v_add_u32_e32 v4, v6, v4
; GCN-NEXT: s_or_b64 vcc, s[0:1], vcc
; GCN-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc
; GCN-NEXT: s_brev_b32 s0, 1
; GCN-NEXT: v_and_or_b32 v5, v1, s0, v4
; GCN-NEXT: v_bfe_u32 v4, v4, 16, 1
; GCN-NEXT: s_movk_i32 s0, 0x7fff
; GCN-NEXT: v_add3_u32 v4, v4, v5, s0
; GCN-NEXT: v_or_b32_e32 v5, 0x400000, v5
; GCN-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
; GCN-NEXT: s_nop 1
; GCN-NEXT: v_cndmask_b32_e32 v0, v4, v5, vcc
; GCN-NEXT: flat_store_short_d16_hi v[2:3], v0 sc0 sc1
; GCN-NEXT: s_endpgm
; GFX-940-LABEL: fptrunc_f64_to_bf16:
; GFX-940: ; %bb.0: ; %entry
; GFX-940-NEXT: v_cvt_f32_f64_e64 v6, |v[0:1]|
; GFX-940-NEXT: v_cvt_f64_f32_e32 v[4:5], v6
; GFX-940-NEXT: v_and_b32_e32 v7, 1, v6
; GFX-940-NEXT: v_cmp_gt_f64_e64 s[2:3], |v[0:1]|, v[4:5]
; GFX-940-NEXT: v_cmp_nlg_f64_e64 s[0:1], |v[0:1]|, v[4:5]
; GFX-940-NEXT: v_cmp_eq_u32_e32 vcc, 1, v7
; GFX-940-NEXT: v_cndmask_b32_e64 v4, -1, 1, s[2:3]
; GFX-940-NEXT: v_add_u32_e32 v4, v6, v4
; GFX-940-NEXT: s_or_b64 vcc, s[0:1], vcc
; GFX-940-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc
; GFX-940-NEXT: s_brev_b32 s0, 1
; GFX-940-NEXT: v_and_or_b32 v5, v1, s0, v4
; GFX-940-NEXT: v_bfe_u32 v4, v4, 16, 1
; GFX-940-NEXT: s_movk_i32 s0, 0x7fff
; GFX-940-NEXT: v_add3_u32 v4, v4, v5, s0
; GFX-940-NEXT: v_or_b32_e32 v5, 0x400000, v5
; GFX-940-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
; GFX-940-NEXT: s_nop 1
; GFX-940-NEXT: v_cndmask_b32_e32 v0, v4, v5, vcc
; GFX-940-NEXT: flat_store_short_d16_hi v[2:3], v0 sc0 sc1
; GFX-940-NEXT: s_endpgm
;
; GFX-950-LABEL: fptrunc_f64_to_bf16:
; GFX-950: ; %bb.0: ; %entry
; GFX-950-NEXT: v_cvt_f32_f64_e64 v6, |v[0:1]|
; GFX-950-NEXT: v_cvt_f64_f32_e32 v[4:5], v6
; GFX-950-NEXT: v_and_b32_e32 v7, 1, v6
; GFX-950-NEXT: v_cmp_gt_f64_e64 s[2:3], |v[0:1]|, v[4:5]
; GFX-950-NEXT: v_cmp_nlg_f64_e64 s[0:1], |v[0:1]|, v[4:5]
; GFX-950-NEXT: v_cmp_eq_u32_e32 vcc, 1, v7
; GFX-950-NEXT: v_cndmask_b32_e64 v4, -1, 1, s[2:3]
; GFX-950-NEXT: v_add_u32_e32 v4, v6, v4
; GFX-950-NEXT: s_or_b64 vcc, s[0:1], vcc
; GFX-950-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc
; GFX-950-NEXT: s_brev_b32 s0, 1
; GFX-950-NEXT: v_and_or_b32 v5, v1, s0, v4
; GFX-950-NEXT: v_bfe_u32 v4, v4, 16, 1
; GFX-950-NEXT: s_movk_i32 s0, 0x7fff
; GFX-950-NEXT: v_add3_u32 v4, v4, v5, s0
; GFX-950-NEXT: v_or_b32_e32 v5, 0x400000, v5
; GFX-950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
; GFX-950-NEXT: s_nop 1
; GFX-950-NEXT: v_cndmask_b32_e32 v0, v4, v5, vcc
; GFX-950-NEXT: flat_store_short_d16_hi v[2:3], v0
; GFX-950-NEXT: s_endpgm
entry:
%a.cvt = fptrunc double %a to bfloat
store bfloat %a.cvt, ptr %out
@@ -292,30 +361,55 @@ entry:
}
define amdgpu_ps void @fptrunc_f64_to_bf16_neg(double %a, ptr %out) {
; GCN-LABEL: fptrunc_f64_to_bf16_neg:
; GCN: ; %bb.0: ; %entry
; GCN-NEXT: v_cvt_f32_f64_e64 v7, |v[0:1]|
; GCN-NEXT: v_cvt_f64_f32_e32 v[4:5], v7
; GCN-NEXT: v_and_b32_e32 v8, 1, v7
; GCN-NEXT: v_cmp_gt_f64_e64 s[2:3], |v[0:1]|, v[4:5]
; GCN-NEXT: v_cmp_nlg_f64_e64 s[0:1], |v[0:1]|, v[4:5]
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v8
; GCN-NEXT: v_cndmask_b32_e64 v4, -1, 1, s[2:3]
; GCN-NEXT: v_add_u32_e32 v4, v7, v4
; GCN-NEXT: s_or_b64 vcc, s[0:1], vcc
; GCN-NEXT: s_brev_b32 s4, 1
; GCN-NEXT: v_xor_b32_e32 v6, 0x80000000, v1
; GCN-NEXT: v_cndmask_b32_e32 v4, v4, v7, vcc
; GCN-NEXT: v_and_or_b32 v5, v6, s4, v4
; GCN-NEXT: v_bfe_u32 v4, v4, 16, 1
; GCN-NEXT: s_movk_i32 s0, 0x7fff
; GCN-NEXT: v_add3_u32 v4, v4, v5, s0
; GCN-NEXT: v_or_b32_e32 v5, 0x400000, v5
; GCN-NEXT: v_cmp_u_f64_e64 vcc, -v[0:1], -v[0:1]
; GCN-NEXT: s_nop 1
; GCN-NEXT: v_cndmask_b32_e32 v0, v4, v5, vcc
; GCN-NEXT: flat_store_short_d16_hi v[2:3], v0 sc0 sc1
; GCN-NEXT: s_endpgm
; GFX-940-LABEL: fptrunc_f64_to_bf16_neg:
; GFX-940: ; %bb.0: ; %entry
; GFX-940-NEXT: v_cvt_f32_f64_e64 v7, |v[0:1]|
; GFX-940-NEXT: v_cvt_f64_f32_e32 v[4:5], v7
; GFX-940-NEXT: v_and_b32_e32 v8, 1, v7
; GFX-940-NEXT: v_cmp_gt_f64_e64 s[2:3], |v[0:1]|, v[4:5]
; GFX-940-NEXT: v_cmp_nlg_f64_e64 s[0:1], |v[0:1]|, v[4:5]
; GFX-940-NEXT: v_cmp_eq_u32_e32 vcc, 1, v8
; GFX-940-NEXT: v_cndmask_b32_e64 v4, -1, 1, s[2:3]
; GFX-940-NEXT: v_add_u32_e32 v4, v7, v4
; GFX-940-NEXT: s_or_b64 vcc, s[0:1], vcc
; GFX-940-NEXT: s_brev_b32 s4, 1
; GFX-940-NEXT: v_xor_b32_e32 v6, 0x80000000, v1
; GFX-940-NEXT: v_cndmask_b32_e32 v4, v4, v7, vcc
; GFX-940-NEXT: v_and_or_b32 v5, v6, s4, v4
; GFX-940-NEXT: v_bfe_u32 v4, v4, 16, 1
; GFX-940-NEXT: s_movk_i32 s0, 0x7fff
; GFX-940-NEXT: v_add3_u32 v4, v4, v5, s0
; GFX-940-NEXT: v_or_b32_e32 v5, 0x400000, v5
; GFX-940-NEXT: v_cmp_u_f64_e64 vcc, -v[0:1], -v[0:1]
; GFX-940-NEXT: s_nop 1
; GFX-940-NEXT: v_cndmask_b32_e32 v0, v4, v5, vcc
; GFX-940-NEXT: flat_store_short_d16_hi v[2:3], v0 sc0 sc1
; GFX-940-NEXT: s_endpgm
;
; GFX-950-LABEL: fptrunc_f64_to_bf16_neg:
; GFX-950: ; %bb.0: ; %entry
; GFX-950-NEXT: v_cvt_f32_f64_e64 v7, |v[0:1]|
; GFX-950-NEXT: v_cvt_f64_f32_e32 v[4:5], v7
; GFX-950-NEXT: v_and_b32_e32 v8, 1, v7
; GFX-950-NEXT: v_cmp_gt_f64_e64 s[2:3], |v[0:1]|, v[4:5]
; GFX-950-NEXT: v_cmp_nlg_f64_e64 s[0:1], |v[0:1]|, v[4:5]
; GFX-950-NEXT: v_cmp_eq_u32_e32 vcc, 1, v8
; GFX-950-NEXT: v_cndmask_b32_e64 v4, -1, 1, s[2:3]
; GFX-950-NEXT: v_add_u32_e32 v4, v7, v4
; GFX-950-NEXT: s_or_b64 vcc, s[0:1], vcc
; GFX-950-NEXT: s_brev_b32 s4, 1
; GFX-950-NEXT: v_xor_b32_e32 v6, 0x80000000, v1
; GFX-950-NEXT: v_cndmask_b32_e32 v4, v4, v7, vcc
; GFX-950-NEXT: v_and_or_b32 v5, v6, s4, v4
; GFX-950-NEXT: v_bfe_u32 v4, v4, 16, 1
; GFX-950-NEXT: s_movk_i32 s0, 0x7fff
; GFX-950-NEXT: v_add3_u32 v4, v4, v5, s0
; GFX-950-NEXT: v_or_b32_e32 v5, 0x400000, v5
; GFX-950-NEXT: v_cmp_u_f64_e64 vcc, -v[0:1], -v[0:1]
; GFX-950-NEXT: s_nop 1
; GFX-950-NEXT: v_cndmask_b32_e32 v0, v4, v5, vcc
; GFX-950-NEXT: flat_store_short_d16_hi v[2:3], v0
; GFX-950-NEXT: s_endpgm
entry:
%a.neg = fneg double %a
%a.cvt = fptrunc double %a.neg to bfloat
@@ -324,30 +418,55 @@ entry:
}
define amdgpu_ps void @fptrunc_f64_to_bf16_abs(double %a, ptr %out) {
; GCN-LABEL: fptrunc_f64_to_bf16_abs:
; GCN: ; %bb.0: ; %entry
; GCN-NEXT: v_cvt_f32_f64_e64 v7, |v[0:1]|
; GCN-NEXT: v_cvt_f64_f32_e32 v[4:5], v7
; GCN-NEXT: v_and_b32_e32 v8, 1, v7
; GCN-NEXT: v_cmp_gt_f64_e64 s[2:3], |v[0:1]|, v[4:5]
; GCN-NEXT: v_cmp_nlg_f64_e64 s[0:1], |v[0:1]|, v[4:5]
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v8
; GCN-NEXT: v_cndmask_b32_e64 v4, -1, 1, s[2:3]
; GCN-NEXT: v_add_u32_e32 v4, v7, v4
; GCN-NEXT: s_or_b64 vcc, s[0:1], vcc
; GCN-NEXT: v_and_b32_e32 v6, 0x7fffffff, v1
; GCN-NEXT: v_cndmask_b32_e32 v4, v4, v7, vcc
; GCN-NEXT: s_brev_b32 s0, 1
; GCN-NEXT: v_and_or_b32 v5, v6, s0, v4
; GCN-NEXT: v_bfe_u32 v4, v4, 16, 1
; GCN-NEXT: s_movk_i32 s0, 0x7fff
; GCN-NEXT: v_add3_u32 v4, v4, v5, s0
; GCN-NEXT: v_or_b32_e32 v5, 0x400000, v5
; GCN-NEXT: v_cmp_u_f64_e64 vcc, |v[0:1]|, |v[0:1]|
; GCN-NEXT: s_nop 1
; GCN-NEXT: v_cndmask_b32_e32 v0, v4, v5, vcc
; GCN-NEXT: flat_store_short_d16_hi v[2:3], v0 sc0 sc1
; GCN-NEXT: s_endpgm
; GFX-940-LABEL: fptrunc_f64_to_bf16_abs:
; GFX-940: ; %bb.0: ; %entry
; GFX-940-NEXT: v_cvt_f32_f64_e64 v7, |v[0:1]|
; GFX-940-NEXT: v_cvt_f64_f32_e32 v[4:5], v7
; GFX-940-NEXT: v_and_b32_e32 v8, 1, v7
; GFX-940-NEXT: v_cmp_gt_f64_e64 s[2:3], |v[0:1]|, v[4:5]
; GFX-940-NEXT: v_cmp_nlg_f64_e64 s[0:1], |v[0:1]|, v[4:5]
; GFX-940-NEXT: v_cmp_eq_u32_e32 vcc, 1, v8
; GFX-940-NEXT: v_cndmask_b32_e64 v4, -1, 1, s[2:3]
; GFX-940-NEXT: v_add_u32_e32 v4, v7, v4
; GFX-940-NEXT: s_or_b64 vcc, s[0:1], vcc
; GFX-940-NEXT: v_and_b32_e32 v6, 0x7fffffff, v1
; GFX-940-NEXT: v_cndmask_b32_e32 v4, v4, v7, vcc
; GFX-940-NEXT: s_brev_b32 s0, 1
; GFX-940-NEXT: v_and_or_b32 v5, v6, s0, v4
; GFX-940-NEXT: v_bfe_u32 v4, v4, 16, 1
; GFX-940-NEXT: s_movk_i32 s0, 0x7fff
; GFX-940-NEXT: v_add3_u32 v4, v4, v5, s0
; GFX-940-NEXT: v_or_b32_e32 v5, 0x400000, v5
; GFX-940-NEXT: v_cmp_u_f64_e64 vcc, |v[0:1]|, |v[0:1]|
; GFX-940-NEXT: s_nop 1
; GFX-940-NEXT: v_cndmask_b32_e32 v0, v4, v5, vcc
; GFX-940-NEXT: flat_store_short_d16_hi v[2:3], v0 sc0 sc1
; GFX-940-NEXT: s_endpgm
;
; GFX-950-LABEL: fptrunc_f64_to_bf16_abs:
; GFX-950: ; %bb.0: ; %entry
; GFX-950-NEXT: v_cvt_f32_f64_e64 v7, |v[0:1]|
; GFX-950-NEXT: v_cvt_f64_f32_e32 v[4:5], v7
; GFX-950-NEXT: v_and_b32_e32 v8, 1, v7
; GFX-950-NEXT: v_cmp_gt_f64_e64 s[2:3], |v[0:1]|, v[4:5]
; GFX-950-NEXT: v_cmp_nlg_f64_e64 s[0:1], |v[0:1]|, v[4:5]
; GFX-950-NEXT: v_cmp_eq_u32_e32 vcc, 1, v8
; GFX-950-NEXT: v_cndmask_b32_e64 v4, -1, 1, s[2:3]
; GFX-950-NEXT: v_add_u32_e32 v4, v7, v4
; GFX-950-NEXT: s_or_b64 vcc, s[0:1], vcc
; GFX-950-NEXT: v_and_b32_e32 v6, 0x7fffffff, v1
; GFX-950-NEXT: v_cndmask_b32_e32 v4, v4, v7, vcc
; GFX-950-NEXT: s_brev_b32 s0, 1
; GFX-950-NEXT: v_and_or_b32 v5, v6, s0, v4
; GFX-950-NEXT: v_bfe_u32 v4, v4, 16, 1
; GFX-950-NEXT: s_movk_i32 s0, 0x7fff
; GFX-950-NEXT: v_add3_u32 v4, v4, v5, s0
; GFX-950-NEXT: v_or_b32_e32 v5, 0x400000, v5
; GFX-950-NEXT: v_cmp_u_f64_e64 vcc, |v[0:1]|, |v[0:1]|
; GFX-950-NEXT: s_nop 1
; GFX-950-NEXT: v_cndmask_b32_e32 v0, v4, v5, vcc
; GFX-950-NEXT: flat_store_short_d16_hi v[2:3], v0
; GFX-950-NEXT: s_endpgm
entry:
%a.abs = call double @llvm.fabs.f64(double %a)
%a.cvt = fptrunc double %a.abs to bfloat

View File

@@ -80,6 +80,9 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 < %s | FileCheck --check-prefixes=GFX942 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -mattr=-xnack < %s | FileCheck --check-prefixes=GFX942-NOXNACK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -mattr=+xnack < %s | FileCheck --check-prefixes=GFX942-XNACK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck --check-prefixes=GFX950 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 -mattr=-xnack < %s | FileCheck --check-prefixes=GFX950-NOXNACK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 -mattr=+xnack < %s | FileCheck --check-prefixes=GFX950-XNACK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 < %s | FileCheck --check-prefixes=GFX1010 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=-xnack < %s | FileCheck --check-prefixes=GFX1010-NOXNACK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+xnack < %s | FileCheck --check-prefixes=GFX1010-XNACK %s
@@ -180,6 +183,9 @@
; GFX942: .amdgcn_target "amdgcn-amd-amdhsa--gfx942"
; GFX942-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx942:xnack-"
; GFX942-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx942:xnack+"
; GFX950: .amdgcn_target "amdgcn-amd-amdhsa--gfx950"
; GFX950-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx950:xnack-"
; GFX950-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx950:xnack+"
; GFX1010: .amdgcn_target "amdgcn-amd-amdhsa--gfx1010"
; GFX1010-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx1010:xnack-"
; GFX1010-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx1010:xnack+"

View File

@@ -57,6 +57,7 @@
; RUN: llc -filetype=obj -mtriple=amdgcn -mcpu=gfx940 < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX940 %s
; RUN: llc -filetype=obj -mtriple=amdgcn -mcpu=gfx941 < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX941 %s
; RUN: llc -filetype=obj -mtriple=amdgcn -mcpu=gfx942 < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX942 %s
; RUN: llc -filetype=obj -mtriple=amdgcn -mcpu=gfx950 < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX950 %s
; RUN: llc -filetype=obj -mtriple=amdgcn -mcpu=gfx1010 < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX1010 %s
; RUN: llc -filetype=obj -mtriple=amdgcn -mcpu=gfx1011 < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX1011 %s
; RUN: llc -filetype=obj -mtriple=amdgcn -mcpu=gfx1012 < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX1012 %s
@@ -139,6 +140,7 @@
; GFX940: EF_AMDGPU_MACH_AMDGCN_GFX940 (0x40)
; GFX941: EF_AMDGPU_MACH_AMDGCN_GFX941 (0x4B)
; GFX942: EF_AMDGPU_MACH_AMDGCN_GFX942 (0x4C)
; GFX950: EF_AMDGPU_MACH_AMDGCN_GFX950 (0x4F)
; GFX1010: EF_AMDGPU_MACH_AMDGCN_GFX1010 (0x33)
; GFX1011: EF_AMDGPU_MACH_AMDGCN_GFX1011 (0x34)
; GFX1012: EF_AMDGPU_MACH_AMDGCN_GFX1012 (0x35)

View File

@@ -12,6 +12,9 @@
; RUN: llc -filetype=obj -mtriple=amdgcn -mcpu=gfx940 < %s | llvm-readobj --file-header - | FileCheck --check-prefix=SRAM-ECC-GFX940 %s
; RUN: llc -filetype=obj -mtriple=amdgcn -mcpu=gfx940 -mattr=+sramecc < %s | llvm-readobj --file-header - | FileCheck --check-prefix=SRAM-ECC-GFX940 %s
; RUN: llc -filetype=obj -mtriple=amdgcn -mcpu=gfx950 < %s | llvm-readobj --file-header - | FileCheck --check-prefix=SRAM-ECC-GFX950 %s
; RUN: llc -filetype=obj -mtriple=amdgcn -mcpu=gfx950 -mattr=+sramecc < %s | llvm-readobj --file-header - | FileCheck --check-prefix=SRAM-ECC-GFX950 %s
; NO-SRAM-ECC-GFX906: Flags [
; NO-SRAM-ECC-GFX906-NEXT: EF_AMDGPU_FEATURE_XNACK_V3 (0x100)
; NO-SRAM-ECC-GFX906-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX906 (0x2F)
@@ -44,6 +47,11 @@
; SRAM-ECC-GFX940: EF_AMDGPU_MACH_AMDGCN_GFX940 (0x40)
; SRAM-ECC-GFX940: ]
; SRAM-ECC-GFX950: Flags [
; SRAM-ECC-GFX950: EF_AMDGPU_FEATURE_SRAMECC_V3 (0x200)
; SRAM-ECC-GFX950: EF_AMDGPU_MACH_AMDGCN_GFX950 (0x4F)
; SRAM-ECC-GFX950: ]
define amdgpu_kernel void @elf_header() {
ret void
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -1,6 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx940 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX940 %s
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx940 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX940 %s
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx950 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX940 %s
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx950 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX940 %s
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX12 %s
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX12 %s

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -1,4 +1,5 @@
// RUN: llvm-mc -triple=amdgcn -mcpu=gfx940 -show-encoding %s | FileCheck -check-prefix=GFX940 %s
// RUN: llvm-mc -triple=amdgcn -mcpu=gfx950 -show-encoding %s | FileCheck -check-prefix=GFX940 %s
scratch_load_dword a2, v4, s6
// GFX940: scratch_load_dword a2, v4, s6 ; encoding: [0x00,0x60,0x50,0xdc,0x04,0x00,0x86,0x02]

View File

@@ -1,4 +1,5 @@
// RUN: llvm-mc -triple=amdgcn -mcpu=gfx940 -show-encoding %s | FileCheck --check-prefix=GFX940 --strict-whitespace %s
// RUN: llvm-mc -triple=amdgcn -mcpu=gfx950 -show-encoding %s | FileCheck --check-prefix=GFX940 --strict-whitespace %s
// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx90a %s 2>&1 | FileCheck --check-prefixes=NOT-GFX940,GFX90A --implicit-check-not=error: %s
// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1010 %s 2>&1 | FileCheck --check-prefixes=NOT-GFX940,GFX10 --implicit-check-not=error: %s

View File

@@ -0,0 +1,179 @@
// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx950 %s 2>&1 | FileCheck -check-prefix=ERR %s
//===----------------------------------------------------------------------===//
// v_mfma_f32_32x32x4_xf32
//===----------------------------------------------------------------------===//
v_mfma_f32_32x32x4_xf32 a[0:3], v[2:3], v[4:5], a[2:5]
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
v_mfma_f32_32x32x4_xf32 v[0:3], v[0:3], v[0:3], v[0:3]
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
v_mfma_f32_32x32x4_xf32 v[0:3], v[0:3], v[0:3], v[0:3]
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
v_mfma_f32_32x32x4_xf32 a[0:3], a[0:3], a[0:3], a[0:3]
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
v_mfma_f32_32x32x4_xf32 a[0:3], a[0:3], a[0:3], a[0:3]
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
v_mfma_f32_32x32x4_xf32 v[0:3], a[0:3], v[0:3], 1.0
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
v_mfma_f32_32x32x4_xf32 a[0:3], v[0:3], a[0:3], 1.0
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
v_mfma_f32_32x32x4_xf32 v[0:3], v[0:3], v[0:3], v[0:3] blgp:5
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
v_mfma_f32_32x32x4_xf32 a[0:3], a[0:3], a[0:3], a[0:3] blgp:1
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
v_mfma_f32_32x32x4_xf32 a[0:3], a[0:3], a[0:3], a[0:3] cbsz:3
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
v_mfma_f32_32x32x4_xf32 a[0:3], a[0:3], a[0:3], a[0:3] abid:1
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
v_mfma_f32_32x32x4_xf32 a[0:3], a[0:3], a[0:3], a[0:3] cbsz:3 abid:1
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
v_mfma_f32_32x32x4_xf32 a[0:3], v[0:3], v[0:3], a[4:7]
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
v_mfma_f32_32x32x4_xf32 v[0:3], a[0:3], a[0:3], v[4:7]
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
v_mfma_f32_32x32x4_xf32 a[0:3], v[2:3], v[4:5], a[2:5]
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
v_mfma_f32_32x32x4_xf32 v[0:3], v[0:3], v[0:3], v[0:3]
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
v_mfma_f32_32x32x4_xf32 v[0:3], v[0:3], v[0:3], v[0:3]
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
v_mfma_f32_32x32x4_xf32 a[0:3], a[0:3], a[0:3], a[0:3]
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
v_mfma_f32_32x32x4_xf32 a[0:3], a[0:3], a[0:3], a[0:3]
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
v_mfma_f32_32x32x4_xf32 v[0:3], a[0:3], v[0:3], 1.0
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
v_mfma_f32_32x32x4_xf32 a[0:3], v[0:3], a[0:3], 1.0
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
v_mfma_f32_32x32x4_xf32 v[0:3], v[0:3], v[0:3], v[0:3] blgp:5
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
v_mfma_f32_32x32x4_xf32 a[0:3], a[0:3], a[0:3], a[0:3] blgp:1
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
v_mfma_f32_32x32x4_xf32 a[0:3], a[0:3], a[0:3], a[0:3] cbsz:3
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
v_mfma_f32_32x32x4_xf32 a[0:3], a[0:3], a[0:3], a[0:3] abid:1
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
v_mfma_f32_32x32x4_xf32 a[0:3], a[0:3], a[0:3], a[0:3] cbsz:3 abid:1
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
v_mfma_f32_32x32x4_xf32 a[0:3], v[0:3], v[0:3], a[4:7]
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
v_mfma_f32_32x32x4_xf32 v[0:3], a[0:3], a[0:3], v[4:7]
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
//===----------------------------------------------------------------------===//
// v_mfma_f32_16x16x8_xf32
//===----------------------------------------------------------------------===//
v_mfma_f32_16x16x8_xf32 a[0:3], v[2:3], v[4:5], a[2:5]
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
v_mfma_f32_16x16x8_xf32 v[0:3], v[0:3], v[0:3], v[0:3]
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
v_mfma_f32_16x16x8_xf32 v[0:3], v[0:3], v[0:3], v[0:3]
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
v_mfma_f32_16x16x8_xf32 a[0:3], a[0:3], a[0:3], a[0:3]
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
v_mfma_f32_16x16x8_xf32 a[0:3], a[0:3], a[0:3], a[0:3]
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
v_mfma_f32_16x16x8_xf32 v[0:3], a[0:3], v[0:3], 1.0
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
v_mfma_f32_16x16x8_xf32 a[0:3], v[0:3], a[0:3], 1.0
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
v_mfma_f32_16x16x8_xf32 v[0:3], v[0:3], v[0:3], v[0:3] blgp:5
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
v_mfma_f32_16x16x8_xf32 a[0:3], a[0:3], a[0:3], a[0:3] blgp:1
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
v_mfma_f32_16x16x8_xf32 a[0:3], a[0:3], a[0:3], a[0:3] cbsz:3
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
v_mfma_f32_16x16x8_xf32 a[0:3], a[0:3], a[0:3], a[0:3] abid:1
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
v_mfma_f32_16x16x8_xf32 a[0:3], a[0:3], a[0:3], a[0:3] cbsz:3 abid:1
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
v_mfma_f32_16x16x8_xf32 a[0:3], v[0:3], v[0:3], a[4:7]
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
v_mfma_f32_16x16x8_xf32 v[0:3], a[0:3], a[0:3], v[4:7]
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
v_mfma_f32_16x16x8_xf32 a[0:3], v[2:3], v[4:5], a[2:5]
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
v_mfma_f32_16x16x8_xf32 v[0:3], v[0:3], v[0:3], v[0:3]
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
v_mfma_f32_16x16x8_xf32 v[0:3], v[0:3], v[0:3], v[0:3]
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
v_mfma_f32_16x16x8_xf32 a[0:3], a[0:3], a[0:3], a[0:3]
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
v_mfma_f32_16x16x8_xf32 a[0:3], a[0:3], a[0:3], a[0:3]
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
v_mfma_f32_16x16x8_xf32 v[0:3], a[0:3], v[0:3], 1.0
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
v_mfma_f32_16x16x8_xf32 a[0:3], v[0:3], a[0:3], 1.0
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
v_mfma_f32_16x16x8_xf32 v[0:3], v[0:3], v[0:3], v[0:3] blgp:5
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
v_mfma_f32_16x16x8_xf32 a[0:3], a[0:3], a[0:3], a[0:3] blgp:1
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
v_mfma_f32_16x16x8_xf32 a[0:3], a[0:3], a[0:3], a[0:3] cbsz:3
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
v_mfma_f32_16x16x8_xf32 a[0:3], a[0:3], a[0:3], a[0:3] abid:1
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
v_mfma_f32_16x16x8_xf32 a[0:3], a[0:3], a[0:3], a[0:3] cbsz:3 abid:1
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
v_mfma_f32_16x16x8_xf32 a[0:3], v[0:3], v[0:3], a[4:7]
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
v_mfma_f32_16x16x8_xf32 v[0:3], a[0:3], a[0:3], v[4:7]
// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU

View File

@@ -0,0 +1,13 @@
# RUN: llvm-mc -disassemble -arch=amdgcn -mcpu=gfx950 -show-encoding %s 2>&1 | FileCheck --implicit-check-not=warning: --check-prefix=GFX950 %s
# GFX950: warning: invalid instruction encoding
0x00,0x80,0xbe,0xd3,0x02,0x09,0x0a,0x04
# GFX950: warning: invalid instruction encoding
0x00,0x00,0xbe,0xd3,0x02,0x09,0x0a,0x04
# GFX950: warning: invalid instruction encoding
0x00,0x00,0xbf,0xd3,0x02,0x09,0x0a,0x04
# GFX950: warning: invalid instruction encoding
0x00,0x80,0xbf,0xd3,0x02,0x09,0x0a,0x04

View File

@@ -1,4 +1,5 @@
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx940 -disassemble -show-encoding %s | FileCheck -strict-whitespace --check-prefix=GFX940 %s
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx950 -disassemble -show-encoding %s | FileCheck -strict-whitespace --check-prefix=GFX940 %s
# GFX940: global_load_dword v2, v[2:3], off sc0 ; encoding: [0x00,0x80,0x51,0xdc,0x02,0x00,0x7f,0x02]
0x00,0x80,0x51,0xdc,0x02,0x00,0x7f,0x02

View File

@@ -162,6 +162,10 @@
# RUN: llvm-readobj -S --file-headers %t.o.AMDGCN_GFX942 | FileCheck --check-prefixes=ELF-AMDGCN-ALL,ELF-AMDGCN-GFX942 %s
# RUN: obj2yaml %t.o.AMDGCN_GFX942 | FileCheck --check-prefixes=YAML-AMDGCN-ALL,YAML-AMDGCN-GFX942 %s
# RUN: sed -e 's/<BITS>/64/' -e 's/<MACH>/AMDGCN_GFX950/' %s | yaml2obj -o %t.o.AMDGCN_GFX950
# RUN: llvm-readobj -S --file-headers %t.o.AMDGCN_GFX950 | FileCheck --check-prefixes=ELF-AMDGCN-ALL,ELF-AMDGCN-GFX950 %s
# RUN: obj2yaml %t.o.AMDGCN_GFX950 | FileCheck --check-prefixes=YAML-AMDGCN-ALL,YAML-AMDGCN-GFX950 %s
# RUN: sed -e 's/<BITS>/64/' -e 's/<MACH>/AMDGCN_GFX1010/' %s | yaml2obj -o %t.o.AMDGCN_GFX1010
# RUN: llvm-readobj -S --file-headers %t.o.AMDGCN_GFX1010 | FileCheck --check-prefixes=ELF-AMDGCN-ALL,ELF-AMDGCN-GFX1010 %s
# RUN: obj2yaml %t.o.AMDGCN_GFX1010 | FileCheck --check-prefixes=YAML-AMDGCN-ALL,YAML-AMDGCN-GFX1010 %s
@@ -411,6 +415,9 @@
# ELF-AMDGCN-GFX942: EF_AMDGPU_MACH_AMDGCN_GFX942 (0x4C)
# YAML-AMDGCN-GFX942: Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX942 ]
# ELF-AMDGCN-GFX950: EF_AMDGPU_MACH_AMDGCN_GFX950 (0x4F)
# YAML-AMDGCN-GFX950: Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX950 ]
# ELF-AMDGCN-GFX1010: EF_AMDGPU_MACH_AMDGCN_GFX1010 (0x33)
# YAML-AMDGCN-GFX1010: Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX1010 ]

View File

@@ -137,7 +137,6 @@ define amdgpu_kernel void @test_kernel() {
; ----------------------------------GFX9---------------------------------------
;
; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=6 -mcpu=gfx9-4-generic -filetype=obj -O0 -o %t.o %s
; RUN: llvm-objdump -D --arch-name=amdgcn -mllvm --amdhsa-code-object-version=6 --mcpu=gfx9-4-generic %t.o > %t-specify.txt
; RUN: llvm-objdump -D -mllvm --amdhsa-code-object-version=6 %t.o > %t-detect.txt
@@ -148,6 +147,11 @@ define amdgpu_kernel void @test_kernel() {
; RUN: llvm-objdump -D -mllvm --amdhsa-code-object-version=6 %t.o > %t-detect.txt
; RUN: diff %t-specify.txt %t-detect.txt
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 -filetype=obj -O0 -o %t.o %s
; RUN: llvm-objdump -D --arch-name=amdgcn --mcpu=gfx950 %t.o > %t-specify.txt
; RUN: llvm-objdump -D %t.o > %t-detect.txt
; RUN: diff %t-specify.txt %t-detect.txt
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -filetype=obj -O0 -o %t.o %s
; RUN: llvm-objdump -D --arch-name=amdgcn --mcpu=gfx942 %t.o > %t-specify.txt
; RUN: llvm-objdump -D %t.o > %t-detect.txt

View File

@@ -223,6 +223,15 @@
# RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX942
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX942 -DFLAG_VALUE=0x4C
# RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX950
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX950 -DFLAG_VALUE=0x4F
# RUN: yaml2obj %s -o %t -DABI_VERSION=1 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX950
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=1 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX950 -DFLAG_VALUE=0x4F
# RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX950
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX950 -DFLAG_VALUE=0x4F
# RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1010
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1010 -DFLAG_VALUE=0x33

View File

@@ -1619,6 +1619,7 @@ const EnumEntry<unsigned> ElfHeaderMipsFlags[] = {
ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX940, "gfx940"), \
ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX941, "gfx941"), \
ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX942, "gfx942"), \
ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX950, "gfx950"), \
ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1010, "gfx1010"), \
ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1011, "gfx1011"), \
ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1012, "gfx1012"), \

View File

@@ -43,7 +43,7 @@ set(include_directory ${devicertl_base_directory}/include)
set(source_directory ${devicertl_base_directory}/src)
set(all_amdgpu_architectures "gfx700;gfx701;gfx801;gfx803;gfx900;gfx902;gfx906"
"gfx908;gfx90a;gfx90c;gfx940;gfx941;gfx942;gfx1010"
"gfx908;gfx90a;gfx90c;gfx940;gfx941;gfx942;gfx950;gfx1010"
"gfx1030;gfx1031;gfx1032;gfx1033;gfx1034;gfx1035"
"gfx1036;gfx1100;gfx1101;gfx1102;gfx1103;gfx1150"
"gfx1151;gfx1152;gfx1153")