[AMDGPU] Introduce a new generic target gfx9-4-generic (#115190)
This patch introduces a new generic target, `gfx9-4-generic`. Since it doesn’t support FP8 and XF32-related instructions, the patch includes several code reorganizations to accommodate these changes.
This commit is contained in:
@@ -103,6 +103,7 @@ enum class OffloadArch {
|
||||
GFX909,
|
||||
GFX90a,
|
||||
GFX90c,
|
||||
GFX9_4_GENERIC,
|
||||
GFX940,
|
||||
GFX941,
|
||||
GFX942,
|
||||
|
||||
@@ -121,6 +121,7 @@ static const OffloadArchToStringMap arch_names[] = {
|
||||
GFX(909), // gfx909
|
||||
GFX(90a), // gfx90a
|
||||
GFX(90c), // gfx90c
|
||||
{OffloadArch::GFX9_4_GENERIC, "gfx9-4-generic", "compute_amdgcn"},
|
||||
GFX(940), // gfx940
|
||||
GFX(941), // gfx941
|
||||
GFX(942), // gfx942
|
||||
|
||||
@@ -205,6 +205,7 @@ void NVPTXTargetInfo::getTargetDefines(const LangOptions &Opts,
|
||||
case OffloadArch::GFX909:
|
||||
case OffloadArch::GFX90a:
|
||||
case OffloadArch::GFX90c:
|
||||
case OffloadArch::GFX9_4_GENERIC:
|
||||
case OffloadArch::GFX940:
|
||||
case OffloadArch::GFX941:
|
||||
case OffloadArch::GFX942:
|
||||
|
||||
@@ -2301,6 +2301,7 @@ void CGOpenMPRuntimeGPU::processRequiresDirective(const OMPRequiresDecl *D) {
|
||||
case OffloadArch::GFX909:
|
||||
case OffloadArch::GFX90a:
|
||||
case OffloadArch::GFX90c:
|
||||
case OffloadArch::GFX9_4_GENERIC:
|
||||
case OffloadArch::GFX940:
|
||||
case OffloadArch::GFX941:
|
||||
case OffloadArch::GFX942:
|
||||
|
||||
@@ -56,6 +56,8 @@
|
||||
|
||||
// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1103 -target-feature +wavefrontsize64 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1103-W64 %s
|
||||
|
||||
// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx9-4-generic -emit-llvm -o - %s | FileCheck --check-prefix=GFX9_4_Generic %s
|
||||
|
||||
// NOCPU-NOT: "target-features"
|
||||
// NOCPU-WAVE32: "target-features"="+wavefrontsize32"
|
||||
// NOCPU-WAVE64: "target-features"="+wavefrontsize64"
|
||||
@@ -82,9 +84,10 @@
|
||||
// GFX909: "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
|
||||
// GFX90A: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
|
||||
// GFX90C: "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
|
||||
// GFX940: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+fp8-conversion-insts,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
|
||||
// GFX941: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+fp8-conversion-insts,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
|
||||
// GFX942: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+fp8-conversion-insts,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
|
||||
// GFX940: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+fp8-conversion-insts,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64,+xf32-insts"
|
||||
// GFX941: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+fp8-conversion-insts,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64,+xf32-insts"
|
||||
// GFX942: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+fp8-conversion-insts,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64,+xf32-insts"
|
||||
// GFX9_4_Generic: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
|
||||
// GFX1010: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dpp,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"
|
||||
// GFX1011: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"
|
||||
// GFX1012: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"
|
||||
|
||||
@@ -0,0 +1,38 @@
|
||||
// REQUIRES: amdgpu-registered-target
|
||||
|
||||
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -target-cpu gfx9-4-generic -verify -emit-llvm -o - %s
|
||||
|
||||
typedef unsigned int uint;
|
||||
typedef float float2 __attribute__((ext_vector_type(2)));
|
||||
typedef float float4 __attribute__((ext_vector_type(4)));
|
||||
typedef float float16 __attribute__((ext_vector_type(16)));
|
||||
typedef int int2 __attribute__((ext_vector_type(2)));
|
||||
typedef int int4 __attribute__((ext_vector_type(4)));
|
||||
|
||||
void builtin_test_unsupported(uint a, uint b, int a_int, long a_long, float a_float, float b_float,
|
||||
int2 a_int2, int4 a_int4, float2 a_float2, float4 a_float4, float16 a_float16) {
|
||||
a_float4 = __builtin_amdgcn_mfma_f32_16x16x32_bf8_bf8(a_long, a_long, a_float4, 0, 0, 0); // expected-error {{'__builtin_amdgcn_mfma_f32_16x16x32_bf8_bf8' needs target feature fp8-insts}}
|
||||
a_float4 = __builtin_amdgcn_mfma_f32_16x16x32_bf8_fp8(a_long, a_long, a_float4, 0, 0, 0); // expected-error {{'__builtin_amdgcn_mfma_f32_16x16x32_bf8_fp8' needs target feature fp8-insts}}
|
||||
a_float4 = __builtin_amdgcn_mfma_f32_16x16x32_fp8_bf8(a_long, a_long, a_float4, 0, 0, 0); // expected-error {{'__builtin_amdgcn_mfma_f32_16x16x32_fp8_bf8' needs target feature fp8-insts}}
|
||||
a_float4 = __builtin_amdgcn_mfma_f32_16x16x32_fp8_fp8(a_long, a_long, a_float4, 0, 0, 0); // expected-error {{'__builtin_amdgcn_mfma_f32_16x16x32_fp8_fp8' needs target feature fp8-insts}}
|
||||
a_float16 = __builtin_amdgcn_mfma_f32_32x32x16_bf8_bf8(a_long, a_long, a_float16, 0, 0, 0); // expected-error {{'__builtin_amdgcn_mfma_f32_32x32x16_bf8_bf8' needs target feature fp8-insts}}
|
||||
a_float16 = __builtin_amdgcn_mfma_f32_32x32x16_bf8_fp8(a_long, a_long, a_float16, 0, 0, 0); // expected-error {{'__builtin_amdgcn_mfma_f32_32x32x16_bf8_fp8' needs target feature fp8-insts}}
|
||||
a_float16 = __builtin_amdgcn_mfma_f32_32x32x16_fp8_bf8(a_long, a_long, a_float16, 0, 0, 0); // expected-error {{'__builtin_amdgcn_mfma_f32_32x32x16_fp8_bf8' needs target feature fp8-insts}}
|
||||
a_float16 = __builtin_amdgcn_mfma_f32_32x32x16_fp8_fp8(a_long, a_long, a_float16, 0, 0, 0); // expected-error {{'__builtin_amdgcn_mfma_f32_32x32x16_fp8_fp8' needs target feature fp8-insts}}
|
||||
a_float4 = __builtin_amdgcn_smfmac_f32_16x16x64_bf8_bf8(a_int2, a_int4, a_float4, a_int, 0, 0); // expected-error {{'__builtin_amdgcn_smfmac_f32_16x16x64_bf8_bf8' needs target feature fp8-insts}}
|
||||
a_float4 = __builtin_amdgcn_smfmac_f32_16x16x64_bf8_fp8(a_int2, a_int4, a_float4, a_int, 0, 0); // expected-error {{'__builtin_amdgcn_smfmac_f32_16x16x64_bf8_fp8' needs target feature fp8-insts}}
|
||||
a_float4 = __builtin_amdgcn_smfmac_f32_16x16x64_fp8_bf8(a_int2, a_int4, a_float4, a_int, 0, 0); // expected-error {{'__builtin_amdgcn_smfmac_f32_16x16x64_fp8_bf8' needs target feature fp8-insts}}
|
||||
a_float4 = __builtin_amdgcn_smfmac_f32_16x16x64_fp8_fp8(a_int2, a_int4, a_float4, a_int, 0, 0); // expected-error {{'__builtin_amdgcn_smfmac_f32_16x16x64_fp8_fp8' needs target feature fp8-insts}}
|
||||
a_float16 = __builtin_amdgcn_smfmac_f32_32x32x32_bf8_bf8(a_int2, a_int4, a_float16, a_int, 0, 0); // expected-error {{'__builtin_amdgcn_smfmac_f32_32x32x32_bf8_bf8' needs target feature fp8-insts}}
|
||||
a_float16 = __builtin_amdgcn_smfmac_f32_32x32x32_bf8_fp8(a_int2, a_int4, a_float16, a_int, 0, 0); // expected-error {{'__builtin_amdgcn_smfmac_f32_32x32x32_bf8_fp8' needs target feature fp8-insts}}
|
||||
a_float16 = __builtin_amdgcn_smfmac_f32_32x32x32_fp8_bf8(a_int2, a_int4, a_float16, a_int, 0, 0); // expected-error {{'__builtin_amdgcn_smfmac_f32_32x32x32_fp8_bf8' needs target feature fp8-insts}}
|
||||
a_float16 = __builtin_amdgcn_smfmac_f32_32x32x32_fp8_fp8(a_int2, a_int4, a_float16, a_int, 0, 0); // expected-error {{'__builtin_amdgcn_smfmac_f32_32x32x32_fp8_fp8' needs target feature fp8-insts}}
|
||||
b = __builtin_amdgcn_cvt_f32_bf8(a, 0); // expected-error {{'__builtin_amdgcn_cvt_f32_bf8' needs target feature fp8-conversion-insts}}
|
||||
b = __builtin_amdgcn_cvt_f32_fp8(a, 1); // expected-error {{'__builtin_amdgcn_cvt_f32_fp8' needs target feature fp8-conversion-insts}}
|
||||
a_float2 = __builtin_amdgcn_cvt_pk_f32_bf8(a, false); // expected-error {{'__builtin_amdgcn_cvt_pk_f32_bf8' needs target feature fp8-conversion-insts}}
|
||||
a_float2 = __builtin_amdgcn_cvt_pk_f32_fp8(a, true); // expected-error {{'__builtin_amdgcn_cvt_pk_f32_fp8' needs target feature fp8-conversion-insts}}
|
||||
b = __builtin_amdgcn_cvt_pk_bf8_f32(a_float, b_float, a, false); // expected-error {{'__builtin_amdgcn_cvt_pk_bf8_f32' needs target feature fp8-conversion-insts}}
|
||||
b = __builtin_amdgcn_cvt_pk_fp8_f32(a_float, b_float, a, true); // expected-error {{'__builtin_amdgcn_cvt_pk_fp8_f32' needs target feature fp8-conversion-insts}}
|
||||
b = __builtin_amdgcn_cvt_sr_bf8_f32(a_float, b_float, a, 2); // expected-error {{'__builtin_amdgcn_cvt_sr_bf8_f32' needs target feature fp8-conversion-insts}}
|
||||
b = __builtin_amdgcn_cvt_sr_fp8_f32(a_float, b_float, a, 3); // expected-error {{'__builtin_amdgcn_cvt_sr_fp8_f32' needs target feature fp8-conversion-insts}}
|
||||
}
|
||||
@@ -133,6 +133,7 @@
|
||||
// RUN: %clang -E -dM -target amdgcn -mcpu=gfx1201 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=32 -DCPU=gfx1201 -DFAMILY=GFX12
|
||||
|
||||
// RUN: %clang -E -dM -target amdgcn -mcpu=gfx9-generic %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=64 -DCPU=gfx9_generic -DFAMILY=GFX9
|
||||
// RUN: %clang -E -dM -target amdgcn -mcpu=gfx9-4-generic %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=64 -DCPU=gfx9_4_generic -DFAMILY=GFX9
|
||||
// RUN: %clang -E -dM -target amdgcn -mcpu=gfx10-1-generic %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=32 -DCPU=gfx10_1_generic -DFAMILY=GFX10
|
||||
// RUN: %clang -E -dM -target amdgcn -mcpu=gfx10-3-generic %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=32 -DCPU=gfx10_3_generic -DFAMILY=GFX10
|
||||
// RUN: %clang -E -dM -target amdgcn -mcpu=gfx11-generic %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=32 -DCPU=gfx11_generic -DFAMILY=GFX11
|
||||
|
||||
@@ -118,6 +118,7 @@
|
||||
// RUN: %clang -### -target amdgcn -mcpu=gfx1201 %s 2>&1 | FileCheck --check-prefix=GFX1201 %s
|
||||
|
||||
// RUN: %clang -### -target amdgcn -mcpu=gfx9-generic %s 2>&1 | FileCheck --check-prefix=GFX9_GENERIC %s
|
||||
// RUN: %clang -### -target amdgcn -mcpu=gfx9-4-generic %s 2>&1 | FileCheck --check-prefix=GFX9_4_GENERIC %s
|
||||
// RUN: %clang -### -target amdgcn -mcpu=gfx10-1-generic %s 2>&1 | FileCheck --check-prefix=GFX10_1_GENERIC %s
|
||||
// RUN: %clang -### -target amdgcn -mcpu=gfx10-3-generic %s 2>&1 | FileCheck --check-prefix=GFX10_3_GENERIC %s
|
||||
// RUN: %clang -### -target amdgcn -mcpu=gfx11-generic %s 2>&1 | FileCheck --check-prefix=GFX11_GENERIC %s
|
||||
@@ -172,6 +173,7 @@
|
||||
// GFX1201: "-target-cpu" "gfx1201"
|
||||
|
||||
// GFX9_GENERIC: "-target-cpu" "gfx9-generic"
|
||||
// GFX9_4_GENERIC: "-target-cpu" "gfx9-4-generic"
|
||||
// GFX10_1_GENERIC: "-target-cpu" "gfx10-1-generic"
|
||||
// GFX10_3_GENERIC: "-target-cpu" "gfx10-3-generic"
|
||||
// GFX11_GENERIC: "-target-cpu" "gfx11-generic"
|
||||
|
||||
@@ -74,4 +74,5 @@
|
||||
// CHECK-SAME: {{^}}, gfx10-3-generic
|
||||
// CHECK-SAME: {{^}}, gfx11-generic
|
||||
// CHECK-SAME: {{^}}, gfx12-generic
|
||||
// CHECK-SAME: {{^}}, gfx9-4-generic
|
||||
// CHECK-SAME: {{$}}
|
||||
|
||||
@@ -50,6 +50,7 @@
|
||||
// CHECK-SAME: {{^}}, gfx909
|
||||
// CHECK-SAME: {{^}}, gfx90a
|
||||
// CHECK-SAME: {{^}}, gfx90c
|
||||
// CHECK-SAME: {{^}}, gfx9-4-generic
|
||||
// CHECK-SAME: {{^}}, gfx940
|
||||
// CHECK-SAME: {{^}}, gfx941
|
||||
// CHECK-SAME: {{^}}, gfx942
|
||||
|
||||
@@ -576,6 +576,12 @@ Generic processor code objects are versioned. See :ref:`amdgpu-generic-processor
|
||||
- ``v_dot2_f32_f16``
|
||||
|
||||
|
||||
``gfx9-4-generic`` ``amdgcn`` - ``gfx940`` - xnack - Absolute flat FP8 and BF8 instructions,
|
||||
- ``gfx941`` - sramecc scratch FP8 and BF8 conversion instructions,
|
||||
- ``gfx942`` as well as instructions with XF32 format support
|
||||
are not available.
|
||||
|
||||
|
||||
``gfx10-1-generic`` ``amdgcn`` - ``gfx1010`` - xnack - Absolute flat - The following instructions are
|
||||
- ``gfx1011`` - wavefrontsize64 scratch not available on ``gfx1011``
|
||||
- ``gfx1012`` - cumode and ``gfx1012``
|
||||
@@ -2180,6 +2186,7 @@ The AMDGPU backend uses the following ELF header:
|
||||
*reserved* 0x057 Reserved.
|
||||
``EF_AMDGPU_MACH_AMDGCN_GFX1153`` 0x058 ``gfx1153``.
|
||||
``EF_AMDGPU_MACH_AMDGCN_GFX12_GENERIC`` 0x059 ``gfx12-generic``
|
||||
``EF_AMDGPU_MACH_AMDGCN_GFX9_4_GENERIC`` 0x05f ``gfx9-4-generic``
|
||||
========================================== ========== =============================
|
||||
|
||||
Sections
|
||||
|
||||
@@ -822,11 +822,12 @@ enum : unsigned {
|
||||
EF_AMDGPU_MACH_AMDGCN_RESERVED_0X57 = 0x057,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX1153 = 0x058,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX12_GENERIC = 0x059,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX9_4_GENERIC = 0x05f,
|
||||
// clang-format on
|
||||
|
||||
// First/last AMDGCN-based processors.
|
||||
EF_AMDGPU_MACH_AMDGCN_FIRST = EF_AMDGPU_MACH_AMDGCN_GFX600,
|
||||
EF_AMDGPU_MACH_AMDGCN_LAST = EF_AMDGPU_MACH_AMDGCN_GFX12_GENERIC,
|
||||
EF_AMDGPU_MACH_AMDGCN_LAST = EF_AMDGPU_MACH_AMDGCN_GFX9_4_GENERIC,
|
||||
|
||||
// Indicates if the "xnack" target feature is enabled for all code contained
|
||||
// in the object.
|
||||
|
||||
@@ -119,9 +119,10 @@ enum GPUKind : uint32_t {
|
||||
GK_GFX10_3_GENERIC = 194,
|
||||
GK_GFX11_GENERIC = 195,
|
||||
GK_GFX12_GENERIC = 196,
|
||||
GK_GFX9_4_GENERIC = 197,
|
||||
|
||||
GK_AMDGCN_GENERIC_FIRST = GK_GFX9_GENERIC,
|
||||
GK_AMDGCN_GENERIC_LAST = GK_GFX12_GENERIC,
|
||||
GK_AMDGCN_GENERIC_LAST = GK_GFX9_4_GENERIC,
|
||||
};
|
||||
|
||||
/// Instruction set architecture version.
|
||||
|
||||
@@ -602,6 +602,8 @@ StringRef ELFObjectFileBase::getAMDGPUCPUName() const {
|
||||
// Generic AMDGCN targets
|
||||
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC:
|
||||
return "gfx9-generic";
|
||||
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX9_4_GENERIC:
|
||||
return "gfx9-4-generic";
|
||||
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC:
|
||||
return "gfx10-1-generic";
|
||||
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC:
|
||||
|
||||
@@ -631,6 +631,7 @@ void ScalarBitSetTraits<ELFYAML::ELF_EF>::bitset(IO &IO,
|
||||
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1200, EF_AMDGPU_MACH);
|
||||
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1201, EF_AMDGPU_MACH);
|
||||
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC, EF_AMDGPU_MACH);
|
||||
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX9_4_GENERIC, EF_AMDGPU_MACH);
|
||||
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC, EF_AMDGPU_MACH);
|
||||
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC, EF_AMDGPU_MACH);
|
||||
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC, EF_AMDGPU_MACH);
|
||||
|
||||
@@ -1451,11 +1451,7 @@ def FeatureISAVersion9_4_Common : FeatureSet<
|
||||
FeatureDPALU_DPP,
|
||||
FeaturePackedFP32Ops,
|
||||
FeatureMAIInsts,
|
||||
FeatureFP8Insts,
|
||||
FeatureFP8ConversionInsts,
|
||||
FeatureCvtFP8VOP1Bug,
|
||||
FeaturePkFmacF16Inst,
|
||||
FeatureXF32Insts,
|
||||
FeatureAtomicFaddRtnInsts,
|
||||
FeatureAtomicFaddNoRtnInsts,
|
||||
FeatureAtomicBufferGlobalPkAddF16Insts,
|
||||
@@ -1476,15 +1472,36 @@ def FeatureISAVersion9_4_Common : FeatureSet<
|
||||
|
||||
def FeatureISAVersion9_4_0 : FeatureSet<
|
||||
!listconcat(FeatureISAVersion9_4_Common.Features,
|
||||
[FeatureForceStoreSC0SC1])>;
|
||||
[
|
||||
FeatureForceStoreSC0SC1,
|
||||
FeatureFP8Insts,
|
||||
FeatureFP8ConversionInsts,
|
||||
FeatureCvtFP8VOP1Bug,
|
||||
FeatureXF32Insts
|
||||
])>;
|
||||
|
||||
def FeatureISAVersion9_4_1 : FeatureSet<
|
||||
!listconcat(FeatureISAVersion9_4_Common.Features,
|
||||
[FeatureForceStoreSC0SC1])>;
|
||||
[
|
||||
FeatureForceStoreSC0SC1,
|
||||
FeatureFP8Insts,
|
||||
FeatureFP8ConversionInsts,
|
||||
FeatureCvtFP8VOP1Bug,
|
||||
FeatureXF32Insts
|
||||
])>;
|
||||
|
||||
def FeatureISAVersion9_4_2 : FeatureSet<
|
||||
!listconcat(FeatureISAVersion9_4_Common.Features,
|
||||
[])>;
|
||||
[
|
||||
FeatureFP8Insts,
|
||||
FeatureFP8ConversionInsts,
|
||||
FeatureCvtFP8VOP1Bug,
|
||||
FeatureXF32Insts
|
||||
])>;
|
||||
|
||||
def FeatureISAVersion9_4_Generic : FeatureSet<
|
||||
!listconcat(FeatureISAVersion9_4_Common.Features,
|
||||
[FeatureRequiresCOV6])>;
|
||||
|
||||
def FeatureISAVersion10_Common : FeatureSet<
|
||||
[FeatureGFX10,
|
||||
|
||||
@@ -209,6 +209,11 @@ def : ProcessorModel<"gfx9-generic", SIQuarterSpeedModel,
|
||||
FeatureISAVersion9_Generic.Features
|
||||
>;
|
||||
|
||||
// [gfx940, gfx941, gfx942]
|
||||
def : ProcessorModel<"gfx9-4-generic", SIDPGFX940FullSpeedModel,
|
||||
FeatureISAVersion9_4_Generic.Features
|
||||
>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// GCN GFX10.
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
@@ -119,6 +119,7 @@ StringRef AMDGPUTargetStreamer::getArchNameFromElfMach(unsigned ElfMach) {
|
||||
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1200: AK = GK_GFX1200; break;
|
||||
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1201: AK = GK_GFX1201; break;
|
||||
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC: AK = GK_GFX9_GENERIC; break;
|
||||
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX9_4_GENERIC: AK = GK_GFX9_4_GENERIC; break;
|
||||
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC: AK = GK_GFX10_1_GENERIC; break;
|
||||
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC: AK = GK_GFX10_3_GENERIC; break;
|
||||
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC: AK = GK_GFX11_GENERIC; break;
|
||||
@@ -204,6 +205,7 @@ unsigned AMDGPUTargetStreamer::getElfMach(StringRef GPU) {
|
||||
case GK_GFX1200: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1200;
|
||||
case GK_GFX1201: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1201;
|
||||
case GK_GFX9_GENERIC: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC;
|
||||
case GK_GFX9_4_GENERIC: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX9_4_GENERIC;
|
||||
case GK_GFX10_1_GENERIC: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC;
|
||||
case GK_GFX10_3_GENERIC: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC;
|
||||
case GK_GFX11_GENERIC: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC;
|
||||
@@ -821,6 +823,9 @@ unsigned AMDGPUTargetELFStreamer::getEFlagsV6() {
|
||||
case AMDGPU::GK_GFX9_GENERIC:
|
||||
Version = GenericVersion::GFX9;
|
||||
break;
|
||||
case AMDGPU::GK_GFX9_4_GENERIC:
|
||||
Version = GenericVersion::GFX9_4;
|
||||
break;
|
||||
case AMDGPU::GK_GFX10_1_GENERIC:
|
||||
Version = GenericVersion::GFX10_1;
|
||||
break;
|
||||
|
||||
@@ -46,6 +46,7 @@ struct IsaVersion;
|
||||
/// within a generic family.
|
||||
namespace GenericVersion {
|
||||
static constexpr unsigned GFX9 = 1;
|
||||
static constexpr unsigned GFX9_4 = 1;
|
||||
static constexpr unsigned GFX10_1 = 1;
|
||||
static constexpr unsigned GFX10_3 = 1;
|
||||
static constexpr unsigned GFX11 = 1;
|
||||
|
||||
@@ -134,6 +134,7 @@ constexpr GPUInfo AMDGCNGPUs[] = {
|
||||
{{"gfx10-3-generic"}, {"gfx10-3-generic"}, GK_GFX10_3_GENERIC, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP},
|
||||
{{"gfx11-generic"}, {"gfx11-generic"}, GK_GFX11_GENERIC, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP},
|
||||
{{"gfx12-generic"}, {"gfx12-generic"}, GK_GFX12_GENERIC, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP},
|
||||
{{"gfx9-4-generic"}, {"gfx9-4-generic"}, GK_GFX9_4_GENERIC, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK|FEATURE_SRAMECC},
|
||||
// clang-format on
|
||||
};
|
||||
|
||||
@@ -155,6 +156,7 @@ const GPUInfo *getArchEntry(AMDGPU::GPUKind AK, ArrayRef<GPUInfo> Table) {
|
||||
StringRef llvm::AMDGPU::getArchFamilyNameAMDGCN(GPUKind AK) {
|
||||
switch (AK) {
|
||||
case AMDGPU::GK_GFX9_GENERIC:
|
||||
case AMDGPU::GK_GFX9_4_GENERIC:
|
||||
return "gfx9";
|
||||
case AMDGPU::GK_GFX10_1_GENERIC:
|
||||
case AMDGPU::GK_GFX10_3_GENERIC:
|
||||
@@ -296,6 +298,7 @@ AMDGPU::IsaVersion AMDGPU::getIsaVersion(StringRef GPU) {
|
||||
// TODO: Split up this API depending on its caller so
|
||||
// generic target handling is more obvious and less risky.
|
||||
case GK_GFX9_GENERIC: return {9, 0, 0};
|
||||
case GK_GFX9_4_GENERIC: return {9, 4, 0};
|
||||
case GK_GFX10_1_GENERIC: return {10, 1, 0};
|
||||
case GK_GFX10_3_GENERIC: return {10, 3, 0};
|
||||
case GK_GFX11_GENERIC: return {11, 0, 3};
|
||||
@@ -466,9 +469,12 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T,
|
||||
case GK_GFX942:
|
||||
case GK_GFX941:
|
||||
case GK_GFX940:
|
||||
Features["gfx940-insts"] = true;
|
||||
Features["fp8-insts"] = true;
|
||||
Features["fp8-conversion-insts"] = true;
|
||||
Features["xf32-insts"] = true;
|
||||
[[fallthrough]];
|
||||
case GK_GFX9_4_GENERIC:
|
||||
Features["gfx940-insts"] = true;
|
||||
Features["atomic-ds-pk-add-16-insts"] = true;
|
||||
Features["atomic-flat-pk-add-16-insts"] = true;
|
||||
Features["atomic-global-pk-add-bf16-inst"] = true;
|
||||
|
||||
@@ -112,6 +112,8 @@
|
||||
|
||||
; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx9-generic -mattr=-xnack < %s | FileCheck --check-prefixes=GFX9_GENERIC_NOXNACK %s
|
||||
; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx9-generic -mattr=+xnack < %s | FileCheck --check-prefixes=GFX9_GENERIC_XNACK %s
|
||||
; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx9-4-generic -mattr=-xnack < %s | FileCheck --check-prefixes=GFX9_4_GENERIC_NOXNACK %s
|
||||
; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx9-4-generic -mattr=+xnack < %s | FileCheck --check-prefixes=GFX9_4_GENERIC_XNACK %s
|
||||
; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx10-1-generic -mattr=-xnack < %s | FileCheck --check-prefixes=GFX10_1_GENERIC_NOXNACK %s
|
||||
; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx10-1-generic -mattr=+xnack < %s | FileCheck --check-prefixes=GFX10_1_GENERIC_XNACK %s
|
||||
; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx10-3-generic < %s | FileCheck --check-prefixes=GFX10_3_GENERIC %s
|
||||
@@ -210,6 +212,8 @@
|
||||
|
||||
; GFX9_GENERIC_NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx9-generic:xnack-"
|
||||
; GFX9_GENERIC_XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx9-generic:xnack+"
|
||||
; GFX9_4_GENERIC_NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx9-4-generic:xnack-"
|
||||
; GFX9_4_GENERIC_XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx9-4-generic:xnack+"
|
||||
; GFX10_1_GENERIC_NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx10-1-generic:xnack-"
|
||||
; GFX10_1_GENERIC_XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx10-1-generic:xnack+"
|
||||
; GFX10_3_GENERIC: .amdgcn_target "amdgcn-amd-amdhsa--gfx10-3-generic"
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -o - %s | FileCheck -check-prefixes=GFX9 %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -o - %s | FileCheck -check-prefixes=GFX942 %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx9-4-generic --amdhsa-code-object-version=6 -o - %s | FileCheck -check-prefixes=GFX942 %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 -o - %s | FileCheck -check-prefixes=GFX1030 %s
|
||||
|
||||
; Sample test to check how we deal with division/modulos by 64 bit constants.
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
|
||||
# RUN: llc -mtriple=amdgcn -mcpu=gfx942 -run-pass post-RA-hazard-rec -o - %s | FileCheck -check-prefix=HAZARD %s
|
||||
# RUN: llc -mtriple=amdgcn -mcpu=gfx9-4-generic -run-pass post-RA-hazard-rec -o - %s | FileCheck -check-prefix=HAZARD %s
|
||||
# RUN: llc -mtriple=amdgcn -mcpu=gfx90a -run-pass post-RA-hazard-rec -o - %s | FileCheck -check-prefix=NOHAZARD %s
|
||||
|
||||
---
|
||||
|
||||
@@ -80,6 +80,7 @@
|
||||
; RUN: llc -filetype=obj -mtriple=amdgcn -mcpu=gfx1201 < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX1201 %s
|
||||
|
||||
; RUN: llc -filetype=obj --amdhsa-code-object-version=6 -mtriple=amdgcn -mcpu=gfx9-generic < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX9_GENERIC %s
|
||||
; RUN: llc -filetype=obj --amdhsa-code-object-version=6 -mtriple=amdgcn -mcpu=gfx9-4-generic < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX9_4_GENERIC %s
|
||||
; RUN: llc -filetype=obj --amdhsa-code-object-version=6 -mtriple=amdgcn -mcpu=gfx10-1-generic < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX10_1_GENERIC %s
|
||||
; RUN: llc -filetype=obj --amdhsa-code-object-version=6 -mtriple=amdgcn -mcpu=gfx10-3-generic < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX10_3_GENERIC %s
|
||||
; RUN: llc -filetype=obj --amdhsa-code-object-version=6 -mtriple=amdgcn -mcpu=gfx11-generic < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX11_GENERIC %s
|
||||
@@ -161,6 +162,7 @@
|
||||
; GFX1201: EF_AMDGPU_MACH_AMDGCN_GFX1201 (0x4E)
|
||||
|
||||
; GFX9_GENERIC: EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC (0x51)
|
||||
; GFX9_4_GENERIC: EF_AMDGPU_MACH_AMDGCN_GFX9_4_GENERIC (0x5F)
|
||||
; GFX10_1_GENERIC: EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC (0x52)
|
||||
; GFX10_3_GENERIC: EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC (0x53)
|
||||
; GFX11_GENERIC: EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC (0x54)
|
||||
|
||||
@@ -1,16 +1,19 @@
|
||||
; RUN: not llc -mtriple=amdgcn -mcpu=gfx9-generic --amdhsa-code-object-version=5 -o - %s 2>&1 | FileCheck --check-prefix=GFX9-V5 %s
|
||||
; RUN: not llc -mtriple=amdgcn -mcpu=gfx9-4-generic --amdhsa-code-object-version=5 -o - %s 2>&1 | FileCheck --check-prefix=GFX9-4-V5 %s
|
||||
; RUN: not llc -mtriple=amdgcn -mcpu=gfx10-1-generic --amdhsa-code-object-version=5 -o - %s 2>&1 | FileCheck --check-prefix=GFX101-V5 %s
|
||||
; RUN: not llc -mtriple=amdgcn -mcpu=gfx10-3-generic --amdhsa-code-object-version=5 -o - %s 2>&1 | FileCheck --check-prefix=GFX103-V5 %s
|
||||
; RUN: not llc -mtriple=amdgcn -mcpu=gfx11-generic --amdhsa-code-object-version=5 -o - %s 2>&1 | FileCheck --check-prefix=GFX11-V5 %s
|
||||
; RUN: not llc -mtriple=amdgcn -mcpu=gfx12-generic --amdhsa-code-object-version=5 -o - %s 2>&1 | FileCheck --check-prefix=GFX12-V5 %s
|
||||
|
||||
; RUN: llc -mtriple=amdgcn -mcpu=gfx9-generic --amdhsa-code-object-version=6 -o - %s
|
||||
; RUN: llc -mtriple=amdgcn -mcpu=gfx9-4-generic --amdhsa-code-object-version=6 -o - %s
|
||||
; RUN: llc -mtriple=amdgcn -mcpu=gfx10-1-generic --amdhsa-code-object-version=6 -o - %s
|
||||
; RUN: llc -mtriple=amdgcn -mcpu=gfx10-3-generic --amdhsa-code-object-version=6 -o - %s
|
||||
; RUN: llc -mtriple=amdgcn -mcpu=gfx11-generic --amdhsa-code-object-version=6 -o - %s
|
||||
; RUN: llc -mtriple=amdgcn -mcpu=gfx12-generic --amdhsa-code-object-version=6 -o - %s
|
||||
|
||||
; GFX9-V5: gfx9-generic is only available on code object version 6 or better
|
||||
; GFX9-4-V5: gfx9-4-generic is only available on code object version 6 or better
|
||||
; GFX101-V5: gfx10-1-generic is only available on code object version 6 or better
|
||||
; GFX103-V5: gfx10-3-generic is only available on code object version 6 or better
|
||||
; GFX11-V5: gfx11-generic is only available on code object version 6 or better
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
# NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
|
||||
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -start-before=machine-scheduler -verify-misched -o - %s | FileCheck -check-prefix=GCN %s
|
||||
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx9-4-generic --amdhsa-code-object-version=6 -start-before=machine-scheduler -verify-misched -o - %s | FileCheck -check-prefix=GCN %s
|
||||
|
||||
--- |
|
||||
define amdgpu_kernel void @largeInterleave() #0 { ret void }
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
# NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
|
||||
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -start-before=machine-scheduler -verify-misched -o - %s | FileCheck -check-prefix=GCN %s
|
||||
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx9-4-generic --amdhsa-code-object-version=6 -start-before=machine-scheduler -verify-misched -o - %s | FileCheck -check-prefix=GCN %s
|
||||
|
||||
--- |
|
||||
define amdgpu_kernel void @smallInterleave() #0 { ret void }
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx90a < %s | FileCheck --check-prefixes=GCN,GFX90A %s
|
||||
; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx942 < %s | FileCheck --check-prefixes=GCN,GFX942 %s
|
||||
; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx942 < %s | FileCheck --check-prefixes=GCN,GFX942 %s
|
||||
; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx9-4-generic --amdhsa-code-object-version=6 < %s | FileCheck --check-prefixes=GCN,GFX942 %s
|
||||
|
||||
; DPP control value 337 is valid for 64-bit DPP on gfx942
|
||||
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 %s -o - | FileCheck %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx9-4-generic --amdhsa-code-object-version=6 %s -o - | FileCheck %s
|
||||
|
||||
define void @no_corresponding_integer_type(i8 %arg, ptr addrspace(1) %ptr) {
|
||||
; CHECK-LABEL: no_corresponding_integer_type:
|
||||
|
||||
104
llvm/test/MC/AMDGPU/gfx9_4_generic_unsupported.s
Normal file
104
llvm/test/MC/AMDGPU/gfx9_4_generic_unsupported.s
Normal file
@@ -0,0 +1,104 @@
|
||||
// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx9-4-generic -mattr=+wavefrontsize32 %s 2>&1 | FileCheck --implicit-check-not=error: %s
|
||||
// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx9-4-generic -mattr=+wavefrontsize64 %s 2>&1 | FileCheck --implicit-check-not=error: %s
|
||||
|
||||
v_mfma_f32_16x16x8_xf32 a[0:3], v[2:3], v[4:5], a[2:5]
|
||||
// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
|
||||
|
||||
v_mfma_f32_16x16x8xf32 a[0:3], v[2:3], v[4:5], a[2:5]
|
||||
// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
|
||||
|
||||
v_mfma_f32_32x32x4_xf32 a[0:15], v[2:3], v[4:5], a[18:33]
|
||||
// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
|
||||
|
||||
v_mfma_f32_32x32x4xf32 a[0:15], v[2:3], v[4:5], a[18:33]
|
||||
// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
|
||||
|
||||
v_cvt_f32_bf8 v1, 3
|
||||
// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
|
||||
|
||||
v_cvt_f32_bf8_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf
|
||||
// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
|
||||
|
||||
v_cvt_f32_bf8_e64 v5, v1
|
||||
// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
|
||||
|
||||
v_cvt_f32_bf8_sdwa v5, v1 src0_sel:BYTE_0
|
||||
// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
|
||||
|
||||
v_cvt_f32_fp8 v1, 3
|
||||
// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
|
||||
|
||||
v_cvt_f32_fp8_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf
|
||||
// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
|
||||
|
||||
v_cvt_f32_fp8_e64 v5, v1
|
||||
// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
|
||||
|
||||
v_cvt_f32_fp8_sdwa v5, v1 src0_sel:BYTE_0
|
||||
// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
|
||||
|
||||
v_cvt_pk_f32_fp8 v[0:1], v3 quad_perm:[0,2,1,1] row_mask:0xf bank_mask:0xf
|
||||
// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
|
||||
|
||||
v_cvt_pk_f32_fp8_dpp v[10:11], v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf
|
||||
// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
|
||||
|
||||
v_cvt_pk_f32_fp8_sdwa v[10:11], v1 src0_sel:WORD_0
|
||||
// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
|
||||
|
||||
v_cvt_pk_f32_bf8 v[0:1], v3 quad_perm:[0,2,1,1] row_mask:0xf bank_mask:0xf
|
||||
// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
|
||||
|
||||
v_cvt_pk_f32_bf8_dpp v[10:11], v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf
|
||||
// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
|
||||
|
||||
v_cvt_pk_f32_bf8_sdwa v[10:11], v1 src0_sel:WORD_0
|
||||
// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
|
||||
|
||||
v_mfma_f32_16x16x32_bf8_bf8 a[0:3], v[2:3], v[4:5], a[0:3]
|
||||
// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
|
||||
|
||||
v_mfma_f32_16x16x32_fp8_bf8 a[0:3], v[2:3], v[4:5], a[0:3]
|
||||
// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
|
||||
|
||||
v_mfma_f32_16x16x32_bf8_fp8 a[0:3], v[2:3], v[4:5], a[0:3]
|
||||
// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
|
||||
|
||||
v_mfma_f32_16x16x32_fp8_fp8 a[0:3], v[2:3], v[4:5], a[0:3]
|
||||
// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
|
||||
|
||||
v_mfma_f32_32x32x16_bf8_bf8 a[0:15], v[2:3], v[4:5], a[0:15]
|
||||
// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
|
||||
|
||||
v_mfma_f32_32x32x16_bf8_fp8 a[0:15], v[2:3], v[4:5], a[0:15]
|
||||
// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
|
||||
|
||||
v_mfma_f32_32x32x16_fp8_bf8 a[0:15], v[2:3], v[4:5], a[0:15]
|
||||
// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
|
||||
|
||||
v_mfma_f32_32x32x16_fp8_fp8 a[0:15], v[2:3], v[4:5], a[0:15]
|
||||
// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
|
||||
|
||||
v_smfmac_f32_16x16x64_bf8_bf8 a[0:3], v[2:3], a[4:7], v1
|
||||
// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
|
||||
|
||||
v_smfmac_f32_16x16x64_bf8_fp8 a[0:3], v[2:3], a[4:7], v1
|
||||
// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
|
||||
|
||||
v_smfmac_f32_16x16x64_fp8_bf8 a[0:3], v[2:3], a[4:7], v1
|
||||
// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
|
||||
|
||||
v_smfmac_f32_16x16x64_fp8_fp8 a[0:3], v[2:3], a[4:7], v1
|
||||
// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
|
||||
|
||||
v_smfmac_f32_32x32x32_bf8_bf8 a[0:15], v[2:3], a[4:7], v1
|
||||
// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
|
||||
|
||||
v_smfmac_f32_32x32x32_bf8_fp8 a[0:15], v[2:3], a[4:7], v1
|
||||
// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
|
||||
|
||||
v_smfmac_f32_32x32x32_fp8_bf8 a[0:15], v[2:3], a[4:7], v1
|
||||
// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
|
||||
|
||||
v_smfmac_f32_32x32x32_fp8_fp8 a[0:15], v[2:3], a[4:7], v1
|
||||
// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
|
||||
@@ -250,6 +250,10 @@
|
||||
# RUN: llvm-readobj -S --file-headers %t.o.AMDGCN_GFX9_GENERIC | FileCheck --check-prefixes=ELF-AMDGCN-ALL,ELF-AMDGCN-GFX9_GENERIC %s
|
||||
# RUN: obj2yaml %t.o.AMDGCN_GFX9_GENERIC | FileCheck --check-prefixes=YAML-AMDGCN-ALL,YAML-AMDGCN-GFX9_GENERIC %s
|
||||
|
||||
# RUN: sed -e 's/<BITS>/64/' -e 's/<MACH>/AMDGCN_GFX9_4_GENERIC/' %s | yaml2obj -o %t.o.AMDGCN_GFX9_4_GENERIC
|
||||
# RUN: llvm-readobj -S --file-headers %t.o.AMDGCN_GFX9_4_GENERIC | FileCheck --check-prefixes=ELF-AMDGCN-ALL,ELF-AMDGCN-GFX9_4_GENERIC %s
|
||||
# RUN: obj2yaml %t.o.AMDGCN_GFX9_4_GENERIC | FileCheck --check-prefixes=YAML-AMDGCN-ALL,YAML-AMDGCN-GFX9_4_GENERIC %s
|
||||
|
||||
# RUN: sed -e 's/<BITS>/64/' -e 's/<MACH>/AMDGCN_GFX10_1_GENERIC/' %s | yaml2obj -o %t.o.AMDGCN_GFX10_1_GENERIC
|
||||
# RUN: llvm-readobj -S --file-headers %t.o.AMDGCN_GFX10_1_GENERIC | FileCheck --check-prefixes=ELF-AMDGCN-ALL,ELF-AMDGCN-GFX10_1_GENERIC %s
|
||||
# RUN: obj2yaml %t.o.AMDGCN_GFX10_1_GENERIC | FileCheck --check-prefixes=YAML-AMDGCN-ALL,YAML-AMDGCN-GFX10_1_GENERIC %s
|
||||
@@ -473,6 +477,9 @@
|
||||
# ELF-AMDGCN-GFX9_GENERIC: EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC (0x51)
|
||||
# YAML-AMDGCN-GFX9_GENERIC: Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC ]
|
||||
|
||||
# ELF-AMDGCN-GFX9_4_GENERIC: EF_AMDGPU_MACH_AMDGCN_GFX9_4_GENERIC (0x5F)
|
||||
# YAML-AMDGCN-GFX9_4_GENERIC: Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX9_4_GENERIC ]
|
||||
|
||||
# ELF-AMDGCN-GFX10_1_GENERIC: EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC (0x52)
|
||||
# YAML-AMDGCN-GFX10_1_GENERIC: Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC ]
|
||||
|
||||
|
||||
@@ -137,6 +137,12 @@ define amdgpu_kernel void @test_kernel() {
|
||||
|
||||
; ----------------------------------GFX9---------------------------------------
|
||||
;
|
||||
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=6 -mcpu=gfx9-4-generic -filetype=obj -O0 -o %t.o %s
|
||||
; RUN: llvm-objdump -D --arch-name=amdgcn -mllvm --amdhsa-code-object-version=6 --mcpu=gfx9-4-generic %t.o > %t-specify.txt
|
||||
; RUN: llvm-objdump -D -mllvm --amdhsa-code-object-version=6 %t.o > %t-detect.txt
|
||||
; RUN: diff %t-specify.txt %t-detect.txt
|
||||
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=6 -mcpu=gfx9-generic -filetype=obj -O0 -o %t.o %s
|
||||
; RUN: llvm-objdump -D --arch-name=amdgcn -mllvm --amdhsa-code-object-version=6 --mcpu=gfx9-generic %t.o > %t-specify.txt
|
||||
; RUN: llvm-objdump -D -mllvm --amdhsa-code-object-version=6 %t.o > %t-detect.txt
|
||||
|
||||
@@ -364,6 +364,9 @@
|
||||
# RUN: yaml2obj %s -o %t -DABI_VERSION=4 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC
|
||||
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=4 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC -DFLAG_VALUE=0x51
|
||||
|
||||
# RUN: yaml2obj %s -o %t -DABI_VERSION=4 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX9_4_GENERIC
|
||||
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=4 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX9_4_GENERIC -DFLAG_VALUE=0x5F
|
||||
|
||||
# RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1100
|
||||
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1100 -DFLAG_VALUE=0x41
|
||||
|
||||
|
||||
@@ -1641,6 +1641,7 @@ const EnumEntry<unsigned> ElfHeaderMipsFlags[] = {
|
||||
ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1200, "gfx1200"), \
|
||||
ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1201, "gfx1201"), \
|
||||
ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC, "gfx9-generic"), \
|
||||
ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX9_4_GENERIC, "gfx9-4-generic"), \
|
||||
ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC, "gfx10-1-generic"), \
|
||||
ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC, "gfx10-3-generic"), \
|
||||
ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC, "gfx11-generic"), \
|
||||
|
||||
Reference in New Issue
Block a user