[AArch64][clang][llvm] Add structured sparsity outer product (TMOP) intrinsics (#135145)
Implement all {BF/F/S/U/SU/US}TMOP intrinsics in clang and llvm
following the ACLE in https://github.com/ARM-software/acle/pull/380/files
This commit is contained in:
committed by
GitHub
parent
b9ce185d4e
commit
3d7e56fd28
@@ -907,6 +907,33 @@ let SMETargetGuard = "sme-f16f16" in {
|
||||
}
|
||||
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// SME2 - TMOP, SUTMOP, USTMOP
|
||||
|
||||
let SMETargetGuard = "sme2,sme-tmop" in {
|
||||
def SVTMOPA_ZA32 : Inst<"svtmopa_lane_za32[_{d}_{d}]", "vi2d[i", "hbf", MergeNone, "aarch64_sme_ftmopa_za32", [IsStreaming, IsInOutZA], [ImmCheck<0, ImmCheck0_3>, ImmCheck<4, ImmCheck0_3>]>;
|
||||
def SVSTMOPA_ZA32 : Inst<"svtmopa_lane_za32[_{d}_{d}]", "vi2d[i", "cs", MergeNone, "aarch64_sme_stmopa_za32", [IsStreaming, IsInOutZA], [ImmCheck<0, ImmCheck0_3>, ImmCheck<4, ImmCheck0_3>]>;
|
||||
def SVUTMOPA_ZA32 : Inst<"svtmopa_lane_za32[_{d}_{d}]", "vi2d[i", "UcUs", MergeNone, "aarch64_sme_utmopa_za32", [IsStreaming, IsInOutZA], [ImmCheck<0, ImmCheck0_3>, ImmCheck<4, ImmCheck0_3>]>;
|
||||
def SVSUTMOPA_ZA32 : Inst<"svtmopa_lane_za32[_{d}_{3}]", "vi2u[i", "c", MergeNone, "aarch64_sme_sutmopa_za32", [IsStreaming, IsInOutZA], [ImmCheck<0, ImmCheck0_3>, ImmCheck<4, ImmCheck0_3>]>;
|
||||
def SVUSTMOPA_ZA32 : Inst<"svtmopa_lane_za32[_{d}_{3}]", "vi2x[i", "Uc", MergeNone, "aarch64_sme_ustmopa_za32", [IsStreaming, IsInOutZA], [ImmCheck<0, ImmCheck0_3>, ImmCheck<4, ImmCheck0_3>]>;
|
||||
}
|
||||
|
||||
let SMETargetGuard = "sme2,sme-tmop,sme-f16f16" in {
|
||||
def SVTMOPA_F16 : Inst<"svtmopa_lane_za16[_{d}_{d}]", "vi2d[i", "h", MergeNone, "aarch64_sme_ftmopa_za16", [IsStreaming, IsInOutZA], [ImmCheck<0, ImmCheck0_1>, ImmCheck<4, ImmCheck0_3>]>;
|
||||
}
|
||||
|
||||
let SMETargetGuard = "sme2,sme-tmop,sme-b16b16" in {
|
||||
def SVTMOPA_BF16 : Inst<"svtmopa_lane_za16[_{d}_{d}]", "vi2d[i", "b", MergeNone, "aarch64_sme_ftmopa_za16", [IsStreaming, IsInOutZA], [ImmCheck<0, ImmCheck0_1>, ImmCheck<4, ImmCheck0_3>]>;
|
||||
}
|
||||
|
||||
let SMETargetGuard = "sme2,sme-tmop,sme-f8f16" in {
|
||||
def SVTMOPA_ZA16_FPM : Inst<"svtmopa_lane_za16[_{d}_{d}]", "vi2.dd[i>", "m", MergeNone, "aarch64_sme_ftmopa_za16", [IsStreaming, IsInOutZA], [ImmCheck<0, ImmCheck0_1>, ImmCheck<4, ImmCheck0_3>]>;
|
||||
}
|
||||
|
||||
let SMETargetGuard = "sme2,sme-tmop,sme-f8f32" in {
|
||||
def SVTMOPA_ZA32_FPM : Inst<"svtmopa_lane_za32[_{d}_{d}]", "vi2.dd[i>", "m", MergeNone, "aarch64_sme_ftmopa_za32", [IsStreaming, IsInOutZA], [ImmCheck<0, ImmCheck0_3>, ImmCheck<4, ImmCheck0_3>]>;
|
||||
}
|
||||
|
||||
multiclass ZAReadz<string n_suffix, string vg_num, string t, string i_prefix, list<ImmCheck> ch> {
|
||||
let SMETargetGuard = "sme2p1" in {
|
||||
def NAME # _H : SInst<"svreadz_hor_" # n_suffix # "_{d}_vg" # vg_num, vg_num # "im", t,
|
||||
|
||||
202
clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_tmop.c
Normal file
202
clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_tmop.c
Normal file
@@ -0,0 +1,202 @@
|
||||
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
|
||||
|
||||
// REQUIRES: aarch64-registered-target
|
||||
// RUN: %clang_cc1 -triple aarch64 -target-feature +sme-tmop -target-feature +sme-f16f16 -target-feature +sme-f8f32 -target-feature +sme-b16b16 -target-feature +sme-f8f16 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
|
||||
// RUN: %clang_cc1 -triple aarch64 -target-feature +sme-tmop -target-feature +sme-f16f16 -target-feature +sme-f8f32 -target-feature +sme-b16b16 -target-feature +sme-f8f16 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
|
||||
// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -triple aarch64 -target-feature +sme-tmop -target-feature +sme-f16f16 -target-feature +sme-f8f32 -target-feature +sme-b16b16 -target-feature +sme-f8f16 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
|
||||
// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -triple aarch64 -target-feature +sme-tmop -target-feature +sme-f16f16 -target-feature +sme-f8f32 -target-feature +sme-b16b16 -target-feature +sme-f8f16 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
|
||||
// RUN: %clang_cc1 -triple aarch64 -target-feature +sme-tmop -target-feature +sme-f16f16 -target-feature +sme-f8f32 -target-feature +sme-b16b16 -target-feature +sme-f8f16 -target-feature +sme -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
|
||||
|
||||
#include <arm_sme.h>
|
||||
|
||||
#ifdef SME_OVERLOADED_FORMS
|
||||
#define SME_ACLE_FUNC(A1,A2_UNUSED,A3) A1##A3
|
||||
#else
|
||||
#define SME_ACLE_FUNC(A1,A2,A3) A1##A2##A3
|
||||
#endif
|
||||
|
||||
// CHECK-LABEL: @test_svtmopa_lane_za32_s8_s8(
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: tail call void @llvm.aarch64.sme.stmopa.za32.nxv16i8(i32 1, <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM:%.*]], <vscale x 16 x i8> [[ZK:%.*]], i32 3)
|
||||
// CHECK-NEXT: ret void
|
||||
//
|
||||
// CPP-CHECK-LABEL: @_Z28test_svtmopa_lane_za32_s8_s810svint8x2_tu10__SVInt8_tu11__SVUint8_t(
|
||||
// CPP-CHECK-NEXT: entry:
|
||||
// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.stmopa.za32.nxv16i8(i32 1, <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM:%.*]], <vscale x 16 x i8> [[ZK:%.*]], i32 3)
|
||||
// CPP-CHECK-NEXT: ret void
|
||||
//
|
||||
void test_svtmopa_lane_za32_s8_s8(svint8x2_t zn, svint8_t zm, svuint8_t zk) __arm_streaming __arm_inout("za") {
|
||||
SME_ACLE_FUNC(svtmopa_lane_za32,_s8_s8,)(1, zn, zm, zk, 3);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_svtmopa_lane_za32_u8_u8(
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: tail call void @llvm.aarch64.sme.utmopa.za32.nxv16i8(i32 1, <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM:%.*]], <vscale x 16 x i8> [[ZK:%.*]], i32 3)
|
||||
// CHECK-NEXT: ret void
|
||||
//
|
||||
// CPP-CHECK-LABEL: @_Z28test_svtmopa_lane_za32_u8_u811svuint8x2_tu11__SVUint8_tS0_(
|
||||
// CPP-CHECK-NEXT: entry:
|
||||
// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.utmopa.za32.nxv16i8(i32 1, <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM:%.*]], <vscale x 16 x i8> [[ZK:%.*]], i32 3)
|
||||
// CPP-CHECK-NEXT: ret void
|
||||
//
|
||||
void test_svtmopa_lane_za32_u8_u8(svuint8x2_t zn, svuint8_t zm, svuint8_t zk) __arm_streaming __arm_inout("za") {
|
||||
SME_ACLE_FUNC(svtmopa_lane_za32,_u8_u8,)(1, zn, zm, zk, 3);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_svtmopa_lane_za32_s8_u8(
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: tail call void @llvm.aarch64.sme.sutmopa.za32.nxv16i8(i32 1, <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM:%.*]], <vscale x 16 x i8> [[ZK:%.*]], i32 3)
|
||||
// CHECK-NEXT: ret void
|
||||
//
|
||||
// CPP-CHECK-LABEL: @_Z28test_svtmopa_lane_za32_s8_u810svint8x2_tu11__SVUint8_tS0_(
|
||||
// CPP-CHECK-NEXT: entry:
|
||||
// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sutmopa.za32.nxv16i8(i32 1, <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM:%.*]], <vscale x 16 x i8> [[ZK:%.*]], i32 3)
|
||||
// CPP-CHECK-NEXT: ret void
|
||||
//
|
||||
void test_svtmopa_lane_za32_s8_u8(svint8x2_t zn, svuint8_t zm, svuint8_t zk) __arm_streaming __arm_inout("za") {
|
||||
SME_ACLE_FUNC(svtmopa_lane_za32,_s8_u8,)(1, zn, zm, zk, 3);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_svtmopa_lane_za32_u8_s8(
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: tail call void @llvm.aarch64.sme.ustmopa.za32.nxv16i8(i32 1, <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM:%.*]], <vscale x 16 x i8> [[ZK:%.*]], i32 3)
|
||||
// CHECK-NEXT: ret void
|
||||
//
|
||||
// CPP-CHECK-LABEL: @_Z28test_svtmopa_lane_za32_u8_s811svuint8x2_tu10__SVInt8_tu11__SVUint8_t(
|
||||
// CPP-CHECK-NEXT: entry:
|
||||
// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.ustmopa.za32.nxv16i8(i32 1, <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM:%.*]], <vscale x 16 x i8> [[ZK:%.*]], i32 3)
|
||||
// CPP-CHECK-NEXT: ret void
|
||||
//
|
||||
void test_svtmopa_lane_za32_u8_s8(svuint8x2_t zn, svint8_t zm, svuint8_t zk) __arm_streaming __arm_inout("za") {
|
||||
SME_ACLE_FUNC(svtmopa_lane_za32,_u8_s8,)(1, zn, zm, zk, 3);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_svtmopa_lane_za32_s16_s16(
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: tail call void @llvm.aarch64.sme.stmopa.za32.nxv8i16(i32 1, <vscale x 8 x i16> [[ZN_COERCE0:%.*]], <vscale x 8 x i16> [[ZN_COERCE1:%.*]], <vscale x 8 x i16> [[ZM:%.*]], <vscale x 16 x i8> [[ZK:%.*]], i32 3)
|
||||
// CHECK-NEXT: ret void
|
||||
//
|
||||
// CPP-CHECK-LABEL: @_Z30test_svtmopa_lane_za32_s16_s1611svint16x2_tu11__SVInt16_tu11__SVUint8_t(
|
||||
// CPP-CHECK-NEXT: entry:
|
||||
// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.stmopa.za32.nxv8i16(i32 1, <vscale x 8 x i16> [[ZN_COERCE0:%.*]], <vscale x 8 x i16> [[ZN_COERCE1:%.*]], <vscale x 8 x i16> [[ZM:%.*]], <vscale x 16 x i8> [[ZK:%.*]], i32 3)
|
||||
// CPP-CHECK-NEXT: ret void
|
||||
//
|
||||
void test_svtmopa_lane_za32_s16_s16(svint16x2_t zn, svint16_t zm, svuint8_t zk) __arm_streaming __arm_inout("za") {
|
||||
SME_ACLE_FUNC(svtmopa_lane_za32,_s16_s16,)(1, zn, zm, zk, 3);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_svtmopa_lane_za32_u16_u16(
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: tail call void @llvm.aarch64.sme.utmopa.za32.nxv8i16(i32 1, <vscale x 8 x i16> [[ZN_COERCE0:%.*]], <vscale x 8 x i16> [[ZN_COERCE1:%.*]], <vscale x 8 x i16> [[ZM:%.*]], <vscale x 16 x i8> [[ZK:%.*]], i32 3)
|
||||
// CHECK-NEXT: ret void
|
||||
//
|
||||
// CPP-CHECK-LABEL: @_Z30test_svtmopa_lane_za32_u16_u1612svuint16x2_tu12__SVUint16_tu11__SVUint8_t(
|
||||
// CPP-CHECK-NEXT: entry:
|
||||
// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.utmopa.za32.nxv8i16(i32 1, <vscale x 8 x i16> [[ZN_COERCE0:%.*]], <vscale x 8 x i16> [[ZN_COERCE1:%.*]], <vscale x 8 x i16> [[ZM:%.*]], <vscale x 16 x i8> [[ZK:%.*]], i32 3)
|
||||
// CPP-CHECK-NEXT: ret void
|
||||
//
|
||||
void test_svtmopa_lane_za32_u16_u16(svuint16x2_t zn, svuint16_t zm, svuint8_t zk) __arm_streaming __arm_inout("za") {
|
||||
SME_ACLE_FUNC(svtmopa_lane_za32,_u16_u16,)(1, zn, zm, zk, 3);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_svtmopa_lane_za32_f16_f16(
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: tail call void @llvm.aarch64.sme.ftmopa.za32.nxv8f16(i32 1, <vscale x 8 x half> [[ZN_COERCE0:%.*]], <vscale x 8 x half> [[ZN_COERCE1:%.*]], <vscale x 8 x half> [[ZM:%.*]], <vscale x 16 x i8> [[ZK:%.*]], i32 3)
|
||||
// CHECK-NEXT: ret void
|
||||
//
|
||||
// CPP-CHECK-LABEL: @_Z30test_svtmopa_lane_za32_f16_f1613svfloat16x2_tu13__SVFloat16_tu11__SVUint8_t(
|
||||
// CPP-CHECK-NEXT: entry:
|
||||
// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.ftmopa.za32.nxv8f16(i32 1, <vscale x 8 x half> [[ZN_COERCE0:%.*]], <vscale x 8 x half> [[ZN_COERCE1:%.*]], <vscale x 8 x half> [[ZM:%.*]], <vscale x 16 x i8> [[ZK:%.*]], i32 3)
|
||||
// CPP-CHECK-NEXT: ret void
|
||||
//
|
||||
void test_svtmopa_lane_za32_f16_f16(svfloat16x2_t zn, svfloat16_t zm, svuint8_t zk) __arm_streaming __arm_inout("za") {
|
||||
SME_ACLE_FUNC(svtmopa_lane_za32,_f16_f16,)(1, zn, zm, zk, 3);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_svtmopa_lane_za32_f32_f32(
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: tail call void @llvm.aarch64.sme.ftmopa.za32.nxv4f32(i32 1, <vscale x 4 x float> [[ZN_COERCE0:%.*]], <vscale x 4 x float> [[ZN_COERCE1:%.*]], <vscale x 4 x float> [[ZM:%.*]], <vscale x 16 x i8> [[ZK:%.*]], i32 3)
|
||||
// CHECK-NEXT: ret void
|
||||
//
|
||||
// CPP-CHECK-LABEL: @_Z30test_svtmopa_lane_za32_f32_f3213svfloat32x2_tu13__SVFloat32_tu11__SVUint8_t(
|
||||
// CPP-CHECK-NEXT: entry:
|
||||
// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.ftmopa.za32.nxv4f32(i32 1, <vscale x 4 x float> [[ZN_COERCE0:%.*]], <vscale x 4 x float> [[ZN_COERCE1:%.*]], <vscale x 4 x float> [[ZM:%.*]], <vscale x 16 x i8> [[ZK:%.*]], i32 3)
|
||||
// CPP-CHECK-NEXT: ret void
|
||||
//
|
||||
void test_svtmopa_lane_za32_f32_f32(svfloat32x2_t zn, svfloat32_t zm, svuint8_t zk) __arm_streaming __arm_inout("za") {
|
||||
SME_ACLE_FUNC(svtmopa_lane_za32,_f32_f32,)(1, zn, zm, zk, 3);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_svtmopa_lane_za32_bf16_bf16(
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: tail call void @llvm.aarch64.sme.ftmopa.za32.nxv8bf16(i32 1, <vscale x 8 x bfloat> [[ZN_COERCE0:%.*]], <vscale x 8 x bfloat> [[ZN_COERCE1:%.*]], <vscale x 8 x bfloat> [[ZM:%.*]], <vscale x 16 x i8> [[ZK:%.*]], i32 3)
|
||||
// CHECK-NEXT: ret void
|
||||
//
|
||||
// CPP-CHECK-LABEL: @_Z32test_svtmopa_lane_za32_bf16_bf1614svbfloat16x2_tu14__SVBfloat16_tu11__SVUint8_t(
|
||||
// CPP-CHECK-NEXT: entry:
|
||||
// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.ftmopa.za32.nxv8bf16(i32 1, <vscale x 8 x bfloat> [[ZN_COERCE0:%.*]], <vscale x 8 x bfloat> [[ZN_COERCE1:%.*]], <vscale x 8 x bfloat> [[ZM:%.*]], <vscale x 16 x i8> [[ZK:%.*]], i32 3)
|
||||
// CPP-CHECK-NEXT: ret void
|
||||
//
|
||||
void test_svtmopa_lane_za32_bf16_bf16(svbfloat16x2_t zn, svbfloat16_t zm, svuint8_t zk) __arm_streaming __arm_inout("za") {
|
||||
SME_ACLE_FUNC(svtmopa_lane_za32,_bf16_bf16,)(1, zn, zm, zk, 3);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_svtmopa_lane_za16_f16_f16(
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: tail call void @llvm.aarch64.sme.ftmopa.za16.nxv8f16(i32 1, <vscale x 8 x half> [[ZN_COERCE0:%.*]], <vscale x 8 x half> [[ZN_COERCE1:%.*]], <vscale x 8 x half> [[ZM:%.*]], <vscale x 16 x i8> [[ZK:%.*]], i32 3)
|
||||
// CHECK-NEXT: ret void
|
||||
//
|
||||
// CPP-CHECK-LABEL: @_Z30test_svtmopa_lane_za16_f16_f1613svfloat16x2_tu13__SVFloat16_tu11__SVUint8_t(
|
||||
// CPP-CHECK-NEXT: entry:
|
||||
// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.ftmopa.za16.nxv8f16(i32 1, <vscale x 8 x half> [[ZN_COERCE0:%.*]], <vscale x 8 x half> [[ZN_COERCE1:%.*]], <vscale x 8 x half> [[ZM:%.*]], <vscale x 16 x i8> [[ZK:%.*]], i32 3)
|
||||
// CPP-CHECK-NEXT: ret void
|
||||
//
|
||||
void test_svtmopa_lane_za16_f16_f16(svfloat16x2_t zn, svfloat16_t zm, svuint8_t zk) __arm_streaming __arm_inout("za") {
|
||||
SME_ACLE_FUNC(svtmopa_lane_za16,_f16_f16,)(1, zn, zm, zk, 3);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_svtmopa_lane_za16_bf16_bf16(
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: tail call void @llvm.aarch64.sme.ftmopa.za16.nxv8bf16(i32 1, <vscale x 8 x bfloat> [[ZN_COERCE0:%.*]], <vscale x 8 x bfloat> [[ZN_COERCE1:%.*]], <vscale x 8 x bfloat> [[ZM:%.*]], <vscale x 16 x i8> [[ZK:%.*]], i32 3)
|
||||
// CHECK-NEXT: ret void
|
||||
//
|
||||
// CPP-CHECK-LABEL: @_Z32test_svtmopa_lane_za16_bf16_bf1614svbfloat16x2_tu14__SVBfloat16_tu11__SVUint8_t(
|
||||
// CPP-CHECK-NEXT: entry:
|
||||
// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.ftmopa.za16.nxv8bf16(i32 1, <vscale x 8 x bfloat> [[ZN_COERCE0:%.*]], <vscale x 8 x bfloat> [[ZN_COERCE1:%.*]], <vscale x 8 x bfloat> [[ZM:%.*]], <vscale x 16 x i8> [[ZK:%.*]], i32 3)
|
||||
// CPP-CHECK-NEXT: ret void
|
||||
//
|
||||
void test_svtmopa_lane_za16_bf16_bf16(svbfloat16x2_t zn, svbfloat16_t zm, svuint8_t zk) __arm_streaming __arm_inout("za") {
|
||||
SME_ACLE_FUNC(svtmopa_lane_za16,_bf16_bf16,)(1, zn, zm, zk, 3);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_svtmopa_lane_za16_mf8_mf8_fpm(
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR:%.*]])
|
||||
// CHECK-NEXT: tail call void @llvm.aarch64.sme.ftmopa.za16.nxv16i8(i32 1, <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM:%.*]], <vscale x 16 x i8> [[ZK:%.*]], i32 3)
|
||||
// CHECK-NEXT: ret void
|
||||
//
|
||||
// CPP-CHECK-LABEL: @_Z34test_svtmopa_lane_za16_mf8_mf8_fpm13svmfloat8x2_tu13__SVMfloat8_tu11__SVUint8_tm(
|
||||
// CPP-CHECK-NEXT: entry:
|
||||
// CPP-CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR:%.*]])
|
||||
// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.ftmopa.za16.nxv16i8(i32 1, <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM:%.*]], <vscale x 16 x i8> [[ZK:%.*]], i32 3)
|
||||
// CPP-CHECK-NEXT: ret void
|
||||
//
|
||||
void test_svtmopa_lane_za16_mf8_mf8_fpm(svmfloat8x2_t zn, svmfloat8_t zm, svuint8_t zk, fpm_t fpmr) __arm_streaming __arm_inout("za") {
|
||||
SME_ACLE_FUNC(svtmopa_lane_za16,_mf8_mf8,_fpm)(1, zn, zm, zk, 3, fpmr);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_svtmopa_lane_za32_mf8_mf8_fpm(
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR:%.*]])
|
||||
// CHECK-NEXT: tail call void @llvm.aarch64.sme.ftmopa.za32.nxv16i8(i32 1, <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM:%.*]], <vscale x 16 x i8> [[ZK:%.*]], i32 3)
|
||||
// CHECK-NEXT: ret void
|
||||
//
|
||||
// CPP-CHECK-LABEL: @_Z34test_svtmopa_lane_za32_mf8_mf8_fpm13svmfloat8x2_tu13__SVMfloat8_tu11__SVUint8_tm(
|
||||
// CPP-CHECK-NEXT: entry:
|
||||
// CPP-CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR:%.*]])
|
||||
// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.ftmopa.za32.nxv16i8(i32 1, <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM:%.*]], <vscale x 16 x i8> [[ZK:%.*]], i32 3)
|
||||
// CPP-CHECK-NEXT: ret void
|
||||
//
|
||||
void test_svtmopa_lane_za32_mf8_mf8_fpm(svmfloat8x2_t zn, svmfloat8_t zm, svuint8_t zk, fpm_t fpmr) __arm_streaming __arm_inout("za") {
|
||||
SME_ACLE_FUNC(svtmopa_lane_za32,_mf8_mf8,_fpm)(1, zn, zm, zk, 3, fpmr);
|
||||
}
|
||||
120
clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_tmop.cpp
Normal file
120
clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_tmop.cpp
Normal file
@@ -0,0 +1,120 @@
|
||||
// RUN: %clang_cc1 -triple aarch64 \
|
||||
// RUN: -target-feature +sme -target-feature +sme2 -verify -emit-llvm -o - %s
|
||||
|
||||
// REQUIRES: aarch64-registered-target
|
||||
|
||||
#include <arm_sme.h>
|
||||
|
||||
void test_features(svuint8x2_t zn_u8, svuint8_t zm_u8,
|
||||
svint8x2_t zn_s8, svint8_t zm_s8,
|
||||
svint16x2_t zn_s16, svint16_t zm_s16,
|
||||
svuint16x2_t zn_u16, svuint16_t zm_u16,
|
||||
svfloat16x2_t zn_f16, svfloat16_t zm_f16,
|
||||
svbfloat16x2_t zn_bf16, svbfloat16_t zm_bf16,
|
||||
svfloat32x2_t zn_f32, svfloat32_t zm_f32,
|
||||
svmfloat8x2_t zn_f8, svmfloat8_t zm_f8,
|
||||
svuint8_t zk, fpm_t fpm) __arm_streaming __arm_inout("za") {
|
||||
|
||||
// expected-error@+1 {{'svtmopa_lane_za32_s8_s8' needs target feature sme,sme2,sme-tmop}}
|
||||
svtmopa_lane_za32_s8_s8(0, zn_s8, zm_s8, zk, 0);
|
||||
// expected-error@+1 {{'svtmopa_lane_za32_u8_u8' needs target feature sme,sme2,sme-tmop}}
|
||||
svtmopa_lane_za32_u8_u8(0, zn_u8, zm_u8, zk, 0);
|
||||
// expected-error@+1 {{'svtmopa_lane_za32_s8_u8' needs target feature sme,sme2,sme-tmop}}
|
||||
svtmopa_lane_za32_s8_u8(0, zn_s8, zm_u8, zk, 0);
|
||||
// expected-error@+1 {{'svtmopa_lane_za32_u8_s8' needs target feature sme,sme2,sme-tmop}}
|
||||
svtmopa_lane_za32_u8_s8(0, zn_u8, zm_s8, zk, 0);
|
||||
// expected-error@+1 {{'svtmopa_lane_za32_s16_s16' needs target feature sme,sme2,sme-tmop}}
|
||||
svtmopa_lane_za32_s16_s16(0, zn_s16, zm_s16, zk, 0);
|
||||
// expected-error@+1 {{'svtmopa_lane_za32_u16_u16' needs target feature sme,sme2,sme-tmop}}
|
||||
svtmopa_lane_za32_u16_u16(0, zn_u16, zm_u16, zk, 0);
|
||||
// expected-error@+1 {{'svtmopa_lane_za32_f16_f16' needs target feature sme,sme2,sme-tmop}}
|
||||
svtmopa_lane_za32_f16_f16(0, zn_f16, zm_f16, zk, 0);
|
||||
// expected-error@+1 {{'svtmopa_lane_za32_f32_f32' needs target feature sme,sme2,sme-tmop}}
|
||||
svtmopa_lane_za32_f32_f32(0, zn_f32, zm_f32, zk, 0);
|
||||
// expected-error@+1 {{'svtmopa_lane_za32_bf16_bf16' needs target feature sme,sme2,sme-tmop}}
|
||||
svtmopa_lane_za32_bf16_bf16(0, zn_bf16, zm_bf16, zk, 0);
|
||||
// expected-error@+1 {{'svtmopa_lane_za16_f16_f16' needs target feature sme,sme2,sme-tmop,sme-f16f16}}
|
||||
svtmopa_lane_za16_f16_f16(0, zn_f16, zm_f16, zk, 0);
|
||||
// expected-error@+1 {{'svtmopa_lane_za16_bf16_bf16' needs target feature sme,sme2,sme-tmop,sme-b16b16}}
|
||||
svtmopa_lane_za16_bf16_bf16(0, zn_bf16, zm_bf16, zk, 0);
|
||||
// expected-error@+1 {{'svtmopa_lane_za16_mf8_mf8_fpm' needs target feature sme,sme2,sme-tmop,sme-f8f16}}
|
||||
svtmopa_lane_za16_mf8_mf8_fpm(0, zn_f8, zm_f8, zk, 0, fpm);
|
||||
// expected-error@+1 {{'svtmopa_lane_za32_mf8_mf8_fpm' needs target feature sme,sme2,sme-tmop,sme-f8f32}}
|
||||
svtmopa_lane_za32_mf8_mf8_fpm(0, zn_f8, zm_f8, zk, 0, fpm);
|
||||
}
|
||||
|
||||
void test_imm(svuint8x2_t zn_u8, svuint8_t zm_u8,
|
||||
svint8x2_t zn_s8, svint8_t zm_s8,
|
||||
svint16x2_t zn_s16, svint16_t zm_s16,
|
||||
svuint16x2_t zn_u16, svuint16_t zm_u16,
|
||||
svfloat16x2_t zn_f16, svfloat16_t zm_f16,
|
||||
svbfloat16x2_t zn_bf16, svbfloat16_t zm_bf16,
|
||||
svfloat32x2_t zn_f32, svfloat32_t zm_f32,
|
||||
svmfloat8x2_t zn_f8, svmfloat8_t zm_f8,
|
||||
svuint8_t zk, fpm_t fpm) __arm_streaming __arm_inout("za") {
|
||||
|
||||
// expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}}
|
||||
svtmopa_lane_za32_s8_s8(0, zn_s8, zm_s8, zk, 4);
|
||||
// expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}}
|
||||
svtmopa_lane_za32_s8_s8(4, zn_s8, zm_s8, zk, 0);
|
||||
|
||||
// expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}}
|
||||
svtmopa_lane_za32_u8_u8(0, zn_u8, zm_u8, zk, 4);
|
||||
// expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}}
|
||||
svtmopa_lane_za32_u8_u8(4, zn_u8, zm_u8, zk, 0);
|
||||
|
||||
// expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}}
|
||||
svtmopa_lane_za32_s8_u8(0, zn_s8, zm_u8, zk, 4);
|
||||
// expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}}
|
||||
svtmopa_lane_za32_s8_u8(4, zn_s8, zm_u8, zk, 0);
|
||||
|
||||
// expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}}
|
||||
svtmopa_lane_za32_u8_s8(0, zn_u8, zm_s8, zk, 4);
|
||||
// expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}}
|
||||
svtmopa_lane_za32_u8_s8(4, zn_u8, zm_s8, zk, 0);
|
||||
|
||||
// expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}}
|
||||
svtmopa_lane_za32_s16_s16(0, zn_s16, zm_s16, zk, 4);
|
||||
// expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}}
|
||||
svtmopa_lane_za32_s16_s16(4, zn_s16, zm_s16, zk, 0);
|
||||
|
||||
// expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}}
|
||||
svtmopa_lane_za32_u16_u16(0, zn_u16, zm_u16, zk, 4);
|
||||
// expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}}
|
||||
svtmopa_lane_za32_u16_u16(4, zn_u16, zm_u16, zk, 0);
|
||||
|
||||
// expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}}
|
||||
svtmopa_lane_za32_f16_f16(0, zn_f16, zm_f16, zk, 4);
|
||||
// expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}}
|
||||
svtmopa_lane_za32_f16_f16(4, zn_f16, zm_f16, zk, 0);
|
||||
|
||||
// expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}}
|
||||
svtmopa_lane_za32_f32_f32(0, zn_f32, zm_f32, zk, 4);
|
||||
// expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}}
|
||||
svtmopa_lane_za32_f32_f32(4, zn_f32, zm_f32, zk, 0);
|
||||
|
||||
// expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}}
|
||||
svtmopa_lane_za32_bf16_bf16(0, zn_bf16, zm_bf16, zk, 4);
|
||||
// expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}}
|
||||
svtmopa_lane_za32_bf16_bf16(4, zn_bf16, zm_bf16, zk, 0);
|
||||
|
||||
// expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}}
|
||||
svtmopa_lane_za16_f16_f16(0, zn_f16, zm_f16, zk, 4);
|
||||
// expected-error@+1 {{argument value 2 is outside the valid range [0, 1]}}
|
||||
svtmopa_lane_za16_f16_f16(2, zn_f16, zm_f16, zk, 0);
|
||||
|
||||
// expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}}
|
||||
svtmopa_lane_za16_bf16_bf16(0, zn_bf16, zm_bf16, zk, 4);
|
||||
// expected-error@+1 {{argument value 2 is outside the valid range [0, 1]}}
|
||||
svtmopa_lane_za16_bf16_bf16(2, zn_bf16, zm_bf16, zk, 0);
|
||||
|
||||
// expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}}
|
||||
svtmopa_lane_za16_mf8_mf8_fpm(0, zn_f8, zm_f8, zk, 4, fpm);
|
||||
// expected-error@+1 {{argument value 2 is outside the valid range [0, 1]}}
|
||||
svtmopa_lane_za16_mf8_mf8_fpm(2, zn_f8, zm_f8, zk, 0, fpm);
|
||||
|
||||
// expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}}
|
||||
svtmopa_lane_za32_mf8_mf8_fpm(0, zn_f8, zm_f8, zk, 4, fpm);
|
||||
// expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}}
|
||||
svtmopa_lane_za32_mf8_mf8_fpm(4, zn_f8, zm_f8, zk, 0, fpm);
|
||||
}
|
||||
@@ -3107,6 +3107,24 @@ let TargetPrefix = "aarch64" in {
|
||||
}
|
||||
}
|
||||
|
||||
class SME_OuterProduct_TMOP_Intrinsic
|
||||
: DefaultAttrsIntrinsic<[],
|
||||
[llvm_i32_ty,
|
||||
llvm_anyvector_ty,
|
||||
LLVMMatchType<0>,
|
||||
LLVMMatchType<0>,
|
||||
llvm_nxv16i8_ty,
|
||||
llvm_i32_ty],
|
||||
[ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<5>>,
|
||||
IntrInaccessibleMemOnly]>;
|
||||
|
||||
def int_aarch64_sme_ftmopa_za16 : SME_OuterProduct_TMOP_Intrinsic;
|
||||
def int_aarch64_sme_ftmopa_za32 : SME_OuterProduct_TMOP_Intrinsic;
|
||||
def int_aarch64_sme_stmopa_za32 : SME_OuterProduct_TMOP_Intrinsic;
|
||||
def int_aarch64_sme_utmopa_za32 : SME_OuterProduct_TMOP_Intrinsic;
|
||||
def int_aarch64_sme_sutmopa_za32 : SME_OuterProduct_TMOP_Intrinsic;
|
||||
def int_aarch64_sme_ustmopa_za32 : SME_OuterProduct_TMOP_Intrinsic;
|
||||
|
||||
class SME_AddVectorToTile_Intrinsic
|
||||
: DefaultAttrsIntrinsic<[],
|
||||
[llvm_i32_ty,
|
||||
|
||||
@@ -1269,8 +1269,10 @@ def ZPRMul2AsmOp32_Hi : ZPRAsmOperand<"VectorS_Hi", 32, "Mul2_Hi">;
|
||||
def ZPRMul2AsmOp64_Lo : ZPRAsmOperand<"VectorD_Lo", 64, "Mul2_Lo">;
|
||||
def ZPRMul2AsmOp64_Hi : ZPRAsmOperand<"VectorD_Hi", 64, "Mul2_Hi">;
|
||||
|
||||
def ZPR_K : RegisterClass<"AArch64", [untyped], 128,
|
||||
(add Z20, Z21, Z22, Z23, Z28, Z29, Z30, Z31)>;
|
||||
def ZPR_K : RegisterClass<"AArch64", [nxv16i8], 128,
|
||||
(add Z20, Z21, Z22, Z23, Z28, Z29, Z30, Z31)>{
|
||||
let Size = 128;
|
||||
}
|
||||
|
||||
def ZK : RegisterOperand<ZPR_K, "printSVERegOp<>">{
|
||||
let EncoderMethod = "EncodeZK";
|
||||
|
||||
@@ -175,12 +175,31 @@ let Predicates = [HasSME_MOP4, HasSMEI16I64] in {
|
||||
}
|
||||
|
||||
let Predicates = [HasSME_TMOP] in {
|
||||
def STMOPA_M2ZZZI_BtoS : sme_int_sparse_outer_product_i32<0b00100, ZZ_b_mul_r, ZPR8, "stmopa">;
|
||||
def STMOPA_M2ZZZI_HtoS : sme_int_sparse_outer_product_i32<0b00101, ZZ_h_mul_r, ZPR16, "stmopa">;
|
||||
def UTMOPA_M2ZZZI_BtoS : sme_int_sparse_outer_product_i32<0b11100, ZZ_b_mul_r, ZPR8, "utmopa">;
|
||||
def UTMOPA_M2ZZZI_HtoS : sme_int_sparse_outer_product_i32<0b10101, ZZ_h_mul_r, ZPR16, "utmopa">;
|
||||
def SUTMOPA_M2ZZZI_BtoS : sme_int_sparse_outer_product_i32<0b01100, ZZ_b_mul_r, ZPR8, "sutmopa">;
|
||||
def USTMOPA_M2ZZZI_BtoS : sme_int_sparse_outer_product_i32<0b10100, ZZ_b_mul_r, ZPR8, "ustmopa">;
|
||||
defm STMOPA_M2ZZZI_BtoS : sme_tmopa_32b<0b00100, ZZ_b_mul_r, ZPR8, nxv16i8, "stmopa", int_aarch64_sme_stmopa_za32>;
|
||||
defm STMOPA_M2ZZZI_HtoS : sme_tmopa_32b<0b00101, ZZ_h_mul_r, ZPR16, nxv8i16, "stmopa", int_aarch64_sme_stmopa_za32>;
|
||||
defm UTMOPA_M2ZZZI_BtoS : sme_tmopa_32b<0b11100, ZZ_b_mul_r, ZPR8, nxv16i8, "utmopa", int_aarch64_sme_utmopa_za32>;
|
||||
defm UTMOPA_M2ZZZI_HtoS : sme_tmopa_32b<0b10101, ZZ_h_mul_r, ZPR16, nxv8i16, "utmopa", int_aarch64_sme_utmopa_za32>;
|
||||
defm SUTMOPA_M2ZZZI_BtoS : sme_tmopa_32b<0b01100, ZZ_b_mul_r, ZPR8, nxv16i8, "sutmopa", int_aarch64_sme_sutmopa_za32>;
|
||||
defm USTMOPA_M2ZZZI_BtoS : sme_tmopa_32b<0b10100, ZZ_b_mul_r, ZPR8, nxv16i8, "ustmopa", int_aarch64_sme_ustmopa_za32>;
|
||||
defm FTMOPA_M2ZZZI_HtoS : sme_tmopa_32b<0b11000, ZZ_h_mul_r, ZPR16, nxv8f16, "ftmopa", int_aarch64_sme_ftmopa_za32, [FPCR]>;
|
||||
defm FTMOPA_M2ZZZI_StoS : sme_tmopa_32b<0b00000, ZZ_s_mul_r, ZPR32, nxv4f32, "ftmopa", int_aarch64_sme_ftmopa_za32, [FPCR]>;
|
||||
defm BFTMOPA_M2ZZZI_HtoS : sme_tmopa_32b<0b10000, ZZ_h_mul_r, ZPR16, nxv8bf16, "bftmopa", int_aarch64_sme_ftmopa_za32, [FPCR]>;
|
||||
}
|
||||
|
||||
let Predicates = [HasSME_TMOP, HasSMEF16F16] in {
|
||||
defm FTMOPA_M2ZZZI_HtoH : sme_tmopa_16b<0b10001, ZZ_h_mul_r, ZPR16, nxv8f16, "ftmopa", int_aarch64_sme_ftmopa_za16, [FPCR]>;
|
||||
}
|
||||
|
||||
let Predicates = [HasSME_TMOP, HasSMEB16B16] in {
|
||||
defm BFTMOPA_M2ZZZI_HtoH : sme_tmopa_16b<0b11001, ZZ_h_mul_r, ZPR16, nxv8bf16, "bftmopa", int_aarch64_sme_ftmopa_za16, [FPCR]>;
|
||||
}
|
||||
|
||||
let Predicates = [HasSME_TMOP, HasSMEF8F16] in {
|
||||
defm FTMOPA_M2ZZZI_BtoH : sme_tmopa_16b<0b01001, ZZ_b_mul_r, ZPR8, nxv16i8, "ftmopa", int_aarch64_sme_ftmopa_za16, [FPMR, FPCR]>;
|
||||
}
|
||||
|
||||
let Predicates = [HasSME_TMOP, HasSMEF8F32] in {
|
||||
defm FTMOPA_M2ZZZI_BtoS : sme_tmopa_32b<0b01000, ZZ_b_mul_r, ZPR8, nxv16i8, "ftmopa", int_aarch64_sme_ftmopa_za32, [FPMR, FPCR]>;
|
||||
}
|
||||
|
||||
let Predicates = [HasSME] in {
|
||||
@@ -1064,12 +1083,6 @@ let Predicates = [HasSME_MOP4] in {
|
||||
defm FMOP4S : sme2_fmop4as_fp32_non_widening<1, "fmop4s", "int_aarch64_sme_mop4s">;
|
||||
}
|
||||
|
||||
let Predicates = [HasSME_TMOP] in {
|
||||
def FTMOPA_M2ZZZI_HtoS : sme_tmopa_32b<0b11000, ZZ_h_mul_r, ZPR16, "ftmopa">;
|
||||
def FTMOPA_M2ZZZI_StoS : sme_tmopa_32b<0b00000, ZZ_s_mul_r, ZPR32, "ftmopa">;
|
||||
def BFTMOPA_M2ZZZI_HtoS : sme_tmopa_32b<0b10000, ZZ_h_mul_r, ZPR16, "bftmopa">;
|
||||
}
|
||||
|
||||
let Predicates = [HasSME2p2] in {
|
||||
defm FMUL_2ZZ : sme2_multi2_fmul_sm<"fmul">;
|
||||
defm FMUL_2Z2Z : sme2_multi2_fmul_mm< "fmul">;
|
||||
@@ -1078,26 +1091,10 @@ let Predicates = [HasSME2p2] in {
|
||||
|
||||
} // [HasSME2p2]
|
||||
|
||||
let Predicates = [HasSME_TMOP, HasSMEB16B16] in {
|
||||
def BFTMOPA_M2ZZZI_HtoH : sme_tmopa_16b<0b11001, ZZ_h_mul_r, ZPR16, "bftmopa">;
|
||||
}
|
||||
|
||||
let Predicates = [HasSME_TMOP, HasSMEF8F32], Uses = [FPMR, FPCR] in {
|
||||
def FTMOPA_M2ZZZI_BtoS : sme_tmopa_32b<0b01000, ZZ_b_mul_r, ZPR8, "ftmopa">;
|
||||
}
|
||||
|
||||
let Predicates = [HasSME_TMOP, HasSMEF8F16], Uses = [FPMR, FPCR] in {
|
||||
def FTMOPA_M2ZZZI_BtoH : sme_tmopa_16b<0b01001, ZZ_b_mul_r, ZPR8, "ftmopa">;
|
||||
}
|
||||
|
||||
let Predicates = [HasSME_MOP4, HasSMEF8F16], Uses = [FPMR, FPCR] in {
|
||||
defm FMOP4A : sme2_fmop4a_fp8_fp16_2way<"fmop4a">;
|
||||
}
|
||||
|
||||
let Predicates = [HasSME_TMOP, HasSMEF16F16] in {
|
||||
def FTMOPA_M2ZZZI_HtoH : sme_tmopa_16b<0b10001, ZZ_h_mul_r, ZPR16, "ftmopa">;
|
||||
}
|
||||
|
||||
let Predicates = [HasSME_MOP4, HasSMEF16F16] in {
|
||||
defm FMOP4A : sme2_fmop4as_fp16_non_widening<0, "fmop4a", "int_aarch64_sme_mop4a">;
|
||||
defm FMOP4S : sme2_fmop4as_fp16_non_widening<1, "fmop4s", "int_aarch64_sme_mop4s">;
|
||||
|
||||
@@ -106,6 +106,16 @@ class sme_outer_product_pseudo<ZPRRegOp zpr_ty, SMEMatrixTypeEnum za_flag>
|
||||
let mayStore = 1;
|
||||
}
|
||||
|
||||
class sme_sparse_outer_product_pseudo<RegisterOperand zn_ty, RegisterOperand zm_ty, SMEMatrixTypeEnum za_flag>
|
||||
: Pseudo<(outs), (ins i32imm:$tile, zn_ty:$zn, zm_ty:$zm, ZK:$zk, i32imm:$idx), []>,
|
||||
Sched<[]> {
|
||||
// Translated to the actual instructions in AArch64ISelLowering.cpp
|
||||
let SMEMatrixType = za_flag;
|
||||
let usesCustomInserter = 1;
|
||||
let mayLoad = 1;
|
||||
let mayStore = 1;
|
||||
}
|
||||
|
||||
class sme2_quarter_tile_outer_product_pseudo<RegisterOperand zn_ty, RegisterOperand zm_ty, SMEMatrixTypeEnum za_flag>
|
||||
: Pseudo<(outs), (ins i32imm:$tile,
|
||||
zn_ty:$zn, zm_ty:$zm), []>,
|
||||
@@ -296,6 +306,12 @@ class SME2_ZA_Tile_Vec_Multi_Single_Pat<string name, SDPatternOperator intrinsic
|
||||
class SME2_ZA_Tile_Vec_Multi_Multi_Pat<string name, SDPatternOperator intrinsic, Operand imm_ty, ValueType vt>
|
||||
: Pat<(intrinsic imm_ty:$tile, vt:$Zn1, vt:$Zn2, vt:$Zm1, vt:$Zm2),
|
||||
(!cast<Instruction>(name # _PSEUDO) $tile, (REG_SEQUENCE ZPR2Mul2, vt:$Zn1, zsub0, vt:$Zn2, zsub1), (REG_SEQUENCE ZPR2Mul2, vt:$Zm1, zsub0, vt:$Zm2, zsub1))>;
|
||||
|
||||
class SME2_ZA_TMOP_Pat<string name, SDPatternOperator intrinsic, Operand tile_imm, ValueType vt>
|
||||
: Pat<(intrinsic tile_imm:$tile, vt:$Zn1, vt:$Zn2, vt:$Zm, nxv16i8:$Zk, timm32_0_3:$idx),
|
||||
(!cast<Instruction>(name # _PSEUDO) $tile, (REG_SEQUENCE ZPR2Mul2, vt:$Zn1, zsub0, vt:$Zn2, zsub1), $Zm, $Zk, $idx)>;
|
||||
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// SME pattern match helpers.
|
||||
//===----------------------------------------------------------------------===//
|
||||
@@ -489,35 +505,6 @@ multiclass sme_int_outer_product_i64<bits<3> opc, string mnemonic,
|
||||
def : SME_ZA_Tile_TwoPred_TwoVec_Pat<NAME, op, timm32_0_7, nxv8i1, nxv8i16>;
|
||||
}
|
||||
|
||||
class sme_int_sparse_outer_product_i32<bits<5> opc, RegisterOperand zn_ty, RegisterOperand zm_ty, string mnemonic>
|
||||
: I<(outs TileOp32:$ZAda),
|
||||
(ins TileOp32:$_ZAda, zn_ty:$Zn, zm_ty:$Zm, ZK:$Zk, VectorIndexS32b:$imm),
|
||||
mnemonic, "\t$ZAda, $Zn, $Zm, $Zk$imm",
|
||||
"", []>,
|
||||
Sched<[]> {
|
||||
bits<2> ZAda;
|
||||
bits<4> Zn;
|
||||
bits<5> Zm;
|
||||
bits<3> Zk;
|
||||
bits<2> imm;
|
||||
let Inst{31-25} = 0b1000000;
|
||||
let Inst{24} = opc{4};
|
||||
let Inst{23-22} = 0b01;
|
||||
let Inst{21} = opc{3};
|
||||
let Inst{20-16} = Zm;
|
||||
let Inst{15} = opc{2};
|
||||
let Inst{14} = 0b0;
|
||||
let Inst{13} = opc{1};
|
||||
let Inst{12-10} = Zk;
|
||||
let Inst{9-6} = Zn;
|
||||
let Inst{5-4} = imm;
|
||||
let Inst{3} = opc{0};
|
||||
let Inst{2} = 0b0;
|
||||
let Inst{1-0} = ZAda;
|
||||
|
||||
let Constraints = "$ZAda = $_ZAda";
|
||||
}
|
||||
|
||||
class sme_outer_product_widening_inst<bits<3> opc, ZPRRegOp zpr_ty, string mnemonic>
|
||||
: I<(outs TileOp32:$ZAda),
|
||||
(ins TileOp32:$_ZAda, PPR3bAny:$Pn, PPR3bAny:$Pm, zpr_ty:$Zn, zpr_ty:$Zm),
|
||||
@@ -3562,7 +3549,7 @@ multiclass sme2_int_bmopx_tile<string mnemonic, bits<3> op, SDPatternOperator i
|
||||
//===----------------------------------------------------------------------===//
|
||||
// SME2 Sparse Outer Product and Accumulate
|
||||
|
||||
class sme_tmopa_16b<bits<5> opc, RegisterOperand zn_ty, RegisterOperand zm_ty, string mnemonic>
|
||||
class sme_int_sparse_outer_product_i16<bits<5> opc, RegisterOperand zn_ty, RegisterOperand zm_ty, string mnemonic>
|
||||
: I<(outs TileOp16:$ZAda),
|
||||
(ins TileOp16:$_ZAda, zn_ty:$Zn, zm_ty:$Zm, ZK:$Zk, VectorIndexS32b:$imm),
|
||||
mnemonic, "\t$ZAda, $Zn, $Zm, $Zk$imm",
|
||||
@@ -3591,7 +3578,7 @@ class sme_tmopa_16b<bits<5> opc, RegisterOperand zn_ty, RegisterOperand zm_ty, s
|
||||
let Constraints = "$ZAda = $_ZAda";
|
||||
}
|
||||
|
||||
class sme_tmopa_32b<bits<5> opc, RegisterOperand zn_ty, RegisterOperand zm_ty, string mnemonic>
|
||||
class sme_int_sparse_outer_product_i32<bits<5> opc, RegisterOperand zn_ty, RegisterOperand zm_ty, string mnemonic>
|
||||
: I<(outs TileOp32:$ZAda),
|
||||
(ins TileOp32:$_ZAda, zn_ty:$Zn, zm_ty:$Zm, ZK:$Zk, VectorIndexS32b:$imm),
|
||||
mnemonic, "\t$ZAda, $Zn, $Zm, $Zk$imm",
|
||||
@@ -3620,6 +3607,25 @@ class sme_tmopa_32b<bits<5> opc, RegisterOperand zn_ty, RegisterOperand zm_ty, s
|
||||
let Constraints = "$ZAda = $_ZAda";
|
||||
}
|
||||
|
||||
multiclass sme_tmopa_16b<bits<5> opc, RegisterOperand zn_ty, RegisterOperand zm_ty, ValueType vt, string mnemonic, SDPatternOperator intrinsic, list<Register> uses=[]> {
|
||||
def NAME : sme_int_sparse_outer_product_i16<opc, zn_ty, zm_ty, mnemonic>, SMEPseudo2Instr<NAME, 1> {
|
||||
let Uses = uses;
|
||||
}
|
||||
|
||||
def NAME # _PSEUDO : sme_sparse_outer_product_pseudo<zn_ty, zm_ty, SMEMatrixTileH>, SMEPseudo2Instr<NAME, 0>;
|
||||
|
||||
def : SME2_ZA_TMOP_Pat<NAME, intrinsic, timm32_0_1, vt>;
|
||||
}
|
||||
|
||||
multiclass sme_tmopa_32b<bits<5> opc, RegisterOperand zn_ty, RegisterOperand zm_ty, ValueType vt, string mnemonic, SDPatternOperator intrinsic, list<Register> uses=[]> {
|
||||
def NAME : sme_int_sparse_outer_product_i32<opc, zn_ty, zm_ty, mnemonic>, SMEPseudo2Instr<NAME, 1> {
|
||||
let Uses = uses;
|
||||
}
|
||||
|
||||
def NAME # _PSEUDO : sme_sparse_outer_product_pseudo<zn_ty, zm_ty, SMEMatrixTileS>, SMEPseudo2Instr<NAME, 0>;
|
||||
|
||||
def : SME2_ZA_TMOP_Pat<NAME, intrinsic, timm32_0_3, vt>;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===///
|
||||
// SME2 Zero Lookup Table.
|
||||
|
||||
@@ -57,11 +57,11 @@ tracksRegLiveness: true
|
||||
body: |
|
||||
bb.1:
|
||||
; CHECK-LABEL: name: inlineasm_virt_reg_output
|
||||
; CHECK: INLINEASM &"mov ${0:w}, 7", 0 /* attdialect */, 2883594 /* regdef:GPR32common */, def %0
|
||||
; CHECK: INLINEASM &"mov ${0:w}, 7", 0 /* attdialect */, 2818058 /* regdef:GPR32common */, def %0
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr(s32) = COPY %0
|
||||
; CHECK-NEXT: $w0 = COPY [[COPY]](s32)
|
||||
; CHECK-NEXT: RET_ReallyLR implicit $w0
|
||||
INLINEASM &"mov ${0:w}, 7", 0 /* attdialect */, 2883594 /* regdef:GPR32common */, def %0:gpr32common
|
||||
INLINEASM &"mov ${0:w}, 7", 0 /* attdialect */, 2818058 /* regdef:GPR32common */, def %0:gpr32common
|
||||
%1:_(s32) = COPY %0
|
||||
$w0 = COPY %1(s32)
|
||||
RET_ReallyLR implicit $w0
|
||||
@@ -75,12 +75,12 @@ tracksRegLiveness: true
|
||||
body: |
|
||||
bb.1:
|
||||
; CHECK-LABEL: name: inlineasm_virt_mixed_types
|
||||
; CHECK: INLINEASM &"mov $0, #0; mov $1, #0", 0 /* attdialect */, 2883594 /* regdef:GPR32common */, def %0, 3735562 /* regdef:FPR64 */, def %1
|
||||
; CHECK: INLINEASM &"mov $0, #0; mov $1, #0", 0 /* attdialect */, 2818058 /* regdef:GPR32common */, def %0, 3670026 /* regdef:FPR64 */, def %1
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr(s32) = COPY %0
|
||||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr(s64) = COPY %1
|
||||
; CHECK-NEXT: $d0 = COPY [[COPY1]](s64)
|
||||
; CHECK-NEXT: RET_ReallyLR implicit $d0
|
||||
INLINEASM &"mov $0, #0; mov $1, #0", 0 /* attdialect */, 2883594 /* regdef:GPR32common */, def %0:gpr32common, 3735562 /* regdef:FPR64 */, def %1:fpr64
|
||||
INLINEASM &"mov $0, #0; mov $1, #0", 0 /* attdialect */, 2818058 /* regdef:GPR32common */, def %0:gpr32common, 3670026 /* regdef:FPR64 */, def %1:fpr64
|
||||
%3:_(s32) = COPY %0
|
||||
%4:_(s64) = COPY %1
|
||||
$d0 = COPY %4(s64)
|
||||
|
||||
@@ -91,10 +91,10 @@ body: |
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: [[LOADgot:%[0-9]+]]:gpr64common = LOADgot target-flags(aarch64-got) @c
|
||||
; CHECK-NEXT: [[LDRDui:%[0-9]+]]:fpr64 = LDRDui [[LOADgot]], 0 :: (dereferenceable load (s64) from @c)
|
||||
; CHECK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, 3735562 /* regdef:FPR64 */, def %2, 2147483657 /* reguse tiedto:$0 */, [[LDRDui]](tied-def 3)
|
||||
; CHECK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, 3670026 /* regdef:FPR64 */, def %2, 2147483657 /* reguse tiedto:$0 */, [[LDRDui]](tied-def 3)
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr64 = COPY %2
|
||||
; CHECK-NEXT: [[LDRDui1:%[0-9]+]]:fpr64 = LDRDui [[LOADgot]], 0 :: (dereferenceable load (s64) from @c)
|
||||
; CHECK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, 3735562 /* regdef:FPR64 */, def %4, 2147483657 /* reguse tiedto:$0 */, [[LDRDui1]](tied-def 3)
|
||||
; CHECK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, 3670026 /* regdef:FPR64 */, def %4, 2147483657 /* reguse tiedto:$0 */, [[LDRDui1]](tied-def 3)
|
||||
; CHECK-NEXT: [[FNEGDr:%[0-9]+]]:fpr64 = FNEGDr %2
|
||||
; CHECK-NEXT: nofpexcept FCMPDrr %4, killed [[FNEGDr]], implicit-def $nzcv, implicit $fpcr
|
||||
; CHECK-NEXT: Bcc 1, %bb.2, implicit $nzcv
|
||||
@@ -111,10 +111,10 @@ body: |
|
||||
|
||||
%6:gpr64common = LOADgot target-flags(aarch64-got) @c
|
||||
%3:fpr64 = LDRDui %6, 0 :: (dereferenceable load (s64) from @c)
|
||||
INLINEASM &"", 1 /* sideeffect attdialect */, 3735562 /* regdef:FPR64 */, def %2, 2147483657 /* reguse tiedto:$0 */, %3(tied-def 3)
|
||||
INLINEASM &"", 1 /* sideeffect attdialect */, 3670026 /* regdef:FPR64 */, def %2, 2147483657 /* reguse tiedto:$0 */, %3(tied-def 3)
|
||||
%0:fpr64 = COPY %2
|
||||
%5:fpr64 = LDRDui %6, 0 :: (dereferenceable load (s64) from @c)
|
||||
INLINEASM &"", 1 /* sideeffect attdialect */, 3735562 /* regdef:FPR64 */, def %4, 2147483657 /* reguse tiedto:$0 */, %5(tied-def 3)
|
||||
INLINEASM &"", 1 /* sideeffect attdialect */, 3670026 /* regdef:FPR64 */, def %4, 2147483657 /* reguse tiedto:$0 */, %5(tied-def 3)
|
||||
%7:fpr64 = FNEGDr %2
|
||||
nofpexcept FCMPDrr %4, killed %7, implicit-def $nzcv, implicit $fpcr
|
||||
Bcc 1, %bb.2, implicit $nzcv
|
||||
|
||||
@@ -487,7 +487,7 @@ body: |
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64common = COPY $x0
|
||||
; CHECK-NEXT: [[DEF:%[0-9]+]]:gpr64all = IMPLICIT_DEF
|
||||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64sp = COPY [[DEF]]
|
||||
; CHECK-NEXT: INLINEASM &"ldr ${0:s}, $1", 8 /* mayload attdialect */, 3735562 /* regdef:FPR64 */, def %1, 262158 /* mem:m */, killed [[COPY1]]
|
||||
; CHECK-NEXT: INLINEASM &"ldr ${0:s}, $1", 8 /* mayload attdialect */, 3670026 /* regdef:FPR64 */, def %1, 262158 /* mem:m */, killed [[COPY1]]
|
||||
; CHECK-NEXT: [[MOVIv2d_ns:%[0-9]+]]:fpr128 = MOVIv2d_ns 0
|
||||
; CHECK-NEXT: [[COPY2:%[0-9]+]]:fpr64 = COPY [[MOVIv2d_ns]].dsub
|
||||
; CHECK-NEXT: [[DEF1:%[0-9]+]]:fpr128 = IMPLICIT_DEF
|
||||
@@ -505,7 +505,7 @@ body: |
|
||||
%0:gpr64common = COPY $x0
|
||||
%2:gpr64all = IMPLICIT_DEF
|
||||
%3:gpr64sp = COPY %2
|
||||
INLINEASM &"ldr ${0:s}, $1", 8 /* mayload attdialect */, 3735562 /* regdef:FPR64 */, def %1, 262158 /* mem:m */, killed %3
|
||||
INLINEASM &"ldr ${0:s}, $1", 8 /* mayload attdialect */, 3670026 /* regdef:FPR64 */, def %1, 262158 /* mem:m */, killed %3
|
||||
%4:fpr128 = MOVIv2d_ns 0
|
||||
%5:fpr64 = COPY %4.dsub
|
||||
%7:fpr128 = IMPLICIT_DEF
|
||||
|
||||
162
llvm/test/CodeGen/AArch64/sme2-intrinsics-tmop.ll
Normal file
162
llvm/test/CodeGen/AArch64/sme2-intrinsics-tmop.ll
Normal file
@@ -0,0 +1,162 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
|
||||
; RUN: llc -force-streaming -verify-machineinstrs < %s | FileCheck %s
|
||||
|
||||
target triple = "aarch64-linux"
|
||||
|
||||
define void @stmopa_za32_s8(<vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zm, <vscale x 16 x i8> %zk) #0 {
|
||||
; CHECK-LABEL: stmopa_za32_s8:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov z28.d, z3.d
|
||||
; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
|
||||
; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
|
||||
; CHECK-NEXT: stmopa za0.s, { z0.b, z1.b }, z2.b, z28[0]
|
||||
; CHECK-NEXT: ret
|
||||
call void @llvm.aarch64.sme.stmopa.za32.nxv16i8(i32 0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zm, <vscale x 16 x i8> %zk, i32 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @utmopa_za32_u8(<vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zm, <vscale x 16 x i8> %zk) #0 {
|
||||
; CHECK-LABEL: utmopa_za32_u8:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov z28.d, z3.d
|
||||
; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
|
||||
; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
|
||||
; CHECK-NEXT: utmopa za0.s, { z0.b, z1.b }, z2.b, z28[0]
|
||||
; CHECK-NEXT: ret
|
||||
call void @llvm.aarch64.sme.utmopa.za32.nxv16i8(i32 0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zm, <vscale x 16 x i8> %zk, i32 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @ustmopa_za32_u8_s8(<vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zm, <vscale x 16 x i8> %zk) #0 {
|
||||
; CHECK-LABEL: ustmopa_za32_u8_s8:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov z28.d, z3.d
|
||||
; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
|
||||
; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
|
||||
; CHECK-NEXT: ustmopa za0.s, { z0.b, z1.b }, z2.b, z28[0]
|
||||
; CHECK-NEXT: ret
|
||||
call void @llvm.aarch64.sme.ustmopa.za32.nxv16i8(i32 0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zm, <vscale x 16 x i8> %zk, i32 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @sutmopa_za32_s8_u8(<vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zm, <vscale x 16 x i8> %zk) #0 {
|
||||
; CHECK-LABEL: sutmopa_za32_s8_u8:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov z28.d, z3.d
|
||||
; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
|
||||
; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
|
||||
; CHECK-NEXT: sutmopa za0.s, { z0.b, z1.b }, z2.b, z28[0]
|
||||
; CHECK-NEXT: ret
|
||||
call void @llvm.aarch64.sme.sutmopa.za32.nxv16i8(i32 0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zm, <vscale x 16 x i8> %zk, i32 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @stmopa_za32_s16(<vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zm, <vscale x 16 x i8> %zk) #0 {
|
||||
; CHECK-LABEL: stmopa_za32_s16:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov z28.d, z3.d
|
||||
; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
|
||||
; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
|
||||
; CHECK-NEXT: stmopa za0.s, { z0.h, z1.h }, z2.h, z28[0]
|
||||
; CHECK-NEXT: ret
|
||||
call void @llvm.aarch64.sme.stmopa.za32.nxv8i16(i32 0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zm, <vscale x 16 x i8> %zk, i32 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @utmopa_za32_u16(<vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zm, <vscale x 16 x i8> %zk) #0 {
|
||||
; CHECK-LABEL: utmopa_za32_u16:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov z28.d, z3.d
|
||||
; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
|
||||
; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
|
||||
; CHECK-NEXT: utmopa za0.s, { z0.h, z1.h }, z2.h, z28[0]
|
||||
; CHECK-NEXT: ret
|
||||
call void @llvm.aarch64.sme.utmopa.za32.nxv8i16(i32 0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zm, <vscale x 16 x i8> %zk, i32 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @ftmopa_za32_f16(<vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2, <vscale x 8 x half> %zm, <vscale x 16 x i8> %zk) #0 {
|
||||
; CHECK-LABEL: ftmopa_za32_f16:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov z28.d, z3.d
|
||||
; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
|
||||
; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
|
||||
; CHECK-NEXT: ftmopa za0.s, { z0.h, z1.h }, z2.h, z28[0]
|
||||
; CHECK-NEXT: ret
|
||||
call void @llvm.aarch64.sme.ftmopa.za32.nxv8f16(i32 0, <vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2, <vscale x 8 x half> %zm, <vscale x 16 x i8> %zk, i32 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @bftmopa_za32_bf16(<vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zn2, <vscale x 8 x bfloat> %zm, <vscale x 16 x i8> %zk) #0 {
|
||||
; CHECK-LABEL: bftmopa_za32_bf16:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov z28.d, z3.d
|
||||
; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
|
||||
; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
|
||||
; CHECK-NEXT: bftmopa za0.s, { z0.h, z1.h }, z2.h, z28[0]
|
||||
; CHECK-NEXT: ret
|
||||
call void @llvm.aarch64.sme.ftmopa.za32.nxv8bf16(i32 0, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zn2, <vscale x 8 x bfloat> %zm, <vscale x 16 x i8> %zk, i32 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @ftmopa_za32_f32(<vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2, <vscale x 4 x float> %zm, <vscale x 16 x i8> %zk) #0 {
|
||||
; CHECK-LABEL: ftmopa_za32_f32:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov z28.d, z3.d
|
||||
; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
|
||||
; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
|
||||
; CHECK-NEXT: ftmopa za0.s, { z0.s, z1.s }, z2.s, z28[0]
|
||||
; CHECK-NEXT: ret
|
||||
call void @llvm.aarch64.sme.ftmopa.za32.nxv4f32(i32 0, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2, <vscale x 4 x float> %zm, <vscale x 16 x i8> %zk, i32 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @ftmopa_za16_f16(<vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2, <vscale x 8 x half> %zm, <vscale x 16 x i8> %zk) #0 {
|
||||
; CHECK-LABEL: ftmopa_za16_f16:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov z28.d, z3.d
|
||||
; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
|
||||
; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
|
||||
; CHECK-NEXT: ftmopa za0.h, { z0.h, z1.h }, z2.h, z28[0]
|
||||
; CHECK-NEXT: ret
|
||||
call void @llvm.aarch64.sme.ftmopa.za16.nxv8f16(i32 0, <vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2, <vscale x 8 x half> %zm, <vscale x 16 x i8> %zk, i32 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @bftmopa_za16_bf16(<vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zn2, <vscale x 8 x bfloat> %zm, <vscale x 16 x i8> %zk) #0 {
|
||||
; CHECK-LABEL: bftmopa_za16_bf16:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov z28.d, z3.d
|
||||
; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
|
||||
; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
|
||||
; CHECK-NEXT: bftmopa za0.h, { z0.h, z1.h }, z2.h, z28[0]
|
||||
; CHECK-NEXT: ret
|
||||
call void @llvm.aarch64.sme.ftmopa.za16.nxv8bf16(i32 0, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zn2, <vscale x 8 x bfloat> %zm, <vscale x 16 x i8> %zk, i32 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @ftmopa_za16_f8(<vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zm, <vscale x 16 x i8> %zk) #0 {
|
||||
; CHECK-LABEL: ftmopa_za16_f8:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov z28.d, z3.d
|
||||
; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
|
||||
; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
|
||||
; CHECK-NEXT: ftmopa za0.h, { z0.b, z1.b }, z2.b, z28[0]
|
||||
; CHECK-NEXT: ret
|
||||
call void @llvm.aarch64.sme.ftmopa.za16.nxv16i8(i32 0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zm, <vscale x 16 x i8> %zk, i32 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @ftmopa_za32_f8(<vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zm, <vscale x 16 x i8> %zk) #0 {
|
||||
; CHECK-LABEL: ftmopa_za32_f8:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov z28.d, z3.d
|
||||
; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
|
||||
; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
|
||||
; CHECK-NEXT: ftmopa za0.s, { z0.b, z1.b }, z2.b, z28[0]
|
||||
; CHECK-NEXT: ret
|
||||
call void @llvm.aarch64.sme.ftmopa.za32.nxv16i8(i32 0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zm, <vscale x 16 x i8> %zk, i32 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = {nounwind "target-features" = "+sme2,+sme-tmop,+sme-f16f16,+sme-b16b16,+sme-f8f16,+sme-f8f32,+sme2p1,+bf16" }
|
||||
Reference in New Issue
Block a user