Files
clang-p2996/clang/test/CodeGen/arm-mfp8.c
CarolineConcatto 49940514e2 [CLANG][AArch64] Add the modal 8 bit floating-point scalar type (#97277)
ARM ACLE PR#323[1] adds new modal types for 8-bit floating point
intrinsic.

From the PR#323:
```
ACLE defines the `__mfp8` type, which can be used for the E5M2 and E4M3
8-bit floating-point formats. It is a storage and interchange only type
with no arithmetic operations other than intrinsic calls.
````

The type should be an opaque type and its format in undefined in Clang.
Only defined in the backend by a status/format register, for AArch64 the
FPMR.

This patch is an attempt to the add the mfloat8_t scalar type. It has a
parser and codegen for the new scalar type.

The patch it is lowering to and 8bit unsigned as it has no format. But
maybe we should add another opaque type.

[1]  https://github.com/ARM-software/acle/pull/323
2024-10-25 13:59:46 +01:00

86 lines
4.1 KiB
C

// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
// RUN: %clang_cc1 -emit-llvm -triple aarch64-arm-none-eabi -target-feature -fp8 -target-feature +neon -o - %s | FileCheck %s --check-prefixes=CHECK,CHECK-C
// RUN: %clang_cc1 -emit-llvm -triple aarch64-arm-none-eabi -target-feature -fp8 -target-feature +neon -o - -x c++ %s | FileCheck %s --check-prefixes=CHECK,CHECK-CXX
// REQUIRES: aarch64-registered-target
#include <arm_neon.h>
// CHECK-C-LABEL: define dso_local <16 x i8> @test_ret_mfloat8x16_t(
// CHECK-C-SAME: <16 x i8> [[V:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK-C-NEXT: [[ENTRY:.*:]]
// CHECK-C-NEXT: [[V_ADDR:%.*]] = alloca <16 x i8>, align 16
// CHECK-C-NEXT: store <16 x i8> [[V]], ptr [[V_ADDR]], align 16
// CHECK-C-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr [[V_ADDR]], align 16
// CHECK-C-NEXT: ret <16 x i8> [[TMP0]]
//
// CHECK-CXX-LABEL: define dso_local <16 x i8> @_Z21test_ret_mfloat8x16_tu14__MFloat8x16_t(
// CHECK-CXX-SAME: <16 x i8> [[V:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK-CXX-NEXT: [[ENTRY:.*:]]
// CHECK-CXX-NEXT: [[V_ADDR:%.*]] = alloca <16 x i8>, align 16
// CHECK-CXX-NEXT: store <16 x i8> [[V]], ptr [[V_ADDR]], align 16
// CHECK-CXX-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr [[V_ADDR]], align 16
// CHECK-CXX-NEXT: ret <16 x i8> [[TMP0]]
//
mfloat8x16_t test_ret_mfloat8x16_t(mfloat8x16_t v) {
return v;
}
// CHECK-C-LABEL: define dso_local <8 x i8> @test_ret_mfloat8x8_t(
// CHECK-C-SAME: <8 x i8> [[V:%.*]]) #[[ATTR0]] {
// CHECK-C-NEXT: [[ENTRY:.*:]]
// CHECK-C-NEXT: [[V_ADDR:%.*]] = alloca <8 x i8>, align 8
// CHECK-C-NEXT: store <8 x i8> [[V]], ptr [[V_ADDR]], align 8
// CHECK-C-NEXT: [[TMP0:%.*]] = load <8 x i8>, ptr [[V_ADDR]], align 8
// CHECK-C-NEXT: ret <8 x i8> [[TMP0]]
//
// CHECK-CXX-LABEL: define dso_local <8 x i8> @_Z20test_ret_mfloat8x8_tu13__MFloat8x8_t(
// CHECK-CXX-SAME: <8 x i8> [[V:%.*]]) #[[ATTR0]] {
// CHECK-CXX-NEXT: [[ENTRY:.*:]]
// CHECK-CXX-NEXT: [[V_ADDR:%.*]] = alloca <8 x i8>, align 8
// CHECK-CXX-NEXT: store <8 x i8> [[V]], ptr [[V_ADDR]], align 8
// CHECK-CXX-NEXT: [[TMP0:%.*]] = load <8 x i8>, ptr [[V_ADDR]], align 8
// CHECK-CXX-NEXT: ret <8 x i8> [[TMP0]]
//
mfloat8x8_t test_ret_mfloat8x8_t(mfloat8x8_t v) {
return v;
}
// CHECK-C-LABEL: define dso_local <1 x i8> @func1n(
// CHECK-C-SAME: <1 x i8> [[MFP8:%.*]]) #[[ATTR0]] {
// CHECK-C-NEXT: [[ENTRY:.*:]]
// CHECK-C-NEXT: [[MFP8_ADDR:%.*]] = alloca <1 x i8>, align 1
// CHECK-C-NEXT: [[F1N:%.*]] = alloca [10 x <1 x i8>], align 1
// CHECK-C-NEXT: store <1 x i8> [[MFP8]], ptr [[MFP8_ADDR]], align 1
// CHECK-C-NEXT: [[TMP0:%.*]] = load <1 x i8>, ptr [[MFP8_ADDR]], align 1
// CHECK-C-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x <1 x i8>], ptr [[F1N]], i64 0, i64 2
// CHECK-C-NEXT: store <1 x i8> [[TMP0]], ptr [[ARRAYIDX]], align 1
// CHECK-C-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [10 x <1 x i8>], ptr [[F1N]], i64 0, i64 2
// CHECK-C-NEXT: [[TMP1:%.*]] = load <1 x i8>, ptr [[ARRAYIDX1]], align 1
// CHECK-C-NEXT: ret <1 x i8> [[TMP1]]
//
// CHECK-CXX-LABEL: define dso_local <1 x i8> @_Z6func1nu11__MFloat8_t(
// CHECK-CXX-SAME: <1 x i8> [[MFP8:%.*]]) #[[ATTR0]] {
// CHECK-CXX-NEXT: [[ENTRY:.*:]]
// CHECK-CXX-NEXT: [[MFP8_ADDR:%.*]] = alloca <1 x i8>, align 1
// CHECK-CXX-NEXT: [[F1N:%.*]] = alloca [10 x <1 x i8>], align 1
// CHECK-CXX-NEXT: store <1 x i8> [[MFP8]], ptr [[MFP8_ADDR]], align 1
// CHECK-CXX-NEXT: [[TMP0:%.*]] = load <1 x i8>, ptr [[MFP8_ADDR]], align 1
// CHECK-CXX-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x <1 x i8>], ptr [[F1N]], i64 0, i64 2
// CHECK-CXX-NEXT: store <1 x i8> [[TMP0]], ptr [[ARRAYIDX]], align 1
// CHECK-CXX-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [10 x <1 x i8>], ptr [[F1N]], i64 0, i64 2
// CHECK-CXX-NEXT: [[TMP1:%.*]] = load <1 x i8>, ptr [[ARRAYIDX1]], align 1
// CHECK-CXX-NEXT: ret <1 x i8> [[TMP1]]
//
__mfp8 func1n(__mfp8 mfp8) {
__mfp8 f1n[10];
f1n[2] = mfp8;
return f1n[2];
}
//// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
// CHECK: {{.*}}