[MLIR] Add f8E3M4 IEEE 754 type (#101230)

This PR adds `f8E3M4` type to mlir.

`f8E3M4` type  follows IEEE 754 convention

```c
f8E3M4 (IEEE 754)
- Exponent bias: 3
- Maximum stored exponent value: 6 (binary 110)
- Maximum unbiased exponent value: 6 - 3 = 3
- Minimum stored exponent value: 1 (binary 001)
- Minimum unbiased exponent value: 1 − 3 = −2
- Precision specifies the total number of bits used for the significand (mantissa), 
    including implicit leading integer bit = 4 + 1 = 5
- Follows IEEE 754 conventions for representation of special values
- Has Positive and Negative zero
- Has Positive and Negative infinity
- Has NaNs

Additional details:
- Max exp (unbiased): 3
- Min exp (unbiased): -2
- Infinities (+/-): S.111.0000
- Zeros (+/-): S.000.0000
- NaNs: S.111.{0,1}⁴ except S.111.0000
- Max normal number: S.110.1111 = +/-2^(6-3) x (1 + 15/16) = +/-2^3 x 31 x 2^(-4) = +/-15.5
- Min normal number: S.001.0000 = +/-2^(1-3) x (1 + 0) = +/-2^(-2)
- Max subnormal number: S.000.1111 = +/-2^(-2) x 15/16 = +/-2^(-2) x 15 x 2^(-4) = +/-15 x 2^(-6)
- Min subnormal number: S.000.0001 = +/-2^(-2) x 1/16 =  +/-2^(-2) x 2^(-4) = +/-2^(-6)
```

Related PRs:
- [PR-99698](https://github.com/llvm/llvm-project/pull/99698) [APFloat]
Add support for f8E3M4 IEEE 754 type
- [PR-97118](https://github.com/llvm/llvm-project/pull/97118) [MLIR] Add
f8E4M3 IEEE 754 type
This commit is contained in:
Alexander Pivovarov
2024-08-02 00:22:11 -07:00
committed by GitHub
parent e9c20b9132
commit eef1d7e377
24 changed files with 133 additions and 9 deletions

View File

@@ -139,6 +139,16 @@ MLIR_CAPI_EXPORTED bool mlirTypeIsAFloat8E4M3B11FNUZ(MlirType type);
/// context.
MLIR_CAPI_EXPORTED MlirType mlirFloat8E4M3B11FNUZTypeGet(MlirContext ctx);
/// Returns the typeID of an Float8E3M4 type.
MLIR_CAPI_EXPORTED MlirTypeID mlirFloat8E3M4TypeGetTypeID(void);
/// Checks whether the given type is an f8E3M4 type.
MLIR_CAPI_EXPORTED bool mlirTypeIsAFloat8E3M4(MlirType type);
/// Creates an f8E3M4 type in the given context. The type is owned by the
/// context.
MLIR_CAPI_EXPORTED MlirType mlirFloat8E3M4TypeGet(MlirContext ctx);
/// Returns the typeID of an BFloat16 type.
MLIR_CAPI_EXPORTED MlirTypeID mlirBFloat16TypeGetTypeID(void);

View File

@@ -66,6 +66,7 @@ public:
FloatType getFloat8E5M2FNUZType();
FloatType getFloat8E4M3FNUZType();
FloatType getFloat8E4M3B11FNUZType();
FloatType getFloat8E3M4Type();
FloatType getBF16Type();
FloatType getF16Type();
FloatType getTF32Type();

View File

@@ -66,6 +66,7 @@ public:
static FloatType getFloat8E5M2FNUZ(MLIRContext *ctx);
static FloatType getFloat8E4M3FNUZ(MLIRContext *ctx);
static FloatType getFloat8E4M3B11FNUZ(MLIRContext *ctx);
static FloatType getFloat8E3M4(MLIRContext *ctx);
/// Methods for support type inquiry through isa, cast, and dyn_cast.
static bool classof(Type type);
@@ -411,10 +412,11 @@ inline bool BaseMemRefType::isValidElementType(Type type) {
}
inline bool FloatType::classof(Type type) {
return llvm::isa<
Float8E5M2Type, Float8E4M3Type, Float8E4M3FNType, Float8E5M2FNUZType,
Float8E4M3FNUZType, Float8E4M3B11FNUZType, BFloat16Type, Float16Type,
FloatTF32Type, Float32Type, Float64Type, Float80Type, Float128Type>(type);
return llvm::isa<Float8E5M2Type, Float8E4M3Type, Float8E4M3FNType,
Float8E5M2FNUZType, Float8E4M3FNUZType,
Float8E4M3B11FNUZType, Float8E3M4Type, BFloat16Type,
Float16Type, FloatTF32Type, Float32Type, Float64Type,
Float80Type, Float128Type>(type);
}
inline FloatType FloatType::getFloat8E5M2(MLIRContext *ctx) {
@@ -441,6 +443,10 @@ inline FloatType FloatType::getFloat8E4M3B11FNUZ(MLIRContext *ctx) {
return Float8E4M3B11FNUZType::get(ctx);
}
inline FloatType FloatType::getFloat8E3M4(MLIRContext *ctx) {
return Float8E3M4Type::get(ctx);
}
inline FloatType FloatType::getBF16(MLIRContext *ctx) {
return BFloat16Type::get(ctx);
}

View File

@@ -213,6 +213,25 @@ def Builtin_Float8E4M3B11FNUZ : Builtin_FloatType<"Float8E4M3B11FNUZ", "f8E4M3B1
}];
}
//===----------------------------------------------------------------------===//
// Float8E3M4Type
def Builtin_Float8E3M4 : Builtin_FloatType<"Float8E3M4", "f8E3M4"> {
let summary = "8-bit floating point with 3 bits exponent and 4 bit mantissa";
let description = [{
An 8-bit floating point type with 1 sign bit, 3 bits exponent and 4 bits
mantissa. This is not a standard type as defined by IEEE-754, but it
follows similar conventions with the following characteristics:
* bit encoding: S1E3M4
* exponent bias: 3
* infinities: supported with exponent set to all 1s and mantissa 0s
* NaNs: supported with exponent bits set to all 1s and mantissa values of
{0,1}⁴ except S.111.0000
* denormals when exponent is 0
}];
}
//===----------------------------------------------------------------------===//
// BFloat16Type

View File

@@ -342,6 +342,8 @@ def F8E4M3B11FNUZ : Type<CPred<"$_self.isFloat8E4M3B11FNUZ()">, "f8E4M3B11FNUZ t
BuildableType<"$_builder.getFloat8E4M3B11FNUZType()">;
def F8E5M2FNUZ : Type<CPred<"$_self.isFloat8E5M2FNUZ()">, "f8E5M2FNUZ type">,
BuildableType<"$_builder.getFloat8E5M2FNUZType()">;
def F8E3M4 : Type<CPred<"$_self.isFloat8E3M4()">, "f8E3M4 type">,
BuildableType<"$_builder.getFloat8E3M4Type()">;
def AnyComplex : Type<CPred<"::llvm::isa<::mlir::ComplexType>($_self)">,
"complex-type", "::mlir::ComplexType">;

View File

@@ -131,6 +131,7 @@ public:
bool isFloat8E5M2FNUZ() const;
bool isFloat8E4M3FNUZ() const;
bool isFloat8E4M3B11FNUZ() const;
bool isFloat8E3M4() const;
bool isBF16() const;
bool isF16() const;
bool isTF32() const;

View File

@@ -100,6 +100,7 @@ TOK_KEYWORD(f8E4M3FN)
TOK_KEYWORD(f8E5M2FNUZ)
TOK_KEYWORD(f8E4M3FNUZ)
TOK_KEYWORD(f8E4M3B11FNUZ)
TOK_KEYWORD(f8E3M4)
TOK_KEYWORD(f128)
TOK_KEYWORD(false)
TOK_KEYWORD(floordiv)

View File

@@ -45,6 +45,7 @@ OptionalParseResult Parser::parseOptionalType(Type &type) {
case Token::kw_f8E5M2FNUZ:
case Token::kw_f8E4M3FNUZ:
case Token::kw_f8E4M3B11FNUZ:
case Token::kw_f8E3M4:
case Token::kw_bf16:
case Token::kw_f16:
case Token::kw_tf32:
@@ -320,6 +321,9 @@ Type Parser::parseNonFunctionType() {
case Token::kw_f8E4M3B11FNUZ:
consumeToken(Token::kw_f8E4M3B11FNUZ);
return builder.getFloat8E4M3B11FNUZType();
case Token::kw_f8E3M4:
consumeToken(Token::kw_f8E3M4);
return builder.getFloat8E3M4Type();
case Token::kw_bf16:
consumeToken(Token::kw_bf16);
return builder.getBF16Type();

View File

@@ -246,6 +246,26 @@ public:
}
};
/// Floating Point Type subclass - Float8E3M4Type.
class PyFloat8E3M4Type : public PyConcreteType<PyFloat8E3M4Type, PyFloatType> {
public:
static constexpr IsAFunctionTy isaFunction = mlirTypeIsAFloat8E3M4;
static constexpr GetTypeIDFunctionTy getTypeIdFunction =
mlirFloat8E3M4TypeGetTypeID;
static constexpr const char *pyClassName = "Float8E3M4Type";
using PyConcreteType::PyConcreteType;
static void bindDerived(ClassTy &c) {
c.def_static(
"get",
[](DefaultingPyMlirContext context) {
MlirType t = mlirFloat8E3M4TypeGet(context->get());
return PyFloat8E3M4Type(context->getRef(), t);
},
py::arg("context") = py::none(), "Create a float8_e3m4 type.");
}
};
/// Floating Point Type subclass - BF16Type.
class PyBF16Type : public PyConcreteType<PyBF16Type, PyFloatType> {
public:
@@ -864,6 +884,7 @@ void mlir::python::populateIRTypes(py::module &m) {
PyFloat8E4M3FNUZType::bind(m);
PyFloat8E4M3B11FNUZType::bind(m);
PyFloat8E5M2FNUZType::bind(m);
PyFloat8E3M4Type::bind(m);
PyBF16Type::bind(m);
PyF16Type::bind(m);
PyTF32Type::bind(m);

View File

@@ -157,6 +157,18 @@ MlirType mlirFloat8E4M3B11FNUZTypeGet(MlirContext ctx) {
return wrap(FloatType::getFloat8E4M3B11FNUZ(unwrap(ctx)));
}
MlirTypeID mlirFloat8E3M4TypeGetTypeID() {
return wrap(Float8E3M4Type::getTypeID());
}
bool mlirTypeIsAFloat8E3M4(MlirType type) {
return unwrap(type).isFloat8E3M4();
}
MlirType mlirFloat8E3M4TypeGet(MlirContext ctx) {
return wrap(FloatType::getFloat8E3M4(unwrap(ctx)));
}
MlirTypeID mlirBFloat16TypeGetTypeID() {
return wrap(BFloat16Type::getTypeID());
}

View File

@@ -249,7 +249,7 @@ Type LLVMTypeConverter::convertIntegerType(IntegerType type) const {
Type LLVMTypeConverter::convertFloatType(FloatType type) const {
if (type.isFloat8E5M2() || type.isFloat8E4M3() || type.isFloat8E4M3FN() ||
type.isFloat8E5M2FNUZ() || type.isFloat8E4M3FNUZ() ||
type.isFloat8E4M3B11FNUZ())
type.isFloat8E4M3B11FNUZ() || type.isFloat8E3M4())
return IntegerType::get(&getContext(), type.getWidth());
return type;
}

View File

@@ -60,6 +60,7 @@ static std::optional<FloatType> parseFloatType(MLIRContext *ctx,
.Case("f8E4M3FN", b.getFloat8E4M3FNType())
.Case("f8E5M2FNUZ", b.getFloat8E5M2FNUZType())
.Case("f8E4M3FNUZ", b.getFloat8E4M3FNUZType())
.Case("f8E3M4", b.getFloat8E3M4Type())
.Case("bf16", b.getBF16Type())
.Case("f16", b.getF16Type())
.Case("f32", b.getF32Type())

View File

@@ -2581,6 +2581,7 @@ void AsmPrinter::Impl::printTypeImpl(Type type) {
.Case<Float8E5M2FNUZType>([&](Type) { os << "f8E5M2FNUZ"; })
.Case<Float8E4M3FNUZType>([&](Type) { os << "f8E4M3FNUZ"; })
.Case<Float8E4M3B11FNUZType>([&](Type) { os << "f8E4M3B11FNUZ"; })
.Case<Float8E3M4Type>([&](Type) { os << "f8E3M4"; })
.Case<BFloat16Type>([&](Type) { os << "bf16"; })
.Case<Float16Type>([&](Type) { os << "f16"; })
.Case<FloatTF32Type>([&](Type) { os << "tf32"; })

View File

@@ -58,6 +58,10 @@ FloatType Builder::getFloat8E4M3B11FNUZType() {
return FloatType::getFloat8E4M3B11FNUZ(context);
}
FloatType Builder::getFloat8E3M4Type() {
return FloatType::getFloat8E3M4(context);
}
FloatType Builder::getBF16Type() { return FloatType::getBF16(context); }
FloatType Builder::getF16Type() { return FloatType::getF16(context); }

View File

@@ -88,8 +88,8 @@ IntegerType IntegerType::scaleElementBitwidth(unsigned scale) {
unsigned FloatType::getWidth() {
if (llvm::isa<Float8E5M2Type, Float8E4M3Type, Float8E4M3FNType,
Float8E5M2FNUZType, Float8E4M3FNUZType, Float8E4M3B11FNUZType>(
*this))
Float8E5M2FNUZType, Float8E4M3FNUZType, Float8E4M3B11FNUZType,
Float8E3M4Type>(*this))
return 8;
if (llvm::isa<Float16Type, BFloat16Type>(*this))
return 16;
@@ -118,6 +118,8 @@ const llvm::fltSemantics &FloatType::getFloatSemantics() {
return APFloat::Float8E4M3FNUZ();
if (llvm::isa<Float8E4M3B11FNUZType>(*this))
return APFloat::Float8E4M3B11FNUZ();
if (llvm::isa<Float8E3M4Type>(*this))
return APFloat::Float8E3M4();
if (llvm::isa<BFloat16Type>(*this))
return APFloat::BFloat();
if (llvm::isa<Float16Type>(*this))

View File

@@ -227,6 +227,7 @@ public:
Float8E5M2FNUZType f8E5M2FNUZTy;
Float8E4M3FNUZType f8E4M3FNUZTy;
Float8E4M3B11FNUZType f8E4M3B11FNUZTy;
Float8E3M4Type f8E3M4Ty;
BFloat16Type bf16Ty;
Float16Type f16Ty;
FloatTF32Type tf32Ty;
@@ -318,6 +319,7 @@ MLIRContext::MLIRContext(const DialectRegistry &registry, Threading setting)
impl->f8E5M2FNUZTy = TypeUniquer::get<Float8E5M2FNUZType>(this);
impl->f8E4M3FNUZTy = TypeUniquer::get<Float8E4M3FNUZType>(this);
impl->f8E4M3B11FNUZTy = TypeUniquer::get<Float8E4M3B11FNUZType>(this);
impl->f8E3M4Ty = TypeUniquer::get<Float8E3M4Type>(this);
impl->bf16Ty = TypeUniquer::get<BFloat16Type>(this);
impl->f16Ty = TypeUniquer::get<Float16Type>(this);
impl->tf32Ty = TypeUniquer::get<FloatTF32Type>(this);
@@ -1029,6 +1031,9 @@ Float8E4M3FNUZType Float8E4M3FNUZType::get(MLIRContext *context) {
Float8E4M3B11FNUZType Float8E4M3B11FNUZType::get(MLIRContext *context) {
return context->getImpl().f8E4M3B11FNUZTy;
}
Float8E3M4Type Float8E3M4Type::get(MLIRContext *context) {
return context->getImpl().f8E3M4Ty;
}
BFloat16Type BFloat16Type::get(MLIRContext *context) {
return context->getImpl().bf16Ty;
}

View File

@@ -46,6 +46,7 @@ bool Type::isFloat8E4M3FNUZ() const {
bool Type::isFloat8E4M3B11FNUZ() const {
return llvm::isa<Float8E4M3B11FNUZType>(*this);
}
bool Type::isFloat8E3M4() const { return llvm::isa<Float8E3M4Type>(*this); }
bool Type::isBF16() const { return llvm::isa<BFloat16Type>(*this); }
bool Type::isF16() const { return llvm::isa<Float16Type>(*this); }
bool Type::isTF32() const { return llvm::isa<FloatTF32Type>(*this); }

View File

@@ -120,6 +120,7 @@ __all__ = [
"F32Type",
"F64Type",
"FlatSymbolRefAttr",
"Float8E3M4Type",
"Float8E4M3B11FNUZType",
"Float8E4M3FNType",
"Float8E4M3FNUZType",
@@ -1537,6 +1538,19 @@ class FlatSymbolRefAttr(Attribute):
Returns the value of the FlatSymbolRef attribute as a string
"""
class Float8E3M4Type(FloatType):
static_typeid: ClassVar[TypeID]
@staticmethod
def get(context: Optional[Context] = None) -> Float8E3M4Type:
"""
Create a float8_e3m4 type.
"""
@staticmethod
def isinstance(other: Type) -> bool: ...
def __init__(self, cast_from_type: Type) -> None: ...
@property
def typeid(self) -> TypeID: ...
class Float8E4M3B11FNUZType(FloatType):
static_typeid: ClassVar[TypeID]
@staticmethod

View File

@@ -12,6 +12,7 @@ from ..ir import (
F16Type,
F32Type,
F64Type,
Float8E3M4Type,
Float8E4M3B11FNUZType,
Float8E4M3FNType,
Float8E4M3Type,
@@ -72,6 +73,7 @@ f8E5M2 = lambda: Float8E5M2Type.get()
f8E4M3 = lambda: Float8E4M3Type.get()
f8E4M3FN = lambda: Float8E4M3FNType.get()
f8E4M3B11FNUZ = lambda: Float8E4M3B11FNUZType.get()
f8E3M4 = lambda: Float8E3M4Type.get()
none = lambda: NoneType.get()

View File

@@ -60,6 +60,10 @@ func.func @float_attrs_pass() {
// CHECK: float_attr = 2.000000e+00 : f8E4M3B11FNUZ
float_attr = 2. : f8E4M3B11FNUZ
} : () -> ()
"test.float_attrs"() {
// CHECK: float_attr = 2.000000e+00 : f8E3M4
float_attr = 2. : f8E3M4
} : () -> ()
"test.float_attrs"() {
// CHECK: float_attr = 2.000000e+00 : f16
float_attr = 2. : f16

View File

@@ -39,6 +39,9 @@ llvm.mlir.global internal constant @string_const("foobar") : !llvm.array<6 x i8>
// CHECK: @int_global_undef = internal global i64 undef
llvm.mlir.global internal @int_global_undef() : i64
// CHECK: @f8E3M4_global_as_i8 = internal global i8 56
llvm.mlir.global internal @f8E3M4_global_as_i8(1.5 : f8E3M4) : i8
// CHECK: @f8E4M3_global_as_i8 = internal global i8 60
llvm.mlir.global internal @f8E4M3_global_as_i8(1.5 : f8E4M3) : i8

View File

@@ -113,6 +113,8 @@ def testTypeIsInstance():
def testFloatTypeSubclasses():
ctx = Context()
# CHECK: True
print(isinstance(Type.parse("f8E3M4", ctx), FloatType))
# CHECK: True
print(isinstance(Type.parse("f8E4M3", ctx), FloatType))
# CHECK: True
print(isinstance(Type.parse("f8E4M3FN", ctx), FloatType))
@@ -231,6 +233,8 @@ def testIndexType():
@run
def testFloatType():
with Context():
# CHECK: float: f8E3M4
print("float:", Float8E3M4Type.get())
# CHECK: float: f8E4M3
print("float:", Float8E4M3Type.get())
# CHECK: float: f8E4M3FN
@@ -605,6 +609,7 @@ def testTypeIDs():
types = [
(IntegerType, IntegerType.get_signless(16)),
(IndexType, IndexType.get()),
(Float8E3M4Type, Float8E3M4Type.get()),
(Float8E4M3Type, Float8E4M3Type.get()),
(Float8E4M3FNType, Float8E4M3FNType.get()),
(Float8E5M2Type, Float8E5M2Type.get()),
@@ -629,6 +634,7 @@ def testTypeIDs():
# CHECK: IntegerType(i16)
# CHECK: IndexType(index)
# CHECK: Float8E3M4Type(f8E3M4)
# CHECK: Float8E4M3Type(f8E4M3)
# CHECK: Float8E4M3FNType(f8E4M3FN)
# CHECK: Float8E5M2Type(f8E5M2)
@@ -707,6 +713,9 @@ def testConcreteTypesRoundTrip():
# CHECK: F64Type
# CHECK: F64Type(f64)
print_downcasted(F64Type.get())
# CHECK: Float8E3M4Type
# CHECK: Float8E3M4Type(f8E3M4)
print_downcasted(Float8E3M4Type.get())
# CHECK: Float8E4M3B11FNUZType
# CHECK: Float8E4M3B11FNUZType(f8E4M3B11FNUZ)
print_downcasted(Float8E4M3B11FNUZType.get())

View File

@@ -56,6 +56,7 @@ builtin_attr_type_mnemonics = {
"mlir::Float8E5M2FNUZType": '"f8E5M2FNUZ"',
"mlir::Float8E4M3FNUZType": '"f8E4M3FNUZ"',
"mlir::Float8E4M3B11FNUZType": '"f8E4M3B11FNUZ"',
"mlir::Float8E3M4Type": '"f8E3M4"',
"mlir::BFloat16Type": '"bf16"',
"mlir::Float16Type": '"f16"',
"mlir::FloatTF32Type": '"tf32"',

View File

@@ -230,8 +230,8 @@ const common = {
integer_type : $ =>
token(seq(choice('si', 'ui', 'i'), /[1-9]/, repeat(/[0-9]/))),
float_type : $ => token(
choice('f16', 'f32', 'f64', 'f80', 'f128', 'bf16', 'f8E4M3FN', 'f8E4M3',
'f8E5M2')),
choice('f16', 'f32', 'f64', 'f80', 'f128', 'bf16', 'f8E3M4', 'f8E4M3FN',
'f8E4M3', 'f8E5M2')),
index_type : $ => token('index'),
none_type : $ => token('none'),
complex_type : $ => seq(token('complex'), '<', $._prim_type, '>'),