This patch is part of a larger initiative aimed at fixing floating-point `max` and `min` operations in MLIR: https://discourse.llvm.org/t/rfc-fix-floating-point-max-and-min-operations-in-mlir/72671. This commit addresses Task 1.2 of the mentioned RFC. By renaming these operations, we align their names with LLVM intrinsics that have corresponding semantics.
153 lines
5.7 KiB
Python
153 lines
5.7 KiB
Python
# RUN: %PYTHON %s | FileCheck %s
|
|
|
|
from mlir.ir import *
|
|
from mlir.dialects import builtin
|
|
from mlir.dialects import func
|
|
from mlir.dialects import linalg
|
|
|
|
from mlir.dialects.linalg.opdsl.lang import *
|
|
|
|
T1 = TV.T1
|
|
T2 = TV.T2
|
|
|
|
|
|
@linalg_structured_op
|
|
def pooling_poly(
|
|
I=TensorDef(T1, S.N, S.H, S.W, S.C),
|
|
K=TensorDef(T2, S.KH, S.KW, index_dims=[D.kh, D.kw]),
|
|
O=TensorDef(U, S.N, S.OH, S.OW, S.C, output=True),
|
|
reduce=BinaryFnAttrDef(default=BinaryFn.max_signed),
|
|
cast=TypeFnAttrDef(default=TypeFn.cast_signed),
|
|
strides=IndexAttrDef(S.SH, S.SW, default=[1, 1]),
|
|
dilations=IndexAttrDef(S.DH, S.DW, default=[1, 1]),
|
|
):
|
|
domain(D.n, D.oh, D.ow, D.kh, D.kw, D.c)
|
|
O[D.n, D.oh, D.ow, D.c] = reduce[D.kh, D.kw](
|
|
cast(U, I[D.n, D.oh * S.SH + D.kh * S.DH, D.ow * S.SW + D.kw * S.DW, D.c])
|
|
)
|
|
|
|
|
|
with Context() as ctx, Location.unknown():
|
|
module = Module.create()
|
|
f32 = F32Type.get()
|
|
i32 = IntegerType.get_signless(32)
|
|
with InsertionPoint(module.body):
|
|
|
|
# Pooling indexing maps.
|
|
# CHECK: #[[$POOL_MAP_I:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1 * 2 + d3, d2 * 4 + d4 * 2, d5)>
|
|
# CHECK: #[[$POOL_MAP_K:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d3, d4)>
|
|
# CHECK: #[[$POOL_MAP_O:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d2, d5)>
|
|
|
|
# CHECK-LABEL: @test_f32i32_max_pooling
|
|
# CHECK: linalg.generic
|
|
# CHECK-SAME: indexing_maps = [#[[$POOL_MAP_I]], #[[$POOL_MAP_K]], #[[$POOL_MAP_O]]]
|
|
# CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "reduction", "reduction", "parallel"]
|
|
# CHECK: ^{{.*}}(%[[IN:.+]]: f32, %[[SHAPE:.+]]: f32, %[[OUT:.+]]: i32)
|
|
# CHECK-NEXT: %[[IN_CAST:.+]] = arith.fptosi %[[IN:.+]] : f32 to i32
|
|
# CHECK-NEXT: %[[MAX:.+]] = arith.maxsi %[[OUT]], %[[IN_CAST:.+]] : i32
|
|
# CHECK-NEXT: linalg.yield %[[MAX]] : i32
|
|
# CHECK-NEXT: -> tensor<1x2x4x1xi32>
|
|
@func.FuncOp.from_py_func(
|
|
RankedTensorType.get((1, 4, 16, 1), f32),
|
|
RankedTensorType.get((2, 2), f32),
|
|
RankedTensorType.get((1, 2, 4, 1), i32),
|
|
)
|
|
def test_f32i32_max_pooling(input, shape, init_result):
|
|
return pooling_poly(
|
|
input, shape, outs=[init_result], strides=[2, 4], dilations=[1, 2]
|
|
)
|
|
|
|
# CHECK-LABEL: @test_f32i32_max_unsigned_pooling
|
|
# CHECK: = arith.fptoui
|
|
# CHECK: = arith.maxui
|
|
@func.FuncOp.from_py_func(
|
|
RankedTensorType.get((1, 4, 16, 1), f32),
|
|
RankedTensorType.get((2, 2), f32),
|
|
RankedTensorType.get((1, 2, 4, 1), i32),
|
|
)
|
|
def test_f32i32_max_unsigned_pooling(input, shape, init_result):
|
|
return pooling_poly(
|
|
input,
|
|
shape,
|
|
outs=[init_result],
|
|
reduce=BinaryFn.max_unsigned,
|
|
cast=TypeFn.cast_unsigned,
|
|
strides=[2, 4],
|
|
dilations=[1, 2],
|
|
)
|
|
|
|
# CHECK-LABEL: @test_f32f32_max_pooling
|
|
# CHECK: linalg.generic
|
|
# CHECK-SAME: indexing_maps = [#[[$POOL_MAP_I]], #[[$POOL_MAP_K]], #[[$POOL_MAP_O]]]
|
|
# CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "reduction", "reduction", "parallel"]
|
|
# CHECK: ^{{.*}}(%[[IN:.+]]: f32, %[[SHAPE:.+]]: f32, %[[OUT:.+]]: f32)
|
|
# CHECK-NEXT: %[[MAX:.+]] = arith.maximumf %[[OUT]], %[[IN:.+]] : f32
|
|
# CHECK-NEXT: linalg.yield %[[MAX]] : f32
|
|
# CHECK-NEXT: -> tensor<1x2x4x1xf32>
|
|
@func.FuncOp.from_py_func(
|
|
RankedTensorType.get((1, 4, 16, 1), f32),
|
|
RankedTensorType.get((2, 2), f32),
|
|
RankedTensorType.get((1, 2, 4, 1), f32),
|
|
)
|
|
def test_f32f32_max_pooling(input, shape, init_result):
|
|
return pooling_poly(
|
|
input, shape, outs=[init_result], strides=[2, 4], dilations=[1, 2]
|
|
)
|
|
|
|
# CHECK-LABEL: @test_f32i32_min_pooling
|
|
# CHECK: = arith.fptosi
|
|
# CHECK: = arith.minsi
|
|
@func.FuncOp.from_py_func(
|
|
RankedTensorType.get((1, 4, 16, 1), f32),
|
|
RankedTensorType.get((2, 2), f32),
|
|
RankedTensorType.get((1, 2, 4, 1), i32),
|
|
)
|
|
def test_f32i32_min_pooling(input, shape, init_result):
|
|
return pooling_poly(
|
|
input,
|
|
shape,
|
|
outs=[init_result],
|
|
reduce=BinaryFn.min_signed,
|
|
strides=[2, 4],
|
|
dilations=[1, 2],
|
|
)
|
|
|
|
# CHECK-LABEL: @test_f32i32_min_unsigned_pooling
|
|
# CHECK: = arith.fptoui
|
|
# CHECK: = arith.minui
|
|
@func.FuncOp.from_py_func(
|
|
RankedTensorType.get((1, 4, 16, 1), f32),
|
|
RankedTensorType.get((2, 2), f32),
|
|
RankedTensorType.get((1, 2, 4, 1), i32),
|
|
)
|
|
def test_f32i32_min_unsigned_pooling(input, shape, init_result):
|
|
return pooling_poly(
|
|
input,
|
|
shape,
|
|
outs=[init_result],
|
|
reduce=BinaryFn.min_unsigned,
|
|
cast=TypeFn.cast_unsigned,
|
|
strides=[2, 4],
|
|
dilations=[1, 2],
|
|
)
|
|
|
|
# CHECK-LABEL: @test_f32f32_min_pooling
|
|
# CHECK: = arith.minimumf
|
|
@func.FuncOp.from_py_func(
|
|
RankedTensorType.get((1, 4, 16, 1), f32),
|
|
RankedTensorType.get((2, 2), f32),
|
|
RankedTensorType.get((1, 2, 4, 1), f32),
|
|
)
|
|
def test_f32f32_min_pooling(input, shape, init_result):
|
|
return pooling_poly(
|
|
input,
|
|
shape,
|
|
outs=[init_result],
|
|
reduce=BinaryFn.min_signed,
|
|
strides=[2, 4],
|
|
dilations=[1, 2],
|
|
)
|
|
|
|
|
|
print(module)
|