Files
clang-p2996/mlir/test/Dialect/Linalg/detensorize_while_impure_cf.mlir
Giuseppe Rossini 441b672bbd [mlir] Fix block merging (#102038)
With this PR I am trying to address:
https://github.com/llvm/llvm-project/issues/63230.

What changed:
- While merging identical blocks, don't add a block argument if it is
"identical" to another block argument. I.e., if the two block arguments
refer to the same `Value`. The operations operands in the block will
point to the argument we already inserted. This needs to happen to all
the arguments we pass to the different successors of the parent block
- After merged the blocks, get rid of "unnecessary" arguments. I.e., if
all the predecessors pass the same block argument, there is no need to
pass it as an argument.
- This last simplification clashed with
`BufferDeallocationSimplification`. The reason, I think, is that the two
simplifications are clashing. I.e., `BufferDeallocationSimplification`
contains an analysis based on the block structure. If we simplify the
block structure (by merging and/or dropping block arguments) the
analysis is invalid . The solution I found is to do a more prudent
simplification when running that pass.

**Note-1**: I ran all the integration tests
(`-DMLIR_INCLUDE_INTEGRATION_TESTS=ON`) and they passed.
**Note-2**: I fixed a bug found by @Dinistro in #97697 . The issue was
that, when looking for redundant arguments, I was not considering that
the block might have already some arguments. So the index (in the block
args list) of the i-th `newArgument` is `i+numOfOldArguments`.
2024-08-07 09:10:01 +01:00

105 lines
3.9 KiB
MLIR

// RUN: mlir-opt %s -pass-pipeline="builtin.module(func.func(linalg-detensorize{aggressive-mode}))" | FileCheck %s -check-prefix=DET-ALL
// RUN: mlir-opt %s -pass-pipeline="builtin.module(func.func(linalg-detensorize))" | FileCheck %s -check-prefix=DET-CF
#map0 = affine_map<() -> ()>
#map1 = affine_map<(i) -> ()>
#map2 = affine_map<(i) -> (i)>
#attrs = {
indexing_maps = [#map0, #map0, #map0],
iterator_types = []
}
#sum_reduction_attrs = {
indexing_maps = [#map2, #map1],
iterator_types = ["reduction"]
}
#broadcast_attrs = {
indexing_maps = [#map1, #map2],
iterator_types = ["parallel"]
}
func.func @main(%farg0: tensor<10xi32>, %farg1: tensor<i32>) -> tensor<i32> attributes {} {
cf.br ^bb1(%farg0 : tensor<10xi32>)
^bb1(%0: tensor<10xi32>): // 2 preds: ^bb0, ^bb2
%1 = tensor.empty() : tensor<i32>
%2 = linalg.generic #sum_reduction_attrs
ins(%0: tensor<10xi32>)
outs(%1: tensor<i32>) {
^bb(%a: i32, %x: i32):
%b = arith.addi %x, %a : i32
linalg.yield %b : i32
} -> tensor<i32>
%3 = tensor.empty() : tensor<i1>
%4 = linalg.generic #attrs
ins(%2, %farg1 : tensor<i32>, tensor<i32>)
outs(%3 : tensor<i1>) {
^bb0(%arg0: i32, %arg1: i32, %arg2: i1):
%8 = arith.cmpi slt, %arg0, %arg1 : i32
linalg.yield %8 : i1
} -> tensor<i1>
%5 = tensor.extract %4[] : tensor<i1>
cf.cond_br %5, ^bb2(%2 : tensor<i32>), ^bb3(%2 : tensor<i32>)
^bb2(%6: tensor<i32>): // pred: ^bb1
%7 = tensor.empty() : tensor<10xi32>
%9 = linalg.generic #broadcast_attrs
ins(%6: tensor<i32>)
outs(%7: tensor<10xi32>) {
^bb(%a: i32, %b: i32) :
linalg.yield %a : i32
} -> tensor<10xi32>
cf.br ^bb1(%9 : tensor<10xi32>)
^bb3(%10: tensor<i32>): // pred: ^bb1
return %10 : tensor<i32>
}
// Test aggresively detensoring all detensorable ops.
//
// DET-ALL-LABEL: func @main
// DET-ALL-SAME: (%{{.*}}: tensor<10xi32>, %{{.*}}: tensor<i32>)
// DET-ALL: cf.br ^[[bb1:.*]](%{{.*}} : tensor<10xi32>)
// DET-ALL: ^[[bb1]](%{{.*}}: tensor<10xi32>)
// DET-ALL: tensor.empty() : tensor<i32>
// DET-ALL: linalg.generic {{{.*}}} ins(%{{.*}} : tensor<10xi32>) outs(%{{.*}} : tensor<i32>) {
// DET-ALL: ^bb0(%{{.*}}: i32, %{{.*}}: i32):
// DET-ALL: %{{.*}} = arith.addi %{{.*}}, %{{.*}}
// DET-ALL: linalg.yield %{{.*}} : i32
// DET-ALL: } -> tensor<i32>
// DET-ALL: tensor.extract %{{.*}}[] : tensor<i32>
// DET-ALL: cmpi slt, %{{.*}}, %{{.*}} : i32
// DET-ALL: cf.cond_br %{{.*}}, ^[[bb2:.*]], ^[[bb3:.*]]
// DET-ALL: ^[[bb2]]:
// DET-ALL: tensor.from_elements %{{.*}} : tensor<i32>
// DET-ALL: tensor.empty() : tensor<10xi32>
// DET-ALL: linalg.generic {{{.*}}} ins(%{{.*}} : tensor<i32>) outs(%{{.*}} : tensor<10xi32>) {
// DET-ALL: ^bb0(%{{.*}}: i32, %{{.*}}: i32):
// DET-ALL: linalg.yield %{{.*}} : i32
// DET-ALL: } -> tensor<10xi32>
// DET-ALL: cf.br ^[[bb1]](%{{.*}} : tensor<10xi32>)
// DET-ALL: ^[[bb3]]
// DET-ALL: tensor.from_elements %{{.*}} : tensor<i32>
// DET-ALL: return %{{.*}} : tensor<i32>
// DET-ALL: }
// DET-CF-LABEL: func @main
// DET-CF-SAME: (%{{.*}}: tensor<10xi32>, %{{.*}}: tensor<i32>)
// DET-CF: cf.br ^[[bb1:.*]](%{{.*}} : tensor<10xi32>)
// DET-CF: ^bb1(%{{.*}}: tensor<10xi32>)
// DET-CF: %{{.*}} = linalg.generic {{{.*}}} ins(%{{.*}} : tensor<10xi32>) outs(%{{.*}} : tensor<i32>) {
// DET-CF: tensor.extract %{{.*}}[] : tensor<i32>
// DET-CF: cmpi slt, %{{.*}}, %{{.*}} : i32
// DET-CF: cf.cond_br %{{.*}}, ^bb2, ^bb3
// DET-CF: ^bb2:
// DET-CF: %{{.*}} = linalg.generic {{{.*}}} ins(%{{.*}} : tensor<i32>) outs(%{{.*}} : tensor<10xi32>) {
// DET-CF: cf.br ^bb1(%{{.*}} : tensor<10xi32>)
// DET-CF: ^bb3:
// DET-CF: return %{{.*}} : tensor<i32>
// DET-CF: }