Bufferization already makes the assumption that buffers pass function boundaries in the strided form and uses the corresponding affine map layouts. Switch it to use the recently introduced strided layout instead to avoid unnecessary casts when bufferizing further operations to the memref dialect counterparts that now largely rely on the strided layout attribute. Depends On D133947 Reviewed By: nicolasvasilache Differential Revision: https://reviews.llvm.org/D133951
130 lines
6.7 KiB
MLIR
130 lines
6.7 KiB
MLIR
// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries allow-return-allocs function-boundary-type-conversion=fully-dynamic-layout-map" -drop-equivalent-buffer-results -buffer-results-to-out-params -buffer-deallocation -split-input-file | FileCheck %s
|
|
// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries allow-return-allocs function-boundary-type-conversion=identity-layout-map" -drop-equivalent-buffer-results -buffer-results-to-out-params -buffer-deallocation -split-input-file | FileCheck %s --check-prefix=CHECK-NO-LAYOUT
|
|
// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries allow-return-allocs function-boundary-type-conversion=infer-layout-map" -drop-equivalent-buffer-results -buffer-deallocation -split-input-file | FileCheck %s --check-prefix=CHECK-BASELINE
|
|
|
|
// Note: function-boundary-type-conversion=infer-layout-map with
|
|
// promote-buffer-results-to-out-params is an unsupported combination.
|
|
|
|
// Note: This bufferization is not very efficient yet, but it works.
|
|
|
|
// CHECK-LABEL: func @callee(
|
|
// CHECK-SAME: %[[arg0:.*]]: memref<5xf32, strided<[?], offset: ?>>,
|
|
// CHECK-SAME: %[[arg1:.*]]: memref<5xf32, strided<[?], offset: ?>>) {
|
|
// This alloc is not needed, but it is inserted due to the out-of-place
|
|
// bufferization of the tensor.insert. With a better layering of the out param
|
|
// promotion pass, this alloc could be avoided.
|
|
// CHECK: %[[alloc:.*]] = memref.alloc() {{.*}} : memref<5xf32>
|
|
// CHECK: memref.copy %[[arg0]], %[[alloc]]
|
|
// CHECK: memref.store %{{.*}}, %[[alloc]]
|
|
// CHECK: %[[casted:.*]] = memref.cast %[[alloc]]
|
|
// CHECK: memref.copy %[[casted]], %[[arg1]]
|
|
// CHECK: memref.dealloc %[[alloc]]
|
|
// CHECK: return
|
|
// CHECK: }
|
|
|
|
// CHECK-NO-LAYOUT-LABEL: func @callee(
|
|
// CHECK-NO-LAYOUT-SAME: %[[arg0:.*]]: memref<5xf32>,
|
|
// CHECK-NO-LAYOUT-SAME: %[[arg1:.*]]: memref<5xf32>) {
|
|
// CHECK-NO-LAYOUT: %[[alloc:.*]] = memref.alloc() {{.*}} : memref<5xf32>
|
|
// CHECK-NO-LAYOUT: memref.copy %[[arg0]], %[[alloc]]
|
|
// CHECK-NO-LAYOUT: memref.store {{.*}}, %[[alloc]]
|
|
// CHECK-NO-LAYOUT: memref.copy %[[alloc]], %[[arg1]]
|
|
// CHECK-NO-LAYOUT: memref.dealloc %[[alloc]]
|
|
|
|
// CHECK-BASELINE-LABEL: func @callee(
|
|
// CHECK-BASELINE-SAME: %[[arg0:.*]]: memref<5xf32, strided<[?], offset: ?>>) -> memref<5xf32> {
|
|
// CHECK-BASELINE: %[[alloc:.*]] = memref.alloc() {{.*}} : memref<5xf32>
|
|
// CHECK-BASELINE: memref.copy %[[arg0]], %[[alloc]]
|
|
// CHECK-BASELINE: memref.store {{.*}}, %[[alloc]]
|
|
// CHECK-BASELINE: return %[[alloc]]
|
|
func.func @callee(%t: tensor<5xf32>) -> (tensor<5xf32>, tensor<5xf32>) {
|
|
%c0 = arith.constant 0 : index
|
|
%cst = arith.constant 8.0 : f32
|
|
// This must bufferize out-of-place.
|
|
%1 = tensor.insert %cst into %t[%c0] : tensor<5xf32>
|
|
// Instead of returning %1, copy into new out param. %t will disappear
|
|
// entirely because the buffer is equivalent to a bbArg.
|
|
return %t, %1 : tensor<5xf32>, tensor<5xf32>
|
|
}
|
|
|
|
// CHECK: func @main(%[[arg0:.*]]: memref<5xf32, strided<[?], offset: ?>>) -> (f32, f32) {
|
|
// CHECK: %[[alloc:.*]] = memref.alloc() : memref<5xf32>
|
|
// CHECK: %[[casted:.*]] = memref.cast %[[alloc]] : memref<5xf32> to memref<5xf32, strided<[?], offset: ?>>
|
|
// CHECK: call @callee(%[[arg0]], %[[casted]])
|
|
// CHECK: %[[l1:.*]] = memref.load %[[arg0]]
|
|
// CHECK: %[[l2:.*]] = memref.load %[[casted]]
|
|
// CHECK: memref.dealloc %[[alloc]]
|
|
// CHECK: return %[[l1]], %[[l2]]
|
|
// CHECK: }
|
|
|
|
// CHECK-NO-LAYOUT-LABEL: func @main(%{{.*}}: memref<5xf32>) -> (f32, f32) {
|
|
// CHECK-NO-LAYOUT: %[[alloc:.*]] = memref.alloc() : memref<5xf32>
|
|
// CHECK-NO-LAYOUT: call @callee(%{{.*}}, %[[alloc]])
|
|
func.func @main(%t: tensor<5xf32>) -> (f32, f32) {
|
|
%c0 = arith.constant 0 : index
|
|
%0, %1 = func.call @callee(%t)
|
|
: (tensor<5xf32>) -> (tensor<5xf32>, tensor<5xf32>)
|
|
%2 = tensor.extract %0[%c0] : tensor<5xf32>
|
|
%3 = tensor.extract %1[%c0] : tensor<5xf32>
|
|
return %2, %3 : f32, f32
|
|
}
|
|
|
|
// -----
|
|
|
|
// CHECK-LABEL: func @callee(
|
|
// CHECK-SAME: %{{.*}}: index,
|
|
// CHECK-SAME: %[[r:.*]]: memref<2x5xf32, strided<[?, ?], offset: ?>>) {
|
|
// CHECK: %[[alloc:.*]] = memref.alloc() {{.*}} : memref<10x20xf32>
|
|
// CHECK: %[[subview:.*]] = memref.subview %[[alloc]]{{.*}} : memref<10x20xf32> to memref<2x5xf32, strided<[20, 1], offset: ?>>
|
|
// CHECK: %[[casted:.*]] = memref.cast %[[subview]]
|
|
// CHECK: memref.copy %[[casted]], %[[r]]
|
|
// CHECK: memref.dealloc %[[alloc]]
|
|
|
|
// CHECK-NO-LAYOUT-LABEL: func @callee(
|
|
// CHECK-NO-LAYOUT-SAME: %{{.*}}: index,
|
|
// CHECK-NO-LAYOUT-SAME: %[[r:.*]]: memref<2x5xf32>) {
|
|
// CHECK-NO-LAYOUT: %[[alloc:.*]] = memref.alloc() {{.*}} : memref<10x20xf32>
|
|
// CHECK-NO-LAYOUT: %[[subview:.*]] = memref.subview %[[alloc]]
|
|
// Note: This alloc is not needed, but it is inserted before the returned buffer
|
|
// is promoted to an out param to reconcile mismatching layout maps on return
|
|
// value and function signature.
|
|
// CHECK-NO-LAYOUT: %[[alloc2:.*]] = memref.alloc() : memref<2x5xf32>
|
|
// CHECK-NO-LAYOUT: memref.copy %[[subview]], %[[alloc2]]
|
|
// CHECK-NO-LAYOUT: memref.dealloc %[[alloc]]
|
|
// CHECK-NO-LAYOUT: memref.copy %[[alloc2]], %[[r]]
|
|
// CHECK-NO-LAYOUT: memref.dealloc %[[alloc2]]
|
|
|
|
// CHECK-BASELINE-LABEL: func @callee(
|
|
// CHECK-BASELINE-SAME: %{{.*}}: index) -> memref<2x5xf32, strided<[20, 1], offset: ?>> {
|
|
// CHECK-BASELINE: %[[alloc:.*]] = memref.alloc() {{.*}} : memref<10x20xf32>
|
|
// CHECK-BASELINE: %[[subview:.*]] = memref.subview %[[alloc]]
|
|
// CHECK-BASELINE: return %[[subview]]
|
|
func.func @callee(%idx: index) -> tensor<2x5xf32> {
|
|
%0 = bufferization.alloc_tensor() : tensor<10x20xf32>
|
|
%1 = tensor.extract_slice %0[%idx, %idx][2, 5][1, 1] : tensor<10x20xf32> to tensor<2x5xf32>
|
|
return %1 : tensor<2x5xf32>
|
|
}
|
|
|
|
// CHECK: func @main(
|
|
// CHECK: %[[alloc:.*]] = memref.alloc() : memref<2x5xf32>
|
|
// CHECK: %[[casted:.*]] = memref.cast %[[alloc]] : memref<2x5xf32> to memref<2x5xf32, strided<[?, ?], offset: ?>>
|
|
// CHECK: call @callee(%{{.*}}, %[[casted]])
|
|
// CHECK: memref.load %[[casted]]
|
|
// CHECK: memref.dealloc %[[alloc]]
|
|
|
|
// CHECK-NO-LAYOUT: func @main(
|
|
// CHECK-NO-LAYOUT: %[[alloc:.*]] = memref.alloc() : memref<2x5xf32>
|
|
// CHECK-NO-LAYOUT: call @callee(%{{.*}}, %[[alloc]])
|
|
// CHECK-NO-LAYOUT: memref.load %[[alloc]]
|
|
// CHECK-NO-LAYOUT: memref.dealloc
|
|
|
|
// CHECK-BASELINE: func @main(
|
|
// CHECK-BASELINE: %[[call:.*]] = call @callee
|
|
// CHECK-BASELINE: memref.load %[[call]]
|
|
func.func @main(%idx: index) -> f32 {
|
|
%c0 = arith.constant 0 : index
|
|
%0 = func.call @callee(%idx) : (index) -> (tensor<2x5xf32>)
|
|
%1 = tensor.extract %0[%c0, %c0] : tensor<2x5xf32>
|
|
return %1 : f32
|
|
}
|