Files
clang-p2996/mlir/test/Integration/Dialect/Vector/CPU/transfer-read-2d.mlir
Andrzej Warzyński 56d6b56739 [mlir][vector] Relax the requirements on broadcast dims (#99341)
NOTE: This is a follow-up for #97049 in which the `in_bounds` attribute
was made mandatory.

This PR updates the semantics of the `in_bounds` attribute so that
broadcast dimensions are no longer required to be "in bounds".
Specifically, these xfer_read/xfer_write Ops become valid after this
change:

```mlir
  %read = vector.transfer_read %A[%base1, %base2], %pad
      {in_bounds = [false], permutation_map = affine_map<(d0, d1) -> (0)>}
      {permutation_map = affine_map<(d0, d1) -> (0)>}
      : memref<?x?xf32>, vector<9xf32>

  vector.transfer_write %vec, %A[%base1, %base2],
      {in_bounds = [false], permutation_map = affine_map<(d0, d1) -> (0)>}
      {permutation_map = affine_map<(d0, d1) -> (0)>}
      : vector<9xf32>, memref<?x?xf32>
```

Note that the value `false` merely means "may run out-of-bounds", i.e.,
the corresponding access can still be "in bounds". In fact, the folder
for xfer Ops is also updated (*) and will update the attribute value
corresponding to broadcast dims to `true` if all non-broadcast dims
are marked as "in bounds". 

Note that this PR doesn't change any of the lowerings. The changes in
"SuperVectorize.cpp", "Vectorization.cpp" and "AffineMap.cpp" are simple
reverts of recent changes in #97049. Those were only meant to facilitate
making `in_bounds` mandatory and to work around the extra requirements
for broadcast dims (those requirements ere removed in this PR). All
changes in tests are also reverts of changes from #97049.

For context, here's a PR in which "broadcast" dims where forced to
always be "in-bounds":
  * https://reviews.llvm.org/D102566

(*) See `foldTransferInBoundsAttribute`.
2024-10-04 07:41:20 +01:00

197 lines
9.5 KiB
MLIR

// RUN: mlir-opt %s -pass-pipeline="builtin.module(func.func(convert-vector-to-scf,lower-affine,convert-scf-to-cf),convert-vector-to-llvm,finalize-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts)" | \
// RUN: mlir-cpu-runner -e entry -entry-point-result=void \
// RUN: -shared-libs=%mlir_c_runner_utils | \
// RUN: FileCheck %s
// RUN: mlir-opt %s -pass-pipeline="builtin.module(func.func(convert-vector-to-scf{full-unroll=true},lower-affine,convert-scf-to-cf),convert-vector-to-llvm,finalize-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts)" | \
// RUN: mlir-cpu-runner -e entry -entry-point-result=void \
// RUN: -shared-libs=%mlir_c_runner_utils | \
// RUN: FileCheck %s
memref.global "private" @gv : memref<3x4xf32> = dense<[[0. , 1. , 2. , 3. ],
[10., 11., 12., 13.],
[20., 21., 22., 23.]]>
// Vector load.
func.func @transfer_read_2d(%A : memref<?x?xf32>, %base1: index, %base2: index) {
%fm42 = arith.constant -42.0: f32
%f = vector.transfer_read %A[%base1, %base2], %fm42
{permutation_map = affine_map<(d0, d1) -> (d0, d1)>} :
memref<?x?xf32>, vector<4x9xf32>
vector.print %f: vector<4x9xf32>
return
}
// Vector load with mask.
func.func @transfer_read_2d_mask(%A : memref<?x?xf32>, %base1: index, %base2: index) {
%fm42 = arith.constant -42.0: f32
%mask = arith.constant dense<[[1, 0, 1, 0, 1, 1, 1, 0, 1],
[0, 0, 1, 1, 1, 1, 1, 0, 1],
[1, 1, 1, 1, 1, 1, 1, 0, 1],
[0, 0, 1, 0, 1, 1, 1, 0, 1]]> : vector<4x9xi1>
%f = vector.transfer_read %A[%base1, %base2], %fm42, %mask
{permutation_map = affine_map<(d0, d1) -> (d0, d1)>} :
memref<?x?xf32>, vector<4x9xf32>
vector.print %f: vector<4x9xf32>
return
}
// Vector load with mask + transpose.
func.func @transfer_read_2d_mask_transposed(
%A : memref<?x?xf32>, %base1: index, %base2: index) {
%fm42 = arith.constant -42.0: f32
%mask = arith.constant dense<[[1, 0, 1, 0, 1, 1, 1, 0, 1],
[0, 0, 1, 1, 1, 1, 1, 0, 1],
[1, 1, 1, 1, 1, 1, 1, 0, 1],
[0, 0, 1, 0, 1, 1, 1, 0, 1]]> : vector<4x9xi1>
%f = vector.transfer_read %A[%base1, %base2], %fm42, %mask
{permutation_map = affine_map<(d0, d1) -> (d1, d0)>} :
memref<?x?xf32>, vector<9x4xf32>
vector.print %f: vector<9x4xf32>
return
}
// Vector load with mask + broadcast.
func.func @transfer_read_2d_mask_broadcast(
%A : memref<?x?xf32>, %base1: index, %base2: index) {
%fm42 = arith.constant -42.0: f32
%mask = arith.constant dense<[1, 0, 1, 0, 1, 1, 1, 0, 1]> : vector<9xi1>
%f = vector.transfer_read %A[%base1, %base2], %fm42, %mask
{permutation_map = affine_map<(d0, d1) -> (0, d1)>} :
memref<?x?xf32>, vector<4x9xf32>
vector.print %f: vector<4x9xf32>
return
}
// Transpose + vector load with mask + broadcast.
func.func @transfer_read_2d_mask_transpose_broadcast_last_dim(
%A : memref<?x?xf32>, %base1: index, %base2: index) {
%fm42 = arith.constant -42.0: f32
%mask = arith.constant dense<[1, 0, 1, 1]> : vector<4xi1>
%f = vector.transfer_read %A[%base1, %base2], %fm42, %mask
{permutation_map = affine_map<(d0, d1) -> (d1, 0)>} :
memref<?x?xf32>, vector<4x9xf32>
vector.print %f: vector<4x9xf32>
return
}
// Load + transpose.
func.func @transfer_read_2d_transposed(
%A : memref<?x?xf32>, %base1: index, %base2: index) {
%fm42 = arith.constant -42.0: f32
%f = vector.transfer_read %A[%base1, %base2], %fm42
{permutation_map = affine_map<(d0, d1) -> (d1, d0)>} :
memref<?x?xf32>, vector<4x9xf32>
vector.print %f: vector<4x9xf32>
return
}
// Load 1D + broadcast to 2D.
func.func @transfer_read_2d_broadcast(
%A : memref<?x?xf32>, %base1: index, %base2: index) {
%fm42 = arith.constant -42.0: f32
%f = vector.transfer_read %A[%base1, %base2], %fm42
{permutation_map = affine_map<(d0, d1) -> (d1, 0)>} :
memref<?x?xf32>, vector<4x9xf32>
vector.print %f: vector<4x9xf32>
return
}
// Vector store.
func.func @transfer_write_2d(%A : memref<?x?xf32>, %base1: index, %base2: index) {
%fn1 = arith.constant -1.0 : f32
%vf0 = vector.splat %fn1 : vector<1x4xf32>
vector.transfer_write %vf0, %A[%base1, %base2]
{permutation_map = affine_map<(d0, d1) -> (d0, d1)>} :
vector<1x4xf32>, memref<?x?xf32>
return
}
// Vector store with mask.
func.func @transfer_write_2d_mask(%A : memref<?x?xf32>, %base1: index, %base2: index) {
%fn1 = arith.constant -2.0 : f32
%mask = arith.constant dense<[[1, 0, 1, 0]]> : vector<1x4xi1>
%vf0 = vector.splat %fn1 : vector<1x4xf32>
vector.transfer_write %vf0, %A[%base1, %base2], %mask
{permutation_map = affine_map<(d0, d1) -> (d0, d1)>} :
vector<1x4xf32>, memref<?x?xf32>
return
}
func.func @entry() {
%c0 = arith.constant 0: index
%c1 = arith.constant 1: index
%c2 = arith.constant 2: index
%c3 = arith.constant 3: index
%c10 = arith.constant 10 : index
%0 = memref.get_global @gv : memref<3x4xf32>
%A = memref.cast %0 : memref<3x4xf32> to memref<?x?xf32>
// 1.a. Read 2D vector from 2D memref.
call @transfer_read_2d(%A, %c1, %c2) : (memref<?x?xf32>, index, index) -> ()
// CHECK: ( ( 12, 13, -42, -42, -42, -42, -42, -42, -42 ), ( 22, 23, -42, -42, -42, -42, -42, -42, -42 ), ( -42, -42, -42, -42, -42, -42, -42, -42, -42 ), ( -42, -42, -42, -42, -42, -42, -42, -42, -42 ) )
// 1.b. Read 2D vector from 2D memref. Starting position of first dim is
// out-of-bounds.
call @transfer_read_2d(%A, %c3, %c2) : (memref<?x?xf32>, index, index) -> ()
// CHECK: ( ( -42, -42, -42, -42, -42, -42, -42, -42, -42 ), ( -42, -42, -42, -42, -42, -42, -42, -42, -42 ), ( -42, -42, -42, -42, -42, -42, -42, -42, -42 ), ( -42, -42, -42, -42, -42, -42, -42, -42, -42 ) )
// 1.c. Read 2D vector from 2D memref. Starting position of second dim is
// out-of-bounds.
call @transfer_read_2d(%A, %c1, %c10) : (memref<?x?xf32>, index, index) -> ()
// CHECK: ( ( -42, -42, -42, -42, -42, -42, -42, -42, -42 ), ( -42, -42, -42, -42, -42, -42, -42, -42, -42 ), ( -42, -42, -42, -42, -42, -42, -42, -42, -42 ), ( -42, -42, -42, -42, -42, -42, -42, -42, -42 ) )
// 2. Read 2D vector from 2D memref at specified location and transpose the
// result.
call @transfer_read_2d_transposed(%A, %c1, %c2)
: (memref<?x?xf32>, index, index) -> ()
// CHECK: ( ( 12, 22, -42, -42, -42, -42, -42, -42, -42 ), ( 13, 23, -42, -42, -42, -42, -42, -42, -42 ), ( -42, -42, -42, -42, -42, -42, -42, -42, -42 ), ( -42, -42, -42, -42, -42, -42, -42, -42, -42 ) )
// 3. Read 2D vector from 2D memref with a 2D mask. In addition, some
// accesses are out-of-bounds.
call @transfer_read_2d_mask(%A, %c0, %c0)
: (memref<?x?xf32>, index, index) -> ()
// CHECK: ( ( 0, -42, 2, -42, -42, -42, -42, -42, -42 ), ( -42, -42, 12, 13, -42, -42, -42, -42, -42 ), ( 20, 21, 22, 23, -42, -42, -42, -42, -42 ), ( -42, -42, -42, -42, -42, -42, -42, -42, -42 ) )
// 4. Same as 3, but transpose the result.
call @transfer_read_2d_mask_transposed(%A, %c0, %c0)
: (memref<?x?xf32>, index, index) -> ()
// CHECK: ( ( 0, -42, 20, -42 ), ( -42, -42, 21, -42 ), ( 2, 12, 22, -42 ), ( -42, 13, 23, -42 ), ( -42, -42, -42, -42 ), ( -42, -42, -42, -42 ), ( -42, -42, -42, -42 ), ( -42, -42, -42, -42 ), ( -42, -42, -42, -42 ) )
// 5. Read 1D vector from 2D memref at specified location and broadcast the
// result to 2D.
call @transfer_read_2d_broadcast(%A, %c1, %c2)
: (memref<?x?xf32>, index, index) -> ()
// CHECK: ( ( 12, 12, 12, 12, 12, 12, 12, 12, 12 ), ( 13, 13, 13, 13, 13, 13, 13, 13, 13 ), ( -42, -42, -42, -42, -42, -42, -42, -42, -42 ), ( -42, -42, -42, -42, -42, -42, -42, -42, -42 ) )
// 6. Read 1D vector from 2D memref at specified location with mask and
// broadcast the result to 2D.
call @transfer_read_2d_mask_broadcast(%A, %c2, %c1)
: (memref<?x?xf32>, index, index) -> ()
// CHECK: ( ( 21, -42, 23, -42, -42, -42, -42, -42, -42 ), ( 21, -42, 23, -42, -42, -42, -42, -42, -42 ), ( 21, -42, 23, -42, -42, -42, -42, -42, -42 ), ( 21, -42, 23, -42, -42, -42, -42, -42, -42 ) )
// 7. Read 1D vector from 2D memref (second dimension) at specified location
// with mask and broadcast the result to 2D. In this test case, mask
// elements must be evaluated before lowering to an (N>1)-D transfer.
call @transfer_read_2d_mask_transpose_broadcast_last_dim(%A, %c0, %c1)
: (memref<?x?xf32>, index, index) -> ()
// CHECK: ( ( 1, 1, 1, 1, 1, 1, 1, 1, 1 ), ( -42, -42, -42, -42, -42, -42, -42, -42, -42 ), ( 3, 3, 3, 3, 3, 3, 3, 3, 3 ), ( -42, -42, -42, -42, -42, -42, -42, -42, -42 ) )
// 8. Write 2D vector into 2D memref at specified location.
call @transfer_write_2d(%A, %c1, %c2) : (memref<?x?xf32>, index, index) -> ()
// 9. Read memref to verify step 8.
call @transfer_read_2d(%A, %c0, %c0) : (memref<?x?xf32>, index, index) -> ()
// CHECK: ( ( 0, 1, 2, 3, -42, -42, -42, -42, -42 ), ( 10, 11, -1, -1, -42, -42, -42, -42, -42 ), ( 20, 21, 22, 23, -42, -42, -42, -42, -42 ), ( -42, -42, -42, -42, -42, -42, -42, -42, -42 ) )
// 10. Write 2D vector into 2D memref at specified location with mask.
call @transfer_write_2d_mask(%A, %c0, %c2) : (memref<?x?xf32>, index, index) -> ()
// 11. Read memref to verify step 10.
call @transfer_read_2d(%A, %c0, %c0) : (memref<?x?xf32>, index, index) -> ()
// CHECK: ( ( 0, 1, -2, 3, -42, -42, -42, -42, -42 ), ( 10, 11, -1, -1, -42, -42, -42, -42, -42 ), ( 20, 21, 22, 23, -42, -42, -42, -42, -42 ), ( -42, -42, -42, -42, -42, -42, -42, -42, -42 ) )
return
}