Files
clang-p2996/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matvec.mlir
Aart Bik c5735fada4 [mlir][sparse] enable a few vectorized runs in integration tests
Recent changes outside sparse compiler exposed the requirement of running a
new pass (lower-affine) but this only became apparent with private testing.
By adding some vectorized runs to integration test, we will detect the need
for such changes earlier and also widen codegen coverage of course.

Reviewed By: gussmith23

Differential Revision: https://reviews.llvm.org/D108667
2021-08-24 16:08:01 -07:00

118 lines
4.0 KiB
MLIR

// RUN: mlir-opt %s \
// RUN: --sparsification --sparse-tensor-conversion \
// RUN: --convert-vector-to-scf --convert-scf-to-std \
// RUN: --func-bufferize --tensor-constant-bufferize --tensor-bufferize \
// RUN: --std-bufferize --finalizing-bufferize \
// RUN: --convert-vector-to-llvm --convert-memref-to-llvm --convert-std-to-llvm | \
// RUN: TENSOR0="%mlir_integration_test_dir/data/wide.mtx" \
// RUN: mlir-cpu-runner \
// RUN: -e entry -entry-point-result=void \
// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
// RUN: FileCheck %s
//
// Do the same run, but now with SIMDization as well. This should not change the outcome.
//
// RUN: mlir-opt %s \
// RUN: --sparsification="vectorization-strategy=2 vl=16 enable-simd-index32" --sparse-tensor-conversion \
// RUN: --convert-vector-to-scf --convert-scf-to-std \
// RUN: --func-bufferize --tensor-constant-bufferize --tensor-bufferize \
// RUN: --std-bufferize --finalizing-bufferize --lower-affine \
// RUN: --convert-vector-to-llvm --convert-memref-to-llvm --convert-std-to-llvm | \
// RUN: TENSOR0="%mlir_integration_test_dir/data/wide.mtx" \
// RUN: mlir-cpu-runner \
// RUN: -e entry -entry-point-result=void \
// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
// RUN: FileCheck %s
!Filename = type !llvm.ptr<i8>
#SparseMatrix = #sparse_tensor.encoding<{
dimLevelType = [ "dense", "compressed" ],
pointerBitWidth = 8,
indexBitWidth = 8
}>
#matvec = {
indexing_maps = [
affine_map<(i,j) -> (i,j)>, // A
affine_map<(i,j) -> (j)>, // b
affine_map<(i,j) -> (i)> // x (out)
],
iterator_types = ["parallel", "reduction"],
doc = "X(i) += A(i,j) * B(j)"
}
//
// Integration test that lowers a kernel annotated as sparse to
// actual sparse code, initializes a matching sparse storage scheme
// from file, and runs the resulting code with the JIT compiler.
//
module {
//
// A kernel that multiplies a sparse matrix A with a dense vector b
// into a dense vector x.
//
func @kernel_matvec(%arga: tensor<?x?xi32, #SparseMatrix>,
%argb: tensor<?xi32>,
%argx: tensor<?xi32>) -> tensor<?xi32> {
%0 = linalg.generic #matvec
ins(%arga, %argb: tensor<?x?xi32, #SparseMatrix>, tensor<?xi32>)
outs(%argx: tensor<?xi32>) {
^bb(%a: i32, %b: i32, %x: i32):
%0 = muli %a, %b : i32
%1 = addi %x, %0 : i32
linalg.yield %1 : i32
} -> tensor<?xi32>
return %0 : tensor<?xi32>
}
func private @getTensorFilename(index) -> (!Filename)
//
// Main driver that reads matrix from file and calls the sparse kernel.
//
func @entry() {
%i0 = constant 0 : i32
%c0 = constant 0 : index
%c1 = constant 1 : index
%c4 = constant 4 : index
%c256 = constant 256 : index
// Read the sparse matrix from file, construct sparse storage.
%fileName = call @getTensorFilename(%c0) : (index) -> (!Filename)
%a = sparse_tensor.new %fileName : !Filename to tensor<?x?xi32, #SparseMatrix>
// Initialize dense vectors.
%bdata = memref.alloc(%c256) : memref<?xi32>
%xdata = memref.alloc(%c4) : memref<?xi32>
scf.for %i = %c0 to %c256 step %c1 {
%k = addi %i, %c1 : index
%j = index_cast %k : index to i32
memref.store %j, %bdata[%i] : memref<?xi32>
}
scf.for %i = %c0 to %c4 step %c1 {
memref.store %i0, %xdata[%i] : memref<?xi32>
}
%b = memref.tensor_load %bdata : memref<?xi32>
%x = memref.tensor_load %xdata : memref<?xi32>
// Call kernel.
%0 = call @kernel_matvec(%a, %b, %x)
: (tensor<?x?xi32, #SparseMatrix>, tensor<?xi32>, tensor<?xi32>) -> tensor<?xi32>
// Print the result for verification.
//
// CHECK: ( 889, 1514, -21, -3431 )
//
%m = memref.buffer_cast %0 : memref<?xi32>
%v = vector.transfer_read %m[%c0], %i0: memref<?xi32>, vector<4xi32>
vector.print %v : vector<4xi32>
// Release the resources.
memref.dealloc %bdata : memref<?xi32>
memref.dealloc %xdata : memref<?xi32>
return
}
}