The logic enabling the Arm SVE (and now SME) integration tests for various dialects, that may run under emulation, is now duplicated in several places. This patch moves the configuration to the top-level MLIR integration tests Lit config and renames the '%lli' substitution in contexts where it will run exclusively (ArmSVE, ArmSME) on AArch64 (and possibly under emulation) to '%lli_aarch64_cmd', and '%lli_host_or_aarch64_cmd' for contexts where it may run AArch64 (also possibly under emulation). The latter is for integration tests that have target-specific and target-agnostic codepaths such as SparseTensor, which supports scalable vectors. The two substitutions have the same effect but the names are different to convey this information. The '%lli_aarch64_cmd' substitution could be used in the SparseTensor tests but that would be a misnomer if the host were x86 and the MLIR_RUN_SVE_TESTS=OFF. The reason for renaming the '%lli' substitution is to not prevent running other target-specific integration tests at the same time, since the same substitution '%lli' is used for lli in other integration tests: * mlir/test/Integration/Dialect/Vector/CPU/X86Vector - (AVX emulation via Intel SDE) * mlir/test/Integration/Dialect/Vector/CPU/AMX - (AMX emulation via Intel SDE) * mlir/test/Integration/Dialect/LLVMIR/CPU/test-vp-intrinsic.mlir - (RISCV emulation via QEMU if supported, native otherwise) and substituting '%lli' at the top-level with Arm specific logic would override this. Reviewed By: awarzynski Differential Revision: https://reviews.llvm.org/D148929
582 lines
25 KiB
MLIR
582 lines
25 KiB
MLIR
// DEFINE: %{option} = enable-runtime-library=true
|
|
// DEFINE: %{compile} = mlir-opt %s --sparse-compiler=%{option}
|
|
// DEFINE: %{run} = mlir-cpu-runner \
|
|
// DEFINE: -e entry -entry-point-result=void \
|
|
// DEFINE: -shared-libs=%mlir_c_runner_utils | \
|
|
// DEFINE: FileCheck %s
|
|
//
|
|
// RUN: %{compile} | %{run}
|
|
//
|
|
// Do the same run, but now with direct IR generation.
|
|
// REDEFINE: %{option} = "enable-runtime-library=false enable-buffer-initialization=true"
|
|
// RUN: %{compile} | %{run}
|
|
//
|
|
// Do the same run, but now with direct IR generation and vectorization.
|
|
// REDEFINE: %{option} = "enable-runtime-library=false enable-buffer-initialization=true vl=2 reassociate-fp-reductions=true enable-index-optimizations=true"
|
|
// RUN: %{compile} | %{run}
|
|
|
|
// Do the same run, but now with direct IR generation and, if available, VLA
|
|
// vectorization.
|
|
// REDEFINE: %{option} = "enable-runtime-library=false enable-buffer-initialization=true vl=4 enable-arm-sve=%ENABLE_VLA"
|
|
// REDEFINE: %{run} = %lli_host_or_aarch64_cmd \
|
|
// REDEFINE: --entry-function=entry_lli \
|
|
// REDEFINE: --extra-module=%S/Inputs/main_for_lli.ll \
|
|
// REDEFINE: %VLA_ARCH_ATTR_OPTIONS \
|
|
// REDEFINE: --dlopen=%mlir_native_utils_lib_dir/libmlir_c_runner_utils%shlibext | \
|
|
// REDEFINE: FileCheck %s
|
|
// RUN: %{compile} | mlir-translate -mlir-to-llvmir | %{run}
|
|
|
|
#SparseVector = #sparse_tensor.encoding<{dimLevelType = ["compressed"]}>
|
|
#DCSR = #sparse_tensor.encoding<{dimLevelType = ["compressed", "compressed"]}>
|
|
|
|
//
|
|
// Traits for tensor operations.
|
|
//
|
|
#trait_vec_scale = {
|
|
indexing_maps = [
|
|
affine_map<(i) -> (i)>, // a (in)
|
|
affine_map<(i) -> (i)> // x (out)
|
|
],
|
|
iterator_types = ["parallel"]
|
|
}
|
|
#trait_vec_op = {
|
|
indexing_maps = [
|
|
affine_map<(i) -> (i)>, // a (in)
|
|
affine_map<(i) -> (i)>, // b (in)
|
|
affine_map<(i) -> (i)> // x (out)
|
|
],
|
|
iterator_types = ["parallel"]
|
|
}
|
|
#trait_mat_op = {
|
|
indexing_maps = [
|
|
affine_map<(i,j) -> (i,j)>, // A (in)
|
|
affine_map<(i,j) -> (i,j)>, // B (in)
|
|
affine_map<(i,j) -> (i,j)> // X (out)
|
|
],
|
|
iterator_types = ["parallel", "parallel"],
|
|
doc = "X(i,j) = A(i,j) OP B(i,j)"
|
|
}
|
|
|
|
//
|
|
// Contains test cases for the sparse_tensor.binary operator (different cases when left/right/overlap
|
|
// is empty/identity, etc).
|
|
//
|
|
|
|
module {
|
|
// Creates a new sparse vector using the minimum values from two input sparse vectors.
|
|
// When there is no overlap, include the present value in the output.
|
|
func.func @vector_min(%arga: tensor<?xi32, #SparseVector>,
|
|
%argb: tensor<?xi32, #SparseVector>) -> tensor<?xi32, #SparseVector> {
|
|
%c = arith.constant 0 : index
|
|
%d = tensor.dim %arga, %c : tensor<?xi32, #SparseVector>
|
|
%xv = bufferization.alloc_tensor(%d) : tensor<?xi32, #SparseVector>
|
|
%0 = linalg.generic #trait_vec_op
|
|
ins(%arga, %argb: tensor<?xi32, #SparseVector>, tensor<?xi32, #SparseVector>)
|
|
outs(%xv: tensor<?xi32, #SparseVector>) {
|
|
^bb(%a: i32, %b: i32, %x: i32):
|
|
%1 = sparse_tensor.binary %a, %b : i32, i32 to i32
|
|
overlap={
|
|
^bb0(%a0: i32, %b0: i32):
|
|
%2 = arith.minsi %a0, %b0: i32
|
|
sparse_tensor.yield %2 : i32
|
|
}
|
|
left=identity
|
|
right=identity
|
|
linalg.yield %1 : i32
|
|
} -> tensor<?xi32, #SparseVector>
|
|
return %0 : tensor<?xi32, #SparseVector>
|
|
}
|
|
|
|
// Creates a new sparse vector by multiplying a sparse vector with a dense vector.
|
|
// When there is no overlap, leave the result empty.
|
|
func.func @vector_mul(%arga: tensor<?xf64, #SparseVector>,
|
|
%argb: tensor<?xf64>) -> tensor<?xf64, #SparseVector> {
|
|
%c = arith.constant 0 : index
|
|
%d = tensor.dim %arga, %c : tensor<?xf64, #SparseVector>
|
|
%xv = bufferization.alloc_tensor(%d) : tensor<?xf64, #SparseVector>
|
|
%0 = linalg.generic #trait_vec_op
|
|
ins(%arga, %argb: tensor<?xf64, #SparseVector>, tensor<?xf64>)
|
|
outs(%xv: tensor<?xf64, #SparseVector>) {
|
|
^bb(%a: f64, %b: f64, %x: f64):
|
|
%1 = sparse_tensor.binary %a, %b : f64, f64 to f64
|
|
overlap={
|
|
^bb0(%a0: f64, %b0: f64):
|
|
%ret = arith.mulf %a0, %b0 : f64
|
|
sparse_tensor.yield %ret : f64
|
|
}
|
|
left={}
|
|
right={}
|
|
linalg.yield %1 : f64
|
|
} -> tensor<?xf64, #SparseVector>
|
|
return %0 : tensor<?xf64, #SparseVector>
|
|
}
|
|
|
|
// Take a set difference of two sparse vectors. The result will include only those
|
|
// sparse elements present in the first, but not the second vector.
|
|
func.func @vector_setdiff(%arga: tensor<?xf64, #SparseVector>,
|
|
%argb: tensor<?xf64, #SparseVector>) -> tensor<?xf64, #SparseVector> {
|
|
%c = arith.constant 0 : index
|
|
%d = tensor.dim %arga, %c : tensor<?xf64, #SparseVector>
|
|
%xv = bufferization.alloc_tensor(%d) : tensor<?xf64, #SparseVector>
|
|
%0 = linalg.generic #trait_vec_op
|
|
ins(%arga, %argb: tensor<?xf64, #SparseVector>, tensor<?xf64, #SparseVector>)
|
|
outs(%xv: tensor<?xf64, #SparseVector>) {
|
|
^bb(%a: f64, %b: f64, %x: f64):
|
|
%1 = sparse_tensor.binary %a, %b : f64, f64 to f64
|
|
overlap={}
|
|
left=identity
|
|
right={}
|
|
linalg.yield %1 : f64
|
|
} -> tensor<?xf64, #SparseVector>
|
|
return %0 : tensor<?xf64, #SparseVector>
|
|
}
|
|
|
|
// Return the index of each entry
|
|
func.func @vector_index(%arga: tensor<?xf64, #SparseVector>) -> tensor<?xi32, #SparseVector> {
|
|
%c = arith.constant 0 : index
|
|
%d = tensor.dim %arga, %c : tensor<?xf64, #SparseVector>
|
|
%xv = bufferization.alloc_tensor(%d) : tensor<?xi32, #SparseVector>
|
|
%0 = linalg.generic #trait_vec_scale
|
|
ins(%arga: tensor<?xf64, #SparseVector>)
|
|
outs(%xv: tensor<?xi32, #SparseVector>) {
|
|
^bb(%a: f64, %x: i32):
|
|
%idx = linalg.index 0 : index
|
|
%1 = sparse_tensor.binary %a, %idx : f64, index to i32
|
|
overlap={
|
|
^bb0(%x0: f64, %i: index):
|
|
%ret = arith.index_cast %i : index to i32
|
|
sparse_tensor.yield %ret : i32
|
|
}
|
|
left={}
|
|
right={}
|
|
linalg.yield %1 : i32
|
|
} -> tensor<?xi32, #SparseVector>
|
|
return %0 : tensor<?xi32, #SparseVector>
|
|
}
|
|
|
|
// Adds two sparse matrices when they intersect. Where they don't intersect,
|
|
// negate the 2nd argument's values; ignore 1st argument-only values.
|
|
func.func @matrix_intersect(%arga: tensor<?x?xf64, #DCSR>,
|
|
%argb: tensor<?x?xf64, #DCSR>) -> tensor<?x?xf64, #DCSR> {
|
|
%c0 = arith.constant 0 : index
|
|
%c1 = arith.constant 1 : index
|
|
%d0 = tensor.dim %arga, %c0 : tensor<?x?xf64, #DCSR>
|
|
%d1 = tensor.dim %arga, %c1 : tensor<?x?xf64, #DCSR>
|
|
%xv = bufferization.alloc_tensor(%d0, %d1) : tensor<?x?xf64, #DCSR>
|
|
%0 = linalg.generic #trait_mat_op
|
|
ins(%arga, %argb: tensor<?x?xf64, #DCSR>, tensor<?x?xf64, #DCSR>)
|
|
outs(%xv: tensor<?x?xf64, #DCSR>) {
|
|
^bb(%a: f64, %b: f64, %x: f64):
|
|
%1 = sparse_tensor.binary %a, %b: f64, f64 to f64
|
|
overlap={
|
|
^bb0(%x0: f64, %y0: f64):
|
|
%ret = arith.addf %x0, %y0 : f64
|
|
sparse_tensor.yield %ret : f64
|
|
}
|
|
left={}
|
|
right={
|
|
^bb0(%x1: f64):
|
|
%lret = arith.negf %x1 : f64
|
|
sparse_tensor.yield %lret : f64
|
|
}
|
|
linalg.yield %1 : f64
|
|
} -> tensor<?x?xf64, #DCSR>
|
|
return %0 : tensor<?x?xf64, #DCSR>
|
|
}
|
|
|
|
// Tensor addition (use semi-ring binary operation).
|
|
func.func @add_tensor_1(%A: tensor<4x4xf64, #DCSR>,
|
|
%B: tensor<4x4xf64, #DCSR>) -> tensor<4x4xf64, #DCSR> {
|
|
%C = bufferization.alloc_tensor() : tensor<4x4xf64, #DCSR>
|
|
%0 = linalg.generic #trait_mat_op
|
|
ins(%A, %B: tensor<4x4xf64, #DCSR>,
|
|
tensor<4x4xf64, #DCSR>)
|
|
outs(%C: tensor<4x4xf64, #DCSR>) {
|
|
^bb0(%a: f64, %b: f64, %c: f64) :
|
|
%result = sparse_tensor.binary %a, %b : f64, f64 to f64
|
|
overlap={
|
|
^bb0(%x: f64, %y: f64):
|
|
%ret = arith.addf %x, %y : f64
|
|
sparse_tensor.yield %ret : f64
|
|
}
|
|
left=identity
|
|
right=identity
|
|
linalg.yield %result : f64
|
|
} -> tensor<4x4xf64, #DCSR>
|
|
return %0 : tensor<4x4xf64, #DCSR>
|
|
}
|
|
|
|
// Same as @add_tensor_1, but use sparse_tensor.yield instead of identity to yield value.
|
|
func.func @add_tensor_2(%A: tensor<4x4xf64, #DCSR>,
|
|
%B: tensor<4x4xf64, #DCSR>) -> tensor<4x4xf64, #DCSR> {
|
|
%C = bufferization.alloc_tensor() : tensor<4x4xf64, #DCSR>
|
|
%0 = linalg.generic #trait_mat_op
|
|
ins(%A, %B: tensor<4x4xf64, #DCSR>,
|
|
tensor<4x4xf64, #DCSR>)
|
|
outs(%C: tensor<4x4xf64, #DCSR>) {
|
|
^bb0(%a: f64, %b: f64, %c: f64) :
|
|
%result = sparse_tensor.binary %a, %b : f64, f64 to f64
|
|
overlap={
|
|
^bb0(%x: f64, %y: f64):
|
|
%ret = arith.addf %x, %y : f64
|
|
sparse_tensor.yield %ret : f64
|
|
}
|
|
left={
|
|
^bb0(%x: f64):
|
|
sparse_tensor.yield %x : f64
|
|
}
|
|
right={
|
|
^bb0(%y: f64):
|
|
sparse_tensor.yield %y : f64
|
|
}
|
|
linalg.yield %result : f64
|
|
} -> tensor<4x4xf64, #DCSR>
|
|
return %0 : tensor<4x4xf64, #DCSR>
|
|
}
|
|
|
|
// Performs triangular add/sub operation (using semi-ring binary op).
|
|
func.func @triangular(%A: tensor<4x4xf64, #DCSR>,
|
|
%B: tensor<4x4xf64, #DCSR>) -> tensor<4x4xf64, #DCSR> {
|
|
%C = bufferization.alloc_tensor() : tensor<4x4xf64, #DCSR>
|
|
%0 = linalg.generic #trait_mat_op
|
|
ins(%A, %B: tensor<4x4xf64, #DCSR>,
|
|
tensor<4x4xf64, #DCSR>)
|
|
outs(%C: tensor<4x4xf64, #DCSR>) {
|
|
^bb0(%a: f64, %b: f64, %c: f64) :
|
|
%row = linalg.index 0 : index
|
|
%col = linalg.index 1 : index
|
|
%result = sparse_tensor.binary %a, %b : f64, f64 to f64
|
|
overlap={
|
|
^bb0(%x: f64, %y: f64):
|
|
%cmp = arith.cmpi "uge", %col, %row : index
|
|
%upperTriangleResult = arith.addf %x, %y : f64
|
|
%lowerTriangleResult = arith.subf %x, %y : f64
|
|
%ret = arith.select %cmp, %upperTriangleResult, %lowerTriangleResult : f64
|
|
sparse_tensor.yield %ret : f64
|
|
}
|
|
left=identity
|
|
right={
|
|
^bb0(%y: f64):
|
|
%cmp = arith.cmpi "uge", %col, %row : index
|
|
%lowerTriangleResult = arith.negf %y : f64
|
|
%ret = arith.select %cmp, %y, %lowerTriangleResult : f64
|
|
sparse_tensor.yield %ret : f64
|
|
}
|
|
linalg.yield %result : f64
|
|
} -> tensor<4x4xf64, #DCSR>
|
|
return %0 : tensor<4x4xf64, #DCSR>
|
|
}
|
|
|
|
// Perform sub operation (using semi-ring binary op) with a constant threshold.
|
|
func.func @sub_with_thres(%A: tensor<4x4xf64, #DCSR>,
|
|
%B: tensor<4x4xf64, #DCSR>) -> tensor<4x4xf64, #DCSR> {
|
|
%C = bufferization.alloc_tensor() : tensor<4x4xf64, #DCSR>
|
|
// Defines out-block constant bounds.
|
|
%thres_out_up = arith.constant 2.0 : f64
|
|
%thres_out_lo = arith.constant -2.0 : f64
|
|
|
|
%0 = linalg.generic #trait_mat_op
|
|
ins(%A, %B: tensor<4x4xf64, #DCSR>,
|
|
tensor<4x4xf64, #DCSR>)
|
|
outs(%C: tensor<4x4xf64, #DCSR>) {
|
|
^bb0(%a: f64, %b: f64, %c: f64) :
|
|
%result = sparse_tensor.binary %a, %b : f64, f64 to f64
|
|
overlap={
|
|
^bb0(%x: f64, %y: f64):
|
|
// Defines in-block constant bounds.
|
|
%thres_up = arith.constant 1.0 : f64
|
|
%thres_lo = arith.constant -1.0 : f64
|
|
%result = arith.subf %x, %y : f64
|
|
%cmp = arith.cmpf "oge", %result, %thres_up : f64
|
|
%tmp = arith.select %cmp, %thres_up, %result : f64
|
|
%cmp1 = arith.cmpf "ole", %tmp, %thres_lo : f64
|
|
%ret = arith.select %cmp1, %thres_lo, %tmp : f64
|
|
sparse_tensor.yield %ret : f64
|
|
}
|
|
left={
|
|
^bb0(%x: f64):
|
|
// Uses out-block constant bounds.
|
|
%cmp = arith.cmpf "oge", %x, %thres_out_up : f64
|
|
%tmp = arith.select %cmp, %thres_out_up, %x : f64
|
|
%cmp1 = arith.cmpf "ole", %tmp, %thres_out_lo : f64
|
|
%ret = arith.select %cmp1, %thres_out_lo, %tmp : f64
|
|
sparse_tensor.yield %ret : f64
|
|
}
|
|
right={
|
|
^bb0(%y: f64):
|
|
%ny = arith.negf %y : f64
|
|
%cmp = arith.cmpf "oge", %ny, %thres_out_up : f64
|
|
%tmp = arith.select %cmp, %thres_out_up, %ny : f64
|
|
%cmp1 = arith.cmpf "ole", %tmp, %thres_out_lo : f64
|
|
%ret = arith.select %cmp1, %thres_out_lo, %tmp : f64
|
|
sparse_tensor.yield %ret : f64
|
|
}
|
|
linalg.yield %result : f64
|
|
} -> tensor<4x4xf64, #DCSR>
|
|
return %0 : tensor<4x4xf64, #DCSR>
|
|
}
|
|
|
|
// Performs isEqual only on intersecting elements.
|
|
func.func @intersect_equal(%A: tensor<4x4xf64, #DCSR>,
|
|
%B: tensor<4x4xf64, #DCSR>) -> tensor<4x4xi8, #DCSR> {
|
|
%C = bufferization.alloc_tensor() : tensor<4x4xi8, #DCSR>
|
|
%0 = linalg.generic #trait_mat_op
|
|
ins(%A, %B: tensor<4x4xf64, #DCSR>,
|
|
tensor<4x4xf64, #DCSR>)
|
|
outs(%C: tensor<4x4xi8, #DCSR>) {
|
|
^bb0(%a: f64, %b: f64, %c: i8) :
|
|
%result = sparse_tensor.binary %a, %b : f64, f64 to i8
|
|
overlap={
|
|
^bb0(%x: f64, %y: f64):
|
|
%cmp = arith.cmpf "oeq", %x, %y : f64
|
|
%ret = arith.extui %cmp : i1 to i8
|
|
sparse_tensor.yield %ret : i8
|
|
}
|
|
left={}
|
|
right={}
|
|
linalg.yield %result : i8
|
|
} -> tensor<4x4xi8, #DCSR>
|
|
return %0 : tensor<4x4xi8, #DCSR>
|
|
}
|
|
|
|
// Keeps values on left, negate value on right, ignore value when overlapping.
|
|
func.func @only_left_right(%A: tensor<4x4xf64, #DCSR>,
|
|
%B: tensor<4x4xf64, #DCSR>) -> tensor<4x4xf64, #DCSR> {
|
|
%C = bufferization.alloc_tensor() : tensor<4x4xf64, #DCSR>
|
|
%0 = linalg.generic #trait_mat_op
|
|
ins(%A, %B: tensor<4x4xf64, #DCSR>,
|
|
tensor<4x4xf64, #DCSR>)
|
|
outs(%C: tensor<4x4xf64, #DCSR>) {
|
|
^bb0(%a: f64, %b: f64, %c: f64) :
|
|
%result = sparse_tensor.binary %a, %b : f64, f64 to f64
|
|
overlap={}
|
|
left=identity
|
|
right={
|
|
^bb0(%y: f64):
|
|
%ret = arith.negf %y : f64
|
|
sparse_tensor.yield %ret : f64
|
|
}
|
|
linalg.yield %result : f64
|
|
} -> tensor<4x4xf64, #DCSR>
|
|
return %0 : tensor<4x4xf64, #DCSR>
|
|
}
|
|
|
|
//
|
|
// Utility functions to dump the value of a tensor.
|
|
//
|
|
|
|
func.func @dump_vec(%arg0: tensor<?xf64, #SparseVector>) {
|
|
// Dump the values array to verify only sparse contents are stored.
|
|
%c0 = arith.constant 0 : index
|
|
%d0 = arith.constant 0.0 : f64
|
|
%0 = sparse_tensor.values %arg0 : tensor<?xf64, #SparseVector> to memref<?xf64>
|
|
%1 = vector.transfer_read %0[%c0], %d0: memref<?xf64>, vector<16xf64>
|
|
vector.print %1 : vector<16xf64>
|
|
// Dump the dense vector to verify structure is correct.
|
|
%dv = sparse_tensor.convert %arg0 : tensor<?xf64, #SparseVector> to tensor<?xf64>
|
|
%3 = vector.transfer_read %dv[%c0], %d0: tensor<?xf64>, vector<32xf64>
|
|
vector.print %3 : vector<32xf64>
|
|
return
|
|
}
|
|
|
|
func.func @dump_vec_i32(%arg0: tensor<?xi32, #SparseVector>) {
|
|
// Dump the values array to verify only sparse contents are stored.
|
|
%c0 = arith.constant 0 : index
|
|
%d0 = arith.constant 0 : i32
|
|
%0 = sparse_tensor.values %arg0 : tensor<?xi32, #SparseVector> to memref<?xi32>
|
|
%1 = vector.transfer_read %0[%c0], %d0: memref<?xi32>, vector<24xi32>
|
|
vector.print %1 : vector<24xi32>
|
|
// Dump the dense vector to verify structure is correct.
|
|
%dv = sparse_tensor.convert %arg0 : tensor<?xi32, #SparseVector> to tensor<?xi32>
|
|
%3 = vector.transfer_read %dv[%c0], %d0: tensor<?xi32>, vector<32xi32>
|
|
vector.print %3 : vector<32xi32>
|
|
return
|
|
}
|
|
|
|
func.func @dump_mat(%arg0: tensor<?x?xf64, #DCSR>) {
|
|
%d0 = arith.constant 0.0 : f64
|
|
%c0 = arith.constant 0 : index
|
|
%dm = sparse_tensor.convert %arg0 : tensor<?x?xf64, #DCSR> to tensor<?x?xf64>
|
|
%1 = vector.transfer_read %dm[%c0, %c0], %d0: tensor<?x?xf64>, vector<4x8xf64>
|
|
vector.print %1 : vector<4x8xf64>
|
|
return
|
|
}
|
|
|
|
func.func @dump_mat_4x4(%A: tensor<4x4xf64, #DCSR>) {
|
|
%c0 = arith.constant 0 : index
|
|
%du = arith.constant 0.0 : f64
|
|
|
|
%c = sparse_tensor.convert %A : tensor<4x4xf64, #DCSR> to tensor<4x4xf64>
|
|
%v = vector.transfer_read %c[%c0, %c0], %du: tensor<4x4xf64>, vector<4x4xf64>
|
|
vector.print %v : vector<4x4xf64>
|
|
|
|
%1 = sparse_tensor.values %A : tensor<4x4xf64, #DCSR> to memref<?xf64>
|
|
%2 = vector.transfer_read %1[%c0], %du: memref<?xf64>, vector<16xf64>
|
|
vector.print %2 : vector<16xf64>
|
|
|
|
return
|
|
}
|
|
|
|
func.func @dump_mat_4x4_i8(%A: tensor<4x4xi8, #DCSR>) {
|
|
%c0 = arith.constant 0 : index
|
|
%du = arith.constant 0 : i8
|
|
|
|
%c = sparse_tensor.convert %A : tensor<4x4xi8, #DCSR> to tensor<4x4xi8>
|
|
%v = vector.transfer_read %c[%c0, %c0], %du: tensor<4x4xi8>, vector<4x4xi8>
|
|
vector.print %v : vector<4x4xi8>
|
|
|
|
%1 = sparse_tensor.values %A : tensor<4x4xi8, #DCSR> to memref<?xi8>
|
|
%2 = vector.transfer_read %1[%c0], %du: memref<?xi8>, vector<16xi8>
|
|
vector.print %2 : vector<16xi8>
|
|
|
|
return
|
|
}
|
|
|
|
// Driver method to call and verify kernels.
|
|
func.func @entry() {
|
|
%c0 = arith.constant 0 : index
|
|
|
|
// Setup sparse vectors.
|
|
%v1 = arith.constant sparse<
|
|
[ [0], [3], [11], [17], [20], [21], [28], [29], [31] ],
|
|
[ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0 ]
|
|
> : tensor<32xf64>
|
|
%v2 = arith.constant sparse<
|
|
[ [1], [3], [4], [10], [16], [18], [21], [28], [29], [31] ],
|
|
[11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0 ]
|
|
> : tensor<32xf64>
|
|
%v3 = arith.constant dense<
|
|
[0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 0., 1., 2., 3., 4., 5., 6., 7., 8., 9.,
|
|
0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 0., 1.]
|
|
> : tensor<32xf64>
|
|
%v1_si = arith.fptosi %v1 : tensor<32xf64> to tensor<32xi32>
|
|
%v2_si = arith.fptosi %v2 : tensor<32xf64> to tensor<32xi32>
|
|
|
|
%sv1 = sparse_tensor.convert %v1 : tensor<32xf64> to tensor<?xf64, #SparseVector>
|
|
%sv2 = sparse_tensor.convert %v2 : tensor<32xf64> to tensor<?xf64, #SparseVector>
|
|
%sv1_si = sparse_tensor.convert %v1_si : tensor<32xi32> to tensor<?xi32, #SparseVector>
|
|
%sv2_si = sparse_tensor.convert %v2_si : tensor<32xi32> to tensor<?xi32, #SparseVector>
|
|
%dv3 = tensor.cast %v3 : tensor<32xf64> to tensor<?xf64>
|
|
|
|
// Setup sparse matrices.
|
|
%m1 = arith.constant sparse<
|
|
[ [0,0], [0,1], [1,7], [2,2], [2,4], [2,7], [3,0], [3,2], [3,3] ],
|
|
[ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0 ]
|
|
> : tensor<4x8xf64>
|
|
%m2 = arith.constant sparse<
|
|
[ [0,0], [0,7], [1,0], [1,6], [2,1], [2,7] ],
|
|
[6.0, 5.0, 4.0, 3.0, 2.0, 1.0 ]
|
|
> : tensor<4x8xf64>
|
|
%sm1 = sparse_tensor.convert %m1 : tensor<4x8xf64> to tensor<?x?xf64, #DCSR>
|
|
%sm2 = sparse_tensor.convert %m2 : tensor<4x8xf64> to tensor<?x?xf64, #DCSR>
|
|
|
|
%m3 = arith.constant dense<
|
|
[ [ 1.0, 0.0, 3.0, 0.0],
|
|
[ 0.0, 2.0, 0.0, 0.0],
|
|
[ 0.0, 0.0, 0.0, 4.0],
|
|
[ 3.0, 4.0, 0.0, 0.0] ]> : tensor<4x4xf64>
|
|
%m4 = arith.constant dense<
|
|
[ [ 1.0, 0.0, 1.0, 1.0],
|
|
[ 0.0, 0.5, 0.0, 0.0],
|
|
[ 1.0, 5.0, 2.0, 0.0],
|
|
[ 2.0, 0.0, 0.0, 0.0] ]> : tensor<4x4xf64>
|
|
|
|
%sm3 = sparse_tensor.convert %m3 : tensor<4x4xf64> to tensor<4x4xf64, #DCSR>
|
|
%sm4 = sparse_tensor.convert %m4 : tensor<4x4xf64> to tensor<4x4xf64, #DCSR>
|
|
|
|
// Call sparse vector kernels.
|
|
%0 = call @vector_min(%sv1_si, %sv2_si)
|
|
: (tensor<?xi32, #SparseVector>,
|
|
tensor<?xi32, #SparseVector>) -> tensor<?xi32, #SparseVector>
|
|
%1 = call @vector_mul(%sv1, %dv3)
|
|
: (tensor<?xf64, #SparseVector>,
|
|
tensor<?xf64>) -> tensor<?xf64, #SparseVector>
|
|
%2 = call @vector_setdiff(%sv1, %sv2)
|
|
: (tensor<?xf64, #SparseVector>,
|
|
tensor<?xf64, #SparseVector>) -> tensor<?xf64, #SparseVector>
|
|
%3 = call @vector_index(%sv1)
|
|
: (tensor<?xf64, #SparseVector>) -> tensor<?xi32, #SparseVector>
|
|
|
|
// Call sparse matrix kernels.
|
|
%5 = call @matrix_intersect(%sm1, %sm2)
|
|
: (tensor<?x?xf64, #DCSR>, tensor<?x?xf64, #DCSR>) -> tensor<?x?xf64, #DCSR>
|
|
%6 = call @add_tensor_1(%sm3, %sm4)
|
|
: (tensor<4x4xf64, #DCSR>, tensor<4x4xf64, #DCSR>) -> tensor<4x4xf64, #DCSR>
|
|
%7 = call @add_tensor_2(%sm3, %sm4)
|
|
: (tensor<4x4xf64, #DCSR>, tensor<4x4xf64, #DCSR>) -> tensor<4x4xf64, #DCSR>
|
|
%8 = call @triangular(%sm3, %sm4)
|
|
: (tensor<4x4xf64, #DCSR>, tensor<4x4xf64, #DCSR>) -> tensor<4x4xf64, #DCSR>
|
|
%9 = call @sub_with_thres(%sm3, %sm4)
|
|
: (tensor<4x4xf64, #DCSR>, tensor<4x4xf64, #DCSR>) -> tensor<4x4xf64, #DCSR>
|
|
%10 = call @intersect_equal(%sm3, %sm4)
|
|
: (tensor<4x4xf64, #DCSR>, tensor<4x4xf64, #DCSR>) -> tensor<4x4xi8, #DCSR>
|
|
%11 = call @only_left_right(%sm3, %sm4)
|
|
: (tensor<4x4xf64, #DCSR>, tensor<4x4xf64, #DCSR>) -> tensor<4x4xf64, #DCSR>
|
|
|
|
//
|
|
// Verify the results.
|
|
//
|
|
// CHECK: ( 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 0, 0, 0, 0, 0, 0 )
|
|
// CHECK-NEXT: ( 1, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 4, 0, 0, 5, 6, 0, 0, 0, 0, 0, 0, 7, 8, 0, 9 )
|
|
// CHECK-NEXT: ( 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 0, 0, 0, 0, 0, 0 )
|
|
// CHECK-NEXT: ( 0, 11, 0, 12, 13, 0, 0, 0, 0, 0, 14, 0, 0, 0, 0, 0, 15, 0, 16, 0, 0, 17, 0, 0, 0, 0, 0, 0, 18, 19, 0, 20 )
|
|
// CHECK-NEXT: ( 1, 11, 2, 13, 14, 3, 15, 4, 16, 5, 6, 7, 8, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 )
|
|
// CHECK-NEXT: ( 1, 11, 0, 2, 13, 0, 0, 0, 0, 0, 14, 3, 0, 0, 0, 0, 15, 4, 16, 0, 5, 6, 0, 0, 0, 0, 0, 0, 7, 8, 0, 9 )
|
|
// CHECK-NEXT: ( 0, 6, 3, 28, 0, 6, 56, 72, 9, 0, 0, 0, 0, 0, 0, 0 )
|
|
// CHECK-NEXT: ( 0, 0, 0, 6, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 28, 0, 0, 0, 6, 0, 0, 0, 0, 0, 0, 56, 72, 0, 9 )
|
|
// CHECK-NEXT: ( 1, 3, 4, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 )
|
|
// CHECK-NEXT: ( 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 4, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 )
|
|
// CHECK-NEXT: ( 0, 3, 11, 17, 20, 21, 28, 29, 31, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 )
|
|
// CHECK-NEXT: ( 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 11, 0, 0, 0, 0, 0, 17, 0, 0, 20, 21, 0, 0, 0, 0, 0, 0, 28, 29, 0, 31 )
|
|
// CHECK-NEXT: ( ( 7, 0, 0, 0, 0, 0, 0, -5 ), ( -4, 0, 0, 0, 0, 0, -3, 0 ), ( 0, -2, 0, 0, 0, 0, 0, 7 ), ( 0, 0, 0, 0, 0, 0, 0, 0 ) )
|
|
// CHECK-NEXT: ( ( 2, 0, 4, 1 ), ( 0, 2.5, 0, 0 ), ( 1, 5, 2, 4 ), ( 5, 4, 0, 0 ) )
|
|
// CHECK-NEXT: ( 2, 4, 1, 2.5, 1, 5, 2, 4, 5, 4, 0, 0, 0, 0, 0, 0 )
|
|
// CHECK-NEXT: ( ( 2, 0, 4, 1 ), ( 0, 2.5, 0, 0 ), ( 1, 5, 2, 4 ), ( 5, 4, 0, 0 ) )
|
|
// CHECK-NEXT: ( 2, 4, 1, 2.5, 1, 5, 2, 4, 5, 4, 0, 0, 0, 0, 0, 0 )
|
|
// CHECK-NEXT: ( ( 2, 0, 4, 1 ), ( 0, 2.5, 0, 0 ), ( -1, -5, 2, 4 ), ( 1, 4, 0, 0 ) )
|
|
// CHECK-NEXT: ( 2, 4, 1, 2.5, -1, -5, 2, 4, 1, 4, 0, 0, 0, 0, 0, 0 )
|
|
// CHECK-NEXT: ( ( 0, 0, 1, -1 ), ( 0, 1, 0, 0 ), ( -1, -2, -2, 2 ), ( 1, 2, 0, 0 ) )
|
|
// CHECK-NEXT: ( 0, 1, -1, 1, -1, -2, -2, 2, 1, 2, 0, 0, 0, 0, 0, 0 )
|
|
// CHECK-NEXT: ( ( 1, 0, 0, 0 ), ( 0, 0, 0, 0 ), ( 0, 0, 0, 0 ), ( 0, 0, 0, 0 ) )
|
|
// CHECK-NEXT: ( 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 )
|
|
// CHECK-NEXT: ( ( 0, 0, 0, -1 ), ( 0, 0, 0, 0 ), ( -1, -5, -2, 4 ), ( 0, 4, 0, 0 ) )
|
|
// CHECK-NEXT: ( -1, -1, -5, -2, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 )
|
|
//
|
|
call @dump_vec(%sv1) : (tensor<?xf64, #SparseVector>) -> ()
|
|
call @dump_vec(%sv2) : (tensor<?xf64, #SparseVector>) -> ()
|
|
call @dump_vec_i32(%0) : (tensor<?xi32, #SparseVector>) -> ()
|
|
call @dump_vec(%1) : (tensor<?xf64, #SparseVector>) -> ()
|
|
call @dump_vec(%2) : (tensor<?xf64, #SparseVector>) -> ()
|
|
call @dump_vec_i32(%3) : (tensor<?xi32, #SparseVector>) -> ()
|
|
call @dump_mat(%5) : (tensor<?x?xf64, #DCSR>) -> ()
|
|
call @dump_mat_4x4(%6) : (tensor<4x4xf64, #DCSR>) -> ()
|
|
call @dump_mat_4x4(%7) : (tensor<4x4xf64, #DCSR>) -> ()
|
|
call @dump_mat_4x4(%8) : (tensor<4x4xf64, #DCSR>) -> ()
|
|
call @dump_mat_4x4(%9) : (tensor<4x4xf64, #DCSR>) -> ()
|
|
call @dump_mat_4x4_i8(%10) : (tensor<4x4xi8, #DCSR>) -> ()
|
|
call @dump_mat_4x4(%11) : (tensor<4x4xf64, #DCSR>) -> ()
|
|
|
|
// Release the resources.
|
|
bufferization.dealloc_tensor %sv1 : tensor<?xf64, #SparseVector>
|
|
bufferization.dealloc_tensor %sv2 : tensor<?xf64, #SparseVector>
|
|
bufferization.dealloc_tensor %sv1_si : tensor<?xi32, #SparseVector>
|
|
bufferization.dealloc_tensor %sv2_si : tensor<?xi32, #SparseVector>
|
|
bufferization.dealloc_tensor %sm1 : tensor<?x?xf64, #DCSR>
|
|
bufferization.dealloc_tensor %sm2 : tensor<?x?xf64, #DCSR>
|
|
bufferization.dealloc_tensor %sm3 : tensor<4x4xf64, #DCSR>
|
|
bufferization.dealloc_tensor %sm4 : tensor<4x4xf64, #DCSR>
|
|
bufferization.dealloc_tensor %0 : tensor<?xi32, #SparseVector>
|
|
bufferization.dealloc_tensor %1 : tensor<?xf64, #SparseVector>
|
|
bufferization.dealloc_tensor %2 : tensor<?xf64, #SparseVector>
|
|
bufferization.dealloc_tensor %3 : tensor<?xi32, #SparseVector>
|
|
bufferization.dealloc_tensor %5 : tensor<?x?xf64, #DCSR>
|
|
bufferization.dealloc_tensor %6 : tensor<4x4xf64, #DCSR>
|
|
bufferization.dealloc_tensor %7 : tensor<4x4xf64, #DCSR>
|
|
bufferization.dealloc_tensor %8 : tensor<4x4xf64, #DCSR>
|
|
bufferization.dealloc_tensor %9 : tensor<4x4xf64, #DCSR>
|
|
bufferization.dealloc_tensor %10 : tensor<4x4xi8, #DCSR>
|
|
bufferization.dealloc_tensor %11 : tensor<4x4xf64, #DCSR>
|
|
return
|
|
}
|
|
}
|