The changes in this p.r. mostly center around the tests that use the flag sparse_compiler (also: sparse-compiler).
87 lines
3.0 KiB
MLIR
87 lines
3.0 KiB
MLIR
// NOTE: this test requires gpu-sm80
|
|
//
|
|
// DEFINE: %{compile} = mlir-opt %s \
|
|
// DEFINE: --sparsifier="enable-gpu-libgen gpu-triple=nvptx64-nvidia-cuda gpu-chip=sm_80 gpu-features=+ptx71 gpu-format=%gpu_compilation_format
|
|
// DEFINE: %{run} = mlir-cpu-runner \
|
|
// DEFINE: --shared-libs=%mlir_cuda_runtime \
|
|
// DEFINE: --shared-libs=%mlir_c_runner_utils \
|
|
// DEFINE: --e main --entry-point-result=void \
|
|
// DEFINE: | FileCheck %s
|
|
//
|
|
// with RT lib:
|
|
//
|
|
// RUN: %{compile} enable-runtime-library=true" | %{run}
|
|
//
|
|
// without RT lib:
|
|
//
|
|
// RUN: %{compile} enable-runtime-library=false" | %{run}
|
|
|
|
#CSR = #sparse_tensor.encoding<{
|
|
map = (d0, d1) -> (d0 : dense, d1 : compressed),
|
|
posWidth = 32,
|
|
crdWidth = 32
|
|
}>
|
|
|
|
module {
|
|
llvm.func @mgpuCreateSparseEnv()
|
|
llvm.func @mgpuDestroySparseEnv()
|
|
|
|
// Computes C = A x B with A,B,C sparse CSR.
|
|
func.func @matmulCSR(%A: tensor<8x8xf32, #CSR>,
|
|
%B: tensor<8x8xf32, #CSR>) -> tensor<8x8xf32, #CSR> {
|
|
%init = tensor.empty() : tensor<8x8xf32, #CSR>
|
|
%C = linalg.matmul
|
|
ins(%A, %B: tensor<8x8xf32, #CSR>,
|
|
tensor<8x8xf32, #CSR>)
|
|
outs(%init: tensor<8x8xf32, #CSR>) -> tensor<8x8xf32, #CSR>
|
|
return %C: tensor<8x8xf32, #CSR>
|
|
}
|
|
|
|
//
|
|
// Main driver.
|
|
//
|
|
func.func @main() {
|
|
llvm.call @mgpuCreateSparseEnv(): () -> ()
|
|
|
|
%c0 = arith.constant 0 : index
|
|
%f0 = arith.constant 0.0 : f32
|
|
|
|
%t = arith.constant dense<[
|
|
[ 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 3.0],
|
|
[ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
|
|
[ 0.0, 0.0, 4.0, 0.0, 0.0, 0.0, 0.0, 0.0],
|
|
[ 0.0, 0.0, 0.0, 5.0, 0.0, 0.0, 0.0, 0.0],
|
|
[ 0.0, 0.0, 0.0, 0.0, 6.0, 0.0, 0.0, 0.0],
|
|
[ 0.0, 7.0, 8.0, 0.0, 0.0, 0.0, 0.0, 9.0],
|
|
[ 0.0, 0.0, 10.0, 0.0, 0.0, 0.0, 11.0, 12.0],
|
|
[ 0.0, 13.0, 14.0, 0.0, 0.0, 0.0, 15.0, 16.0]
|
|
]> : tensor<8x8xf32>
|
|
%Acsr = sparse_tensor.convert %t : tensor<8x8xf32> to tensor<8x8xf32, #CSR>
|
|
|
|
%Ccsr = call @matmulCSR(%Acsr, %Acsr) : (tensor<8x8xf32, #CSR>,
|
|
tensor<8x8xf32, #CSR>) -> tensor<8x8xf32, #CSR>
|
|
|
|
//
|
|
// Verify computed result (expected output, with only 20 nonzeros).
|
|
//
|
|
// CHECK: ( ( 1, 39, 52, 0, 0, 0, 45, 51 ),
|
|
// CHECK-SAME: ( 0, 0, 0, 0, 0, 0, 0, 0 ),
|
|
// CHECK-SAME: ( 0, 0, 16, 0, 0, 0, 0, 0 ),
|
|
// CHECK-SAME: ( 0, 0, 0, 25, 0, 0, 0, 0 ),
|
|
// CHECK-SAME: ( 0, 0, 0, 0, 36, 0, 0, 0 ),
|
|
// CHECK-SAME: ( 0, 117, 158, 0, 0, 0, 135, 144 ),
|
|
// CHECK-SAME: ( 0, 156, 318, 0, 0, 0, 301, 324 ),
|
|
// CHECK-SAME: ( 0, 208, 430, 0, 0, 0, 405, 436 ) )
|
|
// CHECK-NEXT: 20
|
|
%d = sparse_tensor.convert %Ccsr : tensor<8x8xf32, #CSR> to tensor<8x8xf32>
|
|
%v = vector.transfer_read %d[%c0, %c0], %f0: tensor<8x8xf32>, vector<8x8xf32>
|
|
vector.print %v : vector<8x8xf32>
|
|
%nnz = sparse_tensor.number_of_entries %Ccsr : tensor<8x8xf32, #CSR>
|
|
%x = sparse_tensor.number_of_entries %Ccsr : tensor<8x8xf32, #CSR>
|
|
vector.print %nnz : index
|
|
|
|
llvm.call @mgpuDestroySparseEnv(): () -> ()
|
|
return
|
|
}
|
|
}
|