Files
clang-p2996/mlir/lib/Dialect/SparseTensor/Pipelines/SparseTensorPipelines.cpp
Fabian Mora 5093413a50 [mlir][gpu][NVPTX] Enable NVIDIA GPU JIT compilation path (#66220)
This patch adds an NVPTX compilation path that enables JIT compilation
on NVIDIA targets. The following modifications were performed:
1. Adding a format field to the GPU object attribute, allowing the
translation attribute to use the correct runtime function to load the
module. Likewise, a dictionary attribute was added to add any possible
extra options.

2. Adding the `createObject` method to `GPUTargetAttrInterface`; this
method returns a GPU object from a binary string.

3. Adding the function `mgpuModuleLoadJIT`, which is only available for
NVIDIA GPUs, as there is no equivalent for AMD.

4. Adding the CMake flag `MLIR_GPU_COMPILATION_TEST_FORMAT` to specify
the format to use during testing.
2023-09-14 18:00:27 -04:00

107 lines
4.9 KiB
C++

//===- SparseTensorPipelines.cpp - Pipelines for sparse tensor code -------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "mlir/Dialect/SparseTensor/Pipelines/Passes.h"
#include "mlir/Conversion/GPUToNVVM/GPUToNVVMPass.h"
#include "mlir/Conversion/Passes.h"
#include "mlir/Dialect/Arith/Transforms/Passes.h"
#include "mlir/Dialect/Bufferization/Transforms/Bufferize.h"
#include "mlir/Dialect/Bufferization/Transforms/OneShotAnalysis.h"
#include "mlir/Dialect/Bufferization/Transforms/Passes.h"
#include "mlir/Dialect/Func/IR/FuncOps.h"
#include "mlir/Dialect/GPU/IR/GPUDialect.h"
#include "mlir/Dialect/GPU/Transforms/Passes.h"
#include "mlir/Dialect/LLVMIR/NVVMDialect.h"
#include "mlir/Dialect/Linalg/Passes.h"
#include "mlir/Dialect/MemRef/Transforms/Passes.h"
#include "mlir/Dialect/SparseTensor/IR/SparseTensor.h"
#include "mlir/Dialect/SparseTensor/Transforms/Passes.h"
#include "mlir/Pass/PassManager.h"
#include "mlir/Transforms/Passes.h"
//===----------------------------------------------------------------------===//
// Pipeline implementation.
//===----------------------------------------------------------------------===//
void mlir::sparse_tensor::buildSparseCompiler(
OpPassManager &pm, const SparseCompilerOptions &options) {
pm.addNestedPass<func::FuncOp>(createLinalgGeneralizationPass());
pm.addPass(createSparsificationAndBufferizationPass(
getBufferizationOptionsForSparsification(
options.testBufferizationAnalysisOnly),
options.sparsificationOptions(), options.sparseTensorConversionOptions(),
options.createSparseDeallocs, options.enableRuntimeLibrary,
options.enableBufferInitialization, options.vectorLength,
/*enableVLAVectorization=*/options.armSVE,
/*enableSIMDIndex32=*/options.force32BitVectorIndices));
if (options.testBufferizationAnalysisOnly)
return;
pm.addNestedPass<func::FuncOp>(createCanonicalizerPass());
pm.addNestedPass<func::FuncOp>(
mlir::bufferization::createFinalizingBufferizePass());
// GPU code generation.
const bool gpuCodegen = options.gpuTriple.hasValue();
if (gpuCodegen) {
pm.addPass(createSparseGPUCodegenPass());
pm.addNestedPass<gpu::GPUModuleOp>(createStripDebugInfoPass());
pm.addNestedPass<gpu::GPUModuleOp>(createConvertSCFToCFPass());
pm.addNestedPass<gpu::GPUModuleOp>(createConvertGpuOpsToNVVMOps());
}
// TODO(springerm): Add sparse support to the BufferDeallocation pass and add
// it to this pipeline.
pm.addNestedPass<func::FuncOp>(createConvertLinalgToLoopsPass());
pm.addNestedPass<func::FuncOp>(createConvertVectorToSCFPass());
pm.addNestedPass<func::FuncOp>(memref::createExpandReallocPass());
pm.addNestedPass<func::FuncOp>(createConvertSCFToCFPass());
pm.addPass(memref::createExpandStridedMetadataPass());
pm.addPass(createLowerAffinePass());
pm.addPass(createConvertVectorToLLVMPass(options.lowerVectorToLLVMOptions()));
pm.addPass(createFinalizeMemRefToLLVMConversionPass());
pm.addNestedPass<func::FuncOp>(createConvertComplexToStandardPass());
pm.addNestedPass<func::FuncOp>(arith::createArithExpandOpsPass());
pm.addNestedPass<func::FuncOp>(createConvertMathToLLVMPass());
pm.addPass(createConvertMathToLibmPass());
pm.addPass(createConvertComplexToLibmPass());
// Repeat convert-vector-to-llvm.
pm.addPass(createConvertVectorToLLVMPass(options.lowerVectorToLLVMOptions()));
pm.addPass(createConvertComplexToLLVMPass());
pm.addPass(createConvertVectorToLLVMPass(options.lowerVectorToLLVMOptions()));
pm.addPass(createConvertFuncToLLVMPass());
// Finalize GPU code generation.
if (gpuCodegen) {
GpuNVVMAttachTargetOptions nvvmTargetOptions;
nvvmTargetOptions.triple = options.gpuTriple;
nvvmTargetOptions.chip = options.gpuChip;
nvvmTargetOptions.features = options.gpuFeatures;
pm.addPass(createGpuNVVMAttachTarget(nvvmTargetOptions));
pm.addPass(createGpuToLLVMConversionPass());
GpuModuleToBinaryPassOptions gpuModuleToBinaryPassOptions;
gpuModuleToBinaryPassOptions.compilationTarget = options.gpuFormat;
pm.addPass(createGpuModuleToBinaryPass(gpuModuleToBinaryPassOptions));
}
pm.addPass(createReconcileUnrealizedCastsPass());
}
//===----------------------------------------------------------------------===//
// Pipeline registration.
//===----------------------------------------------------------------------===//
void mlir::sparse_tensor::registerSparseTensorPipelines() {
PassPipelineRegistration<SparseCompilerOptions>(
"sparse-compiler",
"The standard pipeline for taking sparsity-agnostic IR using the"
" sparse-tensor type, and lowering it to LLVM IR with concrete"
" representations and algorithms for sparse tensors.",
buildSparseCompiler);
}