The `test-lower-to-nvvm` pipeline serves as the common and proper pipeline for nvvm+host compilation, and it's used across our CUDA integration tests. This PR updates the `test-lower-to-nvvm` pipeline to `gpu-lower-to-nvvm` and moves it within `InitAllPasses.h`. The aim is to call it from Python, also having a standardize compilation process for nvvm.
179 lines
7.9 KiB
C++
179 lines
7.9 KiB
C++
//===- GPUToNVVMPipeline.cpp - Test lowering to NVVM as a sink pass -------===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
// This file implements a pass for testing the lowering to NVVM as a generally
|
|
// usable sink pass.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "mlir/Conversion/AffineToStandard/AffineToStandard.h"
|
|
#include "mlir/Conversion/ArithToLLVM/ArithToLLVM.h"
|
|
#include "mlir/Conversion/FuncToLLVM/ConvertFuncToLLVMPass.h"
|
|
#include "mlir/Conversion/GPUCommon/GPUCommonPass.h"
|
|
#include "mlir/Conversion/GPUToNVVM/GPUToNVVMPass.h"
|
|
#include "mlir/Conversion/IndexToLLVM/IndexToLLVM.h"
|
|
#include "mlir/Conversion/MathToLLVM/MathToLLVM.h"
|
|
#include "mlir/Conversion/MemRefToLLVM/MemRefToLLVM.h"
|
|
#include "mlir/Conversion/NVGPUToNVVM/NVGPUToNVVM.h"
|
|
#include "mlir/Conversion/NVVMToLLVM/NVVMToLLVM.h"
|
|
#include "mlir/Conversion/ReconcileUnrealizedCasts/ReconcileUnrealizedCasts.h"
|
|
#include "mlir/Conversion/SCFToControlFlow/SCFToControlFlow.h"
|
|
#include "mlir/Conversion/VectorToLLVM/ConvertVectorToLLVMPass.h"
|
|
#include "mlir/Conversion/VectorToSCF/VectorToSCF.h"
|
|
#include "mlir/Dialect/Func/IR/FuncOps.h"
|
|
#include "mlir/Dialect/GPU/IR/GPUDialect.h"
|
|
#include "mlir/Dialect/GPU/Pipelines/Passes.h"
|
|
#include "mlir/Dialect/GPU/Transforms/Passes.h"
|
|
#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
|
|
#include "mlir/Dialect/Linalg/Passes.h"
|
|
#include "mlir/Dialect/MemRef/Transforms/Passes.h"
|
|
#include "mlir/Pass/PassManager.h"
|
|
#include "mlir/Pass/PassOptions.h"
|
|
#include "mlir/Transforms/Passes.h"
|
|
|
|
using namespace mlir;
|
|
|
|
#if MLIR_CUDA_CONVERSIONS_ENABLED
|
|
namespace {
|
|
struct GPUToNVVMPipelineOptions
|
|
: public PassPipelineOptions<GPUToNVVMPipelineOptions> {
|
|
PassOptions::Option<int64_t> indexBitWidth{
|
|
*this, "index-bitwidth",
|
|
llvm::cl::desc("Bitwidth of the index type for the host (warning this "
|
|
"should be 64 until the GPU layering is fixed)"),
|
|
llvm::cl::init(64)};
|
|
PassOptions::Option<std::string> cubinTriple{
|
|
*this, "cubin-triple",
|
|
llvm::cl::desc("Triple to use to serialize to cubin."),
|
|
llvm::cl::init("nvptx64-nvidia-cuda")};
|
|
PassOptions::Option<std::string> cubinChip{
|
|
*this, "cubin-chip", llvm::cl::desc("Chip to use to serialize to cubin."),
|
|
llvm::cl::init("sm_50")};
|
|
PassOptions::Option<std::string> cubinFeatures{
|
|
*this, "cubin-features",
|
|
llvm::cl::desc("Features to use to serialize to cubin."),
|
|
llvm::cl::init("+ptx60")};
|
|
PassOptions::Option<std::string> cubinFormat{
|
|
*this, "cubin-format",
|
|
llvm::cl::desc("Compilation format to use to serialize to cubin."),
|
|
llvm::cl::init("fatbin")};
|
|
PassOptions::Option<int> optLevel{
|
|
*this, "opt-level",
|
|
llvm::cl::desc("Optimization level for NVVM compilation"),
|
|
llvm::cl::init(2)};
|
|
PassOptions::Option<bool> kernelUseBarePtrCallConv{
|
|
*this, "kernel-bare-ptr-calling-convention",
|
|
llvm::cl::desc(
|
|
"Whether to use the bareptr calling convention on the kernel "
|
|
"(warning this should be false until the GPU layering is fixed)"),
|
|
llvm::cl::init(false)};
|
|
PassOptions::Option<bool> hostUseBarePtrCallConv{
|
|
*this, "host-bare-ptr-calling-convention",
|
|
llvm::cl::desc(
|
|
"Whether to use the bareptr calling convention on the host (warning "
|
|
"this should be false until the GPU layering is fixed)"),
|
|
llvm::cl::init(false)};
|
|
};
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// Common pipeline
|
|
//===----------------------------------------------------------------------===//
|
|
void buildCommonPassPipeline(OpPassManager &pm,
|
|
const GPUToNVVMPipelineOptions &options) {
|
|
pm.addPass(createConvertNVGPUToNVVMPass());
|
|
pm.addPass(createGpuKernelOutliningPass());
|
|
pm.addPass(createConvertLinalgToLoopsPass());
|
|
pm.addPass(createConvertVectorToSCFPass());
|
|
pm.addPass(createConvertSCFToCFPass());
|
|
pm.addPass(createConvertNVVMToLLVMPass());
|
|
pm.addPass(createConvertMathToLLVMPass());
|
|
pm.addPass(createConvertFuncToLLVMPass());
|
|
pm.addPass(memref::createExpandStridedMetadataPass());
|
|
|
|
GpuNVVMAttachTargetOptions nvvmTargetOptions;
|
|
nvvmTargetOptions.triple = options.cubinTriple;
|
|
nvvmTargetOptions.chip = options.cubinChip;
|
|
nvvmTargetOptions.features = options.cubinFeatures;
|
|
nvvmTargetOptions.optLevel = options.optLevel;
|
|
pm.addPass(createGpuNVVMAttachTarget(nvvmTargetOptions));
|
|
pm.addPass(createLowerAffinePass());
|
|
pm.addPass(createArithToLLVMConversionPass());
|
|
ConvertIndexToLLVMPassOptions convertIndexToLLVMPassOpt;
|
|
convertIndexToLLVMPassOpt.indexBitwidth = options.indexBitWidth;
|
|
pm.addPass(createConvertIndexToLLVMPass(convertIndexToLLVMPassOpt));
|
|
pm.addPass(createCanonicalizerPass());
|
|
pm.addPass(createCSEPass());
|
|
}
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// GPUModule-specific stuff.
|
|
//===----------------------------------------------------------------------===//
|
|
void buildGpuPassPipeline(OpPassManager &pm,
|
|
const GPUToNVVMPipelineOptions &options) {
|
|
pm.addNestedPass<gpu::GPUModuleOp>(createStripDebugInfoPass());
|
|
ConvertGpuOpsToNVVMOpsOptions opt;
|
|
opt.useBarePtrCallConv = options.kernelUseBarePtrCallConv;
|
|
opt.indexBitwidth = options.indexBitWidth;
|
|
pm.addNestedPass<gpu::GPUModuleOp>(createConvertGpuOpsToNVVMOps(opt));
|
|
pm.addNestedPass<gpu::GPUModuleOp>(createCanonicalizerPass());
|
|
pm.addNestedPass<gpu::GPUModuleOp>(createCSEPass());
|
|
pm.addNestedPass<gpu::GPUModuleOp>(createReconcileUnrealizedCastsPass());
|
|
}
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// Host Post-GPU pipeline
|
|
//===----------------------------------------------------------------------===//
|
|
void buildHostPostPipeline(OpPassManager &pm,
|
|
const GPUToNVVMPipelineOptions &options) {
|
|
GpuToLLVMConversionPassOptions opt;
|
|
opt.hostBarePtrCallConv = options.hostUseBarePtrCallConv;
|
|
opt.kernelBarePtrCallConv = options.kernelUseBarePtrCallConv;
|
|
pm.addPass(createGpuToLLVMConversionPass(opt));
|
|
|
|
GpuModuleToBinaryPassOptions gpuModuleToBinaryPassOptions;
|
|
gpuModuleToBinaryPassOptions.compilationTarget = options.cubinFormat;
|
|
pm.addPass(createGpuModuleToBinaryPass(gpuModuleToBinaryPassOptions));
|
|
pm.addPass(createCanonicalizerPass());
|
|
pm.addPass(createCSEPass());
|
|
pm.addPass(createReconcileUnrealizedCastsPass());
|
|
}
|
|
|
|
void buildLowerToNVVMPassPipeline(OpPassManager &pm,
|
|
const GPUToNVVMPipelineOptions &options) {
|
|
//===----------------------------------------------------------------------===//
|
|
// Common pipeline
|
|
//===----------------------------------------------------------------------===//
|
|
buildCommonPassPipeline(pm, options);
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// GPUModule-specific stuff.
|
|
//===----------------------------------------------------------------------===//
|
|
buildGpuPassPipeline(pm, options);
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// Host post-GPUModule-specific stuff.
|
|
//===----------------------------------------------------------------------===//
|
|
buildHostPostPipeline(pm, options);
|
|
}
|
|
} // namespace
|
|
|
|
namespace mlir {
|
|
namespace gpu {
|
|
void registerGPUToNVVMPipeline() {
|
|
PassPipelineRegistration<GPUToNVVMPipelineOptions>(
|
|
"gpu-lower-to-nvvm",
|
|
"The default pipeline lowers main dialects (arith, linalg, memref, scf, "
|
|
"vector, gpu, and nvgpu) to NVVM. It starts by lowering GPU code to the "
|
|
"specified compilation target (default is fatbin) then lowers the host "
|
|
"code.",
|
|
buildLowerToNVVMPassPipeline);
|
|
}
|
|
} // namespace gpu
|
|
} // namespace mlir
|
|
#endif // MLIR_CUDA_CONVERSIONS_ENABLED
|