//===- GPUToNVVMPipeline.cpp - Test lowering to NVVM as a sink pass -------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file implements a pass for testing the lowering to NVVM as a generally // usable sink pass. // //===----------------------------------------------------------------------===// #include "mlir/Conversion/AffineToStandard/AffineToStandard.h" #include "mlir/Conversion/ArithToLLVM/ArithToLLVM.h" #include "mlir/Conversion/FuncToLLVM/ConvertFuncToLLVMPass.h" #include "mlir/Conversion/GPUCommon/GPUCommonPass.h" #include "mlir/Conversion/GPUToNVVM/GPUToNVVMPass.h" #include "mlir/Conversion/IndexToLLVM/IndexToLLVM.h" #include "mlir/Conversion/MathToLLVM/MathToLLVM.h" #include "mlir/Conversion/MemRefToLLVM/MemRefToLLVM.h" #include "mlir/Conversion/NVGPUToNVVM/NVGPUToNVVM.h" #include "mlir/Conversion/NVVMToLLVM/NVVMToLLVM.h" #include "mlir/Conversion/ReconcileUnrealizedCasts/ReconcileUnrealizedCasts.h" #include "mlir/Conversion/SCFToControlFlow/SCFToControlFlow.h" #include "mlir/Conversion/VectorToLLVM/ConvertVectorToLLVMPass.h" #include "mlir/Conversion/VectorToSCF/VectorToSCF.h" #include "mlir/Dialect/Func/IR/FuncOps.h" #include "mlir/Dialect/GPU/IR/GPUDialect.h" #include "mlir/Dialect/GPU/Pipelines/Passes.h" #include "mlir/Dialect/GPU/Transforms/Passes.h" #include "mlir/Dialect/LLVMIR/LLVMDialect.h" #include "mlir/Dialect/Linalg/Passes.h" #include "mlir/Dialect/MemRef/Transforms/Passes.h" #include "mlir/Pass/PassManager.h" #include "mlir/Pass/PassOptions.h" #include "mlir/Transforms/Passes.h" using namespace mlir; #if MLIR_CUDA_CONVERSIONS_ENABLED namespace { struct GPUToNVVMPipelineOptions : public PassPipelineOptions { PassOptions::Option indexBitWidth{ *this, "index-bitwidth", llvm::cl::desc("Bitwidth of the index type for the host (warning this " "should be 64 until the GPU layering is fixed)"), llvm::cl::init(64)}; PassOptions::Option cubinTriple{ *this, "cubin-triple", llvm::cl::desc("Triple to use to serialize to cubin."), llvm::cl::init("nvptx64-nvidia-cuda")}; PassOptions::Option cubinChip{ *this, "cubin-chip", llvm::cl::desc("Chip to use to serialize to cubin."), llvm::cl::init("sm_50")}; PassOptions::Option cubinFeatures{ *this, "cubin-features", llvm::cl::desc("Features to use to serialize to cubin."), llvm::cl::init("+ptx60")}; PassOptions::Option cubinFormat{ *this, "cubin-format", llvm::cl::desc("Compilation format to use to serialize to cubin."), llvm::cl::init("fatbin")}; PassOptions::Option optLevel{ *this, "opt-level", llvm::cl::desc("Optimization level for NVVM compilation"), llvm::cl::init(2)}; PassOptions::Option kernelUseBarePtrCallConv{ *this, "kernel-bare-ptr-calling-convention", llvm::cl::desc( "Whether to use the bareptr calling convention on the kernel " "(warning this should be false until the GPU layering is fixed)"), llvm::cl::init(false)}; PassOptions::Option hostUseBarePtrCallConv{ *this, "host-bare-ptr-calling-convention", llvm::cl::desc( "Whether to use the bareptr calling convention on the host (warning " "this should be false until the GPU layering is fixed)"), llvm::cl::init(false)}; }; //===----------------------------------------------------------------------===// // Common pipeline //===----------------------------------------------------------------------===// void buildCommonPassPipeline(OpPassManager &pm, const GPUToNVVMPipelineOptions &options) { pm.addPass(createConvertNVGPUToNVVMPass()); pm.addPass(createGpuKernelOutliningPass()); pm.addPass(createConvertLinalgToLoopsPass()); pm.addPass(createConvertVectorToSCFPass()); pm.addPass(createConvertSCFToCFPass()); pm.addPass(createConvertNVVMToLLVMPass()); pm.addPass(createConvertMathToLLVMPass()); pm.addPass(createConvertFuncToLLVMPass()); pm.addPass(memref::createExpandStridedMetadataPass()); GpuNVVMAttachTargetOptions nvvmTargetOptions; nvvmTargetOptions.triple = options.cubinTriple; nvvmTargetOptions.chip = options.cubinChip; nvvmTargetOptions.features = options.cubinFeatures; nvvmTargetOptions.optLevel = options.optLevel; pm.addPass(createGpuNVVMAttachTarget(nvvmTargetOptions)); pm.addPass(createLowerAffinePass()); pm.addPass(createArithToLLVMConversionPass()); ConvertIndexToLLVMPassOptions convertIndexToLLVMPassOpt; convertIndexToLLVMPassOpt.indexBitwidth = options.indexBitWidth; pm.addPass(createConvertIndexToLLVMPass(convertIndexToLLVMPassOpt)); pm.addPass(createCanonicalizerPass()); pm.addPass(createCSEPass()); } //===----------------------------------------------------------------------===// // GPUModule-specific stuff. //===----------------------------------------------------------------------===// void buildGpuPassPipeline(OpPassManager &pm, const GPUToNVVMPipelineOptions &options) { pm.addNestedPass(createStripDebugInfoPass()); ConvertGpuOpsToNVVMOpsOptions opt; opt.useBarePtrCallConv = options.kernelUseBarePtrCallConv; opt.indexBitwidth = options.indexBitWidth; pm.addNestedPass(createConvertGpuOpsToNVVMOps(opt)); pm.addNestedPass(createCanonicalizerPass()); pm.addNestedPass(createCSEPass()); pm.addNestedPass(createReconcileUnrealizedCastsPass()); } //===----------------------------------------------------------------------===// // Host Post-GPU pipeline //===----------------------------------------------------------------------===// void buildHostPostPipeline(OpPassManager &pm, const GPUToNVVMPipelineOptions &options) { GpuToLLVMConversionPassOptions opt; opt.hostBarePtrCallConv = options.hostUseBarePtrCallConv; opt.kernelBarePtrCallConv = options.kernelUseBarePtrCallConv; pm.addPass(createGpuToLLVMConversionPass(opt)); GpuModuleToBinaryPassOptions gpuModuleToBinaryPassOptions; gpuModuleToBinaryPassOptions.compilationTarget = options.cubinFormat; pm.addPass(createGpuModuleToBinaryPass(gpuModuleToBinaryPassOptions)); pm.addPass(createCanonicalizerPass()); pm.addPass(createCSEPass()); pm.addPass(createReconcileUnrealizedCastsPass()); } void buildLowerToNVVMPassPipeline(OpPassManager &pm, const GPUToNVVMPipelineOptions &options) { //===----------------------------------------------------------------------===// // Common pipeline //===----------------------------------------------------------------------===// buildCommonPassPipeline(pm, options); //===----------------------------------------------------------------------===// // GPUModule-specific stuff. //===----------------------------------------------------------------------===// buildGpuPassPipeline(pm, options); //===----------------------------------------------------------------------===// // Host post-GPUModule-specific stuff. //===----------------------------------------------------------------------===// buildHostPostPipeline(pm, options); } } // namespace namespace mlir { namespace gpu { void registerGPUToNVVMPipeline() { PassPipelineRegistration( "gpu-lower-to-nvvm", "The default pipeline lowers main dialects (arith, linalg, memref, scf, " "vector, gpu, and nvgpu) to NVVM. It starts by lowering GPU code to the " "specified compilation target (default is fatbin) then lowers the host " "code.", buildLowerToNVVMPassPipeline); } } // namespace gpu } // namespace mlir #endif // MLIR_CUDA_CONVERSIONS_ENABLED