I have a tutorial at EuroLLVM 2024 ([Zero to Hero: Programming Nvidia Hopper Tensor Core with MLIR's NVGPU Dialect](https://llvm.swoogo.com/2024eurollvm/session/2086997/zero-to-hero-programming-nvidia-hopper-tensor-core-with-mlir's-nvgpu-dialect)). For that, I implemented tutorial codes in Python. The focus is the nvgpu dialect and how to use its advanced features. I thought it might be useful to upstream this. The tutorial codes are as follows: - **Ch0.py:** Hello World - **Ch1.py:** 2D Saxpy - **Ch2.py:** 2D Saxpy using TMA - **Ch3.py:** GEMM 128x128x64 using Tensor Core and TMA - **Ch4.py:** Multistage performant GEMM using Tensor Core and TMA - **Ch5.py:** Warp Specialized GEMM using Tensor Core and TMA I might implement one more chapter: - **Ch6.py:** Warp Specialized Persistent ping-pong GEMM This PR also introduces the nvdsl class, making IR building in the tutorial easier.
46 lines
1.6 KiB
Python
46 lines
1.6 KiB
Python
# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
# See https://llvm.org/LICENSE.txt for license information.
|
|
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
|
|
# This file contains the Nvgpu class.
|
|
|
|
from mlir import execution_engine
|
|
from mlir import ir
|
|
from mlir import passmanager
|
|
from typing import Sequence
|
|
import errno
|
|
import os
|
|
import sys
|
|
|
|
_SCRIPT_PATH = os.path.dirname(os.path.abspath(__file__))
|
|
sys.path.append(_SCRIPT_PATH)
|
|
|
|
|
|
class NvgpuCompiler:
|
|
"""Nvgpu class for compiling and building MLIR modules."""
|
|
|
|
def __init__(self, options: str, opt_level: int, shared_libs: Sequence[str]):
|
|
pipeline = f"builtin.module(gpu-lower-to-nvvm-pipeline{{{options}}})"
|
|
self.pipeline = pipeline
|
|
self.shared_libs = shared_libs
|
|
self.opt_level = opt_level
|
|
|
|
def __call__(self, module: ir.Module):
|
|
"""Convenience application method."""
|
|
self.compile(module)
|
|
|
|
def compile(self, module: ir.Module):
|
|
"""Compiles the module by invoking the nvgpu pipeline."""
|
|
passmanager.PassManager.parse(self.pipeline).run(module.operation)
|
|
|
|
def jit(self, module: ir.Module) -> execution_engine.ExecutionEngine:
|
|
"""Wraps the module in a JIT execution engine."""
|
|
return execution_engine.ExecutionEngine(
|
|
module, opt_level=self.opt_level, shared_libs=self.shared_libs
|
|
)
|
|
|
|
def compile_and_jit(self, module: ir.Module) -> execution_engine.ExecutionEngine:
|
|
"""Compiles and jits the module."""
|
|
self.compile(module)
|
|
return self.jit(module)
|