Files
clang-p2996/mlir/test/Conversion/GPUCommon/lower-launch-func-to-gpu-runtime-calls.mlir
Fabian Mora 8e12f31be5 [mlir][gpu] Update LaunchFuncOp lowering in GPU to LLVM (#94991)
This patch updates the lowering of `LaunchFuncOp` in GPU to LLVM to only
legalize the operation with the converted operands, effectively removing
the lowering used by the old serialization pipeline.
It also removes all remaining uses of the old gpu serialization
infrastructure in `gpu-to-llvm`.

See [Compilation overview | 'gpu' Dialect - MLIR
docs](https://mlir.llvm.org/docs/Dialects/GPU/#compilation-overview) for
additional information on the target attributes compilation pipeline
that replaced the old serialization pipeline.
2024-06-10 20:22:22 -05:00

102 lines
3.7 KiB
MLIR

// RUN: mlir-opt %s --gpu-to-llvm -split-input-file | FileCheck %s
module attributes {gpu.container_module} {
// CHECK: gpu.module
gpu.module @kernel_module [#nvvm.target] {
llvm.func @kernel(%arg0: i32, %arg1: !llvm.ptr,
%arg2: !llvm.ptr, %arg3: i64, %arg4: i64,
%arg5: i64) attributes {gpu.kernel} {
llvm.return
}
}
func.func @foo(%buffer: memref<?xf32>) {
// CHECK: [[C8:%.*]] = llvm.mlir.constant(8 : index) : i64
// CHECK: [[C32:%.*]] = llvm.mlir.constant(32 : i32) : i32
// CHECK: [[C256:%.*]] = llvm.mlir.constant(256 : i32) : i32
%c8 = arith.constant 8 : index
%c32 = arith.constant 32 : i32
%c256 = arith.constant 256 : i32
// CHECK: gpu.launch_func @kernel_module::@kernel
// CHECK: blocks in ([[C8]], [[C8]], [[C8]]) threads in ([[C8]], [[C8]], [[C8]]) : i64
// CHECK: dynamic_shared_memory_size [[C256]]
// CHECK: args([[C32]] : i32, %{{.*}} : !llvm.ptr, %{{.*}} : !llvm.ptr, %{{.*}} : i64, %{{.*}} : i64, %{{.*}} : i64)
gpu.launch_func @kernel_module::@kernel
blocks in (%c8, %c8, %c8)
threads in (%c8, %c8, %c8)
dynamic_shared_memory_size %c256
args(%c32 : i32, %buffer : memref<?xf32>)
return
}
}
// -----
module attributes {gpu.container_module} {
// CHECK: gpu.module
gpu.module @kernel_module [#nvvm.target] {
llvm.func @kernel(%arg0: i32, %arg1: !llvm.ptr,
%arg2: !llvm.ptr, %arg3: i64, %arg4: i64,
%arg5: i64) attributes {gpu.kernel} {
llvm.return
}
}
func.func @foo(%buffer: memref<?xf32>) {
// CHECK: [[C8:%.*]] = llvm.mlir.constant(8 : index) : i64
// CHECK: [[C32:%.*]] = llvm.mlir.constant(32 : i32) : i32
// CHECK: [[C256:%.*]] = llvm.mlir.constant(256 : i32) : i32
// CHECK: [[C2:%.*]] = llvm.mlir.constant(2 : index) : i64
%c8 = arith.constant 8 : index
%c32 = arith.constant 32 : i32
%c256 = arith.constant 256 : i32
%c2 = arith.constant 2 : index
// CHECK: gpu.launch_func @kernel_module::@kernel
// CHECK: clusters in ([[C2]], [[C2]], [[C2]])
// CHECK: blocks in ([[C8]], [[C8]], [[C8]]) threads in ([[C8]], [[C8]], [[C8]]) : i64
// CHECK: dynamic_shared_memory_size [[C256]]
// CHECK: args([[C32]] : i32, %{{.*}} : !llvm.ptr, %{{.*}} : !llvm.ptr, %{{.*}} : i64, %{{.*}} : i64, %{{.*}} : i64)
gpu.launch_func @kernel_module::@kernel
clusters in (%c2, %c2, %c2)
blocks in (%c8, %c8, %c8)
threads in (%c8, %c8, %c8)
dynamic_shared_memory_size %c256
args(%c32 : i32, %buffer : memref<?xf32>)
return
}
}
// -----
module attributes {gpu.container_module} {
// CHECK: gpu.binary
gpu.binary @kernel_module [#gpu.object<#rocdl.target, "blob">]
func.func @foo(%buffer: memref<?xf32>) {
// CHECK: [[C8:%.*]] = llvm.mlir.constant(8 : index) : i64
// CHECK: [[C32:%.*]] = llvm.mlir.constant(32 : i32) : i32
// CHECK: [[C256:%.*]] = llvm.mlir.constant(256 : i32) : i32
// CHECK: [[C2:%.*]] = llvm.mlir.constant(2 : index) : i64
%c8 = arith.constant 8 : index
%c32 = arith.constant 32 : i32
%c256 = arith.constant 256 : i32
%c2 = arith.constant 2 : index
// CHECK: gpu.launch_func @kernel_module::@kernel
// CHECK: clusters in ([[C2]], [[C2]], [[C2]])
// CHECK: blocks in ([[C8]], [[C8]], [[C8]]) threads in ([[C8]], [[C8]], [[C8]]) : i64
// CHECK: dynamic_shared_memory_size [[C256]]
// CHECK: args([[C32]] : i32, %{{.*}} : !llvm.ptr, %{{.*}} : !llvm.ptr, %{{.*}} : i64, %{{.*}} : i64, %{{.*}} : i64)
gpu.launch_func @kernel_module::@kernel
clusters in (%c2, %c2, %c2)
blocks in (%c8, %c8, %c8)
threads in (%c8, %c8, %c8)
dynamic_shared_memory_size %c256
args(%c32 : i32, %buffer : memref<?xf32>)
return
}
}