This patch updates the lowering of `LaunchFuncOp` in GPU to LLVM to only legalize the operation with the converted operands, effectively removing the lowering used by the old serialization pipeline. It also removes all remaining uses of the old gpu serialization infrastructure in `gpu-to-llvm`. See [Compilation overview | 'gpu' Dialect - MLIR docs](https://mlir.llvm.org/docs/Dialects/GPU/#compilation-overview) for additional information on the target attributes compilation pipeline that replaced the old serialization pipeline.
102 lines
3.7 KiB
MLIR
102 lines
3.7 KiB
MLIR
// RUN: mlir-opt %s --gpu-to-llvm -split-input-file | FileCheck %s
|
|
|
|
module attributes {gpu.container_module} {
|
|
// CHECK: gpu.module
|
|
gpu.module @kernel_module [#nvvm.target] {
|
|
llvm.func @kernel(%arg0: i32, %arg1: !llvm.ptr,
|
|
%arg2: !llvm.ptr, %arg3: i64, %arg4: i64,
|
|
%arg5: i64) attributes {gpu.kernel} {
|
|
llvm.return
|
|
}
|
|
}
|
|
|
|
func.func @foo(%buffer: memref<?xf32>) {
|
|
// CHECK: [[C8:%.*]] = llvm.mlir.constant(8 : index) : i64
|
|
// CHECK: [[C32:%.*]] = llvm.mlir.constant(32 : i32) : i32
|
|
// CHECK: [[C256:%.*]] = llvm.mlir.constant(256 : i32) : i32
|
|
%c8 = arith.constant 8 : index
|
|
%c32 = arith.constant 32 : i32
|
|
%c256 = arith.constant 256 : i32
|
|
|
|
// CHECK: gpu.launch_func @kernel_module::@kernel
|
|
// CHECK: blocks in ([[C8]], [[C8]], [[C8]]) threads in ([[C8]], [[C8]], [[C8]]) : i64
|
|
// CHECK: dynamic_shared_memory_size [[C256]]
|
|
// CHECK: args([[C32]] : i32, %{{.*}} : !llvm.ptr, %{{.*}} : !llvm.ptr, %{{.*}} : i64, %{{.*}} : i64, %{{.*}} : i64)
|
|
gpu.launch_func @kernel_module::@kernel
|
|
blocks in (%c8, %c8, %c8)
|
|
threads in (%c8, %c8, %c8)
|
|
dynamic_shared_memory_size %c256
|
|
args(%c32 : i32, %buffer : memref<?xf32>)
|
|
return
|
|
}
|
|
}
|
|
|
|
|
|
// -----
|
|
|
|
module attributes {gpu.container_module} {
|
|
// CHECK: gpu.module
|
|
gpu.module @kernel_module [#nvvm.target] {
|
|
llvm.func @kernel(%arg0: i32, %arg1: !llvm.ptr,
|
|
%arg2: !llvm.ptr, %arg3: i64, %arg4: i64,
|
|
%arg5: i64) attributes {gpu.kernel} {
|
|
llvm.return
|
|
}
|
|
}
|
|
|
|
func.func @foo(%buffer: memref<?xf32>) {
|
|
// CHECK: [[C8:%.*]] = llvm.mlir.constant(8 : index) : i64
|
|
// CHECK: [[C32:%.*]] = llvm.mlir.constant(32 : i32) : i32
|
|
// CHECK: [[C256:%.*]] = llvm.mlir.constant(256 : i32) : i32
|
|
// CHECK: [[C2:%.*]] = llvm.mlir.constant(2 : index) : i64
|
|
%c8 = arith.constant 8 : index
|
|
%c32 = arith.constant 32 : i32
|
|
%c256 = arith.constant 256 : i32
|
|
%c2 = arith.constant 2 : index
|
|
|
|
// CHECK: gpu.launch_func @kernel_module::@kernel
|
|
// CHECK: clusters in ([[C2]], [[C2]], [[C2]])
|
|
// CHECK: blocks in ([[C8]], [[C8]], [[C8]]) threads in ([[C8]], [[C8]], [[C8]]) : i64
|
|
// CHECK: dynamic_shared_memory_size [[C256]]
|
|
// CHECK: args([[C32]] : i32, %{{.*}} : !llvm.ptr, %{{.*}} : !llvm.ptr, %{{.*}} : i64, %{{.*}} : i64, %{{.*}} : i64)
|
|
gpu.launch_func @kernel_module::@kernel
|
|
clusters in (%c2, %c2, %c2)
|
|
blocks in (%c8, %c8, %c8)
|
|
threads in (%c8, %c8, %c8)
|
|
dynamic_shared_memory_size %c256
|
|
args(%c32 : i32, %buffer : memref<?xf32>)
|
|
return
|
|
}
|
|
}
|
|
|
|
// -----
|
|
|
|
module attributes {gpu.container_module} {
|
|
// CHECK: gpu.binary
|
|
gpu.binary @kernel_module [#gpu.object<#rocdl.target, "blob">]
|
|
|
|
func.func @foo(%buffer: memref<?xf32>) {
|
|
// CHECK: [[C8:%.*]] = llvm.mlir.constant(8 : index) : i64
|
|
// CHECK: [[C32:%.*]] = llvm.mlir.constant(32 : i32) : i32
|
|
// CHECK: [[C256:%.*]] = llvm.mlir.constant(256 : i32) : i32
|
|
// CHECK: [[C2:%.*]] = llvm.mlir.constant(2 : index) : i64
|
|
%c8 = arith.constant 8 : index
|
|
%c32 = arith.constant 32 : i32
|
|
%c256 = arith.constant 256 : i32
|
|
%c2 = arith.constant 2 : index
|
|
|
|
// CHECK: gpu.launch_func @kernel_module::@kernel
|
|
// CHECK: clusters in ([[C2]], [[C2]], [[C2]])
|
|
// CHECK: blocks in ([[C8]], [[C8]], [[C8]]) threads in ([[C8]], [[C8]], [[C8]]) : i64
|
|
// CHECK: dynamic_shared_memory_size [[C256]]
|
|
// CHECK: args([[C32]] : i32, %{{.*}} : !llvm.ptr, %{{.*}} : !llvm.ptr, %{{.*}} : i64, %{{.*}} : i64, %{{.*}} : i64)
|
|
gpu.launch_func @kernel_module::@kernel
|
|
clusters in (%c2, %c2, %c2)
|
|
blocks in (%c8, %c8, %c8)
|
|
threads in (%c8, %c8, %c8)
|
|
dynamic_shared_memory_size %c256
|
|
args(%c32 : i32, %buffer : memref<?xf32>)
|
|
return
|
|
}
|
|
}
|