This patch adds the `#gpu.kernel_metadata` and `#gpu.kernel_table`
attributes. The `#gpu.kernel_metadata` attribute allows storing metadata
related to a compiled kernel, for example, the number of scalar
registers used by the kernel. The attribute only has 2 required
parameters, the name and function type. It also has 2 optional
parameters, the arguments attributes and generic dictionary for storing
all other metadata.
The `#gpu.kernel_table` stores a table of `#gpu.kernel_metadata`,
mapping the name of the kernel to the metadata.
Finally, the function `ROCDL::getAMDHSAKernelsELFMetadata` was added to
collect ELF metadata from a binary, and to test the class methods in
both attributes.
Example:
```mlir
gpu.binary @binary [#gpu.object<#rocdl.target<chip = "gfx900">, kernels = #gpu.kernel_table<[
#gpu.kernel_metadata<"kernel0", (i32) -> (), metadata = {sgpr_count = 255}>,
#gpu.kernel_metadata<"kernel1", (i32, f32) -> (), arg_attrs = [{llvm.read_only}, {}]>
]> , bin = "BLOB">]
```
The motivation behind these attributes is to provide useful information
for things like tunning.
---------
Co-authored-by: Mehdi Amini <joker.eph@gmail.com>
88 lines
3.9 KiB
C++
88 lines
3.9 KiB
C++
//===- Utils.cpp - MLIR ROCDL target utils ----------------------*- C++ -*-===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
// This files defines ROCDL target related utility classes and functions.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "mlir/Target/LLVM/ROCDL/Utils.h"
|
|
#include "mlir/Dialect/GPU/IR/GPUDialect.h"
|
|
#include "mlir/Dialect/LLVMIR/ROCDLDialect.h"
|
|
|
|
#include "llvm/ADT/StringMap.h"
|
|
#include "llvm/Frontend/Offloading/Utility.h"
|
|
|
|
using namespace mlir;
|
|
using namespace mlir::ROCDL;
|
|
|
|
std::optional<DenseMap<StringAttr, NamedAttrList>>
|
|
mlir::ROCDL::getAMDHSAKernelsELFMetadata(Builder &builder,
|
|
ArrayRef<char> elfData) {
|
|
uint16_t elfABIVersion;
|
|
llvm::StringMap<llvm::offloading::amdgpu::AMDGPUKernelMetaData> kernels;
|
|
llvm::MemoryBufferRef buffer(StringRef(elfData.data(), elfData.size()),
|
|
"buffer");
|
|
// Get the metadata.
|
|
llvm::Error error = llvm::offloading::amdgpu::getAMDGPUMetaDataFromImage(
|
|
buffer, kernels, elfABIVersion);
|
|
// Return `nullopt` if the metadata couldn't be retrieved.
|
|
if (error) {
|
|
llvm::consumeError(std::move(error));
|
|
return std::nullopt;
|
|
}
|
|
// Helper lambda for converting values.
|
|
auto getI32Array = [&builder](const uint32_t *array) {
|
|
return builder.getDenseI32ArrayAttr({static_cast<int32_t>(array[0]),
|
|
static_cast<int32_t>(array[1]),
|
|
static_cast<int32_t>(array[2])});
|
|
};
|
|
DenseMap<StringAttr, NamedAttrList> kernelMD;
|
|
for (const auto &[name, kernel] : kernels) {
|
|
NamedAttrList attrs;
|
|
// Add kernel metadata.
|
|
attrs.append("agpr_count", builder.getI64IntegerAttr(kernel.AGPRCount));
|
|
attrs.append("sgpr_count", builder.getI64IntegerAttr(kernel.SGPRCount));
|
|
attrs.append("vgpr_count", builder.getI64IntegerAttr(kernel.VGPRCount));
|
|
attrs.append("sgpr_spill_count",
|
|
builder.getI64IntegerAttr(kernel.SGPRSpillCount));
|
|
attrs.append("vgpr_spill_count",
|
|
builder.getI64IntegerAttr(kernel.VGPRSpillCount));
|
|
attrs.append("wavefront_size",
|
|
builder.getI64IntegerAttr(kernel.WavefrontSize));
|
|
attrs.append("max_flat_workgroup_size",
|
|
builder.getI64IntegerAttr(kernel.MaxFlatWorkgroupSize));
|
|
attrs.append("group_segment_fixed_size",
|
|
builder.getI64IntegerAttr(kernel.GroupSegmentList));
|
|
attrs.append("private_segment_fixed_size",
|
|
builder.getI64IntegerAttr(kernel.PrivateSegmentSize));
|
|
attrs.append("reqd_workgroup_size",
|
|
getI32Array(kernel.RequestedWorkgroupSize));
|
|
attrs.append("workgroup_size_hint", getI32Array(kernel.WorkgroupSizeHint));
|
|
kernelMD[builder.getStringAttr(name)] = std::move(attrs);
|
|
}
|
|
return std::move(kernelMD);
|
|
}
|
|
|
|
gpu::KernelTableAttr mlir::ROCDL::getKernelMetadata(Operation *gpuModule,
|
|
ArrayRef<char> elfData) {
|
|
auto module = cast<gpu::GPUModuleOp>(gpuModule);
|
|
Builder builder(module.getContext());
|
|
SmallVector<gpu::KernelMetadataAttr> kernels;
|
|
std::optional<DenseMap<StringAttr, NamedAttrList>> mdMapOrNull =
|
|
getAMDHSAKernelsELFMetadata(builder, elfData);
|
|
for (auto funcOp : module.getBody()->getOps<LLVM::LLVMFuncOp>()) {
|
|
if (!funcOp->getDiscardableAttr("rocdl.kernel"))
|
|
continue;
|
|
kernels.push_back(gpu::KernelMetadataAttr::get(
|
|
funcOp, mdMapOrNull ? builder.getDictionaryAttr(
|
|
mdMapOrNull->lookup(funcOp.getNameAttr()))
|
|
: nullptr));
|
|
}
|
|
return gpu::KernelTableAttr::get(gpuModule->getContext(), kernels);
|
|
}
|