Files
clang-p2996/mlir/lib/Target/LLVM/ROCDL/Utils.cpp
Fabian Mora 016e1eb9c8 [mlir][gpu] Add metadata attributes for storing kernel metadata in GPU objects (#95292)
This patch adds the `#gpu.kernel_metadata` and `#gpu.kernel_table`
attributes. The `#gpu.kernel_metadata` attribute allows storing metadata
related to a compiled kernel, for example, the number of scalar
registers used by the kernel. The attribute only has 2 required
parameters, the name and function type. It also has 2 optional
parameters, the arguments attributes and generic dictionary for storing
all other metadata.

The `#gpu.kernel_table` stores a table of `#gpu.kernel_metadata`,
mapping the name of the kernel to the metadata.

Finally, the function `ROCDL::getAMDHSAKernelsELFMetadata` was added to
collect ELF metadata from a binary, and to test the class methods in
both attributes.

Example:
```mlir
gpu.binary @binary [#gpu.object<#rocdl.target<chip = "gfx900">, kernels = #gpu.kernel_table<[
    #gpu.kernel_metadata<"kernel0", (i32) -> (), metadata = {sgpr_count = 255}>,
    #gpu.kernel_metadata<"kernel1", (i32, f32) -> (), arg_attrs = [{llvm.read_only}, {}]>
  ]> , bin = "BLOB">]

```
The motivation behind these attributes is to provide useful information
for things like tunning.

---------

Co-authored-by: Mehdi Amini <joker.eph@gmail.com>
2024-08-27 18:44:50 -04:00

88 lines
3.9 KiB
C++

//===- Utils.cpp - MLIR ROCDL target utils ----------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This files defines ROCDL target related utility classes and functions.
//
//===----------------------------------------------------------------------===//
#include "mlir/Target/LLVM/ROCDL/Utils.h"
#include "mlir/Dialect/GPU/IR/GPUDialect.h"
#include "mlir/Dialect/LLVMIR/ROCDLDialect.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/Frontend/Offloading/Utility.h"
using namespace mlir;
using namespace mlir::ROCDL;
std::optional<DenseMap<StringAttr, NamedAttrList>>
mlir::ROCDL::getAMDHSAKernelsELFMetadata(Builder &builder,
ArrayRef<char> elfData) {
uint16_t elfABIVersion;
llvm::StringMap<llvm::offloading::amdgpu::AMDGPUKernelMetaData> kernels;
llvm::MemoryBufferRef buffer(StringRef(elfData.data(), elfData.size()),
"buffer");
// Get the metadata.
llvm::Error error = llvm::offloading::amdgpu::getAMDGPUMetaDataFromImage(
buffer, kernels, elfABIVersion);
// Return `nullopt` if the metadata couldn't be retrieved.
if (error) {
llvm::consumeError(std::move(error));
return std::nullopt;
}
// Helper lambda for converting values.
auto getI32Array = [&builder](const uint32_t *array) {
return builder.getDenseI32ArrayAttr({static_cast<int32_t>(array[0]),
static_cast<int32_t>(array[1]),
static_cast<int32_t>(array[2])});
};
DenseMap<StringAttr, NamedAttrList> kernelMD;
for (const auto &[name, kernel] : kernels) {
NamedAttrList attrs;
// Add kernel metadata.
attrs.append("agpr_count", builder.getI64IntegerAttr(kernel.AGPRCount));
attrs.append("sgpr_count", builder.getI64IntegerAttr(kernel.SGPRCount));
attrs.append("vgpr_count", builder.getI64IntegerAttr(kernel.VGPRCount));
attrs.append("sgpr_spill_count",
builder.getI64IntegerAttr(kernel.SGPRSpillCount));
attrs.append("vgpr_spill_count",
builder.getI64IntegerAttr(kernel.VGPRSpillCount));
attrs.append("wavefront_size",
builder.getI64IntegerAttr(kernel.WavefrontSize));
attrs.append("max_flat_workgroup_size",
builder.getI64IntegerAttr(kernel.MaxFlatWorkgroupSize));
attrs.append("group_segment_fixed_size",
builder.getI64IntegerAttr(kernel.GroupSegmentList));
attrs.append("private_segment_fixed_size",
builder.getI64IntegerAttr(kernel.PrivateSegmentSize));
attrs.append("reqd_workgroup_size",
getI32Array(kernel.RequestedWorkgroupSize));
attrs.append("workgroup_size_hint", getI32Array(kernel.WorkgroupSizeHint));
kernelMD[builder.getStringAttr(name)] = std::move(attrs);
}
return std::move(kernelMD);
}
gpu::KernelTableAttr mlir::ROCDL::getKernelMetadata(Operation *gpuModule,
ArrayRef<char> elfData) {
auto module = cast<gpu::GPUModuleOp>(gpuModule);
Builder builder(module.getContext());
SmallVector<gpu::KernelMetadataAttr> kernels;
std::optional<DenseMap<StringAttr, NamedAttrList>> mdMapOrNull =
getAMDHSAKernelsELFMetadata(builder, elfData);
for (auto funcOp : module.getBody()->getOps<LLVM::LLVMFuncOp>()) {
if (!funcOp->getDiscardableAttr("rocdl.kernel"))
continue;
kernels.push_back(gpu::KernelMetadataAttr::get(
funcOp, mdMapOrNull ? builder.getDictionaryAttr(
mdMapOrNull->lookup(funcOp.getNameAttr()))
: nullptr));
}
return gpu::KernelTableAttr::get(gpuModule->getContext(), kernels);
}