This PR adds ability to pass non-default value to `.amdhsa_code_object_version` metadata when serializing ROCDL GPU modules. It also fixes typos in two places. --------- Co-authored-by: Fabian Mora <fmora.dev@gmail.com>
522 lines
19 KiB
C++
522 lines
19 KiB
C++
//===- Target.cpp - MLIR LLVM ROCDL target compilation ----------*- C++ -*-===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
// This files defines ROCDL target related functions including registration
|
|
// calls for the `#rocdl.target` compilation attribute.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "mlir/Target/LLVM/ROCDL/Target.h"
|
|
|
|
#include "mlir/Dialect/GPU/IR/GPUDialect.h"
|
|
#include "mlir/Dialect/LLVMIR/ROCDLDialect.h"
|
|
#include "mlir/Support/FileUtilities.h"
|
|
#include "mlir/Target/LLVM/ROCDL/Utils.h"
|
|
#include "mlir/Target/LLVMIR/Export.h"
|
|
|
|
#include "llvm/IR/Constants.h"
|
|
#include "llvm/MC/MCAsmBackend.h"
|
|
#include "llvm/MC/MCAsmInfo.h"
|
|
#include "llvm/MC/MCCodeEmitter.h"
|
|
#include "llvm/MC/MCContext.h"
|
|
#include "llvm/MC/MCInstrInfo.h"
|
|
#include "llvm/MC/MCObjectFileInfo.h"
|
|
#include "llvm/MC/MCObjectWriter.h"
|
|
#include "llvm/MC/MCParser/MCTargetAsmParser.h"
|
|
#include "llvm/MC/MCRegisterInfo.h"
|
|
#include "llvm/MC/MCStreamer.h"
|
|
#include "llvm/MC/MCSubtargetInfo.h"
|
|
#include "llvm/MC/TargetRegistry.h"
|
|
#include "llvm/Support/FileSystem.h"
|
|
#include "llvm/Support/FileUtilities.h"
|
|
#include "llvm/Support/Path.h"
|
|
#include "llvm/Support/Program.h"
|
|
#include "llvm/Support/SourceMgr.h"
|
|
#include "llvm/Support/TargetSelect.h"
|
|
#include "llvm/TargetParser/TargetParser.h"
|
|
|
|
#include <cstdlib>
|
|
#include <optional>
|
|
|
|
using namespace mlir;
|
|
using namespace mlir::ROCDL;
|
|
|
|
#ifndef __DEFAULT_ROCM_PATH__
|
|
#define __DEFAULT_ROCM_PATH__ ""
|
|
#endif
|
|
|
|
namespace {
|
|
// Implementation of the `TargetAttrInterface` model.
|
|
class ROCDLTargetAttrImpl
|
|
: public gpu::TargetAttrInterface::FallbackModel<ROCDLTargetAttrImpl> {
|
|
public:
|
|
std::optional<SmallVector<char, 0>>
|
|
serializeToObject(Attribute attribute, Operation *module,
|
|
const gpu::TargetOptions &options) const;
|
|
|
|
Attribute createObject(Attribute attribute, Operation *module,
|
|
const SmallVector<char, 0> &object,
|
|
const gpu::TargetOptions &options) const;
|
|
};
|
|
} // namespace
|
|
|
|
// Register the ROCDL dialect, the ROCDL translation and the target interface.
|
|
void mlir::ROCDL::registerROCDLTargetInterfaceExternalModels(
|
|
DialectRegistry ®istry) {
|
|
registry.addExtension(+[](MLIRContext *ctx, ROCDL::ROCDLDialect *dialect) {
|
|
ROCDLTargetAttr::attachInterface<ROCDLTargetAttrImpl>(*ctx);
|
|
});
|
|
}
|
|
|
|
void mlir::ROCDL::registerROCDLTargetInterfaceExternalModels(
|
|
MLIRContext &context) {
|
|
DialectRegistry registry;
|
|
registerROCDLTargetInterfaceExternalModels(registry);
|
|
context.appendDialectRegistry(registry);
|
|
}
|
|
|
|
// Search for the ROCM path.
|
|
StringRef mlir::ROCDL::getROCMPath() {
|
|
if (const char *var = std::getenv("ROCM_PATH"))
|
|
return var;
|
|
if (const char *var = std::getenv("ROCM_ROOT"))
|
|
return var;
|
|
if (const char *var = std::getenv("ROCM_HOME"))
|
|
return var;
|
|
return __DEFAULT_ROCM_PATH__;
|
|
}
|
|
|
|
SerializeGPUModuleBase::SerializeGPUModuleBase(
|
|
Operation &module, ROCDLTargetAttr target,
|
|
const gpu::TargetOptions &targetOptions)
|
|
: ModuleToObject(module, target.getTriple(), target.getChip(),
|
|
target.getFeatures(), target.getO()),
|
|
target(target), toolkitPath(targetOptions.getToolkitPath()),
|
|
fileList(targetOptions.getLinkFiles()) {
|
|
|
|
// If `targetOptions` has an empty toolkitPath use `getROCMPath`
|
|
if (toolkitPath.empty())
|
|
toolkitPath = getROCMPath();
|
|
|
|
// Append the files in the target attribute.
|
|
if (ArrayAttr files = target.getLink())
|
|
for (Attribute attr : files.getValue())
|
|
if (auto file = dyn_cast<StringAttr>(attr))
|
|
fileList.push_back(file.str());
|
|
}
|
|
|
|
void SerializeGPUModuleBase::init() {
|
|
static llvm::once_flag initializeBackendOnce;
|
|
llvm::call_once(initializeBackendOnce, []() {
|
|
// If the `AMDGPU` LLVM target was built, initialize it.
|
|
#if MLIR_ENABLE_ROCM_CONVERSIONS
|
|
LLVMInitializeAMDGPUTarget();
|
|
LLVMInitializeAMDGPUTargetInfo();
|
|
LLVMInitializeAMDGPUTargetMC();
|
|
LLVMInitializeAMDGPUAsmParser();
|
|
LLVMInitializeAMDGPUAsmPrinter();
|
|
#endif
|
|
});
|
|
}
|
|
|
|
ROCDLTargetAttr SerializeGPUModuleBase::getTarget() const { return target; }
|
|
|
|
StringRef SerializeGPUModuleBase::getToolkitPath() const { return toolkitPath; }
|
|
|
|
ArrayRef<std::string> SerializeGPUModuleBase::getFileList() const {
|
|
return fileList;
|
|
}
|
|
|
|
LogicalResult SerializeGPUModuleBase::appendStandardLibs(AMDGCNLibraries libs) {
|
|
if (libs == AMDGCNLibraries::None)
|
|
return success();
|
|
StringRef pathRef = getToolkitPath();
|
|
|
|
// Get the path for the device libraries
|
|
SmallString<256> path;
|
|
path.insert(path.begin(), pathRef.begin(), pathRef.end());
|
|
llvm::sys::path::append(path, "amdgcn", "bitcode");
|
|
pathRef = StringRef(path.data(), path.size());
|
|
|
|
// Fail if the path is invalid.
|
|
if (!llvm::sys::fs::is_directory(pathRef)) {
|
|
getOperation().emitError() << "ROCm amdgcn bitcode path: " << pathRef
|
|
<< " does not exist or is not a directory";
|
|
return failure();
|
|
}
|
|
|
|
// Helper function for adding a library.
|
|
auto addLib = [&](const Twine &lib) -> bool {
|
|
auto baseSize = path.size();
|
|
llvm::sys::path::append(path, lib);
|
|
StringRef pathRef(path.data(), path.size());
|
|
if (!llvm::sys::fs::is_regular_file(pathRef)) {
|
|
getOperation().emitRemark() << "bitcode library path: " << pathRef
|
|
<< " does not exist or is not a file";
|
|
return true;
|
|
}
|
|
fileList.push_back(pathRef.str());
|
|
path.truncate(baseSize);
|
|
return false;
|
|
};
|
|
|
|
// Add ROCm device libraries. Fail if any of the libraries is not found, ie.
|
|
// if any of the `addLib` failed.
|
|
if ((any(libs & AMDGCNLibraries::Ocml) && addLib("ocml.bc")) ||
|
|
(any(libs & AMDGCNLibraries::Ockl) && addLib("ockl.bc")) ||
|
|
(any(libs & AMDGCNLibraries::Hip) && addLib("hip.bc")) ||
|
|
(any(libs & AMDGCNLibraries::OpenCL) && addLib("opencl.bc")))
|
|
return failure();
|
|
return success();
|
|
}
|
|
|
|
std::optional<SmallVector<std::unique_ptr<llvm::Module>>>
|
|
SerializeGPUModuleBase::loadBitcodeFiles(llvm::Module &module) {
|
|
// Return if there are no libs to load.
|
|
if (deviceLibs == AMDGCNLibraries::None && fileList.empty())
|
|
return SmallVector<std::unique_ptr<llvm::Module>>();
|
|
if (failed(appendStandardLibs(deviceLibs)))
|
|
return std::nullopt;
|
|
SmallVector<std::unique_ptr<llvm::Module>> bcFiles;
|
|
if (failed(loadBitcodeFilesFromList(module.getContext(), fileList, bcFiles,
|
|
true)))
|
|
return std::nullopt;
|
|
return std::move(bcFiles);
|
|
}
|
|
|
|
LogicalResult SerializeGPUModuleBase::handleBitcodeFile(llvm::Module &module) {
|
|
// Some ROCM builds don't strip this like they should
|
|
if (auto *openclVersion = module.getNamedMetadata("opencl.ocl.version"))
|
|
module.eraseNamedMetadata(openclVersion);
|
|
// Stop spamming us with clang version numbers
|
|
if (auto *ident = module.getNamedMetadata("llvm.ident"))
|
|
module.eraseNamedMetadata(ident);
|
|
// Override the libModules datalayout and target triple with the compiler's
|
|
// data layout should there be a discrepency.
|
|
setDataLayoutAndTriple(module);
|
|
return success();
|
|
}
|
|
|
|
void SerializeGPUModuleBase::handleModulePreLink(llvm::Module &module) {
|
|
// If all libraries are not set, traverse the module to determine which
|
|
// libraries are required.
|
|
if (deviceLibs != AMDGCNLibraries::All) {
|
|
for (llvm::Function &f : module.functions()) {
|
|
if (f.hasExternalLinkage() && f.hasName() && !f.hasExactDefinition()) {
|
|
StringRef funcName = f.getName();
|
|
if ("printf" == funcName)
|
|
deviceLibs |= AMDGCNLibraries::OpenCL | AMDGCNLibraries::Ockl |
|
|
AMDGCNLibraries::Ocml;
|
|
if (funcName.starts_with("__ockl_"))
|
|
deviceLibs |= AMDGCNLibraries::Ockl;
|
|
if (funcName.starts_with("__ocml_"))
|
|
deviceLibs |= AMDGCNLibraries::Ocml;
|
|
if (funcName == "__atomic_work_item_fence")
|
|
deviceLibs |= AMDGCNLibraries::Hip;
|
|
}
|
|
}
|
|
}
|
|
addControlVariables(module, deviceLibs, target.hasWave64(), target.hasDaz(),
|
|
target.hasFiniteOnly(), target.hasUnsafeMath(),
|
|
target.hasFastMath(), target.hasCorrectSqrt(),
|
|
target.getAbi());
|
|
}
|
|
|
|
void SerializeGPUModuleBase::addControlVariables(
|
|
llvm::Module &module, AMDGCNLibraries libs, bool wave64, bool daz,
|
|
bool finiteOnly, bool unsafeMath, bool fastMath, bool correctSqrt,
|
|
StringRef abiVer) {
|
|
// Helper function for adding control variables.
|
|
auto addControlVariable = [&module](StringRef name, uint32_t value,
|
|
uint32_t bitwidth) {
|
|
if (module.getNamedGlobal(name))
|
|
return;
|
|
llvm::IntegerType *type =
|
|
llvm::IntegerType::getIntNTy(module.getContext(), bitwidth);
|
|
llvm::GlobalVariable *controlVariable = new llvm::GlobalVariable(
|
|
module, /*isConstant=*/type, true,
|
|
llvm::GlobalValue::LinkageTypes::LinkOnceODRLinkage,
|
|
llvm::ConstantInt::get(type, value), name, /*before=*/nullptr,
|
|
/*threadLocalMode=*/llvm::GlobalValue::ThreadLocalMode::NotThreadLocal,
|
|
/*addressSpace=*/4);
|
|
controlVariable->setVisibility(
|
|
llvm::GlobalValue::VisibilityTypes::ProtectedVisibility);
|
|
controlVariable->setAlignment(llvm::MaybeAlign(bitwidth / 8));
|
|
controlVariable->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Local);
|
|
};
|
|
|
|
int abi = 500;
|
|
abiVer.getAsInteger(0, abi);
|
|
module.addModuleFlag(llvm::Module::Error, "amdhsa_code_object_version", abi);
|
|
// Return if no device libraries are required.
|
|
if (libs == AMDGCNLibraries::None)
|
|
return;
|
|
// Add ocml related control variables.
|
|
if (any(libs & AMDGCNLibraries::Ocml)) {
|
|
addControlVariable("__oclc_finite_only_opt", finiteOnly || fastMath, 8);
|
|
addControlVariable("__oclc_daz_opt", daz || fastMath, 8);
|
|
addControlVariable("__oclc_correctly_rounded_sqrt32",
|
|
correctSqrt && !fastMath, 8);
|
|
addControlVariable("__oclc_unsafe_math_opt", unsafeMath || fastMath, 8);
|
|
}
|
|
// Add ocml or ockl related control variables.
|
|
if (any(libs & (AMDGCNLibraries::Ocml | AMDGCNLibraries::Ockl))) {
|
|
addControlVariable("__oclc_wavefrontsize64", wave64, 8);
|
|
// Get the ISA version.
|
|
llvm::AMDGPU::IsaVersion isaVersion = llvm::AMDGPU::getIsaVersion(chip);
|
|
// Add the ISA control variable.
|
|
addControlVariable("__oclc_ISA_version",
|
|
isaVersion.Minor + 100 * isaVersion.Stepping +
|
|
1000 * isaVersion.Major,
|
|
32);
|
|
addControlVariable("__oclc_ABI_version", abi, 32);
|
|
}
|
|
}
|
|
|
|
std::optional<SmallVector<char, 0>>
|
|
SerializeGPUModuleBase::assembleIsa(StringRef isa) {
|
|
auto loc = getOperation().getLoc();
|
|
|
|
StringRef targetTriple = this->triple;
|
|
|
|
SmallVector<char, 0> result;
|
|
llvm::raw_svector_ostream os(result);
|
|
|
|
llvm::Triple triple(llvm::Triple::normalize(targetTriple));
|
|
std::string error;
|
|
const llvm::Target *target =
|
|
llvm::TargetRegistry::lookupTarget(triple.normalize(), error);
|
|
if (!target) {
|
|
emitError(loc, Twine("failed to lookup target: ") + error);
|
|
return std::nullopt;
|
|
}
|
|
|
|
llvm::SourceMgr srcMgr;
|
|
srcMgr.AddNewSourceBuffer(llvm::MemoryBuffer::getMemBuffer(isa), SMLoc());
|
|
|
|
const llvm::MCTargetOptions mcOptions;
|
|
std::unique_ptr<llvm::MCRegisterInfo> mri(
|
|
target->createMCRegInfo(targetTriple));
|
|
std::unique_ptr<llvm::MCAsmInfo> mai(
|
|
target->createMCAsmInfo(*mri, targetTriple, mcOptions));
|
|
std::unique_ptr<llvm::MCSubtargetInfo> sti(
|
|
target->createMCSubtargetInfo(targetTriple, chip, features));
|
|
|
|
llvm::MCContext ctx(triple, mai.get(), mri.get(), sti.get(), &srcMgr,
|
|
&mcOptions);
|
|
std::unique_ptr<llvm::MCObjectFileInfo> mofi(target->createMCObjectFileInfo(
|
|
ctx, /*PIC=*/false, /*LargeCodeModel=*/false));
|
|
ctx.setObjectFileInfo(mofi.get());
|
|
|
|
SmallString<128> cwd;
|
|
if (!llvm::sys::fs::current_path(cwd))
|
|
ctx.setCompilationDir(cwd);
|
|
|
|
std::unique_ptr<llvm::MCStreamer> mcStreamer;
|
|
std::unique_ptr<llvm::MCInstrInfo> mcii(target->createMCInstrInfo());
|
|
|
|
llvm::MCCodeEmitter *ce = target->createMCCodeEmitter(*mcii, ctx);
|
|
llvm::MCAsmBackend *mab = target->createMCAsmBackend(*sti, *mri, mcOptions);
|
|
mcStreamer.reset(target->createMCObjectStreamer(
|
|
triple, ctx, std::unique_ptr<llvm::MCAsmBackend>(mab),
|
|
mab->createObjectWriter(os), std::unique_ptr<llvm::MCCodeEmitter>(ce),
|
|
*sti));
|
|
|
|
std::unique_ptr<llvm::MCAsmParser> parser(
|
|
createMCAsmParser(srcMgr, ctx, *mcStreamer, *mai));
|
|
std::unique_ptr<llvm::MCTargetAsmParser> tap(
|
|
target->createMCAsmParser(*sti, *parser, *mcii, mcOptions));
|
|
|
|
if (!tap) {
|
|
emitError(loc, "assembler initialization error");
|
|
return std::nullopt;
|
|
}
|
|
|
|
parser->setTargetParser(*tap);
|
|
parser->Run(false);
|
|
return std::move(result);
|
|
}
|
|
|
|
std::optional<SmallVector<char, 0>>
|
|
SerializeGPUModuleBase::compileToBinary(const std::string &serializedISA) {
|
|
// Assemble the ISA.
|
|
std::optional<SmallVector<char, 0>> isaBinary = assembleIsa(serializedISA);
|
|
|
|
if (!isaBinary) {
|
|
getOperation().emitError() << "failed during ISA assembling";
|
|
return std::nullopt;
|
|
}
|
|
|
|
// Save the ISA binary to a temp file.
|
|
int tempIsaBinaryFd = -1;
|
|
SmallString<128> tempIsaBinaryFilename;
|
|
if (llvm::sys::fs::createTemporaryFile("kernel%%", "o", tempIsaBinaryFd,
|
|
tempIsaBinaryFilename)) {
|
|
getOperation().emitError()
|
|
<< "failed to create a temporary file for dumping the ISA binary";
|
|
return std::nullopt;
|
|
}
|
|
llvm::FileRemover cleanupIsaBinary(tempIsaBinaryFilename);
|
|
{
|
|
llvm::raw_fd_ostream tempIsaBinaryOs(tempIsaBinaryFd, true);
|
|
tempIsaBinaryOs << StringRef(isaBinary->data(), isaBinary->size());
|
|
tempIsaBinaryOs.flush();
|
|
}
|
|
|
|
// Create a temp file for HSA code object.
|
|
SmallString<128> tempHsacoFilename;
|
|
if (llvm::sys::fs::createTemporaryFile("kernel", "hsaco",
|
|
tempHsacoFilename)) {
|
|
getOperation().emitError()
|
|
<< "failed to create a temporary file for the HSA code object";
|
|
return std::nullopt;
|
|
}
|
|
llvm::FileRemover cleanupHsaco(tempHsacoFilename);
|
|
|
|
llvm::SmallString<128> lldPath(toolkitPath);
|
|
llvm::sys::path::append(lldPath, "llvm", "bin", "ld.lld");
|
|
int lldResult = llvm::sys::ExecuteAndWait(
|
|
lldPath,
|
|
{"ld.lld", "-shared", tempIsaBinaryFilename, "-o", tempHsacoFilename});
|
|
if (lldResult != 0) {
|
|
getOperation().emitError() << "lld invocation failed";
|
|
return std::nullopt;
|
|
}
|
|
|
|
// Load the HSA code object.
|
|
auto hsacoFile =
|
|
llvm::MemoryBuffer::getFile(tempHsacoFilename, /*IsText=*/false);
|
|
if (!hsacoFile) {
|
|
getOperation().emitError()
|
|
<< "failed to read the HSA code object from the temp file";
|
|
return std::nullopt;
|
|
}
|
|
|
|
StringRef buffer = (*hsacoFile)->getBuffer();
|
|
|
|
return SmallVector<char, 0>(buffer.begin(), buffer.end());
|
|
}
|
|
|
|
std::optional<SmallVector<char, 0>> SerializeGPUModuleBase::moduleToObjectImpl(
|
|
const gpu::TargetOptions &targetOptions, llvm::Module &llvmModule) {
|
|
// Return LLVM IR if the compilation target is offload.
|
|
#define DEBUG_TYPE "serialize-to-llvm"
|
|
LLVM_DEBUG({
|
|
llvm::dbgs() << "LLVM IR for module: "
|
|
<< cast<gpu::GPUModuleOp>(getOperation()).getNameAttr() << "\n"
|
|
<< llvmModule << "\n";
|
|
});
|
|
#undef DEBUG_TYPE
|
|
if (targetOptions.getCompilationTarget() == gpu::CompilationTarget::Offload)
|
|
return SerializeGPUModuleBase::moduleToObject(llvmModule);
|
|
|
|
std::optional<llvm::TargetMachine *> targetMachine =
|
|
getOrCreateTargetMachine();
|
|
if (!targetMachine) {
|
|
getOperation().emitError() << "target Machine unavailable for triple "
|
|
<< triple << ", can't compile with LLVM";
|
|
return std::nullopt;
|
|
}
|
|
|
|
// Translate the Module to ISA.
|
|
std::optional<std::string> serializedISA =
|
|
translateToISA(llvmModule, **targetMachine);
|
|
if (!serializedISA) {
|
|
getOperation().emitError() << "failed translating the module to ISA";
|
|
return std::nullopt;
|
|
}
|
|
#define DEBUG_TYPE "serialize-to-isa"
|
|
LLVM_DEBUG({
|
|
llvm::dbgs() << "ISA for module: "
|
|
<< cast<gpu::GPUModuleOp>(getOperation()).getNameAttr() << "\n"
|
|
<< *serializedISA << "\n";
|
|
});
|
|
#undef DEBUG_TYPE
|
|
// Return ISA assembly code if the compilation target is assembly.
|
|
if (targetOptions.getCompilationTarget() == gpu::CompilationTarget::Assembly)
|
|
return SmallVector<char, 0>(serializedISA->begin(), serializedISA->end());
|
|
|
|
// Compiling to binary requires a valid ROCm path, fail if it's not found.
|
|
if (getToolkitPath().empty()) {
|
|
getOperation().emitError() << "invalid ROCm path, please set a valid path";
|
|
return std::nullopt;
|
|
}
|
|
|
|
// Compile to binary.
|
|
return compileToBinary(*serializedISA);
|
|
}
|
|
|
|
#if MLIR_ENABLE_ROCM_CONVERSIONS
|
|
namespace {
|
|
class AMDGPUSerializer : public SerializeGPUModuleBase {
|
|
public:
|
|
AMDGPUSerializer(Operation &module, ROCDLTargetAttr target,
|
|
const gpu::TargetOptions &targetOptions);
|
|
|
|
std::optional<SmallVector<char, 0>>
|
|
moduleToObject(llvm::Module &llvmModule) override;
|
|
|
|
private:
|
|
// Target options.
|
|
gpu::TargetOptions targetOptions;
|
|
};
|
|
} // namespace
|
|
|
|
AMDGPUSerializer::AMDGPUSerializer(Operation &module, ROCDLTargetAttr target,
|
|
const gpu::TargetOptions &targetOptions)
|
|
: SerializeGPUModuleBase(module, target, targetOptions),
|
|
targetOptions(targetOptions) {}
|
|
|
|
std::optional<SmallVector<char, 0>>
|
|
AMDGPUSerializer::moduleToObject(llvm::Module &llvmModule) {
|
|
return moduleToObjectImpl(targetOptions, llvmModule);
|
|
}
|
|
#endif // MLIR_ENABLE_ROCM_CONVERSIONS
|
|
|
|
std::optional<SmallVector<char, 0>> ROCDLTargetAttrImpl::serializeToObject(
|
|
Attribute attribute, Operation *module,
|
|
const gpu::TargetOptions &options) const {
|
|
assert(module && "The module must be non null.");
|
|
if (!module)
|
|
return std::nullopt;
|
|
if (!mlir::isa<gpu::GPUModuleOp>(module)) {
|
|
module->emitError("module must be a GPU module");
|
|
return std::nullopt;
|
|
}
|
|
#if MLIR_ENABLE_ROCM_CONVERSIONS
|
|
AMDGPUSerializer serializer(*module, cast<ROCDLTargetAttr>(attribute),
|
|
options);
|
|
serializer.init();
|
|
return serializer.run();
|
|
#else
|
|
module->emitError("the `AMDGPU` target was not built. Please enable it when "
|
|
"building LLVM");
|
|
return std::nullopt;
|
|
#endif // MLIR_ENABLE_ROCM_CONVERSIONS
|
|
}
|
|
|
|
Attribute
|
|
ROCDLTargetAttrImpl::createObject(Attribute attribute, Operation *module,
|
|
const SmallVector<char, 0> &object,
|
|
const gpu::TargetOptions &options) const {
|
|
gpu::CompilationTarget format = options.getCompilationTarget();
|
|
// If format is `fatbin` transform it to binary as `fatbin` is not yet
|
|
// supported.
|
|
gpu::KernelTableAttr kernels;
|
|
if (format > gpu::CompilationTarget::Binary) {
|
|
format = gpu::CompilationTarget::Binary;
|
|
kernels = ROCDL::getKernelMetadata(module, object);
|
|
}
|
|
DictionaryAttr properties{};
|
|
Builder builder(attribute.getContext());
|
|
StringAttr objectStr =
|
|
builder.getStringAttr(StringRef(object.data(), object.size()));
|
|
return builder.getAttr<gpu::ObjectAttr>(attribute, format, objectStr,
|
|
properties, kernels);
|
|
}
|