Files
clang-p2996/mlir/lib/Dialect/GPU/Transforms/SerializeToBlob.cpp
Nicolas Vasilache 7c4e8c6a27 [mlir] Disentangle dialect and extension registrations.
This revision avoids the registration of dialect extensions in Pass::getDependentDialects.

Such registration of extensions can be dangerous because `DialectRegistry::isSubsetOf` is
always guaranteed to return false for extensions (i.e. there is no mechanism to track
whether a lambda is already in the list of already registered extensions).
When the context is already in a multi-threaded mode, this is guaranteed to assert.

Arguably a more structured registration mechanism for extensions with a unique ExtensionID
could be envisioned in the future.

In the process of cleaning this up, multiple usage inconsistencies surfaced around the
registration of translation extensions that this revision also cleans up.

Reviewed By: springerm

Differential Revision: https://reviews.llvm.org/D157703
2023-08-22 00:40:09 +00:00

154 lines
5.2 KiB
C++

//===- SerializeToBlob.cpp - MLIR GPU lowering pass -----------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file implements a base class for a pass to serialize a gpu module
// into a binary blob that can be executed on a GPU. The binary blob is added
// as a string attribute to the gpu module.
//
//===----------------------------------------------------------------------===//
#include "mlir/Dialect/GPU/IR/GPUDialect.h"
#include "mlir/Dialect/GPU/Transforms/Passes.h"
#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
#include "mlir/ExecutionEngine/OptUtils.h"
#include "mlir/Pass/Pass.h"
#include "mlir/Target/LLVMIR/Dialect/GPU/GPUToLLVMIRTranslation.h"
#include "mlir/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.h"
#include "mlir/Target/LLVMIR/Export.h"
#include "llvm/IR/LegacyPassManager.h"
#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/TargetSelect.h"
#include "llvm/Target/TargetMachine.h"
#include <optional>
#include <string>
#define DEBUG_TYPE "serialize-to-blob"
using namespace mlir;
std::string gpu::getDefaultGpuBinaryAnnotation() { return "gpu.binary"; }
gpu::SerializeToBlobPass::SerializeToBlobPass(TypeID passID)
: OperationPass<gpu::GPUModuleOp>(passID) {}
gpu::SerializeToBlobPass::SerializeToBlobPass(const SerializeToBlobPass &other)
: OperationPass<gpu::GPUModuleOp>(other) {}
std::optional<std::string>
gpu::SerializeToBlobPass::translateToISA(llvm::Module &llvmModule,
llvm::TargetMachine &targetMachine) {
llvmModule.setDataLayout(targetMachine.createDataLayout());
if (failed(optimizeLlvm(llvmModule, targetMachine)))
return std::nullopt;
std::string targetISA;
llvm::raw_string_ostream stream(targetISA);
{ // Drop pstream after this to prevent the ISA from being stuck buffering
llvm::buffer_ostream pstream(stream);
llvm::legacy::PassManager codegenPasses;
if (targetMachine.addPassesToEmitFile(codegenPasses, pstream, nullptr,
llvm::CGFT_AssemblyFile))
return std::nullopt;
codegenPasses.run(llvmModule);
}
return stream.str();
}
void gpu::SerializeToBlobPass::runOnOperation() {
// Lower the module to an LLVM IR module using a separate context to enable
// multi-threaded processing.
llvm::LLVMContext llvmContext;
std::unique_ptr<llvm::Module> llvmModule = translateToLLVMIR(llvmContext);
if (!llvmModule)
return signalPassFailure();
// Lower the LLVM IR module to target ISA.
std::unique_ptr<llvm::TargetMachine> targetMachine = createTargetMachine();
if (!targetMachine)
return signalPassFailure();
std::optional<std::string> maybeTargetISA =
translateToISA(*llvmModule, *targetMachine);
if (!maybeTargetISA.has_value())
return signalPassFailure();
std::string targetISA = std::move(*maybeTargetISA);
LLVM_DEBUG({
llvm::dbgs() << "ISA for module: " << getOperation().getNameAttr() << "\n";
llvm::dbgs() << targetISA << "\n";
llvm::dbgs().flush();
});
// Serialize the target ISA.
std::unique_ptr<std::vector<char>> blob = serializeISA(targetISA);
if (!blob)
return signalPassFailure();
// Add the blob as module attribute.
auto attr =
StringAttr::get(&getContext(), StringRef(blob->data(), blob->size()));
getOperation()->setAttr(gpuBinaryAnnotation, attr);
}
LogicalResult
gpu::SerializeToBlobPass::optimizeLlvm(llvm::Module &llvmModule,
llvm::TargetMachine &targetMachine) {
int optLevel = this->optLevel.getValue();
if (optLevel < 0 || optLevel > 3)
return getOperation().emitError()
<< "invalid optimization level " << optLevel;
targetMachine.setOptLevel(static_cast<llvm::CodeGenOpt::Level>(optLevel));
auto transformer =
makeOptimizingTransformer(optLevel, /*sizeLevel=*/0, &targetMachine);
auto error = transformer(&llvmModule);
if (error) {
InFlightDiagnostic mlirError = getOperation()->emitError();
llvm::handleAllErrors(
std::move(error), [&mlirError](const llvm::ErrorInfoBase &ei) {
mlirError << "could not optimize LLVM IR: " << ei.message();
});
return mlirError;
}
return success();
}
std::unique_ptr<llvm::TargetMachine>
gpu::SerializeToBlobPass::createTargetMachine() {
Location loc = getOperation().getLoc();
std::string error;
const llvm::Target *target =
llvm::TargetRegistry::lookupTarget(triple, error);
if (!target) {
emitError(loc, Twine("failed to lookup target: ") + error);
return {};
}
llvm::TargetMachine *machine =
target->createTargetMachine(triple, chip, features, {}, {});
if (!machine) {
emitError(loc, "failed to create target machine");
return {};
}
return std::unique_ptr<llvm::TargetMachine>{machine};
}
std::unique_ptr<llvm::Module>
gpu::SerializeToBlobPass::translateToLLVMIR(llvm::LLVMContext &llvmContext) {
return translateModuleToLLVMIR(getOperation(), llvmContext,
"LLVMDialectModule");
}