[mlir][NVVM] Adds the NVVM target attribute.
**For an explanation of these patches see D154153.** Commit message: This patch adds the NVVM target attribute for serializing GPU modules into strings containing cubin. Depends on D154113 and D154100 and D154097 Reviewed By: mehdi_amini Differential Revision: https://reviews.llvm.org/D154117
This commit is contained in:
@@ -97,7 +97,7 @@ endif()
|
||||
|
||||
# Build the CUDA conversions and run according tests if the NVPTX backend
|
||||
# is available
|
||||
if ("NVPTX" IN_LIST LLVM_TARGETS_TO_BUILD AND MLIR_ENABLE_EXECUTION_ENGINE)
|
||||
if ("NVPTX" IN_LIST LLVM_TARGETS_TO_BUILD)
|
||||
set(MLIR_ENABLE_CUDA_CONVERSIONS 1)
|
||||
else()
|
||||
set(MLIR_ENABLE_CUDA_CONVERSIONS 0)
|
||||
@@ -118,6 +118,9 @@ set(MLIR_ENABLE_CUDA_RUNNER 0 CACHE BOOL "Enable building the mlir CUDA runner")
|
||||
set(MLIR_ENABLE_ROCM_RUNNER 0 CACHE BOOL "Enable building the mlir ROCm runner")
|
||||
set(MLIR_ENABLE_SPIRV_CPU_RUNNER 0 CACHE BOOL "Enable building the mlir SPIR-V cpu runner")
|
||||
set(MLIR_ENABLE_VULKAN_RUNNER 0 CACHE BOOL "Enable building the mlir Vulkan runner")
|
||||
set(MLIR_ENABLE_NVPTXCOMPILER 0 CACHE BOOL
|
||||
"Statically link the nvptxlibrary instead of calling ptxas as a subprocess \
|
||||
for compiling PTX to cubin")
|
||||
|
||||
option(MLIR_INCLUDE_TESTS
|
||||
"Generate build targets for the MLIR unit tests."
|
||||
|
||||
@@ -15,6 +15,7 @@
|
||||
#define MLIR_DIALECT_LLVMIR_NVVMDIALECT_H_
|
||||
|
||||
#include "mlir/Bytecode/BytecodeOpInterface.h"
|
||||
#include "mlir/Dialect/GPU/IR/CompilationInterfaces.h"
|
||||
#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
|
||||
#include "mlir/IR/Dialect.h"
|
||||
#include "mlir/IR/OpDefinition.h"
|
||||
|
||||
@@ -14,6 +14,7 @@
|
||||
#define NVVMIR_OPS
|
||||
|
||||
include "mlir/IR/EnumAttr.td"
|
||||
include "mlir/Dialect/GPU/IR/CompilationAttrInterfaces.td"
|
||||
include "mlir/Dialect/LLVMIR/LLVMOpBase.td"
|
||||
include "mlir/Interfaces/SideEffectInterfaces.td"
|
||||
|
||||
@@ -1472,4 +1473,72 @@ def NVVM_WgmmaWaitGroupSyncOp : NVVM_Op<"wgmma.wait.group.sync.aligned",
|
||||
}];
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// NVVM target attribute.
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
def NVVM_TargettAttr : NVVM_Attr<"NVVMTarget", "target"> {
|
||||
let description = [{
|
||||
GPU target attribute for controlling compilation of NVIDIA targets. All
|
||||
parameters decay into default values if not present.
|
||||
|
||||
Examples:
|
||||
|
||||
1. Target with default values.
|
||||
```
|
||||
gpu.module @mymodule [#nvvm.target] attributes {...} {
|
||||
...
|
||||
}
|
||||
```
|
||||
|
||||
2. Target with `sm_90` chip and fast math.
|
||||
```
|
||||
gpu.module @mymodule [#nvvm.target<chip = "sm_90", flags = {fast}>] {
|
||||
...
|
||||
}
|
||||
```
|
||||
}];
|
||||
let parameters = (ins
|
||||
DefaultValuedParameter<"int", "2", "Optimization level to apply.">:$O,
|
||||
StringRefParameter<"Target triple.", "\"nvptx64-nvidia-cuda\"">:$triple,
|
||||
StringRefParameter<"Target chip.", "\"sm_50\"">:$chip,
|
||||
StringRefParameter<"Target chip features.", "\"+ptx60\"">:$features,
|
||||
OptionalParameter<"DictionaryAttr", "Target specific flags.">:$flags,
|
||||
OptionalParameter<"ArrayAttr", "Files to link to the LLVM module.">:$link
|
||||
);
|
||||
let assemblyFormat = [{
|
||||
(`<` struct($O, $triple, $chip, $features, $flags, $link)^ `>`)?
|
||||
}];
|
||||
let builders = [
|
||||
AttrBuilder<(ins CArg<"int", "2">:$optLevel,
|
||||
CArg<"StringRef", "\"nvptx64-nvidia-cuda\"">:$triple,
|
||||
CArg<"StringRef", "\"sm_50\"">:$chip,
|
||||
CArg<"StringRef", "\"+ptx60\"">:$features,
|
||||
CArg<"DictionaryAttr", "nullptr">:$targetFlags,
|
||||
CArg<"ArrayAttr", "nullptr">:$linkFiles), [{
|
||||
return Base::get($_ctxt, optLevel, triple, chip, features, targetFlags, linkFiles);
|
||||
}]>
|
||||
];
|
||||
let skipDefaultBuilders = 1;
|
||||
let genVerifyDecl = 1;
|
||||
let extraClassDeclaration = [{
|
||||
bool hasFlag(StringRef flag) const;
|
||||
bool hasFastMath() const;
|
||||
bool hasFtz() const;
|
||||
}];
|
||||
let extraClassDefinition = [{
|
||||
bool $cppClass::hasFlag(StringRef flag) const {
|
||||
if (DictionaryAttr flags = getFlags())
|
||||
return flags.get(flag) != nullptr;
|
||||
return false;
|
||||
}
|
||||
bool $cppClass::hasFastMath() const {
|
||||
return hasFlag("fast");
|
||||
}
|
||||
bool $cppClass::hasFtz() const {
|
||||
return hasFlag("ftz");
|
||||
}
|
||||
}];
|
||||
}
|
||||
|
||||
#endif // NVVMIR_OPS
|
||||
|
||||
@@ -16,6 +16,7 @@
|
||||
|
||||
#include "mlir/Conversion/NVVMToLLVM/NVVMToLLVM.h"
|
||||
#include "mlir/Dialect/Func/Extensions/AllExtensions.h"
|
||||
#include "mlir/Target/LLVM/NVVM/Target.h"
|
||||
|
||||
#include <cstdlib>
|
||||
|
||||
@@ -29,6 +30,7 @@ namespace mlir {
|
||||
inline void registerAllExtensions(DialectRegistry ®istry) {
|
||||
func::registerAllExtensions(registry);
|
||||
registerConvertNVVMToLLVMInterface(registry);
|
||||
registerNVVMTarget(registry);
|
||||
}
|
||||
|
||||
} // namespace mlir
|
||||
|
||||
28
mlir/include/mlir/Target/LLVM/NVVM/Target.h
Normal file
28
mlir/include/mlir/Target/LLVM/NVVM/Target.h
Normal file
@@ -0,0 +1,28 @@
|
||||
//===- Target.h - MLIR NVVM target registration -----------------*- C++ -*-===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This provides registration calls for attaching the NVVM target interface.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef MLIR_TARGET_LLVM_NVVM_TARGET_H
|
||||
#define MLIR_TARGET_LLVM_NVVM_TARGET_H
|
||||
|
||||
namespace mlir {
|
||||
class DialectRegistry;
|
||||
class MLIRContext;
|
||||
/// Registers the `TargetAttrInterface` for the `#nvvm.target` attribute in the
|
||||
/// given registry.
|
||||
void registerNVVMTarget(DialectRegistry ®istry);
|
||||
|
||||
/// Registers the `TargetAttrInterface` for the `#nvvm.target` attribute in the
|
||||
/// registry associated with the given context.
|
||||
void registerNVVMTarget(MLIRContext &context);
|
||||
} // namespace mlir
|
||||
|
||||
#endif // MLIR_TARGET_LLVM_NVVM_TARGET_H
|
||||
74
mlir/include/mlir/Target/LLVM/NVVM/Utils.h
Normal file
74
mlir/include/mlir/Target/LLVM/NVVM/Utils.h
Normal file
@@ -0,0 +1,74 @@
|
||||
//===- Utils.h - MLIR NVVM target utils -------------------------*- C++ -*-===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This files declares NVVM target related utility classes and functions.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef MLIR_TARGET_LLVM_NVVM_UTILS_H
|
||||
#define MLIR_TARGET_LLVM_NVVM_UTILS_H
|
||||
|
||||
#include "mlir/Dialect/GPU/IR/CompilationInterfaces.h"
|
||||
#include "mlir/Dialect/LLVMIR/NVVMDialect.h"
|
||||
#include "mlir/Target/LLVM/ModuleToObject.h"
|
||||
|
||||
namespace mlir {
|
||||
namespace NVVM {
|
||||
/// Searches & returns the path CUDA toolkit path, the search order is:
|
||||
/// 1. The `CUDA_ROOT` environment variable.
|
||||
/// 2. The `CUDA_HOME` environment variable.
|
||||
/// 3. The `CUDA_PATH` environment variable.
|
||||
/// 4. The CUDA toolkit path detected by CMake.
|
||||
/// 5. Returns an empty string.
|
||||
StringRef getCUDAToolkitPath();
|
||||
|
||||
/// Base class for all NVVM serializations from GPU modules into binary strings.
|
||||
/// By default this class serializes into LLVM bitcode.
|
||||
class SerializeGPUModuleBase : public LLVM::ModuleToObject {
|
||||
public:
|
||||
/// Initializes the `toolkitPath` with the path in `targetOptions` or if empty
|
||||
/// with the path in `getCUDAToolkitPath`.
|
||||
SerializeGPUModuleBase(Operation &module, NVVMTargetAttr target,
|
||||
const gpu::TargetOptions &targetOptions = {});
|
||||
|
||||
/// Initializes the LLVM NVPTX target by safely calling `LLVMInitializeNVPTX*`
|
||||
/// methods if available.
|
||||
static void init();
|
||||
|
||||
/// Returns the target attribute.
|
||||
NVVMTargetAttr getTarget() const;
|
||||
|
||||
/// Returns the CUDA toolkit path.
|
||||
StringRef getToolkitPath() const;
|
||||
|
||||
/// Returns the bitcode files to be loaded.
|
||||
ArrayRef<std::string> getFileList() const;
|
||||
|
||||
/// Appends `nvvm/libdevice.bc` into `fileList`. Returns failure if the
|
||||
/// library couldn't be found.
|
||||
LogicalResult appendStandardLibs();
|
||||
|
||||
/// Loads the bitcode files in `fileList`.
|
||||
virtual std::optional<SmallVector<std::unique_ptr<llvm::Module>>>
|
||||
loadBitcodeFiles(llvm::Module &module,
|
||||
llvm::TargetMachine &targetMachine) override;
|
||||
|
||||
protected:
|
||||
/// NVVM target attribute.
|
||||
NVVMTargetAttr target;
|
||||
|
||||
/// CUDA toolkit path.
|
||||
std::string toolkitPath;
|
||||
|
||||
/// List of LLVM bitcode files to link to.
|
||||
SmallVector<std::string> fileList;
|
||||
};
|
||||
} // namespace NVVM
|
||||
} // namespace mlir
|
||||
|
||||
#endif // MLIR_TARGET_LLVM_NVVM_UTILS_H
|
||||
@@ -17,6 +17,7 @@
|
||||
#include "mlir/Dialect/LLVMIR/NVVMDialect.h"
|
||||
|
||||
#include "mlir/Conversion/ConvertToLLVM/ToLLVMInterface.h"
|
||||
#include "mlir/Dialect/GPU/IR/GPUDialect.h"
|
||||
#include "mlir/Dialect/Utils/StaticValueUtils.h"
|
||||
#include "mlir/IR/Builders.h"
|
||||
#include "mlir/IR/BuiltinAttributes.h"
|
||||
@@ -723,6 +724,7 @@ void NVVMDialect::initialize() {
|
||||
// registered.
|
||||
allowUnknownOperations();
|
||||
declarePromisedInterface<ConvertToLLVMPatternInterface>();
|
||||
declarePromisedInterface<gpu::TargetAttrInterface>();
|
||||
}
|
||||
|
||||
LogicalResult NVVMDialect::verifyOperationAttribute(Operation *op,
|
||||
@@ -761,6 +763,35 @@ LogicalResult NVVMDialect::verifyOperationAttribute(Operation *op,
|
||||
return success();
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// NVVM target attribute.
|
||||
//===----------------------------------------------------------------------===//
|
||||
LogicalResult
|
||||
NVVMTargetAttr::verify(function_ref<InFlightDiagnostic()> emitError,
|
||||
int optLevel, StringRef triple, StringRef chip,
|
||||
StringRef features, DictionaryAttr flags,
|
||||
ArrayAttr files) {
|
||||
if (optLevel < 0 || optLevel > 3) {
|
||||
emitError() << "The optimization level must be a number between 0 and 3.";
|
||||
return failure();
|
||||
}
|
||||
if (triple.empty()) {
|
||||
emitError() << "The target triple cannot be empty.";
|
||||
return failure();
|
||||
}
|
||||
if (chip.empty()) {
|
||||
emitError() << "The target chip cannot be empty.";
|
||||
return failure();
|
||||
}
|
||||
if (files && !llvm::all_of(files, [](::mlir::Attribute attr) {
|
||||
return attr && mlir::isa<StringAttr>(attr);
|
||||
})) {
|
||||
emitError() << "All the elements in the `link` array must be strings.";
|
||||
return failure();
|
||||
}
|
||||
return success();
|
||||
}
|
||||
|
||||
#define GET_OP_CLASSES
|
||||
#include "mlir/Dialect/LLVMIR/NVVMOps.cpp.inc"
|
||||
|
||||
|
||||
@@ -20,3 +20,80 @@ add_mlir_library(MLIRTargetLLVM
|
||||
MLIRExecutionEngineUtils
|
||||
MLIRTargetLLVMIRExport
|
||||
)
|
||||
|
||||
if (MLIR_ENABLE_CUDA_CONVERSIONS)
|
||||
set(NVPTX_LIBS
|
||||
NVPTXCodeGen
|
||||
NVPTXDesc
|
||||
NVPTXInfo
|
||||
)
|
||||
endif()
|
||||
|
||||
add_mlir_dialect_library(MLIRNVVMTarget
|
||||
NVVM/Target.cpp
|
||||
|
||||
ADDITIONAL_HEADER_DIRS
|
||||
${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/LLVMIR
|
||||
|
||||
LINK_COMPONENTS
|
||||
${NVPTX_LIBS}
|
||||
|
||||
LINK_LIBS PUBLIC
|
||||
MLIRIR
|
||||
MLIRExecutionEngineUtils
|
||||
MLIRSupport
|
||||
MLIRGPUDialect
|
||||
MLIRTargetLLVM
|
||||
MLIRNVVMToLLVMIRTranslation
|
||||
)
|
||||
|
||||
if(MLIR_ENABLE_CUDA_CONVERSIONS)
|
||||
# Find the CUDA toolkit.
|
||||
find_package(CUDAToolkit)
|
||||
|
||||
if(CUDAToolkit_FOUND)
|
||||
# Get the CUDA toolkit path. The path is needed for detecting `libdevice.bc`.
|
||||
# These extra steps are needed because of a bug on CMake.
|
||||
# See: https://gitlab.kitware.com/cmake/cmake/-/issues/24858
|
||||
# TODO: Bump the MLIR CMake version to 3.26.4 and switch to
|
||||
# ${CUDAToolkit_LIBRARY_ROOT}
|
||||
if(NOT DEFINED ${CUDAToolkit_LIBRARY_ROOT})
|
||||
get_filename_component(MLIR_CUDAToolkit_ROOT ${CUDAToolkit_BIN_DIR}
|
||||
DIRECTORY ABSOLUTE)
|
||||
else()
|
||||
set(MLIR_CUDAToolkit_ROOT ${CUDAToolkit_LIBRARY_ROOT})
|
||||
endif()
|
||||
|
||||
# Add the `nvptxcompiler` library.
|
||||
if(MLIR_ENABLE_NVPTXCOMPILER)
|
||||
# Find the `nvptxcompiler` library.
|
||||
# TODO: Bump the MLIR CMake version to 3.25 and use `CUDA::nvptxcompiler_static`.
|
||||
find_library(MLIR_NVPTXCOMPILER_LIB nvptxcompiler_static
|
||||
PATHS ${CUDAToolkit_LIBRARY_DIR} NO_DEFAULT_PATH)
|
||||
|
||||
# Fail if `nvptxcompiler_static` couldn't be found.
|
||||
if(MLIR_NVPTXCOMPILER_LIB STREQUAL "MLIR_NVPTXCOMPILER_LIB-NOTFOUND")
|
||||
message(FATAL_ERROR
|
||||
"Requested using the `nvptxcompiler` library backend but it couldn't be found.")
|
||||
endif()
|
||||
|
||||
# Link against `nvptxcompiler_static`. TODO: use `CUDA::nvptxcompiler_static`.
|
||||
target_link_libraries(MLIRNVVMTarget PRIVATE ${MLIR_NVPTXCOMPILER_LIB})
|
||||
target_include_directories(obj.MLIRNVVMTarget PUBLIC ${CUDAToolkit_INCLUDE_DIRS})
|
||||
endif()
|
||||
else()
|
||||
# Fail if `MLIR_ENABLE_NVPTXCOMPILER` is enabled and the toolkit couldn't be found.
|
||||
if(MLIR_ENABLE_NVPTXCOMPILER)
|
||||
message(FATAL_ERROR
|
||||
"Requested using the `nvptxcompiler` library backend but it couldn't be found.")
|
||||
endif()
|
||||
endif()
|
||||
message(VERBOSE "MLIR default CUDA toolkit path: ${MLIR_CUDAToolkit_ROOT}")
|
||||
|
||||
# Define the `CUDAToolkit` path.
|
||||
target_compile_definitions(obj.MLIRNVVMTarget
|
||||
PRIVATE
|
||||
MLIR_NVPTXCOMPILER_ENABLED=${MLIR_ENABLE_NVPTXCOMPILER}
|
||||
__DEFAULT_CUDATOOLKIT_PATH__="${MLIR_CUDAToolkit_ROOT}"
|
||||
)
|
||||
endif()
|
||||
|
||||
508
mlir/lib/Target/LLVM/NVVM/Target.cpp
Normal file
508
mlir/lib/Target/LLVM/NVVM/Target.cpp
Normal file
@@ -0,0 +1,508 @@
|
||||
//===- Target.cpp - MLIR LLVM NVVM target compilation -----------*- C++ -*-===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This files defines NVVM target related functions including registration
|
||||
// calls for the `#nvvm.target` compilation attribute.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "mlir/Target/LLVM/NVVM/Target.h"
|
||||
|
||||
#include "mlir/Dialect/GPU/IR/GPUDialect.h"
|
||||
#include "mlir/Dialect/LLVMIR/NVVMDialect.h"
|
||||
#include "mlir/Target/LLVM/NVVM/Utils.h"
|
||||
#include "mlir/Target/LLVMIR/Dialect/GPU/GPUToLLVMIRTranslation.h"
|
||||
#include "mlir/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.h"
|
||||
#include "mlir/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.h"
|
||||
#include "mlir/Target/LLVMIR/Export.h"
|
||||
|
||||
#include "llvm/Support/FileSystem.h"
|
||||
#include "llvm/Support/FileUtilities.h"
|
||||
#include "llvm/Support/FormatVariadic.h"
|
||||
#include "llvm/Support/MemoryBuffer.h"
|
||||
#include "llvm/Support/Path.h"
|
||||
#include "llvm/Support/Process.h"
|
||||
#include "llvm/Support/Program.h"
|
||||
#include "llvm/Support/TargetSelect.h"
|
||||
|
||||
#include <cstdlib>
|
||||
|
||||
using namespace mlir;
|
||||
using namespace mlir::NVVM;
|
||||
|
||||
#ifndef __DEFAULT_CUDATOOLKIT_PATH__
|
||||
#define __DEFAULT_CUDATOOLKIT_PATH__ ""
|
||||
#endif
|
||||
|
||||
namespace {
|
||||
// Implementation of the `TargetAttrInterface` model.
|
||||
class NVVMTargetAttrImpl
|
||||
: public gpu::TargetAttrInterface::FallbackModel<NVVMTargetAttrImpl> {
|
||||
public:
|
||||
std::optional<SmallVector<char, 0>>
|
||||
serializeToObject(Attribute attribute, Operation *module,
|
||||
const gpu::TargetOptions &options) const;
|
||||
};
|
||||
} // namespace
|
||||
|
||||
// Register the NVVM dialect, the NVVM translation & the target interface.
|
||||
void mlir::registerNVVMTarget(DialectRegistry ®istry) {
|
||||
registerNVVMDialectTranslation(registry);
|
||||
registry.addExtension(+[](MLIRContext *ctx, NVVM::NVVMDialect *dialect) {
|
||||
NVVMTargetAttr::attachInterface<NVVMTargetAttrImpl>(*ctx);
|
||||
});
|
||||
}
|
||||
|
||||
void mlir::registerNVVMTarget(MLIRContext &context) {
|
||||
DialectRegistry registry;
|
||||
registerNVVMTarget(registry);
|
||||
context.appendDialectRegistry(registry);
|
||||
}
|
||||
|
||||
// Search for the CUDA toolkit path.
|
||||
StringRef mlir::NVVM::getCUDAToolkitPath() {
|
||||
if (const char *var = std::getenv("CUDA_ROOT"))
|
||||
return var;
|
||||
if (const char *var = std::getenv("CUDA_HOME"))
|
||||
return var;
|
||||
if (const char *var = std::getenv("CUDA_PATH"))
|
||||
return var;
|
||||
return __DEFAULT_CUDATOOLKIT_PATH__;
|
||||
}
|
||||
|
||||
SerializeGPUModuleBase::SerializeGPUModuleBase(
|
||||
Operation &module, NVVMTargetAttr target,
|
||||
const gpu::TargetOptions &targetOptions)
|
||||
: ModuleToObject(module, target.getTriple(), target.getChip(),
|
||||
target.getFeatures(), target.getO()),
|
||||
target(target), toolkitPath(targetOptions.getToolkitPath()),
|
||||
fileList(targetOptions.getLinkFiles()) {
|
||||
|
||||
// If `targetOptions` have an empty toolkitPath use `getCUDAToolkitPath`
|
||||
if (toolkitPath.empty())
|
||||
toolkitPath = getCUDAToolkitPath();
|
||||
|
||||
// Append the files in the target attribute.
|
||||
if (ArrayAttr files = target.getLink())
|
||||
for (Attribute attr : files.getValue())
|
||||
if (auto file = dyn_cast<StringAttr>(attr))
|
||||
fileList.push_back(file.str());
|
||||
|
||||
// Append libdevice to the files to be loaded.
|
||||
(void)appendStandardLibs();
|
||||
}
|
||||
|
||||
void SerializeGPUModuleBase::init() {
|
||||
static llvm::once_flag initializeBackendOnce;
|
||||
llvm::call_once(initializeBackendOnce, []() {
|
||||
// If the `NVPTX` LLVM target was built, initialize it.
|
||||
#if MLIR_CUDA_CONVERSIONS_ENABLED == 1
|
||||
LLVMInitializeNVPTXTarget();
|
||||
LLVMInitializeNVPTXTargetInfo();
|
||||
LLVMInitializeNVPTXTargetMC();
|
||||
LLVMInitializeNVPTXAsmPrinter();
|
||||
#endif
|
||||
});
|
||||
}
|
||||
|
||||
NVVMTargetAttr SerializeGPUModuleBase::getTarget() const { return target; }
|
||||
|
||||
StringRef SerializeGPUModuleBase::getToolkitPath() const { return toolkitPath; }
|
||||
|
||||
ArrayRef<std::string> SerializeGPUModuleBase::getFileList() const {
|
||||
return fileList;
|
||||
}
|
||||
|
||||
// Try to append `libdevice` from a CUDA toolkit installation.
|
||||
LogicalResult SerializeGPUModuleBase::appendStandardLibs() {
|
||||
StringRef pathRef = getToolkitPath();
|
||||
if (pathRef.size()) {
|
||||
SmallVector<char, 256> path;
|
||||
path.insert(path.begin(), pathRef.begin(), pathRef.end());
|
||||
pathRef = StringRef(path.data(), path.size());
|
||||
if (!llvm::sys::fs::is_directory(pathRef)) {
|
||||
getOperation().emitError() << "CUDA path: " << pathRef
|
||||
<< " does not exist or is not a directory.\n";
|
||||
return failure();
|
||||
}
|
||||
llvm::sys::path::append(path, "nvvm", "libdevice", "libdevice.10.bc");
|
||||
pathRef = StringRef(path.data(), path.size());
|
||||
if (!llvm::sys::fs::is_regular_file(pathRef)) {
|
||||
getOperation().emitError() << "LibDevice path: " << pathRef
|
||||
<< " does not exist or is not a file.\n";
|
||||
return failure();
|
||||
}
|
||||
fileList.push_back(pathRef.str());
|
||||
}
|
||||
return success();
|
||||
}
|
||||
|
||||
std::optional<SmallVector<std::unique_ptr<llvm::Module>>>
|
||||
SerializeGPUModuleBase::loadBitcodeFiles(llvm::Module &module,
|
||||
llvm::TargetMachine &targetMachine) {
|
||||
SmallVector<std::unique_ptr<llvm::Module>> bcFiles;
|
||||
if (failed(loadBitcodeFilesFromList(module.getContext(), targetMachine,
|
||||
fileList, bcFiles, true)))
|
||||
return std::nullopt;
|
||||
return bcFiles;
|
||||
}
|
||||
|
||||
#if MLIR_CUDA_CONVERSIONS_ENABLED == 1
|
||||
namespace {
|
||||
class NVPTXSerializer : public SerializeGPUModuleBase {
|
||||
public:
|
||||
NVPTXSerializer(Operation &module, NVVMTargetAttr target,
|
||||
const gpu::TargetOptions &targetOptions);
|
||||
|
||||
gpu::GPUModuleOp getOperation();
|
||||
|
||||
// Compile PTX to cubin using `ptxas`.
|
||||
std::optional<SmallVector<char, 0>>
|
||||
compileToBinary(const std::string &ptxCode);
|
||||
|
||||
// Compile PTX to cubin using the `nvptxcompiler` library.
|
||||
std::optional<SmallVector<char, 0>>
|
||||
compileToBinaryNVPTX(const std::string &ptxCode);
|
||||
|
||||
std::optional<SmallVector<char, 0>>
|
||||
moduleToObject(llvm::Module &llvmModule,
|
||||
llvm::TargetMachine &targetMachine) override;
|
||||
|
||||
private:
|
||||
using TmpFile = std::pair<llvm::SmallString<128>, llvm::FileRemover>;
|
||||
|
||||
// Create a temp file.
|
||||
std::optional<TmpFile> createTemp(StringRef name, StringRef suffix);
|
||||
|
||||
// Find the PTXAS compiler. The search order is:
|
||||
// 1. The toolkit path in `targetOptions`.
|
||||
// 2. In the system PATH.
|
||||
// 3. The path from `getCUDAToolkitPath()`.
|
||||
std::optional<std::string> findPtxas() const;
|
||||
|
||||
// Target options.
|
||||
gpu::TargetOptions targetOptions;
|
||||
};
|
||||
} // namespace
|
||||
|
||||
NVPTXSerializer::NVPTXSerializer(Operation &module, NVVMTargetAttr target,
|
||||
const gpu::TargetOptions &targetOptions)
|
||||
: SerializeGPUModuleBase(module, target, targetOptions),
|
||||
targetOptions(targetOptions) {}
|
||||
|
||||
std::optional<NVPTXSerializer::TmpFile>
|
||||
NVPTXSerializer::createTemp(StringRef name, StringRef suffix) {
|
||||
llvm::SmallString<128> filename;
|
||||
std::error_code ec =
|
||||
llvm::sys::fs::createTemporaryFile(name, suffix, filename);
|
||||
if (ec) {
|
||||
getOperation().emitError() << "Couldn't create the temp file: `" << filename
|
||||
<< "`, error message: " << ec.message();
|
||||
return std::nullopt;
|
||||
}
|
||||
return TmpFile(filename, llvm::FileRemover(filename.c_str()));
|
||||
}
|
||||
|
||||
gpu::GPUModuleOp NVPTXSerializer::getOperation() {
|
||||
return dyn_cast<gpu::GPUModuleOp>(&SerializeGPUModuleBase::getOperation());
|
||||
}
|
||||
|
||||
std::optional<std::string> NVPTXSerializer::findPtxas() const {
|
||||
// Find the `ptxas` compiler.
|
||||
// 1. Check the toolkit path given in the command line.
|
||||
StringRef pathRef = targetOptions.getToolkitPath();
|
||||
SmallVector<char, 256> path;
|
||||
if (pathRef.size()) {
|
||||
path.insert(path.begin(), pathRef.begin(), pathRef.end());
|
||||
llvm::sys::path::append(path, "bin", "ptxas");
|
||||
if (llvm::sys::fs::can_execute(path))
|
||||
return StringRef(path.data(), path.size()).str();
|
||||
}
|
||||
|
||||
// 2. Check PATH.
|
||||
if (std::optional<std::string> ptxasCompiler =
|
||||
llvm::sys::Process::FindInEnvPath("PATH", "ptxas"))
|
||||
return *ptxasCompiler;
|
||||
|
||||
// 3. Check `getCUDAToolkitPath()`.
|
||||
pathRef = getCUDAToolkitPath();
|
||||
path.clear();
|
||||
if (pathRef.size()) {
|
||||
path.insert(path.begin(), pathRef.begin(), pathRef.end());
|
||||
llvm::sys::path::append(path, "bin", "ptxas");
|
||||
if (llvm::sys::fs::can_execute(path))
|
||||
return StringRef(path.data(), path.size()).str();
|
||||
}
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
// TODO: clean this method & have a generic tool driver or never emit binaries
|
||||
// with this mechanism and let another stage take care of it.
|
||||
std::optional<SmallVector<char, 0>>
|
||||
NVPTXSerializer::compileToBinary(const std::string &ptxCode) {
|
||||
// Find the PTXAS compiler.
|
||||
std::optional<std::string> ptxasCompiler = findPtxas();
|
||||
if (!ptxasCompiler) {
|
||||
getOperation().emitError()
|
||||
<< "Couldn't find the `ptxas` compiler. Please specify the toolkit "
|
||||
"path, add the compiler to $PATH, or set one of the environment "
|
||||
"variables in `NVVM::getCUDAToolkitPath()`.";
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
// Base name for all temp files: mlir-<module name>-<target triple>-<chip>.
|
||||
std::string basename =
|
||||
llvm::formatv("mlir-{0}-{1}-{2}", getOperation().getNameAttr().getValue(),
|
||||
getTarget().getTriple(), getTarget().getChip());
|
||||
|
||||
// Create temp files:
|
||||
std::optional<TmpFile> ptxFile = createTemp(basename, "ptx");
|
||||
if (!ptxFile)
|
||||
return std::nullopt;
|
||||
std::optional<TmpFile> logFile = createTemp(basename, "log");
|
||||
if (!logFile)
|
||||
return std::nullopt;
|
||||
std::optional<TmpFile> cubinFile = createTemp(basename, "cubin");
|
||||
if (!cubinFile)
|
||||
return std::nullopt;
|
||||
|
||||
std::error_code ec;
|
||||
// Dump the PTX to a temp file.
|
||||
{
|
||||
llvm::raw_fd_ostream ptxStream(ptxFile->first, ec);
|
||||
if (ec) {
|
||||
getOperation().emitError()
|
||||
<< "Couldn't open the file: `" << ptxFile->first
|
||||
<< "`, error message: " << ec.message();
|
||||
return std::nullopt;
|
||||
}
|
||||
ptxStream << ptxCode;
|
||||
if (ptxStream.has_error()) {
|
||||
getOperation().emitError()
|
||||
<< "An error occurred while writing the PTX to: `" << ptxFile->first
|
||||
<< "`.";
|
||||
return std::nullopt;
|
||||
}
|
||||
ptxStream.flush();
|
||||
}
|
||||
|
||||
// Create PTX args.
|
||||
std::string optLevel = std::to_string(this->optLevel);
|
||||
SmallVector<StringRef, 12> ptxasArgs(
|
||||
{StringRef("ptxas"), StringRef("-arch"), getTarget().getChip(),
|
||||
StringRef(ptxFile->first), StringRef("-o"), StringRef(cubinFile->first),
|
||||
"--opt-level", optLevel});
|
||||
|
||||
std::pair<llvm::BumpPtrAllocator, SmallVector<const char *>> cmdOpts =
|
||||
targetOptions.tokenizeCmdOptions();
|
||||
for (auto arg : cmdOpts.second)
|
||||
ptxasArgs.push_back(arg);
|
||||
|
||||
std::optional<StringRef> redirects[] = {
|
||||
std::nullopt,
|
||||
logFile->first,
|
||||
logFile->first,
|
||||
};
|
||||
|
||||
// Invoke PTXAS.
|
||||
std::string message;
|
||||
if (llvm::sys::ExecuteAndWait(ptxasCompiler.value(), ptxasArgs,
|
||||
/*Env=*/std::nullopt,
|
||||
/*Redirects=*/redirects,
|
||||
/*SecondsToWait=*/0,
|
||||
/*MemoryLimit=*/0,
|
||||
/*ErrMsg=*/&message)) {
|
||||
if (message.empty()) {
|
||||
llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> ptxasStderr =
|
||||
llvm::MemoryBuffer::getFile(logFile->first);
|
||||
if (ptxasStderr)
|
||||
getOperation().emitError() << "PTXAS invocation failed. PTXAS log:\n"
|
||||
<< ptxasStderr->get()->getBuffer();
|
||||
else
|
||||
getOperation().emitError() << "PTXAS invocation failed.";
|
||||
return std::nullopt;
|
||||
}
|
||||
getOperation().emitError()
|
||||
<< "PTXAS invocation failed, error message: " << message;
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
// Dump the output of PTXAS, helpful if the verbose flag was passed.
|
||||
#define DEBUG_TYPE "serialize-to-binary"
|
||||
LLVM_DEBUG({
|
||||
llvm::dbgs() << "PTXAS invocation for module: "
|
||||
<< getOperation().getNameAttr() << "\n";
|
||||
llvm::dbgs() << "Command: ";
|
||||
llvm::interleave(ptxasArgs, llvm::dbgs(), " ");
|
||||
llvm::dbgs() << "\n";
|
||||
llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> ptxasLog =
|
||||
llvm::MemoryBuffer::getFile(logFile->first);
|
||||
if (ptxasLog && (*ptxasLog)->getBuffer().size()) {
|
||||
llvm::dbgs() << "Output:\n" << (*ptxasLog)->getBuffer() << "\n";
|
||||
llvm::dbgs().flush();
|
||||
}
|
||||
});
|
||||
#undef DEBUG_TYPE
|
||||
|
||||
// Read the cubin file.
|
||||
llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> cubinBuffer =
|
||||
llvm::MemoryBuffer::getFile(cubinFile->first);
|
||||
if (!cubinBuffer) {
|
||||
getOperation().emitError()
|
||||
<< "Couldn't open the file: `" << cubinFile->first
|
||||
<< "`, error message: " << cubinBuffer.getError().message();
|
||||
return std::nullopt;
|
||||
}
|
||||
StringRef cubinStr = (*cubinBuffer)->getBuffer();
|
||||
return SmallVector<char, 0>(cubinStr.begin(), cubinStr.end());
|
||||
}
|
||||
|
||||
#if MLIR_NVPTXCOMPILER_ENABLED == 1
|
||||
#include "nvPTXCompiler.h"
|
||||
|
||||
#define RETURN_ON_NVPTXCOMPILER_ERROR(expr) \
|
||||
do { \
|
||||
if (auto status = (expr)) { \
|
||||
emitError(loc) << llvm::Twine(#expr).concat(" failed with error code ") \
|
||||
<< status; \
|
||||
return std::nullopt; \
|
||||
} \
|
||||
} while (false)
|
||||
|
||||
std::optional<SmallVector<char, 0>>
|
||||
NVPTXSerializer::compileToBinaryNVPTX(const std::string &ptxCode) {
|
||||
Location loc = getOperation().getLoc();
|
||||
nvPTXCompilerHandle compiler = nullptr;
|
||||
nvPTXCompileResult status;
|
||||
size_t logSize;
|
||||
|
||||
// Create the options.
|
||||
std::string optLevel = std::to_string(this->optLevel);
|
||||
std::pair<llvm::BumpPtrAllocator, SmallVector<const char *>> cmdOpts =
|
||||
targetOptions.tokenizeCmdOptions();
|
||||
cmdOpts.second.append(
|
||||
{"-arch", getTarget().getChip().data(), "--opt-level", optLevel.c_str()});
|
||||
|
||||
// Create the compiler handle.
|
||||
RETURN_ON_NVPTXCOMPILER_ERROR(
|
||||
nvPTXCompilerCreate(&compiler, ptxCode.size(), ptxCode.c_str()));
|
||||
|
||||
// Try to compile the binary.
|
||||
status = nvPTXCompilerCompile(compiler, cmdOpts.second.size(),
|
||||
cmdOpts.second.data());
|
||||
|
||||
// Check if compilation failed.
|
||||
if (status != NVPTXCOMPILE_SUCCESS) {
|
||||
RETURN_ON_NVPTXCOMPILER_ERROR(
|
||||
nvPTXCompilerGetErrorLogSize(compiler, &logSize));
|
||||
if (logSize != 0) {
|
||||
SmallVector<char> log(logSize + 1, 0);
|
||||
RETURN_ON_NVPTXCOMPILER_ERROR(
|
||||
nvPTXCompilerGetErrorLog(compiler, log.data()));
|
||||
emitError(loc) << "NVPTX compiler invocation failed, error log: "
|
||||
<< log.data();
|
||||
} else
|
||||
emitError(loc) << "NVPTX compiler invocation failed with error code: "
|
||||
<< status;
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
// Retrieve the binary.
|
||||
size_t elfSize;
|
||||
RETURN_ON_NVPTXCOMPILER_ERROR(
|
||||
nvPTXCompilerGetCompiledProgramSize(compiler, &elfSize));
|
||||
SmallVector<char, 0> binary(elfSize, 0);
|
||||
RETURN_ON_NVPTXCOMPILER_ERROR(
|
||||
nvPTXCompilerGetCompiledProgram(compiler, (void *)binary.data()));
|
||||
|
||||
// Dump the log of the compiler, helpful if the verbose flag was passed.
|
||||
#define DEBUG_TYPE "serialize-to-binary"
|
||||
LLVM_DEBUG({
|
||||
RETURN_ON_NVPTXCOMPILER_ERROR(
|
||||
nvPTXCompilerGetInfoLogSize(compiler, &logSize));
|
||||
if (logSize != 0) {
|
||||
SmallVector<char> log(logSize + 1, 0);
|
||||
RETURN_ON_NVPTXCOMPILER_ERROR(
|
||||
nvPTXCompilerGetInfoLog(compiler, log.data()));
|
||||
llvm::dbgs() << "NVPTX compiler invocation for module: "
|
||||
<< getOperation().getNameAttr() << "\n";
|
||||
llvm::dbgs() << "Arguments: ";
|
||||
llvm::interleave(cmdOpts.second, llvm::dbgs(), " ");
|
||||
llvm::dbgs() << "\nOutput\n" << log.data() << "\n";
|
||||
llvm::dbgs().flush();
|
||||
}
|
||||
});
|
||||
#undef DEBUG_TYPE
|
||||
RETURN_ON_NVPTXCOMPILER_ERROR(nvPTXCompilerDestroy(&compiler));
|
||||
return binary;
|
||||
}
|
||||
#endif // MLIR_NVPTXCOMPILER_ENABLED == 1
|
||||
|
||||
std::optional<SmallVector<char, 0>>
|
||||
NVPTXSerializer::moduleToObject(llvm::Module &llvmModule,
|
||||
llvm::TargetMachine &targetMachine) {
|
||||
// Return LLVM IR if the compilation target is offload.
|
||||
#define DEBUG_TYPE "serialize-to-llvm"
|
||||
LLVM_DEBUG({
|
||||
llvm::dbgs() << "LLVM IR for module: " << getOperation().getNameAttr()
|
||||
<< "\n";
|
||||
llvm::dbgs() << llvmModule << "\n";
|
||||
llvm::dbgs().flush();
|
||||
});
|
||||
#undef DEBUG_TYPE
|
||||
if (targetOptions.getCompilationTarget() == gpu::TargetOptions::offload)
|
||||
return SerializeGPUModuleBase::moduleToObject(llvmModule, targetMachine);
|
||||
|
||||
// Emit PTX code.
|
||||
std::optional<std::string> serializedISA =
|
||||
translateToISA(llvmModule, targetMachine);
|
||||
if (!serializedISA) {
|
||||
getOperation().emitError() << "Failed translating the module to ISA.";
|
||||
return std::nullopt;
|
||||
}
|
||||
#define DEBUG_TYPE "serialize-to-isa"
|
||||
LLVM_DEBUG({
|
||||
llvm::dbgs() << "PTX for module: " << getOperation().getNameAttr() << "\n";
|
||||
llvm::dbgs() << *serializedISA << "\n";
|
||||
llvm::dbgs().flush();
|
||||
});
|
||||
#undef DEBUG_TYPE
|
||||
|
||||
// Return PTX if the compilation target is assembly.
|
||||
if (targetOptions.getCompilationTarget() == gpu::TargetOptions::assembly)
|
||||
return SmallVector<char, 0>(serializedISA->begin(), serializedISA->end());
|
||||
|
||||
// Compile to binary.
|
||||
#if MLIR_NVPTXCOMPILER_ENABLED == 1
|
||||
return compileToBinaryNVPTX(*serializedISA);
|
||||
#else
|
||||
return compileToBinary(*serializedISA);
|
||||
#endif // MLIR_NVPTXCOMPILER_ENABLED == 1
|
||||
}
|
||||
#endif // MLIR_CUDA_CONVERSIONS_ENABLED == 1
|
||||
|
||||
std::optional<SmallVector<char, 0>>
|
||||
NVVMTargetAttrImpl::serializeToObject(Attribute attribute, Operation *module,
|
||||
const gpu::TargetOptions &options) const {
|
||||
assert(module && "The module must be non null.");
|
||||
if (!module)
|
||||
return std::nullopt;
|
||||
if (!mlir::isa<gpu::GPUModuleOp>(module)) {
|
||||
module->emitError("Module must be a GPU module.");
|
||||
return std::nullopt;
|
||||
}
|
||||
#if MLIR_CUDA_CONVERSIONS_ENABLED == 1
|
||||
NVPTXSerializer serializer(*module, cast<NVVMTargetAttr>(attribute), options);
|
||||
serializer.init();
|
||||
return serializer.run();
|
||||
#else
|
||||
module->emitError(
|
||||
"The `NVPTX` target was not built. Please enable it when building LLVM.");
|
||||
return std::nullopt;
|
||||
#endif // MLIR_CUDA_CONVERSIONS_ENABLED == 1
|
||||
}
|
||||
@@ -364,3 +364,10 @@ gpu.module @module {
|
||||
gpu.return
|
||||
}) {function_type = () -> (), sym_name = "func"} : () -> ()
|
||||
}
|
||||
|
||||
// Check that this doesn't crash.
|
||||
gpu.module @module_with_one_target [#nvvm.target] {
|
||||
gpu.func @kernel(%arg0 : f32) kernel {
|
||||
gpu.return
|
||||
}
|
||||
}
|
||||
|
||||
@@ -429,3 +429,12 @@ func.func @wgmma_wait_group_sync_aligned() {
|
||||
nvvm.wgmma.wait.group.sync.aligned 0
|
||||
return
|
||||
}
|
||||
|
||||
// -----
|
||||
|
||||
// Just check these don't emit errors.
|
||||
gpu.module @module_1 [#nvvm.target<chip = "sm_90", features = "+ptx70", link = ["my_device_lib.bc"], flags = {fast, ftz}>] {
|
||||
}
|
||||
|
||||
gpu.module @module_2 [#nvvm.target<chip = "sm_90">, #nvvm.target<chip = "sm_80">, #nvvm.target<chip = "sm_70">] {
|
||||
}
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
add_mlir_unittest(MLIRTargetLLVMTests
|
||||
SerializeNVVMTarget.cpp
|
||||
SerializeToLLVMBitcode.cpp
|
||||
)
|
||||
|
||||
@@ -7,9 +8,14 @@ llvm_map_components_to_libnames(llvm_libs nativecodegen)
|
||||
target_link_libraries(MLIRTargetLLVMTests
|
||||
PRIVATE
|
||||
MLIRTargetLLVM
|
||||
MLIRNVVMTarget
|
||||
MLIRGPUDialect
|
||||
MLIRNVVMDialect
|
||||
MLIRLLVMDialect
|
||||
MLIRLLVMToLLVMIRTranslation
|
||||
MLIRBuiltinToLLVMIRTranslation
|
||||
MLIRNVVMToLLVMIRTranslation
|
||||
MLIRGPUToLLVMIRTranslation
|
||||
${llvm_libs}
|
||||
)
|
||||
|
||||
|
||||
154
mlir/unittests/Target/LLVM/SerializeNVVMTarget.cpp
Normal file
154
mlir/unittests/Target/LLVM/SerializeNVVMTarget.cpp
Normal file
@@ -0,0 +1,154 @@
|
||||
//===- SerializeNVVMTarget.cpp ----------------------------------*- C++ -*-===//
|
||||
//
|
||||
// This file is licensed under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "mlir/Dialect/GPU/IR/GPUDialect.h"
|
||||
#include "mlir/Dialect/LLVMIR/NVVMDialect.h"
|
||||
#include "mlir/IR/MLIRContext.h"
|
||||
#include "mlir/InitAllDialects.h"
|
||||
#include "mlir/Parser/Parser.h"
|
||||
#include "mlir/Target/LLVM/NVVM/Target.h"
|
||||
#include "mlir/Target/LLVMIR/Dialect/Builtin/BuiltinToLLVMIRTranslation.h"
|
||||
#include "mlir/Target/LLVMIR/Dialect/GPU/GPUToLLVMIRTranslation.h"
|
||||
#include "mlir/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.h"
|
||||
|
||||
#include "llvm/IRReader/IRReader.h"
|
||||
#include "llvm/Support/MemoryBufferRef.h"
|
||||
#include "llvm/Support/Process.h"
|
||||
#include "llvm/Support/TargetSelect.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
#include "llvm/TargetParser/Host.h"
|
||||
|
||||
#include "gmock/gmock.h"
|
||||
|
||||
using namespace mlir;
|
||||
|
||||
// Skip the test if the NVPTX target was not built.
|
||||
#if MLIR_CUDA_CONVERSIONS_ENABLED == 0
|
||||
#define SKIP_WITHOUT_NVPTX(x) DISABLED_##x
|
||||
#else
|
||||
#define SKIP_WITHOUT_NVPTX(x) x
|
||||
#endif
|
||||
|
||||
class MLIRTargetLLVMNVVM : public ::testing::Test {
|
||||
protected:
|
||||
virtual void SetUp() {
|
||||
registerBuiltinDialectTranslation(registry);
|
||||
registerLLVMDialectTranslation(registry);
|
||||
registerGPUDialectTranslation(registry);
|
||||
registerNVVMTarget(registry);
|
||||
}
|
||||
|
||||
// Checks if PTXAS is in PATH.
|
||||
bool hasPtxas() {
|
||||
// Find the `ptxas` compiler.
|
||||
std::optional<std::string> ptxasCompiler =
|
||||
llvm::sys::Process::FindInEnvPath("PATH", "ptxas");
|
||||
return ptxasCompiler.has_value();
|
||||
}
|
||||
|
||||
// Dialect registry.
|
||||
DialectRegistry registry;
|
||||
|
||||
// MLIR module used for the tests.
|
||||
const std::string moduleStr = R"mlir(
|
||||
gpu.module @nvvm_test {
|
||||
llvm.func @nvvm_kernel(%arg0: f32) attributes {gpu.kernel, nvvm.kernel} {
|
||||
llvm.return
|
||||
}
|
||||
})mlir";
|
||||
};
|
||||
|
||||
// Test NVVM serialization to LLVM.
|
||||
TEST_F(MLIRTargetLLVMNVVM, SKIP_WITHOUT_NVPTX(SerializeNVVMMToLLVM)) {
|
||||
MLIRContext context(registry);
|
||||
|
||||
OwningOpRef<ModuleOp> module =
|
||||
parseSourceString<ModuleOp>(moduleStr, &context);
|
||||
ASSERT_TRUE(!!module);
|
||||
|
||||
// Create an NVVM target.
|
||||
NVVM::NVVMTargetAttr target = NVVM::NVVMTargetAttr::get(&context);
|
||||
|
||||
// Serialize the module.
|
||||
auto serializer = dyn_cast<gpu::TargetAttrInterface>(target);
|
||||
ASSERT_TRUE(!!serializer);
|
||||
gpu::TargetOptions options("", {}, "", gpu::TargetOptions::offload);
|
||||
for (auto gpuModule : (*module).getBody()->getOps<gpu::GPUModuleOp>()) {
|
||||
std::optional<SmallVector<char, 0>> object =
|
||||
serializer.serializeToObject(gpuModule, options);
|
||||
// Check that the serializer was successful.
|
||||
ASSERT_TRUE(object != std::nullopt);
|
||||
ASSERT_TRUE(object->size() > 0);
|
||||
|
||||
// Read the serialized module.
|
||||
llvm::MemoryBufferRef buffer(StringRef(object->data(), object->size()),
|
||||
"module");
|
||||
llvm::LLVMContext llvmContext;
|
||||
llvm::Expected<std::unique_ptr<llvm::Module>> llvmModule =
|
||||
llvm::getLazyBitcodeModule(buffer, llvmContext);
|
||||
ASSERT_TRUE(!!llvmModule);
|
||||
ASSERT_TRUE(!!*llvmModule);
|
||||
|
||||
// Check that it has a function named `foo`.
|
||||
ASSERT_TRUE((*llvmModule)->getFunction("nvvm_kernel") != nullptr);
|
||||
}
|
||||
}
|
||||
|
||||
// Test NVVM serialization to PTX.
|
||||
TEST_F(MLIRTargetLLVMNVVM, SKIP_WITHOUT_NVPTX(SerializeNVVMToPTX)) {
|
||||
MLIRContext context(registry);
|
||||
|
||||
OwningOpRef<ModuleOp> module =
|
||||
parseSourceString<ModuleOp>(moduleStr, &context);
|
||||
ASSERT_TRUE(!!module);
|
||||
|
||||
// Create an NVVM target.
|
||||
NVVM::NVVMTargetAttr target = NVVM::NVVMTargetAttr::get(&context);
|
||||
|
||||
// Serialize the module.
|
||||
auto serializer = dyn_cast<gpu::TargetAttrInterface>(target);
|
||||
ASSERT_TRUE(!!serializer);
|
||||
gpu::TargetOptions options("", {}, "", gpu::TargetOptions::assembly);
|
||||
for (auto gpuModule : (*module).getBody()->getOps<gpu::GPUModuleOp>()) {
|
||||
std::optional<SmallVector<char, 0>> object =
|
||||
serializer.serializeToObject(gpuModule, options);
|
||||
// Check that the serializer was successful.
|
||||
ASSERT_TRUE(object != std::nullopt);
|
||||
ASSERT_TRUE(object->size() > 0);
|
||||
|
||||
ASSERT_TRUE(
|
||||
StringRef(object->data(), object->size()).contains("nvvm_kernel"));
|
||||
}
|
||||
}
|
||||
|
||||
// Test NVVM serialization to Binary.
|
||||
TEST_F(MLIRTargetLLVMNVVM, SKIP_WITHOUT_NVPTX(SerializeNVVMToBinary)) {
|
||||
if (!hasPtxas())
|
||||
GTEST_SKIP() << "PTXAS compiler not found, skipping test.";
|
||||
|
||||
MLIRContext context(registry);
|
||||
|
||||
OwningOpRef<ModuleOp> module =
|
||||
parseSourceString<ModuleOp>(moduleStr, &context);
|
||||
ASSERT_TRUE(!!module);
|
||||
|
||||
// Create an NVVM target.
|
||||
NVVM::NVVMTargetAttr target = NVVM::NVVMTargetAttr::get(&context);
|
||||
|
||||
// Serialize the module.
|
||||
auto serializer = dyn_cast<gpu::TargetAttrInterface>(target);
|
||||
ASSERT_TRUE(!!serializer);
|
||||
gpu::TargetOptions options("", {}, "", gpu::TargetOptions::binary);
|
||||
for (auto gpuModule : (*module).getBody()->getOps<gpu::GPUModuleOp>()) {
|
||||
std::optional<SmallVector<char, 0>> object =
|
||||
serializer.serializeToObject(gpuModule, options);
|
||||
// Check that the serializer was successful.
|
||||
ASSERT_TRUE(object != std::nullopt);
|
||||
ASSERT_TRUE(object->size() > 0);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user