As there is now certain areas where we now have the possibility of having either a ModuleOp or GPUModuleOp and both of these modules can have DataLayout's and we may require utilising the DataLayout utilities in these areas I've taken the liberty of trying to extend them for use with both. Those with more knowledge of how they wish the GPUModuleOp's to interact with their parent ModuleOp's DataLayout may have further alterations they wish to make in the future, but for the moment, it'll simply utilise the basic data layout construction which I believe combines parent and child datalayouts from the ModuleOp and GPUModuleOp. If there is no GPUModuleOp DataLayout it should default to the parent ModuleOp. It's worth noting there is some weirdness if you have two module operations defining builtin dialect DataLayout Entries, it appears the combinatorial functionality for DataLayouts doesn't support the merging of these. This behaviour is useful for areas like: https://github.com/llvm/llvm-project/pull/119585/files#diff-19fc4bcb38829d085e25d601d344bbd85bf7ef749ca359e348f4a7c750eae89dR1412 where we have a crossroads between the two different module operations.
168 lines
6.7 KiB
C++
168 lines
6.7 KiB
C++
//===-- CUFAddConstructor.cpp ---------------------------------------------===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "flang/Optimizer/Builder/BoxValue.h"
|
|
#include "flang/Optimizer/Builder/CUFCommon.h"
|
|
#include "flang/Optimizer/Builder/FIRBuilder.h"
|
|
#include "flang/Optimizer/Builder/Runtime/RTBuilder.h"
|
|
#include "flang/Optimizer/Builder/Todo.h"
|
|
#include "flang/Optimizer/CodeGen/Target.h"
|
|
#include "flang/Optimizer/CodeGen/TypeConverter.h"
|
|
#include "flang/Optimizer/Dialect/CUF/CUFOps.h"
|
|
#include "flang/Optimizer/Dialect/FIRAttr.h"
|
|
#include "flang/Optimizer/Dialect/FIRDialect.h"
|
|
#include "flang/Optimizer/Dialect/FIROps.h"
|
|
#include "flang/Optimizer/Dialect/FIROpsSupport.h"
|
|
#include "flang/Optimizer/Dialect/FIRType.h"
|
|
#include "flang/Optimizer/Support/DataLayout.h"
|
|
#include "flang/Runtime/CUDA/registration.h"
|
|
#include "flang/Runtime/entry-names.h"
|
|
#include "mlir/Dialect/GPU/IR/GPUDialect.h"
|
|
#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
|
|
#include "mlir/IR/Value.h"
|
|
#include "mlir/Pass/Pass.h"
|
|
#include "llvm/ADT/SmallVector.h"
|
|
|
|
namespace fir {
|
|
#define GEN_PASS_DEF_CUFADDCONSTRUCTOR
|
|
#include "flang/Optimizer/Transforms/Passes.h.inc"
|
|
} // namespace fir
|
|
|
|
using namespace Fortran::runtime::cuda;
|
|
|
|
namespace {
|
|
|
|
static constexpr llvm::StringRef cudaFortranCtorName{
|
|
"__cudaFortranConstructor"};
|
|
|
|
struct CUFAddConstructor
|
|
: public fir::impl::CUFAddConstructorBase<CUFAddConstructor> {
|
|
|
|
void runOnOperation() override {
|
|
mlir::ModuleOp mod = getOperation();
|
|
mlir::SymbolTable symTab(mod);
|
|
mlir::OpBuilder opBuilder{mod.getBodyRegion()};
|
|
fir::FirOpBuilder builder(opBuilder, mod);
|
|
fir::KindMapping kindMap{fir::getKindMapping(mod)};
|
|
builder.setInsertionPointToEnd(mod.getBody());
|
|
mlir::Location loc = mod.getLoc();
|
|
auto *ctx = mod.getContext();
|
|
auto voidTy = mlir::LLVM::LLVMVoidType::get(ctx);
|
|
auto idxTy = builder.getIndexType();
|
|
auto funcTy =
|
|
mlir::LLVM::LLVMFunctionType::get(voidTy, {}, /*isVarArg=*/false);
|
|
std::optional<mlir::DataLayout> dl =
|
|
fir::support::getOrSetMLIRDataLayout(mod, /*allowDefaultLayout=*/false);
|
|
if (!dl) {
|
|
mlir::emitError(mod.getLoc(),
|
|
"data layout attribute is required to perform " +
|
|
getName() + "pass");
|
|
}
|
|
|
|
// Symbol reference to CUFRegisterAllocator.
|
|
builder.setInsertionPointToEnd(mod.getBody());
|
|
auto registerFuncOp = builder.create<mlir::LLVM::LLVMFuncOp>(
|
|
loc, RTNAME_STRING(CUFRegisterAllocator), funcTy);
|
|
registerFuncOp.setVisibility(mlir::SymbolTable::Visibility::Private);
|
|
auto cufRegisterAllocatorRef = mlir::SymbolRefAttr::get(
|
|
mod.getContext(), RTNAME_STRING(CUFRegisterAllocator));
|
|
builder.setInsertionPointToEnd(mod.getBody());
|
|
|
|
// Create the constructor function that call CUFRegisterAllocator.
|
|
auto func = builder.create<mlir::LLVM::LLVMFuncOp>(loc, cudaFortranCtorName,
|
|
funcTy);
|
|
func.setLinkage(mlir::LLVM::Linkage::Internal);
|
|
builder.setInsertionPointToStart(func.addEntryBlock(builder));
|
|
builder.create<mlir::LLVM::CallOp>(loc, funcTy, cufRegisterAllocatorRef);
|
|
|
|
auto gpuMod = symTab.lookup<mlir::gpu::GPUModuleOp>(cudaDeviceModuleName);
|
|
if (gpuMod) {
|
|
auto llvmPtrTy = mlir::LLVM::LLVMPointerType::get(ctx);
|
|
auto registeredMod = builder.create<cuf::RegisterModuleOp>(
|
|
loc, llvmPtrTy, mlir::SymbolRefAttr::get(ctx, gpuMod.getName()));
|
|
|
|
fir::LLVMTypeConverter typeConverter(mod, /*applyTBAA=*/false,
|
|
/*forceUnifiedTBAATree=*/false, *dl);
|
|
// Register kernels
|
|
for (auto func : gpuMod.getOps<mlir::gpu::GPUFuncOp>()) {
|
|
if (func.isKernel()) {
|
|
auto kernelName = mlir::SymbolRefAttr::get(
|
|
builder.getStringAttr(cudaDeviceModuleName),
|
|
{mlir::SymbolRefAttr::get(builder.getContext(), func.getName())});
|
|
builder.create<cuf::RegisterKernelOp>(loc, kernelName, registeredMod);
|
|
}
|
|
}
|
|
|
|
// Register variables
|
|
for (fir::GlobalOp globalOp : mod.getOps<fir::GlobalOp>()) {
|
|
auto attr = globalOp.getDataAttrAttr();
|
|
if (!attr)
|
|
continue;
|
|
|
|
mlir::func::FuncOp func;
|
|
switch (attr.getValue()) {
|
|
case cuf::DataAttribute::Device:
|
|
case cuf::DataAttribute::Constant: {
|
|
func = fir::runtime::getRuntimeFunc<mkRTKey(CUFRegisterVariable)>(
|
|
loc, builder);
|
|
auto fTy = func.getFunctionType();
|
|
|
|
// Global variable name
|
|
std::string gblNameStr = globalOp.getSymbol().getValue().str();
|
|
gblNameStr += '\0';
|
|
mlir::Value gblName = fir::getBase(
|
|
fir::factory::createStringLiteral(builder, loc, gblNameStr));
|
|
|
|
// Global variable size
|
|
std::optional<uint64_t> size;
|
|
if (auto boxTy =
|
|
mlir::dyn_cast<fir::BaseBoxType>(globalOp.getType())) {
|
|
mlir::Type structTy = typeConverter.convertBoxTypeAsStruct(boxTy);
|
|
size = dl->getTypeSizeInBits(structTy) / 8;
|
|
}
|
|
if (!size) {
|
|
size = fir::getTypeSizeAndAlignmentOrCrash(loc, globalOp.getType(),
|
|
*dl, kindMap)
|
|
.first;
|
|
}
|
|
auto sizeVal = builder.createIntegerConstant(loc, idxTy, *size);
|
|
|
|
// Global variable address
|
|
mlir::Value addr = builder.create<fir::AddrOfOp>(
|
|
loc, globalOp.resultType(), globalOp.getSymbol());
|
|
|
|
llvm::SmallVector<mlir::Value> args{fir::runtime::createArguments(
|
|
builder, loc, fTy, registeredMod, addr, gblName, sizeVal)};
|
|
builder.create<fir::CallOp>(loc, func, args);
|
|
} break;
|
|
case cuf::DataAttribute::Managed:
|
|
TODO(loc, "registration of managed variables");
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
builder.create<mlir::LLVM::ReturnOp>(loc, mlir::ValueRange{});
|
|
|
|
// Create the llvm.global_ctor with the function.
|
|
// TODO: We might want to have a utility that retrieve it if already
|
|
// created and adds new functions.
|
|
builder.setInsertionPointToEnd(mod.getBody());
|
|
llvm::SmallVector<mlir::Attribute> funcs;
|
|
funcs.push_back(
|
|
mlir::FlatSymbolRefAttr::get(mod.getContext(), func.getSymName()));
|
|
llvm::SmallVector<int> priorities;
|
|
priorities.push_back(0);
|
|
builder.create<mlir::LLVM::GlobalCtorsOp>(
|
|
mod.getLoc(), builder.getArrayAttr(funcs),
|
|
builder.getI32ArrayAttr(priorities));
|
|
}
|
|
};
|
|
|
|
} // end anonymous namespace
|