Files
clang-p2996/flang/lib/Optimizer/Transforms/CUFAddConstructor.cpp
agozillon 4186805060 [Flang][MLIR] Extend DataLayout utilities to have basic GPU Module support (#123149)
As there is now certain areas where we now have the possibility of
having either a ModuleOp or GPUModuleOp and both of these modules can
have DataLayout's and we may require utilising the DataLayout utilities
in these areas I've taken the liberty of trying to extend them for use
with both.

Those with more knowledge of how they wish the GPUModuleOp's to interact
with their parent ModuleOp's DataLayout may have further alterations
they wish to make in the future, but for the moment, it'll simply
utilise the basic data layout construction which I believe combines
parent and child datalayouts from the ModuleOp and GPUModuleOp. If there
is no GPUModuleOp DataLayout it should default to the parent ModuleOp.

It's worth noting there is some weirdness if you have two module
operations defining builtin dialect DataLayout Entries, it appears the
combinatorial functionality for DataLayouts doesn't support the merging
of these.

This behaviour is useful for areas like:
https://github.com/llvm/llvm-project/pull/119585/files#diff-19fc4bcb38829d085e25d601d344bbd85bf7ef749ca359e348f4a7c750eae89dR1412
where we have a crossroads between the two different module operations.
2025-01-30 17:31:50 +01:00

168 lines
6.7 KiB
C++

//===-- CUFAddConstructor.cpp ---------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "flang/Optimizer/Builder/BoxValue.h"
#include "flang/Optimizer/Builder/CUFCommon.h"
#include "flang/Optimizer/Builder/FIRBuilder.h"
#include "flang/Optimizer/Builder/Runtime/RTBuilder.h"
#include "flang/Optimizer/Builder/Todo.h"
#include "flang/Optimizer/CodeGen/Target.h"
#include "flang/Optimizer/CodeGen/TypeConverter.h"
#include "flang/Optimizer/Dialect/CUF/CUFOps.h"
#include "flang/Optimizer/Dialect/FIRAttr.h"
#include "flang/Optimizer/Dialect/FIRDialect.h"
#include "flang/Optimizer/Dialect/FIROps.h"
#include "flang/Optimizer/Dialect/FIROpsSupport.h"
#include "flang/Optimizer/Dialect/FIRType.h"
#include "flang/Optimizer/Support/DataLayout.h"
#include "flang/Runtime/CUDA/registration.h"
#include "flang/Runtime/entry-names.h"
#include "mlir/Dialect/GPU/IR/GPUDialect.h"
#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
#include "mlir/IR/Value.h"
#include "mlir/Pass/Pass.h"
#include "llvm/ADT/SmallVector.h"
namespace fir {
#define GEN_PASS_DEF_CUFADDCONSTRUCTOR
#include "flang/Optimizer/Transforms/Passes.h.inc"
} // namespace fir
using namespace Fortran::runtime::cuda;
namespace {
static constexpr llvm::StringRef cudaFortranCtorName{
"__cudaFortranConstructor"};
struct CUFAddConstructor
: public fir::impl::CUFAddConstructorBase<CUFAddConstructor> {
void runOnOperation() override {
mlir::ModuleOp mod = getOperation();
mlir::SymbolTable symTab(mod);
mlir::OpBuilder opBuilder{mod.getBodyRegion()};
fir::FirOpBuilder builder(opBuilder, mod);
fir::KindMapping kindMap{fir::getKindMapping(mod)};
builder.setInsertionPointToEnd(mod.getBody());
mlir::Location loc = mod.getLoc();
auto *ctx = mod.getContext();
auto voidTy = mlir::LLVM::LLVMVoidType::get(ctx);
auto idxTy = builder.getIndexType();
auto funcTy =
mlir::LLVM::LLVMFunctionType::get(voidTy, {}, /*isVarArg=*/false);
std::optional<mlir::DataLayout> dl =
fir::support::getOrSetMLIRDataLayout(mod, /*allowDefaultLayout=*/false);
if (!dl) {
mlir::emitError(mod.getLoc(),
"data layout attribute is required to perform " +
getName() + "pass");
}
// Symbol reference to CUFRegisterAllocator.
builder.setInsertionPointToEnd(mod.getBody());
auto registerFuncOp = builder.create<mlir::LLVM::LLVMFuncOp>(
loc, RTNAME_STRING(CUFRegisterAllocator), funcTy);
registerFuncOp.setVisibility(mlir::SymbolTable::Visibility::Private);
auto cufRegisterAllocatorRef = mlir::SymbolRefAttr::get(
mod.getContext(), RTNAME_STRING(CUFRegisterAllocator));
builder.setInsertionPointToEnd(mod.getBody());
// Create the constructor function that call CUFRegisterAllocator.
auto func = builder.create<mlir::LLVM::LLVMFuncOp>(loc, cudaFortranCtorName,
funcTy);
func.setLinkage(mlir::LLVM::Linkage::Internal);
builder.setInsertionPointToStart(func.addEntryBlock(builder));
builder.create<mlir::LLVM::CallOp>(loc, funcTy, cufRegisterAllocatorRef);
auto gpuMod = symTab.lookup<mlir::gpu::GPUModuleOp>(cudaDeviceModuleName);
if (gpuMod) {
auto llvmPtrTy = mlir::LLVM::LLVMPointerType::get(ctx);
auto registeredMod = builder.create<cuf::RegisterModuleOp>(
loc, llvmPtrTy, mlir::SymbolRefAttr::get(ctx, gpuMod.getName()));
fir::LLVMTypeConverter typeConverter(mod, /*applyTBAA=*/false,
/*forceUnifiedTBAATree=*/false, *dl);
// Register kernels
for (auto func : gpuMod.getOps<mlir::gpu::GPUFuncOp>()) {
if (func.isKernel()) {
auto kernelName = mlir::SymbolRefAttr::get(
builder.getStringAttr(cudaDeviceModuleName),
{mlir::SymbolRefAttr::get(builder.getContext(), func.getName())});
builder.create<cuf::RegisterKernelOp>(loc, kernelName, registeredMod);
}
}
// Register variables
for (fir::GlobalOp globalOp : mod.getOps<fir::GlobalOp>()) {
auto attr = globalOp.getDataAttrAttr();
if (!attr)
continue;
mlir::func::FuncOp func;
switch (attr.getValue()) {
case cuf::DataAttribute::Device:
case cuf::DataAttribute::Constant: {
func = fir::runtime::getRuntimeFunc<mkRTKey(CUFRegisterVariable)>(
loc, builder);
auto fTy = func.getFunctionType();
// Global variable name
std::string gblNameStr = globalOp.getSymbol().getValue().str();
gblNameStr += '\0';
mlir::Value gblName = fir::getBase(
fir::factory::createStringLiteral(builder, loc, gblNameStr));
// Global variable size
std::optional<uint64_t> size;
if (auto boxTy =
mlir::dyn_cast<fir::BaseBoxType>(globalOp.getType())) {
mlir::Type structTy = typeConverter.convertBoxTypeAsStruct(boxTy);
size = dl->getTypeSizeInBits(structTy) / 8;
}
if (!size) {
size = fir::getTypeSizeAndAlignmentOrCrash(loc, globalOp.getType(),
*dl, kindMap)
.first;
}
auto sizeVal = builder.createIntegerConstant(loc, idxTy, *size);
// Global variable address
mlir::Value addr = builder.create<fir::AddrOfOp>(
loc, globalOp.resultType(), globalOp.getSymbol());
llvm::SmallVector<mlir::Value> args{fir::runtime::createArguments(
builder, loc, fTy, registeredMod, addr, gblName, sizeVal)};
builder.create<fir::CallOp>(loc, func, args);
} break;
case cuf::DataAttribute::Managed:
TODO(loc, "registration of managed variables");
default:
break;
}
}
}
builder.create<mlir::LLVM::ReturnOp>(loc, mlir::ValueRange{});
// Create the llvm.global_ctor with the function.
// TODO: We might want to have a utility that retrieve it if already
// created and adds new functions.
builder.setInsertionPointToEnd(mod.getBody());
llvm::SmallVector<mlir::Attribute> funcs;
funcs.push_back(
mlir::FlatSymbolRefAttr::get(mod.getContext(), func.getSymName()));
llvm::SmallVector<int> priorities;
priorities.push_back(0);
builder.create<mlir::LLVM::GlobalCtorsOp>(
mod.getLoc(), builder.getArrayAttr(funcs),
builder.getI32ArrayAttr(priorities));
}
};
} // end anonymous namespace