After a global device pointer is set through `c_f_pointer`, we need to sync the double descriptor so the version on the device is also up to date.
90 lines
3.4 KiB
C++
90 lines
3.4 KiB
C++
//===-- CUFCommon.cpp - Shared functions between passes ---------*- C++ -*-===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "flang/Optimizer/Builder/CUFCommon.h"
|
|
#include "flang/Optimizer/Builder/FIRBuilder.h"
|
|
#include "flang/Optimizer/Dialect/CUF/CUFOps.h"
|
|
#include "flang/Optimizer/HLFIR/HLFIROps.h"
|
|
#include "mlir/Dialect/Func/IR/FuncOps.h"
|
|
#include "mlir/Dialect/LLVMIR/NVVMDialect.h"
|
|
#include "mlir/Dialect/OpenACC/OpenACC.h"
|
|
|
|
/// Retrieve or create the CUDA Fortran GPU module in the give in \p mod.
|
|
mlir::gpu::GPUModuleOp cuf::getOrCreateGPUModule(mlir::ModuleOp mod,
|
|
mlir::SymbolTable &symTab) {
|
|
if (auto gpuMod = symTab.lookup<mlir::gpu::GPUModuleOp>(cudaDeviceModuleName))
|
|
return gpuMod;
|
|
|
|
auto *ctx = mod.getContext();
|
|
mod->setAttr(mlir::gpu::GPUDialect::getContainerModuleAttrName(),
|
|
mlir::UnitAttr::get(ctx));
|
|
|
|
mlir::OpBuilder builder(ctx);
|
|
auto gpuMod = builder.create<mlir::gpu::GPUModuleOp>(mod.getLoc(),
|
|
cudaDeviceModuleName);
|
|
mlir::Block::iterator insertPt(mod.getBodyRegion().front().end());
|
|
symTab.insert(gpuMod, insertPt);
|
|
return gpuMod;
|
|
}
|
|
|
|
bool cuf::isCUDADeviceContext(mlir::Operation *op) {
|
|
if (!op || !op->getParentRegion())
|
|
return false;
|
|
return isCUDADeviceContext(*op->getParentRegion());
|
|
}
|
|
|
|
// Check if the insertion point is currently in a device context. HostDevice
|
|
// subprogram are not considered fully device context so it will return false
|
|
// for it.
|
|
// If the insertion point is inside an OpenACC region op, it is considered
|
|
// device context.
|
|
bool cuf::isCUDADeviceContext(mlir::Region ®ion) {
|
|
if (region.getParentOfType<cuf::KernelOp>())
|
|
return true;
|
|
if (region.getParentOfType<mlir::acc::ComputeRegionOpInterface>())
|
|
return true;
|
|
if (auto funcOp = region.getParentOfType<mlir::func::FuncOp>()) {
|
|
if (auto cudaProcAttr =
|
|
funcOp.getOperation()->getAttrOfType<cuf::ProcAttributeAttr>(
|
|
cuf::getProcAttrName())) {
|
|
return cudaProcAttr.getValue() != cuf::ProcAttribute::Host &&
|
|
cudaProcAttr.getValue() != cuf::ProcAttribute::HostDevice;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
bool cuf::isRegisteredDeviceAttr(std::optional<cuf::DataAttribute> attr) {
|
|
if (attr && (*attr == cuf::DataAttribute::Device ||
|
|
*attr == cuf::DataAttribute::Managed ||
|
|
*attr == cuf::DataAttribute::Constant))
|
|
return true;
|
|
return false;
|
|
}
|
|
|
|
bool cuf::isRegisteredDeviceGlobal(fir::GlobalOp op) {
|
|
if (op.getConstant())
|
|
return false;
|
|
return isRegisteredDeviceAttr(op.getDataAttr());
|
|
}
|
|
|
|
void cuf::genPointerSync(const mlir::Value box, fir::FirOpBuilder &builder) {
|
|
if (auto declareOp = box.getDefiningOp<hlfir::DeclareOp>()) {
|
|
if (auto addrOfOp = declareOp.getMemref().getDefiningOp<fir::AddrOfOp>()) {
|
|
auto mod = addrOfOp->getParentOfType<mlir::ModuleOp>();
|
|
if (auto globalOp =
|
|
mod.lookupSymbol<fir::GlobalOp>(addrOfOp.getSymbol())) {
|
|
if (cuf::isRegisteredDeviceGlobal(globalOp)) {
|
|
builder.create<cuf::SyncDescriptorOp>(box.getLoc(),
|
|
addrOfOp.getSymbol());
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|