[flang][cuda] Move CUDA Fortran operations to a CUF dialect (#92317)

The number of operations dedicated to CUF grew and where all still in
FIR. In order to have a better organization, the CUF operations,
attributes and code is moved into their specific dialect and files. CUF
dialect is tightly coupled with HLFIR/FIR and their types.

The CUF attributes are bundled into their own library since some
HLFIR/FIR operations depend on them and the CUF dialect depends on the
FIR types. Without having the attributes into a separate library there
would be a dependency cycle.
This commit is contained in:
Valentin Clement (バレンタイン クレメン)
2024-05-17 09:37:53 -07:00
committed by GitHub
parent d90159add4
commit 45daa4fdc6
56 changed files with 1184 additions and 817 deletions

View File

@@ -23,6 +23,10 @@
#include "mlir/IR/Value.h"
#include "llvm/ADT/DenseMap.h"
namespace cuf {
class DataAttributeAttr;
}
namespace fir {
class ExtendedValue;
class FirOpBuilder;
@@ -146,9 +150,9 @@ translateSymbolAttributes(mlir::MLIRContext *mlirContext,
/// Translate the CUDA Fortran attributes of \p sym into the FIR CUDA attribute
/// representation.
fir::CUDADataAttributeAttr
translateSymbolCUDADataAttribute(mlir::MLIRContext *mlirContext,
const Fortran::semantics::Symbol &sym);
cuf::DataAttributeAttr
translateSymbolCUFDataAttribute(mlir::MLIRContext *mlirContext,
const Fortran::semantics::Symbol &sym);
/// Map a symbol to a given fir::ExtendedValue. This will generate an
/// hlfir.declare when lowering to HLFIR and map the hlfir.declare result to the

View File

@@ -254,13 +254,13 @@ public:
mlir::StringAttr linkage = {},
mlir::Attribute value = {}, bool isConst = false,
bool isTarget = false,
fir::CUDADataAttributeAttr cudaAttr = {});
cuf::DataAttributeAttr dataAttr = {});
fir::GlobalOp createGlobal(mlir::Location loc, mlir::Type type,
llvm::StringRef name, bool isConst, bool isTarget,
std::function<void(FirOpBuilder &)> bodyBuilder,
mlir::StringAttr linkage = {},
fir::CUDADataAttributeAttr cudaAttr = {});
cuf::DataAttributeAttr dataAttr = {});
/// Create a global constant (read-only) value.
fir::GlobalOp createGlobalConstant(mlir::Location loc, mlir::Type type,

View File

@@ -239,7 +239,7 @@ genDeclare(mlir::Location loc, fir::FirOpBuilder &builder,
const fir::ExtendedValue &exv, llvm::StringRef name,
fir::FortranVariableFlagsAttr flags,
mlir::Value dummyScope = nullptr,
fir::CUDADataAttributeAttr cudaAttr = {});
cuf::DataAttributeAttr dataAttr = {});
/// Generate an hlfir.associate to build a variable from an expression value.
/// The type of the variable must be provided so that scalar logicals are

View File

@@ -1,3 +1,5 @@
add_subdirectory(CUF)
# This replicates part of the add_mlir_dialect cmake function from MLIR that
# cannot be used her because it expects to be run inside MLIR directory which
# is not the case for FIR.

View File

@@ -0,0 +1,7 @@
set(LLVM_TARGET_DEFINITIONS CUFAttr.td)
mlir_tablegen(CUFEnumAttr.h.inc -gen-enum-decls)
mlir_tablegen(CUFEnumAttr.cpp.inc -gen-enum-defs)
mlir_tablegen(CUFAttr.h.inc --gen-attrdef-decls)
mlir_tablegen(CUFAttr.cpp.inc -gen-attrdef-defs)
add_public_tablegen_target(CUFAttrsIncGen)

View File

@@ -0,0 +1,106 @@
//===-- Optimizer/Dialect/CUF/Attributes/CUFAttr.h -- CUF attributes ------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// Coding style: https://mlir.llvm.org/getting_started/DeveloperGuide/
//
//===----------------------------------------------------------------------===//
#ifndef FORTRAN_OPTIMIZER_DIALECT_CUF_CUFATTR_H
#define FORTRAN_OPTIMIZER_DIALECT_CUF_CUFATTR_H
#include "flang/Common/Fortran.h"
#include "mlir/IR/BuiltinAttributes.h"
namespace llvm {
class StringRef;
}
#include "flang/Optimizer/Dialect/CUF/Attributes/CUFEnumAttr.h.inc"
#define GET_ATTRDEF_CLASSES
#include "flang/Optimizer/Dialect/CUF/Attributes/CUFAttr.h.inc"
namespace cuf {
/// Attribute to mark Fortran entities with the CUDA attribute.
static constexpr llvm::StringRef getDataAttrName() { return "cuf.data_attr"; }
static constexpr llvm::StringRef getProcAttrName() { return "cuf.proc_attr"; }
/// Attribute to carry CUDA launch_bounds values.
static constexpr llvm::StringRef getLaunchBoundsAttrName() {
return "cuf.launch_bounds";
}
/// Attribute to carry CUDA cluster_dims values.
static constexpr llvm::StringRef getClusterDimsAttrName() {
return "cuf.cluster_dims";
}
inline cuf::DataAttributeAttr
getDataAttribute(mlir::MLIRContext *mlirContext,
std::optional<Fortran::common::CUDADataAttr> cudaAttr) {
if (cudaAttr) {
cuf::DataAttribute attr;
switch (*cudaAttr) {
case Fortran::common::CUDADataAttr::Constant:
attr = cuf::DataAttribute::Constant;
break;
case Fortran::common::CUDADataAttr::Device:
attr = cuf::DataAttribute::Device;
break;
case Fortran::common::CUDADataAttr::Managed:
attr = cuf::DataAttribute::Managed;
break;
case Fortran::common::CUDADataAttr::Pinned:
attr = cuf::DataAttribute::Pinned;
break;
case Fortran::common::CUDADataAttr::Shared:
attr = cuf::DataAttribute::Shared;
break;
case Fortran::common::CUDADataAttr::Texture:
// Obsolete attribute
return {};
case Fortran::common::CUDADataAttr::Unified:
attr = cuf::DataAttribute::Unified;
break;
}
return cuf::DataAttributeAttr::get(mlirContext, attr);
}
return {};
}
inline cuf::ProcAttributeAttr
getProcAttribute(mlir::MLIRContext *mlirContext,
std::optional<Fortran::common::CUDASubprogramAttrs> cudaAttr) {
if (cudaAttr) {
cuf::ProcAttribute attr;
switch (*cudaAttr) {
case Fortran::common::CUDASubprogramAttrs::Host:
attr = cuf::ProcAttribute::Host;
break;
case Fortran::common::CUDASubprogramAttrs::Device:
attr = cuf::ProcAttribute::Device;
break;
case Fortran::common::CUDASubprogramAttrs::HostDevice:
attr = cuf::ProcAttribute::HostDevice;
break;
case Fortran::common::CUDASubprogramAttrs::Global:
attr = cuf::ProcAttribute::Global;
break;
case Fortran::common::CUDASubprogramAttrs::Grid_Global:
attr = cuf::ProcAttribute::GridGlobal;
break;
}
return cuf::ProcAttributeAttr::get(mlirContext, attr);
}
return {};
}
} // namespace cuf
#endif // FORTRAN_OPTIMIZER_DIALECT_CUF_CUFATTR_H

View File

@@ -0,0 +1,100 @@
//===- CUFAttr.td - CUF Attributes -------------------------*- tablegen -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file declares the CUF dialect attributes.
//
//===----------------------------------------------------------------------===//
#ifndef FORTRAN_DIALECT_CUF_CUFATTRS
#define FORTRAN_DIALECT_CUF_CUFATTRS
include "flang/Optimizer/Dialect/CUF/CUFDialect.td"
include "mlir/IR/EnumAttr.td"
class cuf_Attr<string name> : AttrDef<CUFDialect, name>;
def cuf_DataAttribute : I32EnumAttr<
"DataAttribute",
"CUDA Fortran variable attributes",
[
I32EnumAttrCase<"Constant", 0, "constant">,
I32EnumAttrCase<"Device", 1, "device">,
I32EnumAttrCase<"Managed", 2, "managed">,
I32EnumAttrCase<"Pinned", 3, "pinned">,
I32EnumAttrCase<"Shared", 4, "shared">,
I32EnumAttrCase<"Unified", 5, "unified">,
// Texture is omitted since it is obsolete and rejected by semantic.
]> {
let genSpecializedAttr = 0;
let cppNamespace = "::cuf";
}
def cuf_DataAttributeAttr :
EnumAttr<CUFDialect, cuf_DataAttribute, "cuda"> {
let assemblyFormat = [{ ```<` $value `>` }];
}
def cuf_ProcAttribute : I32EnumAttr<
"ProcAttribute", "CUDA Fortran procedure attributes",
[
I32EnumAttrCase<"Host", 0, "host">,
I32EnumAttrCase<"Device", 1, "device">,
I32EnumAttrCase<"HostDevice", 2, "host_device">,
I32EnumAttrCase<"Global", 3, "global">,
I32EnumAttrCase<"GridGlobal", 4, "grid_global">,
]> {
let genSpecializedAttr = 0;
let cppNamespace = "::cuf";
}
def cuf_ProcAttributeAttr :
EnumAttr<CUFDialect, cuf_ProcAttribute, "cuda_proc"> {
let assemblyFormat = [{ ```<` $value `>` }];
}
def cuf_LaunchBoundsAttr : cuf_Attr<"LaunchBounds"> {
let mnemonic = "launch_bounds";
let parameters = (ins
"mlir::IntegerAttr":$maxTPB,
"mlir::IntegerAttr":$minBPM,
OptionalParameter<"mlir::IntegerAttr">:$upperBoundClusterSize
);
let assemblyFormat = "`<` struct(params) `>`";
}
def cuf_ClusterDimsAttr : cuf_Attr<"ClusterDims"> {
let mnemonic = "cluster_dims";
let parameters = (ins
"mlir::IntegerAttr":$x,
"mlir::IntegerAttr":$y,
"mlir::IntegerAttr":$z
);
let assemblyFormat = "`<` struct(params) `>`";
}
def cuf_DataTransferKind : I32EnumAttr<
"DataTransferKind", "CUDA Fortran data transfer kind",
[
I32EnumAttrCase<"DeviceHost", 0, "device_host">,
I32EnumAttrCase<"HostDevice", 1, "host_device">,
I32EnumAttrCase<"DeviceDevice", 2, "device_device">,
]> {
let genSpecializedAttr = 0;
let cppNamespace = "::cuf";
}
def cuf_DataTransferKindAttr :
EnumAttr<CUFDialect, cuf_DataTransferKind, "cuda_transfer"> {
let assemblyFormat = [{ ```<` $value `>` }];
}
#endif // FORTRAN_DIALECT_CUF_CUFATTRS

View File

@@ -0,0 +1,11 @@
add_subdirectory(Attributes)
set(LLVM_TARGET_DEFINITIONS CUFDialect.td)
mlir_tablegen(CUFDialect.h.inc -gen-dialect-decls -dialect=cuf)
mlir_tablegen(CUFDialect.cpp.inc -gen-dialect-defs -dialect=cuf)
set(LLVM_TARGET_DEFINITIONS CUFOps.td)
mlir_tablegen(CUFOps.h.inc -gen-op-decls)
mlir_tablegen(CUFOps.cpp.inc -gen-op-defs)
add_public_tablegen_target(CUFOpsIncGen)

View File

@@ -0,0 +1,26 @@
//===-- Optimizer/Dialect/CUFDialect.h -- CUF dialect -----------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// Coding style: https://mlir.llvm.org/getting_started/DeveloperGuide/
//
//===----------------------------------------------------------------------===//
#ifndef FORTRAN_OPTIMIZER_DIALECT_CUF_CUFDIALECT_H
#define FORTRAN_OPTIMIZER_DIALECT_CUF_CUFDIALECT_H
#include "mlir/Bytecode/BytecodeOpInterface.h"
#include "mlir/IR/Dialect.h"
#include "mlir/IR/SymbolTable.h"
#include "mlir/Interfaces/CallInterfaces.h"
#include "mlir/Interfaces/FunctionInterfaces.h"
#include "mlir/Interfaces/LoopLikeInterface.h"
#include "mlir/Interfaces/SideEffectInterfaces.h"
#include "flang/Optimizer/Dialect/CUF/CUFDialect.h.inc"
#endif // FORTRAN_OPTIMIZER_DIALECT_CUF_CUFDIALECT_H

View File

@@ -0,0 +1,43 @@
//===-- CUFDialect.td - CUF dialect base definitions -------*- tablegen -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
/// \file
/// Definition of the CUDA Fortran dialect
///
//===----------------------------------------------------------------------===//
#ifndef FORTRAN_DIALECT_CUF_CUFDIALECT
#define FORTRAN_DIALECT_CUF_CUFDIALECT
include "mlir/IR/AttrTypeBase.td"
include "mlir/IR/EnumAttr.td"
include "mlir/IR/OpBase.td"
def CUFDialect : Dialect {
let name = "cuf";
let summary = "CUDA Fortran dialect";
let description = [{
This dialect models CUDA Fortran operations. The CUF dialect operations use
the FIR types and are tightly coupled with FIR and HLFIR.
}];
let useDefaultAttributePrinterParser = 1;
let usePropertiesForAttributes = 1;
let cppNamespace = "::cuf";
let dependentDialects = ["fir::FIROpsDialect"];
let extraClassDeclaration = [{
private:
// Register the CUF Attributes.
void registerAttributes();
}];
}
#endif // FORTRAN_DIALECT_CUF_CUFDIALECT

View File

@@ -0,0 +1,20 @@
//===-- Optimizer/Dialect/CUF/CUFOps.h - CUF operations ---------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef FORTRAN_OPTIMIZER_DIALECT_CUF_CUFOPS_H
#define FORTRAN_OPTIMIZER_DIALECT_CUF_CUFOPS_H
#include "flang/Optimizer/Dialect/CUF/Attributes/CUFAttr.h"
#include "flang/Optimizer/Dialect/CUF/CUFDialect.h"
#include "flang/Optimizer/Dialect/FIRType.h"
#include "mlir/IR/OpDefinition.h"
#define GET_OP_CLASSES
#include "flang/Optimizer/Dialect/CUF/CUFOps.h.inc"
#endif // FORTRAN_OPTIMIZER_DIALECT_CUF_CUFOPS_H

View File

@@ -0,0 +1,263 @@
//===-- CUFOps.td - CUF operation definitions --------------*- tablegen -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
/// \file
/// Definition of the CUF dialect operations
///
//===----------------------------------------------------------------------===//
#ifndef FORTRAN_DIALECT_CUF_CUF_OPS
#define FORTRAN_DIALECT_CUF_CUF_OPS
include "flang/Optimizer/Dialect/CUF/CUFDialect.td"
include "flang/Optimizer/Dialect/CUF/Attributes/CUFAttr.td"
include "flang/Optimizer/Dialect/FIRTypes.td"
include "mlir/Interfaces/LoopLikeInterface.td"
include "mlir/IR/BuiltinAttributes.td"
class cuf_Op<string mnemonic, list<Trait> traits>
: Op<CUFDialect, mnemonic, traits>;
def cuf_AllocOp : cuf_Op<"alloc", [AttrSizedOperandSegments,
MemoryEffects<[MemAlloc]>]> {
let summary = "Allocate an object on device";
let description = [{
This is a drop in replacement for fir.alloca and fir.allocmem for device
object. Any device, managed or unified object declared in an host
subprogram needs to be allocated in the device memory through runtime calls.
The cuf.alloc is an abstraction to the runtime calls and works together
with cuf.free.
}];
let arguments = (ins
TypeAttr:$in_type,
OptionalAttr<StrAttr>:$uniq_name,
OptionalAttr<StrAttr>:$bindc_name,
Variadic<AnyIntegerType>:$typeparams,
Variadic<AnyIntegerType>:$shape,
cuf_DataAttributeAttr:$data_attr
);
let results = (outs fir_ReferenceType:$ptr);
let assemblyFormat = [{
$in_type (`(` $typeparams^ `:` type($typeparams) `)`)?
(`,` $shape^ `:` type($shape) )? attr-dict `->` qualified(type($ptr))
}];
let builders = [
OpBuilder<(ins "mlir::Type":$inType, "llvm::StringRef":$uniqName,
"llvm::StringRef":$bindcName,
"cuf::DataAttributeAttr":$cudaAttr,
CArg<"mlir::ValueRange", "{}">:$typeparams,
CArg<"mlir::ValueRange", "{}">:$shape,
CArg<"llvm::ArrayRef<mlir::NamedAttribute>", "{}">:$attributes)>];
let hasVerifier = 1;
}
def cuf_FreeOp : cuf_Op<"free", [MemoryEffects<[MemFree]>]> {
let summary = "Free a device allocated object";
let description = [{
The cuf.free operation frees the memory allocated by cuf.alloc.
This is used for non-allocatable device, managed and unified device
variables declare in host subprogram.
}];
let arguments = (ins
Arg<AnyReferenceLike, "", [MemFree]>:$devptr,
cuf_DataAttributeAttr:$data_attr
);
let assemblyFormat = "$devptr `:` qualified(type($devptr)) attr-dict";
let hasVerifier = 1;
}
def cuf_AllocateOp : cuf_Op<"allocate", [AttrSizedOperandSegments,
MemoryEffects<[MemAlloc<DefaultResource>]>]> {
let summary = "Perform the device allocation of data of an allocatable";
let description = [{
The cuf.allocate operation performs the allocation on the device
of the data of an allocatable. The descriptor passed to the operation
is initialized before with the standard flang runtime calls.
}];
let arguments = (ins Arg<fir_ReferenceType, "", [MemRead, MemWrite]>:$box,
Arg<Optional<AnyRefOrBoxType>, "", [MemWrite]>:$errmsg,
Optional<AnyIntegerType>:$stream,
Arg<Optional<AnyRefOrBoxType>, "", [MemWrite]>:$pinned,
Arg<Optional<AnyRefOrBoxType>, "", [MemRead]>:$source,
cuf_DataAttributeAttr:$data_attr,
UnitAttr:$hasStat);
let results = (outs AnyIntegerType:$stat);
let assemblyFormat = [{
$box `:` qualified(type($box))
( `source` `(` $source^ `:` qualified(type($source) )`)` )?
( `errmsg` `(` $errmsg^ `:` type($errmsg) `)` )?
( `stream` `(` $stream^ `:` type($stream) `)` )?
( `pinned` `(` $pinned^ `:` type($pinned) `)` )?
attr-dict `->` type($stat)
}];
let hasVerifier = 1;
}
def cuf_DeallocateOp : cuf_Op<"deallocate",
[MemoryEffects<[MemFree<DefaultResource>]>]> {
let summary = "Perform the device deallocation of data of an allocatable";
let description = [{
The cuf.deallocate operation performs the deallocation on the device
of the data of an allocatable.
}];
let arguments = (ins Arg<fir_ReferenceType, "", [MemRead, MemWrite]>:$box,
Arg<Optional<AnyRefOrBoxType>, "", [MemWrite]>:$errmsg,
cuf_DataAttributeAttr:$data_attr,
UnitAttr:$hasStat);
let results = (outs AnyIntegerType:$stat);
let assemblyFormat = [{
$box `:` qualified(type($box))
( `errmsg` `(` $errmsg^ `:` type($errmsg) `)` )?
attr-dict `->` type($stat)
}];
let hasVerifier = 1;
}
def cuf_DataTransferOp : cuf_Op<"data_transfer", []> {
let summary = "Represent a data transfer between host and device memory";
let description = [{
CUDA Fortran allows data transfer to be done via intrinsic assignment
between a host and a device variable. This operation is used to materialized
the data transfer between the lhs and rhs memory references.
The kind of transfer is specified in the attribute.
```
adev = a ! transfer host to device
a = adev ! transfer device to host
bdev = adev ! transfer device to device
```
}];
let arguments = (ins Arg<AnyReferenceLike, "", [MemWrite]>:$src,
Arg<AnyReferenceLike, "", [MemRead]>:$dst,
cuf_DataTransferKindAttr:$transfer_kind);
let assemblyFormat = [{
$src `to` $dst attr-dict `:` type(operands)
}];
}
def cuf_KernelLaunchOp : cuf_Op<"kernel_launch", [CallOpInterface,
AttrSizedOperandSegments]> {
let summary = "call CUDA kernel";
let description = [{
Launch a CUDA kernel from the host.
```
// launch simple kernel with no arguments. bytes and stream value are
// optional in the chevron notation.
cuf.kernel_launch @kernel<<<%gx, %gy, %bx, %by, %bz>>>()
```
}];
let arguments = (ins
SymbolRefAttr:$callee,
I32:$grid_x,
I32:$grid_y,
I32:$grid_z,
I32:$block_x,
I32:$block_y,
I32:$block_z,
Optional<I32>:$bytes,
Optional<I32>:$stream,
Variadic<AnyType>:$args
);
let assemblyFormat = [{
$callee `<` `<` `<` $grid_x `,` $grid_y `,` $grid_z `,`$block_x `,`
$block_y `,` $block_z ( `,` $bytes^ ( `,` $stream^ )? )? `>` `>` `>`
`` `(` $args `)` ( `:` `(` type($args)^ `)` )? attr-dict
}];
let extraClassDeclaration = [{
mlir::CallInterfaceCallable getCallableForCallee() {
return getCalleeAttr();
}
void setCalleeFromCallable(mlir::CallInterfaceCallable callee) {
(*this)->setAttr(getCalleeAttrName(), callee.get<mlir::SymbolRefAttr>());
}
mlir::FunctionType getFunctionType();
unsigned getNbNoArgOperand() {
unsigned nbNoArgOperand = 5; // grids and blocks values are always present.
if (getBytes()) ++nbNoArgOperand;
if (getStream()) ++nbNoArgOperand;
return nbNoArgOperand;
}
operand_range getArgOperands() {
return {operand_begin() + getNbNoArgOperand(), operand_end()};
}
mlir::MutableOperandRange getArgOperandsMutable() {
return mlir::MutableOperandRange(
*this, getNbNoArgOperand(), getArgs().size() - 1);
}
}];
}
def cuf_KernelOp : cuf_Op<"kernel", [AttrSizedOperandSegments,
DeclareOpInterfaceMethods<LoopLikeOpInterface>]> {
let description = [{
Represent the CUDA Fortran kernel directive. The operation is a loop like
operation that represents the iteration range of the embedded loop nest.
When grid or block variadic operands are empty, a `*` only syntax was used
in the Fortran code.
If the `*` is mixed with values for either grid or block, these are
represented by a 0 constant value.
}];
let arguments = (ins
Variadic<I32>:$grid, // empty means `*`
Variadic<I32>:$block, // empty means `*`
Optional<I32>:$stream,
Variadic<Index>:$lowerbound,
Variadic<Index>:$upperbound,
Variadic<Index>:$step,
OptionalAttr<I64Attr>:$n
);
let regions = (region AnyRegion:$region);
let assemblyFormat = [{
`<` `<` `<` custom<CUFKernelValues>($grid, type($grid)) `,`
custom<CUFKernelValues>($block, type($block))
( `,` `stream` `=` $stream^ )? `>` `>` `>`
custom<CUFKernelLoopControl>($region, $lowerbound, type($lowerbound),
$upperbound, type($upperbound), $step, type($step))
attr-dict
}];
let hasVerifier = 1;
}
#endif // FORTRAN_DIALECT_CUF_CUF_OPS

View File

@@ -70,87 +70,4 @@ def fir_BoxFieldAttr : I32EnumAttr<
// mlir::SideEffects::Resource for modelling operations which add debugging information
def DebuggingResource : Resource<"::fir::DebuggingResource">;
//===----------------------------------------------------------------------===//
// CUDA Fortran specific attributes
//===----------------------------------------------------------------------===//
def fir_CUDADataAttribute : I32EnumAttr<
"CUDADataAttribute",
"CUDA Fortran variable attributes",
[
I32EnumAttrCase<"Constant", 0, "constant">,
I32EnumAttrCase<"Device", 1, "device">,
I32EnumAttrCase<"Managed", 2, "managed">,
I32EnumAttrCase<"Pinned", 3, "pinned">,
I32EnumAttrCase<"Shared", 4, "shared">,
I32EnumAttrCase<"Unified", 5, "unified">,
// Texture is omitted since it is obsolete and rejected by semantic.
]> {
let genSpecializedAttr = 0;
let cppNamespace = "::fir";
}
def fir_CUDADataAttributeAttr :
EnumAttr<FIROpsDialect, fir_CUDADataAttribute, "cuda"> {
let assemblyFormat = [{ ```<` $value `>` }];
}
def fir_CUDAProcAttribute : I32EnumAttr<
"CUDAProcAttribute", "CUDA Fortran procedure attributes",
[
I32EnumAttrCase<"Host", 0, "host">,
I32EnumAttrCase<"Device", 1, "device">,
I32EnumAttrCase<"HostDevice", 2, "host_device">,
I32EnumAttrCase<"Global", 3, "global">,
I32EnumAttrCase<"GridGlobal", 4, "grid_global">,
]> {
let genSpecializedAttr = 0;
let cppNamespace = "::fir";
}
def fir_CUDAProcAttributeAttr :
EnumAttr<FIROpsDialect, fir_CUDAProcAttribute, "cuda_proc"> {
let assemblyFormat = [{ ```<` $value `>` }];
}
def fir_CUDALaunchBoundsAttr : fir_Attr<"CUDALaunchBounds"> {
let mnemonic = "launch_bounds";
let parameters = (ins
"mlir::IntegerAttr":$maxTPB,
"mlir::IntegerAttr":$minBPM,
OptionalParameter<"mlir::IntegerAttr">:$upperBoundClusterSize
);
let assemblyFormat = "`<` struct(params) `>`";
}
def fir_CUDAClusterDimsAttr : fir_Attr<"CUDAClusterDims"> {
let mnemonic = "cluster_dims";
let parameters = (ins
"mlir::IntegerAttr":$x,
"mlir::IntegerAttr":$y,
"mlir::IntegerAttr":$z
);
let assemblyFormat = "`<` struct(params) `>`";
}
def fir_CUDADataTransferKind : I32EnumAttr<
"CUDADataTransferKind", "CUDA Fortran data transfer kind",
[
I32EnumAttrCase<"DeviceHost", 0, "device_host">,
I32EnumAttrCase<"HostDevice", 1, "host_device">,
I32EnumAttrCase<"DeviceDevice", 2, "device_device">,
]> {
let genSpecializedAttr = 0;
let cppNamespace = "::fir";
}
def fir_CUDADataTransferKindAttr :
EnumAttr<FIROpsDialect, fir_CUDADataTransferKind, "cuda_transfer"> {
let assemblyFormat = [{ ```<` $value `>` }];
}
#endif // FIR_DIALECT_FIR_ATTRS

View File

@@ -9,6 +9,7 @@
#ifndef FORTRAN_OPTIMIZER_DIALECT_FIROPS_H
#define FORTRAN_OPTIMIZER_DIALECT_FIROPS_H
#include "flang/Optimizer/Dialect/CUF/Attributes/CUFAttr.h"
#include "flang/Optimizer/Dialect/FIRAttr.h"
#include "flang/Optimizer/Dialect/FIRType.h"
#include "flang/Optimizer/Dialect/FirAliasTagOpInterface.h"

View File

@@ -17,6 +17,7 @@
include "mlir/Dialect/Arith/IR/ArithBase.td"
include "mlir/Dialect/Arith/IR/ArithOpsInterfaces.td"
include "mlir/Dialect/LLVMIR/LLVMAttrDefs.td"
include "flang/Optimizer/Dialect/CUF/Attributes/CUFAttr.td"
include "flang/Optimizer/Dialect/FIRDialect.td"
include "flang/Optimizer/Dialect/FIRTypes.td"
include "flang/Optimizer/Dialect/FIRAttr.td"
@@ -2436,66 +2437,6 @@ def fir_DispatchOp : fir_Op<"dispatch", []> {
}];
}
def fir_CUDAKernelLaunch : fir_Op<"cuda_kernel_launch", [CallOpInterface,
AttrSizedOperandSegments]> {
let summary = "call CUDA kernel";
let description = [{
Launch a CUDA kernel from the host.
```
// launch simple kernel with no arguments. bytes and stream value are
// optional in the chevron notation.
fir.cuda_kernel_launch @kernel<<<%gx, %gy, %bx, %by, %bz>>>()
```
}];
let arguments = (ins
SymbolRefAttr:$callee,
I32:$grid_x,
I32:$grid_y,
I32:$grid_z,
I32:$block_x,
I32:$block_y,
I32:$block_z,
Optional<I32>:$bytes,
Optional<I32>:$stream,
Variadic<AnyType>:$args
);
let assemblyFormat = [{
$callee `<` `<` `<` $grid_x `,` $grid_y `,` $grid_z `,`$block_x `,`
$block_y `,` $block_z ( `,` $bytes^ ( `,` $stream^ )? )? `>` `>` `>`
`` `(` $args `)` ( `:` `(` type($args)^ `)` )? attr-dict
}];
let extraClassDeclaration = [{
mlir::CallInterfaceCallable getCallableForCallee() {
return getCalleeAttr();
}
void setCalleeFromCallable(mlir::CallInterfaceCallable callee) {
(*this)->setAttr(getCalleeAttrName(), callee.get<mlir::SymbolRefAttr>());
}
mlir::FunctionType getFunctionType();
unsigned getNbNoArgOperand() {
unsigned nbNoArgOperand = 5; // grids and blocks values are always present.
if (getBytes()) ++nbNoArgOperand;
if (getStream()) ++nbNoArgOperand;
return nbNoArgOperand;
}
operand_range getArgOperands() {
return {operand_begin() + getNbNoArgOperand(), operand_end()};
}
mlir::MutableOperandRange getArgOperandsMutable() {
return mlir::MutableOperandRange(
*this, getNbNoArgOperand(), getArgs().size() - 1);
}
}];
}
// Constant operations that support Fortran
def fir_StringLitOp : fir_Op<"string_lit", [NoMemoryEffect]> {
@@ -2797,7 +2738,7 @@ def fir_GlobalOp : fir_Op<"global", [IsolatedFromAbove, Symbol]> {
OptionalAttr<UnitAttr>:$constant,
OptionalAttr<UnitAttr>:$target,
OptionalAttr<StrAttr>:$linkName,
OptionalAttr<fir_CUDADataAttributeAttr>:$cuda_attr
OptionalAttr<cuf_DataAttributeAttr>:$data_attr
);
let regions = (region AtMostRegion<1>:$region);
@@ -3077,7 +3018,7 @@ def fir_DeclareOp : fir_Op<"declare", [AttrSizedOperandSegments,
Optional<fir_DummyScopeType>:$dummy_scope,
Builtin_StringAttr:$uniq_name,
OptionalAttr<fir_FortranVariableFlagsAttr>:$fortran_attrs,
OptionalAttr<fir_CUDADataAttributeAttr>:$cuda_attr
OptionalAttr<cuf_DataAttributeAttr>:$data_attr
);
let results = (outs AnyRefOrBox);
@@ -3130,125 +3071,6 @@ def fir_BoxOffsetOp : fir_Op<"box_offset", [NoMemoryEffect]> {
];
}
def fir_CUDAKernelOp : fir_Op<"cuda_kernel", [AttrSizedOperandSegments,
DeclareOpInterfaceMethods<LoopLikeOpInterface>]> {
let description = [{
Represent the CUDA Fortran kernel directive. The operation is a loop like
operation that represents the iteration range of the embedded loop nest.
When grid or block variadic operands are empty, a `*` only syntax was used
in the Fortran code.
If the `*` is mixed with values for either grid or block, these are
represented by a 0 constant value.
}];
let arguments = (ins
Variadic<I32>:$grid, // empty means `*`
Variadic<I32>:$block, // empty means `*`
Optional<I32>:$stream,
Variadic<Index>:$lowerbound,
Variadic<Index>:$upperbound,
Variadic<Index>:$step,
OptionalAttr<I64Attr>:$n
);
let regions = (region AnyRegion:$region);
let assemblyFormat = [{
`<` `<` `<` custom<CUFKernelValues>($grid, type($grid)) `,`
custom<CUFKernelValues>($block, type($block))
( `,` `stream` `=` $stream^ )? `>` `>` `>`
custom<CUFKernelLoopControl>($region, $lowerbound, type($lowerbound),
$upperbound, type($upperbound), $step, type($step))
attr-dict
}];
let hasVerifier = 1;
}
def fir_CUDADataTransferOp : fir_Op<"cuda_data_transfer", []> {
let summary = "Represent a data transfer between host and device memory";
let description = [{
CUDA Fortran allows data transfer to be done via intrinsic assignment
between a host and a device variable. This operation is used to materialized
the data transfer between the lhs and rhs memory references.
The kind of transfer is specified in the attribute.
```
adev = a ! transfer host to device
a = adev ! transfer device to host
bdev = adev ! transfer device to device
```
}];
let arguments = (ins Arg<AnyReferenceLike, "", [MemWrite]>:$src,
Arg<AnyReferenceLike, "", [MemRead]>:$dst,
fir_CUDADataTransferKindAttr:$transfer_kind);
let assemblyFormat = [{
$src `to` $dst attr-dict `:` type(operands)
}];
}
def fir_CUDAAllocateOp : fir_Op<"cuda_allocate", [AttrSizedOperandSegments,
MemoryEffects<[MemAlloc<DefaultResource>]>]> {
let summary = "Perform the device allocation of data of an allocatable";
let description = [{
The fir.cuda_allocate operation performs the allocation on the device
of the data of an allocatable. The descriptor passed to the operation
is initialized before with the standard flang runtime calls.
}];
let arguments = (ins Arg<fir_ReferenceType, "", [MemRead, MemWrite]>:$box,
Arg<Optional<AnyRefOrBoxType>, "", [MemWrite]>:$errmsg,
Optional<AnyIntegerType>:$stream,
Arg<Optional<AnyRefOrBoxType>, "", [MemWrite]>:$pinned,
Arg<Optional<AnyRefOrBoxType>, "", [MemRead]>:$source,
fir_CUDADataAttributeAttr:$cuda_attr,
UnitAttr:$hasStat);
let results = (outs AnyIntegerType:$stat);
let assemblyFormat = [{
$box `:` qualified(type($box))
( `source` `(` $source^ `:` qualified(type($source) )`)` )?
( `errmsg` `(` $errmsg^ `:` type($errmsg) `)` )?
( `stream` `(` $stream^ `:` type($stream) `)` )?
( `pinned` `(` $pinned^ `:` type($pinned) `)` )?
attr-dict `->` type($stat)
}];
let hasVerifier = 1;
}
def fir_CUDADeallocateOp : fir_Op<"cuda_deallocate",
[MemoryEffects<[MemFree<DefaultResource>]>]> {
let summary = "Perform the device deallocation of data of an allocatable";
let description = [{
The fir.cuda_deallocate operation performs the deallocation on the device
of the data of an allocatable.
}];
let arguments = (ins Arg<fir_ReferenceType, "", [MemRead, MemWrite]>:$box,
Arg<Optional<AnyRefOrBoxType>, "", [MemWrite]>:$errmsg,
fir_CUDADataAttributeAttr:$cuda_attr,
UnitAttr:$hasStat);
let results = (outs AnyIntegerType:$stat);
let assemblyFormat = [{
$box `:` qualified(type($box))
( `errmsg` `(` $errmsg^ `:` type($errmsg) `)` )?
attr-dict `->` type($stat)
}];
let hasVerifier = 1;
}
def fir_DummyScopeOp : fir_Op<"dummy_scope",
[MemoryEffects<[MemWrite<DebuggingResource>]>]> {
let summary = "Define a scope for dummy arguments";
@@ -3329,62 +3151,4 @@ def fir_DummyScopeOp : fir_Op<"dummy_scope",
let assemblyFormat = "attr-dict `:` type(results)";
}
def fir_CUDAAllocOp : fir_Op<"cuda_alloc", [AttrSizedOperandSegments,
MemoryEffects<[MemAlloc]>]> {
let summary = "Allocate an object on device";
let description = [{
This is a drop in replacement for fir.alloca and fir.allocmem for device
object. Any device, managed or unified object declared in an host
subprogram needs to be allocated in the device memory through runtime calls.
The fir.cuda_alloc is an abstraction to the runtime calls and works together
with fir.cuda_free.
}];
let arguments = (ins
TypeAttr:$in_type,
OptionalAttr<StrAttr>:$uniq_name,
OptionalAttr<StrAttr>:$bindc_name,
Variadic<AnyIntegerType>:$typeparams,
Variadic<AnyIntegerType>:$shape,
fir_CUDADataAttributeAttr:$cuda_attr
);
let results = (outs fir_ReferenceType:$ptr);
let assemblyFormat = [{
$in_type (`(` $typeparams^ `:` type($typeparams) `)`)?
(`,` $shape^ `:` type($shape) )? attr-dict `->` qualified(type($ptr))
}];
let builders = [
OpBuilder<(ins "mlir::Type":$inType, "llvm::StringRef":$uniqName,
"llvm::StringRef":$bindcName,
"fir::CUDADataAttributeAttr":$cudaAttr,
CArg<"mlir::ValueRange", "{}">:$typeparams,
CArg<"mlir::ValueRange", "{}">:$shape,
CArg<"llvm::ArrayRef<mlir::NamedAttribute>", "{}">:$attributes)>];
let hasVerifier = 1;
}
def fir_CUDAFreeOp : fir_Op<"cuda_free", [MemoryEffects<[MemFree]>]> {
let summary = "Free a device allocated object";
let description = [{
The fir.cuda_free operation frees the memory allocated by fir.cuda_alloc.
This is used for non-allocatable device, managed and unified device
variables declare in host subprogram.
}];
let arguments = (ins
Arg<AnyReferenceLike, "", [MemFree]>:$devptr,
fir_CUDADataAttributeAttr:$cuda_attr
);
let assemblyFormat = "$devptr `:` qualified(type($devptr)) attr-dict";
let hasVerifier = 1;
}
#endif

View File

@@ -75,19 +75,6 @@ constexpr llvm::StringRef getOptionalAttrName() { return "fir.optional"; }
/// Attribute to mark Fortran entities with the TARGET attribute.
static constexpr llvm::StringRef getTargetAttrName() { return "fir.target"; }
/// Attribute to mark Fortran entities with the CUDA attribute.
static constexpr llvm::StringRef getCUDAAttrName() { return "fir.cuda_attr"; }
/// Attribute to carry CUDA launch_bounds values.
static constexpr llvm::StringRef getCUDALaunchBoundsAttrName() {
return "fir.cuda_launch_bounds";
}
/// Attribute to carry CUDA cluster_dims values.
static constexpr llvm::StringRef getCUDAClusterDimsAttrName() {
return "fir.cuda_cluster_dims";
}
/// Attribute to mark that a function argument is a character dummy procedure.
/// Character dummy procedure have special ABI constraints.
static constexpr llvm::StringRef getCharacterProcedureDummyAttrName() {

View File

@@ -15,6 +15,7 @@
#define FORTRAN_DIALECT_HLFIR_OPS
include "flang/Optimizer/HLFIR/HLFIROpBase.td"
include "flang/Optimizer/Dialect/CUF/Attributes/CUFAttr.td"
include "flang/Optimizer/Dialect/FIRTypes.td"
include "flang/Optimizer/Dialect/FIRAttr.td"
include "flang/Optimizer/Dialect/FortranVariableInterface.td"
@@ -90,7 +91,7 @@ def hlfir_DeclareOp : hlfir_Op<"declare", [AttrSizedOperandSegments,
Optional<fir_DummyScopeType>:$dummy_scope,
Builtin_StringAttr:$uniq_name,
OptionalAttr<fir_FortranVariableFlagsAttr>:$fortran_attrs,
OptionalAttr<fir_CUDADataAttributeAttr>:$cuda_attr
OptionalAttr<cuf_DataAttributeAttr>:$data_attr
);
let results = (outs AnyFortranVariable, AnyRefOrBoxLike);
@@ -106,7 +107,7 @@ def hlfir_DeclareOp : hlfir_Op<"declare", [AttrSizedOperandSegments,
CArg<"mlir::Value", "{}">:$shape, CArg<"mlir::ValueRange", "{}">:$typeparams,
CArg<"mlir::Value", "{}">:$dummy_scope,
CArg<"fir::FortranVariableFlagsAttr", "{}">:$fortran_attrs,
CArg<"fir::CUDADataAttributeAttr", "{}">:$cuda_attr)>];
CArg<"cuf::DataAttributeAttr", "{}">:$data_attr)>];
let extraClassDeclaration = [{
/// Get the variable original base (same as input). It lacks

View File

@@ -13,6 +13,7 @@
#ifndef FORTRAN_OPTIMIZER_SUPPORT_INITFIR_H
#define FORTRAN_OPTIMIZER_SUPPORT_INITFIR_H
#include "flang/Optimizer/Dialect/CUF/CUFDialect.h"
#include "flang/Optimizer/Dialect/FIRDialect.h"
#include "flang/Optimizer/HLFIR/HLFIRDialect.h"
#include "mlir/Conversion/Passes.h"
@@ -34,7 +35,7 @@ namespace fir::support {
mlir::scf::SCFDialect, mlir::arith::ArithDialect, \
mlir::cf::ControlFlowDialect, mlir::func::FuncDialect, \
mlir::vector::VectorDialect, mlir::math::MathDialect, \
mlir::complex::ComplexDialect, mlir::DLTIDialect
mlir::complex::ComplexDialect, mlir::DLTIDialect, cuf::CUFDialect
#define FLANG_CODEGEN_DIALECT_LIST FIRCodeGenDialect, mlir::LLVM::LLVMDialect

View File

@@ -16,6 +16,7 @@
#include "flang/Common/default-kinds.h"
#include "flang/Optimizer/Builder/FIRBuilder.h"
#include "flang/Optimizer/Builder/Todo.h"
#include "flang/Optimizer/Dialect/CUF/Attributes/CUFAttr.h"
#include "flang/Optimizer/Dialect/FIROps.h"
#include "flang/Optimizer/Dialect/FIRType.h"
#include "flang/Optimizer/Support/FatalError.h"
@@ -136,66 +137,6 @@ inline void intrinsicTypeTODO(fir::FirOpBuilder &builder, mlir::Type type,
" in " + intrinsicName);
}
inline fir::CUDADataAttributeAttr
getCUDADataAttribute(mlir::MLIRContext *mlirContext,
std::optional<Fortran::common::CUDADataAttr> cudaAttr) {
if (cudaAttr) {
fir::CUDADataAttribute attr;
switch (*cudaAttr) {
case Fortran::common::CUDADataAttr::Constant:
attr = fir::CUDADataAttribute::Constant;
break;
case Fortran::common::CUDADataAttr::Device:
attr = fir::CUDADataAttribute::Device;
break;
case Fortran::common::CUDADataAttr::Managed:
attr = fir::CUDADataAttribute::Managed;
break;
case Fortran::common::CUDADataAttr::Pinned:
attr = fir::CUDADataAttribute::Pinned;
break;
case Fortran::common::CUDADataAttr::Shared:
attr = fir::CUDADataAttribute::Shared;
break;
case Fortran::common::CUDADataAttr::Texture:
// Obsolete attribute
return {};
case Fortran::common::CUDADataAttr::Unified:
attr = fir::CUDADataAttribute::Unified;
break;
}
return fir::CUDADataAttributeAttr::get(mlirContext, attr);
}
return {};
}
inline fir::CUDAProcAttributeAttr getCUDAProcAttribute(
mlir::MLIRContext *mlirContext,
std::optional<Fortran::common::CUDASubprogramAttrs> cudaAttr) {
if (cudaAttr) {
fir::CUDAProcAttribute attr;
switch (*cudaAttr) {
case Fortran::common::CUDASubprogramAttrs::Host:
attr = fir::CUDAProcAttribute::Host;
break;
case Fortran::common::CUDASubprogramAttrs::Device:
attr = fir::CUDAProcAttribute::Device;
break;
case Fortran::common::CUDASubprogramAttrs::HostDevice:
attr = fir::CUDAProcAttribute::HostDevice;
break;
case Fortran::common::CUDASubprogramAttrs::Global:
attr = fir::CUDAProcAttribute::Global;
break;
case Fortran::common::CUDASubprogramAttrs::Grid_Global:
attr = fir::CUDAProcAttribute::GridGlobal;
break;
}
return fir::CUDAProcAttributeAttr::get(mlirContext, attr);
}
return {};
}
} // namespace fir
#endif // FORTRAN_OPTIMIZER_SUPPORT_UTILS_H

View File

@@ -14,6 +14,7 @@ add_flang_library(flangFrontend
TextDiagnostic.cpp
DEPENDS
CUFDialect
FIRDialect
FIROptCodeGenPassIncGen
FIROptTransformsPassIncGen
@@ -23,6 +24,7 @@ add_flang_library(flangFrontend
${extension_libs}
LINK_LIBS
CUFDialect
FortranParser
FortranSemantics
FortranEvaluate

View File

@@ -24,6 +24,7 @@
#include "flang/Optimizer/Builder/FIRBuilder.h"
#include "flang/Optimizer/Builder/Runtime/RTBuilder.h"
#include "flang/Optimizer/Builder/Todo.h"
#include "flang/Optimizer/Dialect/CUF/CUFOps.h"
#include "flang/Optimizer/Dialect/FIROps.h"
#include "flang/Optimizer/Dialect/FIROpsSupport.h"
#include "flang/Optimizer/Support/FatalError.h"
@@ -729,9 +730,9 @@ private:
ErrorManager &errorManager,
const Fortran::semantics::Symbol &sym) {
Fortran::lower::StatementContext stmtCtx;
fir::CUDADataAttributeAttr cudaAttr =
Fortran::lower::translateSymbolCUDADataAttribute(builder.getContext(),
sym);
cuf::DataAttributeAttr cudaAttr =
Fortran::lower::translateSymbolCUFDataAttribute(builder.getContext(),
sym);
mlir::Value errmsg = errMsgExpr ? errorManager.errMsgAddr : nullptr;
mlir::Value stream =
streamExpr
@@ -746,7 +747,7 @@ private:
// Keep return type the same as a standard AllocatableAllocate call.
mlir::Type retTy = fir::runtime::getModel<int>()(builder.getContext());
return builder
.create<fir::CUDAAllocateOp>(
.create<cuf::AllocateOp>(
loc, retTy, box.getAddr(), errmsg, stream, pinned, source, cudaAttr,
errorManager.hasStatSpec() ? builder.getUnitAttr() : nullptr)
.getResult();
@@ -804,9 +805,9 @@ static mlir::Value genCudaDeallocate(fir::FirOpBuilder &builder,
const fir::MutableBoxValue &box,
ErrorManager &errorManager,
const Fortran::semantics::Symbol &sym) {
fir::CUDADataAttributeAttr cudaAttr =
Fortran::lower::translateSymbolCUDADataAttribute(builder.getContext(),
sym);
cuf::DataAttributeAttr cudaAttr =
Fortran::lower::translateSymbolCUFDataAttribute(builder.getContext(),
sym);
mlir::Value errmsg =
mlir::isa<fir::AbsentOp>(errorManager.errMsgAddr.getDefiningOp())
? nullptr
@@ -815,7 +816,7 @@ static mlir::Value genCudaDeallocate(fir::FirOpBuilder &builder,
// Keep return type the same as a standard AllocatableAllocate call.
mlir::Type retTy = fir::runtime::getModel<int>()(builder.getContext());
return builder
.create<fir::CUDADeallocateOp>(
.create<cuf::DeallocateOp>(
loc, retTy, box.getAddr(), errmsg, cudaAttr,
errorManager.hasStatSpec() ? builder.getUnitAttr() : nullptr)
.getResult();

View File

@@ -40,6 +40,8 @@
#include "flang/Optimizer/Builder/Runtime/Ragged.h"
#include "flang/Optimizer/Builder/Runtime/Stop.h"
#include "flang/Optimizer/Builder/Todo.h"
#include "flang/Optimizer/Dialect/CUF/Attributes/CUFAttr.h"
#include "flang/Optimizer/Dialect/CUF/CUFOps.h"
#include "flang/Optimizer/Dialect/FIRAttr.h"
#include "flang/Optimizer/Dialect/FIRDialect.h"
#include "flang/Optimizer/Dialect/FIROps.h"
@@ -2648,8 +2650,8 @@ private:
loopEval = &*std::next(loopEval->getNestedEvaluations().begin());
}
auto op = builder->create<fir::CUDAKernelOp>(
loc, gridValues, blockValues, streamValue, lbs, ubs, steps, n);
auto op = builder->create<cuf::KernelOp>(loc, gridValues, blockValues,
streamValue, lbs, ubs, steps, n);
builder->createBlock(&op.getRegion(), op.getRegion().end(), ivTypes,
ivLocs);
mlir::Block &b = op.getRegion().back();
@@ -3719,46 +3721,44 @@ private:
// device = host
if (lhsIsDevice && !rhsIsDevice) {
auto transferKindAttr = fir::CUDADataTransferKindAttr::get(
builder.getContext(), fir::CUDADataTransferKind::HostDevice);
auto transferKindAttr = cuf::DataTransferKindAttr::get(
builder.getContext(), cuf::DataTransferKind::HostDevice);
if (!rhs.isVariable()) {
auto associate = hlfir::genAssociateExpr(
loc, builder, rhs, rhs.getType(), ".cuf_host_tmp");
builder.create<fir::CUDADataTransferOp>(loc, associate.getBase(), lhs,
transferKindAttr);
builder.create<cuf::DataTransferOp>(loc, associate.getBase(), lhs,
transferKindAttr);
builder.create<hlfir::EndAssociateOp>(loc, associate);
} else {
builder.create<fir::CUDADataTransferOp>(loc, rhs, lhs,
transferKindAttr);
builder.create<cuf::DataTransferOp>(loc, rhs, lhs, transferKindAttr);
}
return;
}
// host = device
if (!lhsIsDevice && rhsIsDevice) {
auto transferKindAttr = fir::CUDADataTransferKindAttr::get(
builder.getContext(), fir::CUDADataTransferKind::DeviceHost);
auto transferKindAttr = cuf::DataTransferKindAttr::get(
builder.getContext(), cuf::DataTransferKind::DeviceHost);
if (!rhs.isVariable()) {
// evaluateRhs loads scalar. Look for the memory reference to be used in
// the transfer.
if (mlir::isa_and_nonnull<fir::LoadOp>(rhs.getDefiningOp())) {
auto loadOp = mlir::dyn_cast<fir::LoadOp>(rhs.getDefiningOp());
builder.create<fir::CUDADataTransferOp>(loc, loadOp.getMemref(), lhs,
transferKindAttr);
builder.create<cuf::DataTransferOp>(loc, loadOp.getMemref(), lhs,
transferKindAttr);
return;
}
} else {
builder.create<fir::CUDADataTransferOp>(loc, rhs, lhs,
transferKindAttr);
builder.create<cuf::DataTransferOp>(loc, rhs, lhs, transferKindAttr);
}
return;
}
if (lhsIsDevice && rhsIsDevice) {
assert(rhs.isVariable() && "CUDA Fortran assignment rhs is not legal");
auto transferKindAttr = fir::CUDADataTransferKindAttr::get(
builder.getContext(), fir::CUDADataTransferKind::DeviceDevice);
builder.create<fir::CUDADataTransferOp>(loc, rhs, lhs, transferKindAttr);
auto transferKindAttr = cuf::DataTransferKindAttr::get(
builder.getContext(), cuf::DataTransferKind::DeviceDevice);
builder.create<cuf::DataTransferOp>(loc, rhs, lhs, transferKindAttr);
return;
}
llvm_unreachable("Unhandled CUDA data transfer");
@@ -3769,8 +3769,8 @@ private:
const Fortran::evaluate::Assignment &assign) {
llvm::SmallVector<mlir::Value> temps;
localSymbols.pushScope();
auto transferKindAttr = fir::CUDADataTransferKindAttr::get(
builder.getContext(), fir::CUDADataTransferKind::DeviceHost);
auto transferKindAttr = cuf::DataTransferKindAttr::get(
builder.getContext(), cuf::DataTransferKind::DeviceHost);
[[maybe_unused]] unsigned nbDeviceResidentObject = 0;
for (const Fortran::semantics::Symbol &sym :
Fortran::evaluate::CollectSymbols(assign.rhs)) {
@@ -3795,8 +3795,8 @@ private:
addSymbol(sym,
hlfir::translateToExtendedValue(loc, builder, temp).first,
/*forced=*/true);
builder.create<fir::CUDADataTransferOp>(loc, addr, temp,
transferKindAttr);
builder.create<cuf::DataTransferOp>(loc, addr, temp,
transferKindAttr);
++nbDeviceResidentObject;
}
}
@@ -3808,15 +3808,15 @@ private:
// subprogram are not considered fully device context so it will return false
// for it.
static bool isDeviceContext(fir::FirOpBuilder &builder) {
if (builder.getRegion().getParentOfType<fir::CUDAKernelOp>())
if (builder.getRegion().getParentOfType<cuf::KernelOp>())
return true;
if (auto funcOp =
builder.getRegion().getParentOfType<mlir::func::FuncOp>()) {
if (auto cudaProcAttr =
funcOp.getOperation()->getAttrOfType<fir::CUDAProcAttributeAttr>(
fir::getCUDAAttrName())) {
return cudaProcAttr.getValue() != fir::CUDAProcAttribute::Host &&
cudaProcAttr.getValue() != fir::CUDAProcAttribute::HostDevice;
funcOp.getOperation()->getAttrOfType<cuf::ProcAttributeAttr>(
cuf::getProcAttrName())) {
return cudaProcAttr.getValue() != cuf::ProcAttribute::Host &&
cudaProcAttr.getValue() != cuf::ProcAttribute::HostDevice;
}
}
return false;

View File

@@ -37,6 +37,8 @@ add_flang_library(FortranLower
VectorSubscripts.cpp
DEPENDS
CUFAttrs
CUFDialect
FIRDialect
FIRTransforms
HLFIRDialect
@@ -44,6 +46,8 @@ add_flang_library(FortranLower
${extension_libs}
LINK_LIBS
CUFAttrs
CUFDialect
FIRDialect
FIRDialectSupport
FIRBuilder

View File

@@ -627,9 +627,9 @@ setCUDAAttributes(mlir::func::FuncOp func,
characteristic) {
if (characteristic && characteristic->cudaSubprogramAttrs) {
func.getOperation()->setAttr(
fir::getCUDAAttrName(),
fir::getCUDAProcAttribute(func.getContext(),
*characteristic->cudaSubprogramAttrs));
cuf::getProcAttrName(),
cuf::getProcAttribute(func.getContext(),
*characteristic->cudaSubprogramAttrs));
}
if (sym) {
@@ -649,9 +649,9 @@ setCUDAAttributes(mlir::func::FuncOp func,
ubAttr =
mlir::IntegerAttr::get(i64Ty, details->cudaLaunchBounds()[2]);
func.getOperation()->setAttr(
fir::getCUDALaunchBoundsAttrName(),
fir::CUDALaunchBoundsAttr::get(func.getContext(), maxTPBAttr,
minBPMAttr, ubAttr));
cuf::getLaunchBoundsAttrName(),
cuf::LaunchBoundsAttr::get(func.getContext(), maxTPBAttr,
minBPMAttr, ubAttr));
}
if (!details->cudaClusterDims().empty()) {
@@ -663,9 +663,8 @@ setCUDAAttributes(mlir::func::FuncOp func,
auto zAttr =
mlir::IntegerAttr::get(i64Ty, details->cudaClusterDims()[2]);
func.getOperation()->setAttr(
fir::getCUDAClusterDimsAttrName(),
fir::CUDAClusterDimsAttr::get(func.getContext(), xAttr, yAttr,
zAttr));
cuf::getClusterDimsAttrName(),
cuf::ClusterDimsAttr::get(func.getContext(), xAttr, yAttr, zAttr));
}
}
}
@@ -1116,8 +1115,8 @@ private:
addMLIRAttr(fir::getTargetAttrName());
if (obj.cudaDataAttr)
attrs.emplace_back(
mlir::StringAttr::get(&mlirContext, fir::getCUDAAttrName()),
fir::getCUDADataAttribute(&mlirContext, obj.cudaDataAttr));
mlir::StringAttr::get(&mlirContext, cuf::getDataAttrName()),
cuf::getDataAttribute(&mlirContext, obj.cudaDataAttr));
// TODO: intents that require special care (e.g finalization)

View File

@@ -28,6 +28,7 @@
#include "flang/Optimizer/Builder/MutableBox.h"
#include "flang/Optimizer/Builder/Runtime/Derived.h"
#include "flang/Optimizer/Builder/Todo.h"
#include "flang/Optimizer/Dialect/CUF/CUFOps.h"
#include "flang/Optimizer/Dialect/FIROpsSupport.h"
#include "flang/Optimizer/HLFIR/HLFIROps.h"
#include "mlir/IR/IRMapping.h"
@@ -589,7 +590,7 @@ std::pair<fir::ExtendedValue, bool> Fortran::lower::genCallOpAndResult(
fir::getBase(converter.genExprValue(
caller.getCallDescription().chevrons()[3], stmtCtx)));
builder.create<fir::CUDAKernelLaunch>(
builder.create<cuf::KernelLaunchOp>(
loc, funcType.getResults(), funcSymbolAttr, grid_x, grid_y, grid_z,
block_x, block_y, block_z, bytes, stream, operands);
callNumResults = 0;

View File

@@ -30,6 +30,7 @@
#include "flang/Optimizer/Builder/IntrinsicCall.h"
#include "flang/Optimizer/Builder/Runtime/Derived.h"
#include "flang/Optimizer/Builder/Todo.h"
#include "flang/Optimizer/Dialect/CUF/CUFOps.h"
#include "flang/Optimizer/Dialect/FIRAttr.h"
#include "flang/Optimizer/Dialect/FIRDialect.h"
#include "flang/Optimizer/Dialect/FIROps.h"
@@ -139,7 +140,7 @@ static fir::GlobalOp defineGlobal(Fortran::lower::AbstractConverter &converter,
const Fortran::lower::pft::Variable &var,
llvm::StringRef globalName,
mlir::StringAttr linkage,
fir::CUDADataAttributeAttr cudaAttr = {});
cuf::DataAttributeAttr dataAttr = {});
static mlir::Location genLocation(Fortran::lower::AbstractConverter &converter,
const Fortran::semantics::Symbol &sym) {
@@ -172,12 +173,12 @@ static fir::GlobalOp declareGlobal(Fortran::lower::AbstractConverter &converter,
!Fortran::semantics::IsProcedurePointer(ultimate))
mlir::emitError(loc, "processing global declaration: symbol '")
<< toStringRef(sym.name()) << "' has unexpected details\n";
fir::CUDADataAttributeAttr cudaAttr =
Fortran::lower::translateSymbolCUDADataAttribute(
cuf::DataAttributeAttr dataAttr =
Fortran::lower::translateSymbolCUFDataAttribute(
converter.getFirOpBuilder().getContext(), sym);
return builder.createGlobal(loc, converter.genType(var), globalName, linkage,
mlir::Attribute{}, isConstant(ultimate),
var.isTarget(), cudaAttr);
var.isTarget(), dataAttr);
}
/// Temporary helper to catch todos in initial data target lowering.
@@ -474,7 +475,7 @@ static fir::GlobalOp defineGlobal(Fortran::lower::AbstractConverter &converter,
const Fortran::lower::pft::Variable &var,
llvm::StringRef globalName,
mlir::StringAttr linkage,
fir::CUDADataAttributeAttr cudaAttr) {
cuf::DataAttributeAttr dataAttr) {
fir::FirOpBuilder &builder = converter.getFirOpBuilder();
const Fortran::semantics::Symbol &sym = var.getSymbol();
mlir::Location loc = genLocation(converter, sym);
@@ -514,7 +515,7 @@ static fir::GlobalOp defineGlobal(Fortran::lower::AbstractConverter &converter,
if (!global)
global =
builder.createGlobal(loc, symTy, globalName, linkage, mlir::Attribute{},
isConst, var.isTarget(), cudaAttr);
isConst, var.isTarget(), dataAttr);
if (Fortran::semantics::IsAllocatableOrPointer(sym) &&
!Fortran::semantics::IsProcedure(sym)) {
const auto *details =
@@ -694,9 +695,9 @@ static mlir::Value createNewLocal(Fortran::lower::AbstractConverter &converter,
return builder.create<fir::ZeroOp>(loc, fir::ReferenceType::get(ty));
if (Fortran::semantics::NeedCUDAAlloc(ultimateSymbol)) {
fir::CUDADataAttributeAttr cudaAttr =
Fortran::lower::translateSymbolCUDADataAttribute(builder.getContext(),
ultimateSymbol);
cuf::DataAttributeAttr dataAttr =
Fortran::lower::translateSymbolCUFDataAttribute(builder.getContext(),
ultimateSymbol);
llvm::SmallVector<mlir::Value> indices;
llvm::SmallVector<mlir::Value> elidedShape =
fir::factory::elideExtentsAlreadyInType(ty, shape);
@@ -705,8 +706,8 @@ static mlir::Value createNewLocal(Fortran::lower::AbstractConverter &converter,
auto idxTy = builder.getIndexType();
for (mlir::Value sh : elidedShape)
indices.push_back(builder.createConvert(loc, idxTy, sh));
return builder.create<fir::CUDAAllocOp>(loc, ty, nm, symNm, cudaAttr,
lenParams, indices);
return builder.create<cuf::AllocOp>(loc, ty, nm, symNm, dataAttr, lenParams,
indices);
}
// Let the builder do all the heavy lifting.
@@ -950,10 +951,10 @@ static void instantiateLocal(Fortran::lower::AbstractConverter &converter,
converter.getSymbolExtendedValue(var.getSymbol(), &symMap);
auto *sym = &var.getSymbol();
converter.getFctCtx().attachCleanup([builder, loc, exv, sym]() {
fir::CUDADataAttributeAttr cudaAttr =
Fortran::lower::translateSymbolCUDADataAttribute(
builder->getContext(), *sym);
builder->create<fir::CUDAFreeOp>(loc, fir::getBase(exv), cudaAttr);
cuf::DataAttributeAttr dataAttr =
Fortran::lower::translateSymbolCUFDataAttribute(builder->getContext(),
*sym);
builder->create<cuf::FreeOp>(loc, fir::getBase(exv), dataAttr);
});
}
}
@@ -1628,11 +1629,11 @@ fir::FortranVariableFlagsAttr Fortran::lower::translateSymbolAttributes(
return fir::FortranVariableFlagsAttr::get(mlirContext, flags);
}
fir::CUDADataAttributeAttr Fortran::lower::translateSymbolCUDADataAttribute(
cuf::DataAttributeAttr Fortran::lower::translateSymbolCUFDataAttribute(
mlir::MLIRContext *mlirContext, const Fortran::semantics::Symbol &sym) {
std::optional<Fortran::common::CUDADataAttr> cudaAttr =
Fortran::semantics::GetCUDADataAttr(&sym.GetUltimate());
return fir::getCUDADataAttribute(mlirContext, cudaAttr);
return cuf::getDataAttribute(mlirContext, cudaAttr);
}
/// Map a symbol to its FIR address and evaluated specification expressions.
@@ -1672,9 +1673,9 @@ static void genDeclareSymbol(Fortran::lower::AbstractConverter &converter,
auto name = converter.mangleName(sym);
fir::FortranVariableFlagsAttr attributes =
Fortran::lower::translateSymbolAttributes(builder.getContext(), sym);
fir::CUDADataAttributeAttr cudaAttr =
Fortran::lower::translateSymbolCUDADataAttribute(builder.getContext(),
sym);
cuf::DataAttributeAttr dataAttr =
Fortran::lower::translateSymbolCUFDataAttribute(builder.getContext(),
sym);
if (sym.test(Fortran::semantics::Symbol::Flag::CrayPointee)) {
mlir::Type ptrBoxType =
@@ -1716,7 +1717,7 @@ static void genDeclareSymbol(Fortran::lower::AbstractConverter &converter,
dummyScope = converter.dummyArgsScopeValue();
auto newBase = builder.create<hlfir::DeclareOp>(
loc, base, name, shapeOrShift, lenParams, dummyScope, attributes,
cudaAttr);
dataAttr);
symMap.addVariableDefinition(sym, newBase, force);
return;
}
@@ -1762,15 +1763,15 @@ void Fortran::lower::genDeclareSymbol(
fir::FortranVariableFlagsAttr attributes =
Fortran::lower::translateSymbolAttributes(
builder.getContext(), sym.GetUltimate(), extraFlags);
fir::CUDADataAttributeAttr cudaAttr =
Fortran::lower::translateSymbolCUDADataAttribute(builder.getContext(),
sym.GetUltimate());
cuf::DataAttributeAttr dataAttr =
Fortran::lower::translateSymbolCUFDataAttribute(builder.getContext(),
sym.GetUltimate());
auto name = converter.mangleName(sym);
mlir::Value dummyScope;
if (converter.isRegisteredDummySymbol(sym))
dummyScope = converter.dummyArgsScopeValue();
hlfir::EntityWithAttributes declare = hlfir::genDeclare(
loc, builder, exv, name, attributes, dummyScope, cudaAttr);
loc, builder, exv, name, attributes, dummyScope, dataAttr);
symMap.addVariableDefinition(sym, declare.getIfVariableInterface(), force);
return;
}
@@ -2272,10 +2273,10 @@ void Fortran::lower::defineModuleVariable(
// Do nothing. Mapping will be done on user side.
} else {
std::string globalName = converter.mangleName(sym);
fir::CUDADataAttributeAttr cudaAttr =
Fortran::lower::translateSymbolCUDADataAttribute(
cuf::DataAttributeAttr dataAttr =
Fortran::lower::translateSymbolCUFDataAttribute(
converter.getFirOpBuilder().getContext(), sym);
defineGlobal(converter, var, globalName, linkage, cudaAttr);
defineGlobal(converter, var, globalName, linkage, dataAttr);
}
}

View File

@@ -322,18 +322,18 @@ mlir::Value fir::FirOpBuilder::createHeapTemporary(
fir::GlobalOp fir::FirOpBuilder::createGlobal(
mlir::Location loc, mlir::Type type, llvm::StringRef name,
mlir::StringAttr linkage, mlir::Attribute value, bool isConst,
bool isTarget, fir::CUDADataAttributeAttr cudaAttr) {
bool isTarget, cuf::DataAttributeAttr dataAttr) {
if (auto global = getNamedGlobal(name))
return global;
auto module = getModule();
auto insertPt = saveInsertionPoint();
setInsertionPoint(module.getBody(), module.getBody()->end());
llvm::SmallVector<mlir::NamedAttribute> attrs;
if (cudaAttr) {
if (dataAttr) {
auto globalOpName = mlir::OperationName(fir::GlobalOp::getOperationName(),
module.getContext());
attrs.push_back(mlir::NamedAttribute(
fir::GlobalOp::getCudaAttrAttrName(globalOpName), cudaAttr));
fir::GlobalOp::getDataAttrAttrName(globalOpName), dataAttr));
}
auto glob = create<fir::GlobalOp>(loc, name, isConst, isTarget, type, value,
linkage, attrs);
@@ -346,7 +346,7 @@ fir::GlobalOp fir::FirOpBuilder::createGlobal(
fir::GlobalOp fir::FirOpBuilder::createGlobal(
mlir::Location loc, mlir::Type type, llvm::StringRef name, bool isConst,
bool isTarget, std::function<void(FirOpBuilder &)> bodyBuilder,
mlir::StringAttr linkage, fir::CUDADataAttributeAttr cudaAttr) {
mlir::StringAttr linkage, cuf::DataAttributeAttr dataAttr) {
if (auto global = getNamedGlobal(name))
return global;
auto module = getModule();

View File

@@ -199,7 +199,7 @@ fir::FortranVariableOpInterface
hlfir::genDeclare(mlir::Location loc, fir::FirOpBuilder &builder,
const fir::ExtendedValue &exv, llvm::StringRef name,
fir::FortranVariableFlagsAttr flags, mlir::Value dummyScope,
fir::CUDADataAttributeAttr cudaAttr) {
cuf::DataAttributeAttr dataAttr) {
mlir::Value base = fir::getBase(exv);
assert(fir::conformsWithPassByRef(base.getType()) &&
@@ -229,7 +229,7 @@ hlfir::genDeclare(mlir::Location loc, fir::FirOpBuilder &builder,
},
[](const auto &) {});
auto declareOp = builder.create<hlfir::DeclareOp>(
loc, base, name, shapeOrShift, lenParams, dummyScope, flags, cudaAttr);
loc, base, name, shapeOrShift, lenParams, dummyScope, flags, dataAttr);
return mlir::cast<fir::FortranVariableOpInterface>(declareOp.getOperation());
}

View File

@@ -1,4 +1,5 @@
add_subdirectory(Support)
add_subdirectory(CUF)
add_flang_library(FIRDialect
FIRAttr.cpp
@@ -13,9 +14,11 @@ add_flang_library(FIRDialect
CanonicalizationPatternsIncGen
MLIRIR
FIROpsIncGen
CUFAttrsIncGen
intrinsics_gen
LINK_LIBS
CUFAttrs
FIRDialectSupport
MLIRArithDialect
MLIRBuiltinToLLVMIRTranslation

View File

@@ -0,0 +1,16 @@
# Keep CUF attributes as a separate library as FIR and HLFIR depend on it.
add_flang_library(CUFAttrs
CUFAttr.cpp
DEPENDS
MLIRIR
CUFAttrsIncGen
LINK_LIBS
MLIRTargetLLVMIRExport
LINK_COMPONENTS
AsmParser
AsmPrinter
Remarks
)

View File

@@ -0,0 +1,32 @@
//===-- CUFAttr.cpp -------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// Coding style: https://mlir.llvm.org/getting_started/DeveloperGuide/
//
//===----------------------------------------------------------------------===//
#include "flang/Optimizer/Dialect/CUF/Attributes/CUFAttr.h"
#include "flang/Optimizer/Dialect/CUF/CUFDialect.h"
#include "mlir/IR/Builders.h"
#include "mlir/IR/BuiltinTypes.h"
#include "mlir/IR/DialectImplementation.h"
#include "mlir/IR/OpDefinition.h"
#include "llvm/ADT/TypeSwitch.h"
#include "flang/Optimizer/Dialect/CUF/Attributes/CUFEnumAttr.cpp.inc"
#define GET_ATTRDEF_CLASSES
#include "flang/Optimizer/Dialect/CUF/Attributes/CUFAttr.cpp.inc"
namespace cuf {
void CUFDialect::registerAttributes() {
addAttributes<ClusterDimsAttr, DataAttributeAttr, DataTransferKindAttr,
LaunchBoundsAttr, ProcAttributeAttr>();
}
} // namespace cuf

View File

@@ -0,0 +1,22 @@
add_subdirectory(Attributes)
add_flang_library(CUFDialect
CUFDialect.cpp
CUFOps.cpp
DEPENDS
MLIRIR
CUFOpsIncGen
LINK_LIBS
CUFAttrs
FIRDialect
FIRDialectSupport
MLIRIR
MLIRTargetLLVMIRExport
LINK_COMPONENTS
AsmParser
AsmPrinter
Remarks
)

View File

@@ -0,0 +1,25 @@
//===-- CUFDialect.cpp ----------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// Coding style: https://mlir.llvm.org/getting_started/DeveloperGuide/
//
//===----------------------------------------------------------------------===//
#include "flang/Optimizer/Dialect/CUF/CUFDialect.h"
#include "flang/Optimizer/Dialect/CUF/CUFOps.h"
#include "flang/Optimizer/Dialect/FIRDialect.h"
#include "flang/Optimizer/Dialect/CUF/CUFDialect.cpp.inc"
void cuf::CUFDialect::initialize() {
registerAttributes();
addOperations<
#define GET_OP_LIST
#include "flang/Optimizer/Dialect/CUF/CUFOps.cpp.inc"
>();
}

View File

@@ -0,0 +1,219 @@
//===-- CUFOps.cpp --------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// Coding style: https://mlir.llvm.org/getting_started/DeveloperGuide/
//
//===----------------------------------------------------------------------===//
#include "flang/Optimizer/Dialect/CUF/CUFOps.h"
#include "flang/Optimizer/Dialect/CUF/Attributes/CUFAttr.h"
#include "flang/Optimizer/Dialect/CUF/CUFDialect.h"
#include "flang/Optimizer/Dialect/FIRType.h"
#include "mlir/IR/Attributes.h"
#include "mlir/IR/BuiltinAttributes.h"
#include "mlir/IR/BuiltinOps.h"
#include "mlir/IR/Diagnostics.h"
#include "mlir/IR/Matchers.h"
#include "mlir/IR/OpDefinition.h"
#include "mlir/IR/PatternMatch.h"
#include "llvm/ADT/SmallVector.h"
//===----------------------------------------------------------------------===//
// AllocOp
//===----------------------------------------------------------------------===//
static mlir::Type wrapAllocaResultType(mlir::Type intype) {
if (mlir::isa<fir::ReferenceType>(intype))
return {};
return fir::ReferenceType::get(intype);
}
void cuf::AllocOp::build(mlir::OpBuilder &builder, mlir::OperationState &result,
mlir::Type inType, llvm::StringRef uniqName,
llvm::StringRef bindcName,
cuf::DataAttributeAttr cudaAttr,
mlir::ValueRange typeparams, mlir::ValueRange shape,
llvm::ArrayRef<mlir::NamedAttribute> attributes) {
mlir::StringAttr nameAttr =
uniqName.empty() ? mlir::StringAttr{} : builder.getStringAttr(uniqName);
mlir::StringAttr bindcAttr =
bindcName.empty() ? mlir::StringAttr{} : builder.getStringAttr(bindcName);
build(builder, result, wrapAllocaResultType(inType),
mlir::TypeAttr::get(inType), nameAttr, bindcAttr, typeparams, shape,
cudaAttr);
result.addAttributes(attributes);
}
template <typename Op>
static mlir::LogicalResult checkCudaAttr(Op op) {
if (op.getDataAttr() == cuf::DataAttribute::Device ||
op.getDataAttr() == cuf::DataAttribute::Managed ||
op.getDataAttr() == cuf::DataAttribute::Unified)
return mlir::success();
return op.emitOpError("expect device, managed or unified cuda attribute");
}
mlir::LogicalResult cuf::AllocOp::verify() { return checkCudaAttr(*this); }
//===----------------------------------------------------------------------===//
// FreeOp
//===----------------------------------------------------------------------===//
mlir::LogicalResult cuf::FreeOp::verify() { return checkCudaAttr(*this); }
//===----------------------------------------------------------------------===//
// AllocateOp
//===----------------------------------------------------------------------===//
mlir::LogicalResult cuf::AllocateOp::verify() {
if (getPinned() && getStream())
return emitOpError("pinned and stream cannot appears at the same time");
if (!mlir::isa<fir::BaseBoxType>(fir::unwrapRefType(getBox().getType())))
return emitOpError(
"expect box to be a reference to a class or box type value");
if (getSource() &&
!mlir::isa<fir::BaseBoxType>(fir::unwrapRefType(getSource().getType())))
return emitOpError(
"expect source to be a reference to/or a class or box type value");
if (getErrmsg() &&
!mlir::isa<fir::BoxType>(fir::unwrapRefType(getErrmsg().getType())))
return emitOpError(
"expect errmsg to be a reference to/or a box type value");
if (getErrmsg() && !getHasStat())
return emitOpError("expect stat attribute when errmsg is provided");
return mlir::success();
}
//===----------------------------------------------------------------------===//
// DeallocateOp
//===----------------------------------------------------------------------===//
mlir::LogicalResult cuf::DeallocateOp::verify() {
if (!mlir::isa<fir::BaseBoxType>(fir::unwrapRefType(getBox().getType())))
return emitOpError(
"expect box to be a reference to class or box type value");
if (getErrmsg() &&
!mlir::isa<fir::BoxType>(fir::unwrapRefType(getErrmsg().getType())))
return emitOpError(
"expect errmsg to be a reference to/or a box type value");
if (getErrmsg() && !getHasStat())
return emitOpError("expect stat attribute when errmsg is provided");
return mlir::success();
}
//===----------------------------------------------------------------------===//
// KernelOp
//===----------------------------------------------------------------------===//
llvm::SmallVector<mlir::Region *> cuf::KernelOp::getLoopRegions() {
return {&getRegion()};
}
mlir::ParseResult parseCUFKernelValues(
mlir::OpAsmParser &parser,
llvm::SmallVectorImpl<mlir::OpAsmParser::UnresolvedOperand> &values,
llvm::SmallVectorImpl<mlir::Type> &types) {
if (mlir::succeeded(parser.parseOptionalStar()))
return mlir::success();
if (mlir::succeeded(parser.parseOptionalLParen())) {
if (mlir::failed(parser.parseCommaSeparatedList(
mlir::AsmParser::Delimiter::None, [&]() {
if (parser.parseOperand(values.emplace_back()))
return mlir::failure();
return mlir::success();
})))
return mlir::failure();
auto builder = parser.getBuilder();
for (size_t i = 0; i < values.size(); i++) {
types.emplace_back(builder.getI32Type());
}
if (parser.parseRParen())
return mlir::failure();
} else {
if (parser.parseOperand(values.emplace_back()))
return mlir::failure();
auto builder = parser.getBuilder();
types.emplace_back(builder.getI32Type());
return mlir::success();
}
return mlir::success();
}
void printCUFKernelValues(mlir::OpAsmPrinter &p, mlir::Operation *op,
mlir::ValueRange values, mlir::TypeRange types) {
if (values.empty())
p << "*";
if (values.size() > 1)
p << "(";
llvm::interleaveComma(values, p, [&p](mlir::Value v) { p << v; });
if (values.size() > 1)
p << ")";
}
mlir::ParseResult parseCUFKernelLoopControl(
mlir::OpAsmParser &parser, mlir::Region &region,
llvm::SmallVectorImpl<mlir::OpAsmParser::UnresolvedOperand> &lowerbound,
llvm::SmallVectorImpl<mlir::Type> &lowerboundType,
llvm::SmallVectorImpl<mlir::OpAsmParser::UnresolvedOperand> &upperbound,
llvm::SmallVectorImpl<mlir::Type> &upperboundType,
llvm::SmallVectorImpl<mlir::OpAsmParser::UnresolvedOperand> &step,
llvm::SmallVectorImpl<mlir::Type> &stepType) {
llvm::SmallVector<mlir::OpAsmParser::Argument> inductionVars;
if (parser.parseLParen() ||
parser.parseArgumentList(inductionVars,
mlir::OpAsmParser::Delimiter::None,
/*allowType=*/true) ||
parser.parseRParen() || parser.parseEqual() || parser.parseLParen() ||
parser.parseOperandList(lowerbound, inductionVars.size(),
mlir::OpAsmParser::Delimiter::None) ||
parser.parseColonTypeList(lowerboundType) || parser.parseRParen() ||
parser.parseKeyword("to") || parser.parseLParen() ||
parser.parseOperandList(upperbound, inductionVars.size(),
mlir::OpAsmParser::Delimiter::None) ||
parser.parseColonTypeList(upperboundType) || parser.parseRParen() ||
parser.parseKeyword("step") || parser.parseLParen() ||
parser.parseOperandList(step, inductionVars.size(),
mlir::OpAsmParser::Delimiter::None) ||
parser.parseColonTypeList(stepType) || parser.parseRParen())
return mlir::failure();
return parser.parseRegion(region, inductionVars);
}
void printCUFKernelLoopControl(
mlir::OpAsmPrinter &p, mlir::Operation *op, mlir::Region &region,
mlir::ValueRange lowerbound, mlir::TypeRange lowerboundType,
mlir::ValueRange upperbound, mlir::TypeRange upperboundType,
mlir::ValueRange steps, mlir::TypeRange stepType) {
mlir::ValueRange regionArgs = region.front().getArguments();
if (!regionArgs.empty()) {
p << "(";
llvm::interleaveComma(
regionArgs, p, [&p](mlir::Value v) { p << v << " : " << v.getType(); });
p << ") = (" << lowerbound << " : " << lowerboundType << ") to ("
<< upperbound << " : " << upperboundType << ") "
<< " step (" << steps << " : " << stepType << ") ";
}
p.printRegion(region, /*printEntryBlockArgs=*/false);
}
mlir::LogicalResult cuf::KernelOp::verify() {
if (getLowerbound().size() != getUpperbound().size() ||
getLowerbound().size() != getStep().size())
return emitOpError(
"expect same number of values in lowerbound, upperbound and step");
return mlir::success();
}
// Tablegen operators
#define GET_OP_CLASSES
#include "flang/Optimizer/Dialect/CUF/CUFOps.cpp.inc"

View File

@@ -298,7 +298,5 @@ void fir::printFirAttribute(FIROpsDialect *dialect, mlir::Attribute attr,
void FIROpsDialect::registerAttributes() {
addAttributes<ClosedIntervalAttr, ExactTypeAttr, FortranVariableFlagsAttr,
LowerBoundAttr, PointIntervalAttr, RealAttr, SubclassAttr,
UpperBoundAttr, CUDADataAttributeAttr, CUDAProcAttributeAttr,
CUDALaunchBoundsAttr, CUDAClusterDimsAttr,
CUDADataTransferKindAttr>();
UpperBoundAttr>();
}

View File

@@ -11,6 +11,7 @@
//===----------------------------------------------------------------------===//
#include "flang/Optimizer/Dialect/FIRDialect.h"
#include "flang/Optimizer/Dialect/CUF/Attributes/CUFAttr.h"
#include "flang/Optimizer/Dialect/FIRAttr.h"
#include "flang/Optimizer/Dialect/FIROps.h"
#include "flang/Optimizer/Dialect/FIRType.h"

View File

@@ -3898,169 +3898,6 @@ mlir::LogicalResult fir::DeclareOp::verify() {
return fortranVar.verifyDeclareLikeOpImpl(getMemref());
}
llvm::SmallVector<mlir::Region *> fir::CUDAKernelOp::getLoopRegions() {
return {&getRegion()};
}
mlir::ParseResult parseCUFKernelValues(
mlir::OpAsmParser &parser,
llvm::SmallVectorImpl<mlir::OpAsmParser::UnresolvedOperand> &values,
llvm::SmallVectorImpl<mlir::Type> &types) {
if (mlir::succeeded(parser.parseOptionalStar()))
return mlir::success();
if (mlir::succeeded(parser.parseOptionalLParen())) {
if (mlir::failed(parser.parseCommaSeparatedList(
mlir::AsmParser::Delimiter::None, [&]() {
if (parser.parseOperand(values.emplace_back()))
return mlir::failure();
return mlir::success();
})))
return mlir::failure();
auto builder = parser.getBuilder();
for (size_t i = 0; i < values.size(); i++) {
types.emplace_back(builder.getI32Type());
}
if (parser.parseRParen())
return mlir::failure();
} else {
if (parser.parseOperand(values.emplace_back()))
return mlir::failure();
auto builder = parser.getBuilder();
types.emplace_back(builder.getI32Type());
return mlir::success();
}
return mlir::success();
}
void printCUFKernelValues(mlir::OpAsmPrinter &p, mlir::Operation *op,
mlir::ValueRange values, mlir::TypeRange types) {
if (values.empty())
p << "*";
if (values.size() > 1)
p << "(";
llvm::interleaveComma(values, p, [&p](mlir::Value v) { p << v; });
if (values.size() > 1)
p << ")";
}
mlir::ParseResult parseCUFKernelLoopControl(
mlir::OpAsmParser &parser, mlir::Region &region,
llvm::SmallVectorImpl<mlir::OpAsmParser::UnresolvedOperand> &lowerbound,
llvm::SmallVectorImpl<mlir::Type> &lowerboundType,
llvm::SmallVectorImpl<mlir::OpAsmParser::UnresolvedOperand> &upperbound,
llvm::SmallVectorImpl<mlir::Type> &upperboundType,
llvm::SmallVectorImpl<mlir::OpAsmParser::UnresolvedOperand> &step,
llvm::SmallVectorImpl<mlir::Type> &stepType) {
llvm::SmallVector<mlir::OpAsmParser::Argument> inductionVars;
if (parser.parseLParen() ||
parser.parseArgumentList(inductionVars,
mlir::OpAsmParser::Delimiter::None,
/*allowType=*/true) ||
parser.parseRParen() || parser.parseEqual() || parser.parseLParen() ||
parser.parseOperandList(lowerbound, inductionVars.size(),
mlir::OpAsmParser::Delimiter::None) ||
parser.parseColonTypeList(lowerboundType) || parser.parseRParen() ||
parser.parseKeyword("to") || parser.parseLParen() ||
parser.parseOperandList(upperbound, inductionVars.size(),
mlir::OpAsmParser::Delimiter::None) ||
parser.parseColonTypeList(upperboundType) || parser.parseRParen() ||
parser.parseKeyword("step") || parser.parseLParen() ||
parser.parseOperandList(step, inductionVars.size(),
mlir::OpAsmParser::Delimiter::None) ||
parser.parseColonTypeList(stepType) || parser.parseRParen())
return mlir::failure();
return parser.parseRegion(region, inductionVars);
}
void printCUFKernelLoopControl(
mlir::OpAsmPrinter &p, mlir::Operation *op, mlir::Region &region,
mlir::ValueRange lowerbound, mlir::TypeRange lowerboundType,
mlir::ValueRange upperbound, mlir::TypeRange upperboundType,
mlir::ValueRange steps, mlir::TypeRange stepType) {
mlir::ValueRange regionArgs = region.front().getArguments();
if (!regionArgs.empty()) {
p << "(";
llvm::interleaveComma(
regionArgs, p, [&p](mlir::Value v) { p << v << " : " << v.getType(); });
p << ") = (" << lowerbound << " : " << lowerboundType << ") to ("
<< upperbound << " : " << upperboundType << ") "
<< " step (" << steps << " : " << stepType << ") ";
}
p.printRegion(region, /*printEntryBlockArgs=*/false);
}
mlir::LogicalResult fir::CUDAKernelOp::verify() {
if (getLowerbound().size() != getUpperbound().size() ||
getLowerbound().size() != getStep().size())
return emitOpError(
"expect same number of values in lowerbound, upperbound and step");
return mlir::success();
}
mlir::LogicalResult fir::CUDAAllocateOp::verify() {
if (getPinned() && getStream())
return emitOpError("pinned and stream cannot appears at the same time");
if (!mlir::isa<fir::BaseBoxType>(fir::unwrapRefType(getBox().getType())))
return emitOpError(
"expect box to be a reference to a class or box type value");
if (getSource() &&
!mlir::isa<fir::BaseBoxType>(fir::unwrapRefType(getSource().getType())))
return emitOpError(
"expect source to be a reference to/or a class or box type value");
if (getErrmsg() &&
!mlir::isa<fir::BoxType>(fir::unwrapRefType(getErrmsg().getType())))
return emitOpError(
"expect errmsg to be a reference to/or a box type value");
if (getErrmsg() && !getHasStat())
return emitOpError("expect stat attribute when errmsg is provided");
return mlir::success();
}
mlir::LogicalResult fir::CUDADeallocateOp::verify() {
if (!mlir::isa<fir::BaseBoxType>(fir::unwrapRefType(getBox().getType())))
return emitOpError(
"expect box to be a reference to class or box type value");
if (getErrmsg() &&
!mlir::isa<fir::BoxType>(fir::unwrapRefType(getErrmsg().getType())))
return emitOpError(
"expect errmsg to be a reference to/or a box type value");
if (getErrmsg() && !getHasStat())
return emitOpError("expect stat attribute when errmsg is provided");
return mlir::success();
}
void fir::CUDAAllocOp::build(
mlir::OpBuilder &builder, mlir::OperationState &result, mlir::Type inType,
llvm::StringRef uniqName, llvm::StringRef bindcName,
fir::CUDADataAttributeAttr cudaAttr, mlir::ValueRange typeparams,
mlir::ValueRange shape, llvm::ArrayRef<mlir::NamedAttribute> attributes) {
mlir::StringAttr nameAttr =
uniqName.empty() ? mlir::StringAttr{} : builder.getStringAttr(uniqName);
mlir::StringAttr bindcAttr =
bindcName.empty() ? mlir::StringAttr{} : builder.getStringAttr(bindcName);
build(builder, result, wrapAllocaResultType(inType),
mlir::TypeAttr::get(inType), nameAttr, bindcAttr, typeparams, shape,
cudaAttr);
result.addAttributes(attributes);
}
template <typename Op>
static mlir::LogicalResult checkCudaAttr(Op op) {
if (op.getCudaAttr() == fir::CUDADataAttribute::Device ||
op.getCudaAttr() == fir::CUDADataAttribute::Managed ||
op.getCudaAttr() == fir::CUDADataAttribute::Unified)
return mlir::success();
return op.emitOpError("expect device, managed or unified cuda attribute");
}
mlir::LogicalResult fir::CUDAAllocOp::verify() { return checkCudaAttr(*this); }
mlir::LogicalResult fir::CUDAFreeOp::verify() { return checkCudaAttr(*this); }
//===----------------------------------------------------------------------===//
// FIROpsDialect
//===----------------------------------------------------------------------===//

View File

@@ -5,11 +5,13 @@ add_flang_library(HLFIRDialect
HLFIROps.cpp
DEPENDS
CUFAttrsIncGen
FIRDialect
HLFIROpsIncGen
${dialect_libs}
LINK_LIBS
CUFAttrs
FIRDialect
MLIRIR
${dialect_libs}

View File

@@ -127,14 +127,14 @@ void hlfir::DeclareOp::build(mlir::OpBuilder &builder,
mlir::ValueRange typeparams,
mlir::Value dummy_scope,
fir::FortranVariableFlagsAttr fortran_attrs,
fir::CUDADataAttributeAttr cuda_attr) {
cuf::DataAttributeAttr data_attr) {
auto nameAttr = builder.getStringAttr(uniq_name);
mlir::Type inputType = memref.getType();
bool hasExplicitLbs = hasExplicitLowerBounds(shape);
mlir::Type hlfirVariableType =
getHLFIRVariableType(inputType, hasExplicitLbs);
build(builder, result, {hlfirVariableType, inputType}, memref, shape,
typeparams, dummy_scope, nameAttr, fortran_attrs, cuda_attr);
typeparams, dummy_scope, nameAttr, fortran_attrs, data_attr);
}
mlir::LogicalResult hlfir::DeclareOp::verify() {

View File

@@ -11,11 +11,13 @@ add_flang_library(HLFIRTransforms
OptimizedBufferization.cpp
DEPENDS
CUFAttrsIncGen
FIRDialect
HLFIROpsIncGen
${dialect_libs}
LINK_LIBS
CUFAttrs
FIRAnalysis
FIRDialect
FIRBuilder

View File

@@ -320,16 +320,16 @@ public:
mlir::Location loc = declareOp->getLoc();
mlir::Value memref = declareOp.getMemref();
fir::FortranVariableFlagsAttr fortranAttrs;
fir::CUDADataAttributeAttr cudaAttr;
cuf::DataAttributeAttr dataAttr;
if (auto attrs = declareOp.getFortranAttrs())
fortranAttrs =
fir::FortranVariableFlagsAttr::get(rewriter.getContext(), *attrs);
if (auto attr = declareOp.getCudaAttr())
cudaAttr = fir::CUDADataAttributeAttr::get(rewriter.getContext(), *attr);
if (auto attr = declareOp.getDataAttr())
dataAttr = cuf::DataAttributeAttr::get(rewriter.getContext(), *attr);
auto firDeclareOp = rewriter.create<fir::DeclareOp>(
loc, memref.getType(), memref, declareOp.getShape(),
declareOp.getTypeparams(), declareOp.getDummyScope(),
declareOp.getUniqName(), fortranAttrs, cudaAttr);
declareOp.getUniqName(), fortranAttrs, dataAttr);
// Propagate other attributes from hlfir.declare to fir.declare.
// OpenACC's acc.declare is one example. Right now, the propagation

View File

@@ -4,11 +4,11 @@ func.func @_QPsub1() {
%0 = fir.alloca !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "a", uniq_name = "_QFsub1Ea"}
%1 = fir.alloca i32
%pinned = fir.alloca i1
%4:2 = hlfir.declare %0 {cuda_attr = #fir.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub1Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
%4:2 = hlfir.declare %0 {data_attr = #cuf.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub1Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
%11 = fir.convert %4#1 : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> !fir.ref<!fir.box<none>>
%s = fir.load %1 : !fir.ref<i32>
// expected-error@+1{{'fir.cuda_allocate' op pinned and stream cannot appears at the same time}}
%13 = fir.cuda_allocate %11 : !fir.ref<!fir.box<none>> stream(%s : i32) pinned(%pinned : !fir.ref<i1>) {cuda_attr = #fir.cuda<device>} -> i32
// expected-error@+1{{'cuf.allocate' op pinned and stream cannot appears at the same time}}
%13 = cuf.allocate %11 : !fir.ref<!fir.box<none>> stream(%s : i32) pinned(%pinned : !fir.ref<i1>) {data_attr = #cuf.cuda<device>} -> i32
return
}
@@ -16,8 +16,8 @@ func.func @_QPsub1() {
func.func @_QPsub1() {
%1 = fir.alloca i32
// expected-error@+1{{'fir.cuda_allocate' op expect box to be a reference to a class or box type value}}
%2 = fir.cuda_allocate %1 : !fir.ref<i32> {cuda_attr = #fir.cuda<device>} -> i32
// expected-error@+1{{'cuf.allocate' op expect box to be a reference to a class or box type value}}
%2 = cuf.allocate %1 : !fir.ref<i32> {data_attr = #cuf.cuda<device>} -> i32
return
}
@@ -25,15 +25,15 @@ func.func @_QPsub1() {
func.func @_QPsub1() {
%0 = fir.alloca !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "a", uniq_name = "_QFsub1Ea"}
%4:2 = hlfir.declare %0 {cuda_attr = #fir.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub1Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
%4:2 = hlfir.declare %0 {data_attr = #cuf.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub1Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
%c100 = arith.constant 100 : index
%7 = fir.alloca !fir.char<1,100> {bindc_name = "msg", uniq_name = "_QFsub1Emsg"}
%8:2 = hlfir.declare %7 typeparams %c100 {uniq_name = "_QFsub1Emsg"} : (!fir.ref<!fir.char<1,100>>, index) -> (!fir.ref<!fir.char<1,100>>, !fir.ref<!fir.char<1,100>>)
%9 = fir.embox %8#1 : (!fir.ref<!fir.char<1,100>>) -> !fir.box<!fir.char<1,100>>
%11 = fir.convert %4#1 : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> !fir.ref<!fir.box<none>>
%16 = fir.convert %9 : (!fir.box<!fir.char<1,100>>) -> !fir.box<none>
// expected-error@+1{{'fir.cuda_allocate' op expect stat attribute when errmsg is provided}}
%13 = fir.cuda_allocate %11 : !fir.ref<!fir.box<none>> errmsg(%16 : !fir.box<none>) {cuda_attr = #fir.cuda<device>} -> i32
// expected-error@+1{{'cuf.allocate' op expect stat attribute when errmsg is provided}}
%13 = cuf.allocate %11 : !fir.ref<!fir.box<none>> errmsg(%16 : !fir.box<none>) {data_attr = #cuf.cuda<device>} -> i32
return
}
@@ -41,11 +41,11 @@ func.func @_QPsub1() {
func.func @_QPsub1() {
%0 = fir.alloca !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "a", uniq_name = "_QFsub1Ea"}
%4:2 = hlfir.declare %0 {cuda_attr = #fir.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub1Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
%4:2 = hlfir.declare %0 {data_attr = #cuf.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub1Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
%1 = fir.alloca i32
%11 = fir.convert %4#1 : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> !fir.ref<!fir.box<none>>
// expected-error@+1{{'fir.cuda_allocate' op expect errmsg to be a reference to/or a box type value}}
%13 = fir.cuda_allocate %11 : !fir.ref<!fir.box<none>> errmsg(%1 : !fir.ref<i32>) {cuda_attr = #fir.cuda<device>, hasStat} -> i32
// expected-error@+1{{'cuf.allocate' op expect errmsg to be a reference to/or a box type value}}
%13 = cuf.allocate %11 : !fir.ref<!fir.box<none>> errmsg(%1 : !fir.ref<i32>) {data_attr = #cuf.cuda<device>, hasStat} -> i32
return
}
@@ -53,8 +53,8 @@ func.func @_QPsub1() {
func.func @_QPsub1() {
%1 = fir.alloca i32
// expected-error@+1{{'fir.cuda_deallocate' op expect box to be a reference to class or box type value}}
%2 = fir.cuda_deallocate %1 : !fir.ref<i32> {cuda_attr = #fir.cuda<device>} -> i32
// expected-error@+1{{'cuf.deallocate' op expect box to be a reference to class or box type value}}
%2 = cuf.deallocate %1 : !fir.ref<i32> {data_attr = #cuf.cuda<device>} -> i32
return
}
@@ -62,11 +62,11 @@ func.func @_QPsub1() {
func.func @_QPsub1() {
%0 = fir.alloca !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "a", uniq_name = "_QFsub1Ea"}
%4:2 = hlfir.declare %0 {cuda_attr = #fir.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub1Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
%4:2 = hlfir.declare %0 {data_attr = #cuf.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub1Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
%1 = fir.alloca i32
%11 = fir.convert %4#1 : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> !fir.ref<!fir.box<none>>
// expected-error@+1{{'fir.cuda_deallocate' op expect errmsg to be a reference to/or a box type value}}
%13 = fir.cuda_deallocate %11 : !fir.ref<!fir.box<none>> errmsg(%1 : !fir.ref<i32>) {cuda_attr = #fir.cuda<device>, hasStat} -> i32
// expected-error@+1{{'cuf.deallocate' op expect errmsg to be a reference to/or a box type value}}
%13 = cuf.deallocate %11 : !fir.ref<!fir.box<none>> errmsg(%1 : !fir.ref<i32>) {data_attr = #cuf.cuda<device>, hasStat} -> i32
return
}
@@ -74,32 +74,32 @@ func.func @_QPsub1() {
func.func @_QPsub1() {
%0 = fir.alloca !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "a", uniq_name = "_QFsub1Ea"}
%4:2 = hlfir.declare %0 {cuda_attr = #fir.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub1Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
%4:2 = hlfir.declare %0 {data_attr = #cuf.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub1Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
%c100 = arith.constant 100 : index
%7 = fir.alloca !fir.char<1,100> {bindc_name = "msg", uniq_name = "_QFsub1Emsg"}
%8:2 = hlfir.declare %7 typeparams %c100 {uniq_name = "_QFsub1Emsg"} : (!fir.ref<!fir.char<1,100>>, index) -> (!fir.ref<!fir.char<1,100>>, !fir.ref<!fir.char<1,100>>)
%9 = fir.embox %8#1 : (!fir.ref<!fir.char<1,100>>) -> !fir.box<!fir.char<1,100>>
%11 = fir.convert %4#1 : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> !fir.ref<!fir.box<none>>
%16 = fir.convert %9 : (!fir.box<!fir.char<1,100>>) -> !fir.box<none>
// expected-error@+1{{'fir.cuda_deallocate' op expect stat attribute when errmsg is provided}}
%13 = fir.cuda_deallocate %11 : !fir.ref<!fir.box<none>> errmsg(%16 : !fir.box<none>) {cuda_attr = #fir.cuda<device>} -> i32
// expected-error@+1{{'cuf.deallocate' op expect stat attribute when errmsg is provided}}
%13 = cuf.deallocate %11 : !fir.ref<!fir.box<none>> errmsg(%16 : !fir.box<none>) {data_attr = #cuf.cuda<device>} -> i32
return
}
// -----
func.func @_QPsub1() {
// expected-error@+1{{'fir.cuda_alloc' op expect device, managed or unified cuda attribute}}
%0 = fir.cuda_alloc f32 {bindc_name = "r", cuda_attr = #fir.cuda<pinned>, uniq_name = "_QFsub1Er"} -> !fir.ref<f32>
fir.cuda_free %0 : !fir.ref<f32> {cuda_attr = #fir.cuda<constant>}
// expected-error@+1{{'cuf.alloc' op expect device, managed or unified cuda attribute}}
%0 = cuf.alloc f32 {bindc_name = "r", data_attr = #cuf.cuda<pinned>, uniq_name = "_QFsub1Er"} -> !fir.ref<f32>
cuf.free %0 : !fir.ref<f32> {data_attr = #cuf.cuda<constant>}
return
}
// -----
func.func @_QPsub1() {
%0 = fir.cuda_alloc f32 {bindc_name = "r", cuda_attr = #fir.cuda<device>, uniq_name = "_QFsub1Er"} -> !fir.ref<f32>
// expected-error@+1{{'fir.cuda_free' op expect device, managed or unified cuda attribute}}
fir.cuda_free %0 : !fir.ref<f32> {cuda_attr = #fir.cuda<constant>}
%0 = cuf.alloc f32 {bindc_name = "r", data_attr = #cuf.cuda<device>, uniq_name = "_QFsub1Er"} -> !fir.ref<f32>
// expected-error@+1{{'cuf.free' op expect device, managed or unified cuda attribute}}
cuf.free %0 : !fir.ref<f32> {data_attr = #cuf.cuda<constant>}
return
}

View File

@@ -4,85 +4,85 @@
func.func @_QPsub1() {
%0 = fir.alloca !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "a", uniq_name = "_QFsub1Ea"}
%4:2 = hlfir.declare %0 {cuda_attr = #fir.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub1Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
%4:2 = hlfir.declare %0 {data_attr = #cuf.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub1Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
%11 = fir.convert %4#1 : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> !fir.ref<!fir.box<none>>
%13 = fir.cuda_allocate %11 : !fir.ref<!fir.box<none>> {cuda_attr = #fir.cuda<device>} -> i32
%14 = fir.cuda_deallocate %11 : !fir.ref<!fir.box<none>> {cuda_attr = #fir.cuda<device>} -> i32
%13 = cuf.allocate %11 : !fir.ref<!fir.box<none>> {data_attr = #cuf.cuda<device>} -> i32
%14 = cuf.deallocate %11 : !fir.ref<!fir.box<none>> {data_attr = #cuf.cuda<device>} -> i32
return
}
// CHECK: fir.cuda_allocate %{{.*}} : !fir.ref<!fir.box<none>> {cuda_attr = #fir.cuda<device>} -> i32
// CHECK: fir.cuda_deallocate %{{.*}} : !fir.ref<!fir.box<none>> {cuda_attr = #fir.cuda<device>} -> i32
// CHECK: cuf.allocate %{{.*}} : !fir.ref<!fir.box<none>> {data_attr = #cuf.cuda<device>} -> i32
// CHECK: cuf.deallocate %{{.*}} : !fir.ref<!fir.box<none>> {data_attr = #cuf.cuda<device>} -> i32
// -----
func.func @_QPsub1() {
%0 = fir.alloca !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "a", uniq_name = "_QFsub1Ea"}
%1 = fir.alloca i32
%4:2 = hlfir.declare %0 {cuda_attr = #fir.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub1Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
%4:2 = hlfir.declare %0 {data_attr = #cuf.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub1Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
%11 = fir.convert %4#1 : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> !fir.ref<!fir.box<none>>
%s = fir.load %1 : !fir.ref<i32>
%13 = fir.cuda_allocate %11 : !fir.ref<!fir.box<none>> stream(%s : i32) {cuda_attr = #fir.cuda<device>} -> i32
%13 = cuf.allocate %11 : !fir.ref<!fir.box<none>> stream(%s : i32) {data_attr = #cuf.cuda<device>} -> i32
return
}
// CHECK: fir.cuda_allocate %{{.*}} : !fir.ref<!fir.box<none>> stream(%{{.*}} : i32) {cuda_attr = #fir.cuda<device>} -> i32
// CHECK: cuf.allocate %{{.*}} : !fir.ref<!fir.box<none>> stream(%{{.*}} : i32) {data_attr = #cuf.cuda<device>} -> i32
// -----
func.func @_QPsub1() {
%0 = fir.alloca !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "a", uniq_name = "_QFsub1Ea"}
%1 = fir.alloca !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "b", uniq_name = "_QFsub1Eb"}
%4:2 = hlfir.declare %0 {cuda_attr = #fir.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub1Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
%4:2 = hlfir.declare %0 {data_attr = #cuf.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub1Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
%5:2 = hlfir.declare %1 {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub1Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
%11 = fir.convert %4#1 : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> !fir.ref<!fir.box<none>>
%12 = fir.convert %5#1 : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> !fir.ref<!fir.box<none>>
%13 = fir.cuda_allocate %11 : !fir.ref<!fir.box<none>> source(%12 : !fir.ref<!fir.box<none>>) {cuda_attr = #fir.cuda<device>} -> i32
%13 = cuf.allocate %11 : !fir.ref<!fir.box<none>> source(%12 : !fir.ref<!fir.box<none>>) {data_attr = #cuf.cuda<device>} -> i32
return
}
// CHECK: fir.cuda_allocate %{{.*}} : !fir.ref<!fir.box<none>> source(%{{.*}} : !fir.ref<!fir.box<none>>) {cuda_attr = #fir.cuda<device>} -> i32
// CHECK: cuf.allocate %{{.*}} : !fir.ref<!fir.box<none>> source(%{{.*}} : !fir.ref<!fir.box<none>>) {data_attr = #cuf.cuda<device>} -> i32
// -----
func.func @_QPsub1() {
%0 = fir.alloca !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "a", uniq_name = "_QFsub1Ea"}
%pinned = fir.alloca i1
%4:2 = hlfir.declare %0 {cuda_attr = #fir.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub1Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
%4:2 = hlfir.declare %0 {data_attr = #cuf.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub1Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
%11 = fir.convert %4#1 : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> !fir.ref<!fir.box<none>>
%13 = fir.cuda_allocate %11 : !fir.ref<!fir.box<none>> pinned(%pinned : !fir.ref<i1>) {cuda_attr = #fir.cuda<device>} -> i32
%13 = cuf.allocate %11 : !fir.ref<!fir.box<none>> pinned(%pinned : !fir.ref<i1>) {data_attr = #cuf.cuda<device>} -> i32
return
}
// CHECK: fir.cuda_allocate %{{.*}} : !fir.ref<!fir.box<none>> pinned(%{{.*}} : !fir.ref<i1>) {cuda_attr = #fir.cuda<device>} -> i32
// CHECK: cuf.allocate %{{.*}} : !fir.ref<!fir.box<none>> pinned(%{{.*}} : !fir.ref<i1>) {data_attr = #cuf.cuda<device>} -> i32
// -----
func.func @_QPsub1() {
%0 = fir.alloca !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "a", uniq_name = "_QFsub1Ea"}
%4:2 = hlfir.declare %0 {cuda_attr = #fir.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub1Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
%4:2 = hlfir.declare %0 {data_attr = #cuf.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub1Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
%c100 = arith.constant 100 : index
%7 = fir.alloca !fir.char<1,100> {bindc_name = "msg", uniq_name = "_QFsub1Emsg"}
%8:2 = hlfir.declare %7 typeparams %c100 {uniq_name = "_QFsub1Emsg"} : (!fir.ref<!fir.char<1,100>>, index) -> (!fir.ref<!fir.char<1,100>>, !fir.ref<!fir.char<1,100>>)
%9 = fir.embox %8#1 : (!fir.ref<!fir.char<1,100>>) -> !fir.box<!fir.char<1,100>>
%11 = fir.convert %4#1 : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> !fir.ref<!fir.box<none>>
%16 = fir.convert %9 : (!fir.box<!fir.char<1,100>>) -> !fir.box<none>
%13 = fir.cuda_allocate %11 : !fir.ref<!fir.box<none>> errmsg(%16 : !fir.box<none>) {cuda_attr = #fir.cuda<device>, hasStat} -> i32
%14 = fir.cuda_deallocate %11 : !fir.ref<!fir.box<none>> errmsg(%16 : !fir.box<none>) {cuda_attr = #fir.cuda<device>, hasStat} -> i32
%13 = cuf.allocate %11 : !fir.ref<!fir.box<none>> errmsg(%16 : !fir.box<none>) {data_attr = #cuf.cuda<device>, hasStat} -> i32
%14 = cuf.deallocate %11 : !fir.ref<!fir.box<none>> errmsg(%16 : !fir.box<none>) {data_attr = #cuf.cuda<device>, hasStat} -> i32
return
}
// CHECK: fir.cuda_allocate %{{.*}} : !fir.ref<!fir.box<none>> errmsg(%{{.*}} : !fir.box<none>) {cuda_attr = #fir.cuda<device>, hasStat} -> i32
// CHECK: fir.cuda_deallocate %{{.*}} : !fir.ref<!fir.box<none>> errmsg(%{{.*}} : !fir.box<none>) {cuda_attr = #fir.cuda<device>, hasStat} -> i32
// CHECK: cuf.allocate %{{.*}} : !fir.ref<!fir.box<none>> errmsg(%{{.*}} : !fir.box<none>) {data_attr = #cuf.cuda<device>, hasStat} -> i32
// CHECK: cuf.deallocate %{{.*}} : !fir.ref<!fir.box<none>> errmsg(%{{.*}} : !fir.box<none>) {data_attr = #cuf.cuda<device>, hasStat} -> i32
// -----
func.func @_QPsub1() {
%0 = fir.cuda_alloc f32 {bindc_name = "r", cuda_attr = #fir.cuda<device>, uniq_name = "_QFsub1Er"} -> !fir.ref<f32>
fir.cuda_free %0 : !fir.ref<f32> {cuda_attr = #fir.cuda<device>}
%0 = cuf.alloc f32 {bindc_name = "r", data_attr = #cuf.cuda<device>, uniq_name = "_QFsub1Er"} -> !fir.ref<f32>
cuf.free %0 : !fir.ref<f32> {data_attr = #cuf.cuda<device>}
return
}
// CHECK: fir.cuda_alloc
// CHECK: fir.cuda_free
// CHECK: cuf.alloc
// CHECK: cuf.free

View File

@@ -11,11 +11,11 @@ end subroutine
! CHECK-LABEL: func.func @_QPsub1()
! CHECK: %[[BOX:.*]] = fir.alloca !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "a", uniq_name = "_QFsub1Ea"}
! CHECK: %[[BOX_DECL:.*]]:2 = hlfir.declare %[[BOX]] {cuda_attr = #fir.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub1Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
! CHECK: %[[BOX_DECL:.*]]:2 = hlfir.declare %[[BOX]] {data_attr = #cuf.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub1Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
! CHECK: fir.call @_FortranAAllocatableSetBounds
! CHECK: %{{.*}} = fir.cuda_allocate %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {cuda_attr = #fir.cuda<device>} -> i32
! CHECK: %{{.*}} = cuf.allocate %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {data_attr = #cuf.cuda<device>} -> i32
! CHECK: %{{.*}} = fir.cuda_deallocate %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {cuda_attr = #fir.cuda<device>} -> i32
! CHECK: %{{.*}} = cuf.deallocate %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {data_attr = #cuf.cuda<device>} -> i32
! CHECK: %[[BOX_LOAD:.*]] = fir.load %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
! CHECK: %[[ADDR:.*]] = fir.box_addr %[[BOX_LOAD]] : (!fir.box<!fir.heap<!fir.array<?xf32>>>) -> !fir.heap<!fir.array<?xf32>>
@@ -23,7 +23,7 @@ end subroutine
! CHECK: %[[C0:.*]] = arith.constant 0 : i64
! CHECK: %[[NE_C0:.*]] = arith.cmpi ne, %[[ADDR_I64]], %[[C0]] : i64
! CHECK: fir.if %[[NE_C0]] {
! CHECK: %{{.*}} = fir.cuda_deallocate %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {cuda_attr = #fir.cuda<device>} -> i32
! CHECK: %{{.*}} = cuf.deallocate %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {data_attr = #cuf.cuda<device>} -> i32
! CHECK: }
subroutine sub2()
@@ -36,18 +36,18 @@ end subroutine
! CHECK-LABEL: func.func @_QPsub2()
! CHECK: %[[BOX:.*]] = fir.alloca !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "a", uniq_name = "_QFsub2Ea"}
! CHECK: %[[BOX_DECL:.*]]:2 = hlfir.declare %[[BOX]] {cuda_attr = #fir.cuda<managed>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub2Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
! CHECK: %[[BOX_DECL:.*]]:2 = hlfir.declare %[[BOX]] {data_attr = #cuf.cuda<managed>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub2Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
! CHECK: %[[ISTAT:.*]] = fir.alloca i32 {bindc_name = "istat", uniq_name = "_QFsub2Eistat"}
! CHECK: %[[ISTAT_DECL:.*]]:2 = hlfir.declare %[[ISTAT]] {uniq_name = "_QFsub2Eistat"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
! CHECK: fir.call @_FortranAAllocatableSetBounds
! CHECK: %[[STAT:.*]] = fir.cuda_allocate %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {cuda_attr = #fir.cuda<managed>, hasStat} -> i32
! CHECK: %[[STAT:.*]] = cuf.allocate %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {data_attr = #cuf.cuda<managed>, hasStat} -> i32
! CHECK: fir.store %[[STAT]] to %[[ISTAT_DECL]]#1 : !fir.ref<i32>
! CHECK: %[[STAT:.*]] = fir.cuda_deallocate %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {cuda_attr = #fir.cuda<managed>, hasStat} -> i32
! CHECK: %[[STAT:.*]] = cuf.deallocate %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {data_attr = #cuf.cuda<managed>, hasStat} -> i32
! CHECK: fir.store %[[STAT]] to %[[ISTAT_DECL]]#1 : !fir.ref<i32>
! CHECK: fir.if %{{.*}} {
! CHECK: %{{.*}} = fir.cuda_deallocate %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {cuda_attr = #fir.cuda<managed>} -> i32
! CHECK: %{{.*}} = cuf.deallocate %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {data_attr = #cuf.cuda<managed>} -> i32
! CHECK: }
subroutine sub3()
@@ -58,13 +58,13 @@ end subroutine
! CHECK-LABEL: func.func @_QPsub3()
! CHECK: %[[BOX:.*]] = fir.alloca !fir.box<!fir.heap<!fir.array<?x?xi32>>> {bindc_name = "a", uniq_name = "_QFsub3Ea"}
! CHECK: %[[BOX_DECL:.*]]:2 = hlfir.declare %[[BOX]] {cuda_attr = #fir.cuda<pinned>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub3Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?x?xi32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?x?xi32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?x?xi32>>>>)
! CHECK: %[[BOX_DECL:.*]]:2 = hlfir.declare %[[BOX]] {data_attr = #cuf.cuda<pinned>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub3Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?x?xi32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?x?xi32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?x?xi32>>>>)
! CHECK: %[[PLOG:.*]] = fir.alloca !fir.logical<4> {bindc_name = "plog", uniq_name = "_QFsub3Eplog"}
! CHECK: %[[PLOG_DECL:.*]]:2 = hlfir.declare %5 {uniq_name = "_QFsub3Eplog"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
! CHECK-2: fir.call @_FortranAAllocatableSetBounds
! CHECK: %{{.*}} = fir.cuda_allocate %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?x?xi32>>>> pinned(%[[PLOG_DECL]]#1 : !fir.ref<!fir.logical<4>>) {cuda_attr = #fir.cuda<pinned>} -> i32
! CHECK: %{{.*}} = cuf.allocate %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?x?xi32>>>> pinned(%[[PLOG_DECL]]#1 : !fir.ref<!fir.logical<4>>) {data_attr = #cuf.cuda<pinned>} -> i32
! CHECK: fir.if %{{.*}} {
! CHECK: %{{.*}} = fir.cuda_deallocate %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?x?xi32>>>> {cuda_attr = #fir.cuda<pinned>} -> i32
! CHECK: %{{.*}} = cuf.deallocate %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?x?xi32>>>> {data_attr = #cuf.cuda<pinned>} -> i32
! CHECK: }
subroutine sub4()
@@ -75,14 +75,14 @@ end subroutine
! CHECK-LABEL: func.func @_QPsub4()
! CHECK: %[[BOX:.*]] = fir.alloca !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "a", uniq_name = "_QFsub4Ea"}
! CHECK: %[[BOX_DECL:.*]]:2 = hlfir.declare %0 {cuda_attr = #fir.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub4Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
! CHECK: %[[BOX_DECL:.*]]:2 = hlfir.declare %0 {data_attr = #cuf.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub4Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
! CHECK: %[[ISTREAM:.*]] = fir.alloca i32 {bindc_name = "istream", uniq_name = "_QFsub4Eistream"}
! CHECK: %[[ISTREAM_DECL:.*]]:2 = hlfir.declare %[[ISTREAM]] {uniq_name = "_QFsub4Eistream"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
! CHECK: fir.call @_FortranAAllocatableSetBounds
! CHECK: %[[STREAM:.*]] = fir.load %[[ISTREAM_DECL]]#0 : !fir.ref<i32>
! CHECK: %{{.*}} = fir.cuda_allocate %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> stream(%[[STREAM]] : i32) {cuda_attr = #fir.cuda<device>} -> i32
! CHECK: %{{.*}} = cuf.allocate %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> stream(%[[STREAM]] : i32) {data_attr = #cuf.cuda<device>} -> i32
! CHECK: fir.if %{{.*}} {
! CHECK: %{{.*}} = fir.cuda_deallocate %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {cuda_attr = #fir.cuda<device>} -> i32
! CHECK: %{{.*}} = cuf.deallocate %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {data_attr = #cuf.cuda<device>} -> i32
! CHECK: }
subroutine sub5()
@@ -93,16 +93,16 @@ end subroutine
! CHECK-LABEL: func.func @_QPsub5()
! CHECK: %[[BOX_A:.*]] = fir.alloca !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "a", uniq_name = "_QFsub5Ea"}
! CHECK: %[[BOX_A_DECL:.*]]:2 = hlfir.declare %[[BOX]] {cuda_attr = #fir.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub5Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
! CHECK: %[[BOX_A_DECL:.*]]:2 = hlfir.declare %[[BOX]] {data_attr = #cuf.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub5Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
! CHECK: %[[BOX_B:.*]] = fir.alloca !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "b", uniq_name = "_QFsub5Eb"}
! CHECK: %[[BOX_B_DECL:.*]]:2 = hlfir.declare %[[BOX_B]] {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub5Eb"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
! CHECK: %[[LOAD_B:.*]] = fir.load %[[BOX_B_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
! CHECK: fir.call @_FortranAAllocatableSetBounds
! CHECK: %{{.*}} = fir.cuda_allocate %[[BOX_A_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> source(%[[LOAD_B]] : !fir.box<!fir.heap<!fir.array<?xf32>>>) {cuda_attr = #fir.cuda<device>} -> i32
! CHECK: %{{.*}} = cuf.allocate %[[BOX_A_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> source(%[[LOAD_B]] : !fir.box<!fir.heap<!fir.array<?xf32>>>) {data_attr = #cuf.cuda<device>} -> i32
! CHECK: fir.if
! CHECK: fir.freemem
! CHECK: fir.if %{{.*}} {
! CHECK: %{{.*}} = fir.cuda_deallocate %[[BOX_A_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {cuda_attr = #fir.cuda<device>} -> i32
! CHECK: %{{.*}} = cuf.deallocate %[[BOX_A_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {data_attr = #cuf.cuda<device>} -> i32
! CHECK: }
subroutine sub6()
@@ -113,14 +113,14 @@ end subroutine
! CHECK-LABEL: func.func @_QPsub6()
! CHECK: %[[BOX_A:.*]] = fir.alloca !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "a", uniq_name = "_QFsub6Ea"}
! CHECK: %[[BOX_A_DECL:.*]]:2 = hlfir.declare %[[BOX]] {cuda_attr = #fir.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub6Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
! CHECK: %[[BOX_A_DECL:.*]]:2 = hlfir.declare %[[BOX]] {data_attr = #cuf.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub6Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
! CHECK: %[[BOX_B:.*]] = fir.alloca !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "b", uniq_name = "_QFsub6Eb"}
! CHECK: %[[BOX_B_DECL:.*]]:2 = hlfir.declare %[[BOX_B]] {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub6Eb"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
! CHECK: %[[LOAD_B:.*]] = fir.load %[[BOX_B_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
! CHECK: fir.call @_FortranAAllocatableApplyMold
! CHECK: %{{.*}} = fir.cuda_allocate %[[BOX_A_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {cuda_attr = #fir.cuda<device>} -> i32
! CHECK: %{{.*}} = cuf.allocate %[[BOX_A_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {data_attr = #cuf.cuda<device>} -> i32
! CHECK: fir.if %{{.*}} {
! CHECK: %{{.*}} = fir.cuda_deallocate %[[BOX_A_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {cuda_attr = #fir.cuda<device>} -> i32
! CHECK: %{{.*}} = cuf.deallocate %[[BOX_A_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {data_attr = #cuf.cuda<device>} -> i32
! CHECK: }
subroutine sub7()
@@ -134,19 +134,19 @@ end subroutine
! CHECK-LABEL: func.func @_QPsub7()
! CHECK: %[[BOX:.*]] = fir.alloca !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "a", uniq_name = "_QFsub7Ea"}
! CHECK: %[[BOX_DECL:.*]]:2 = hlfir.declare %[[BOX]] {cuda_attr = #fir.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub7Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
! CHECK: %[[BOX_DECL:.*]]:2 = hlfir.declare %[[BOX]] {data_attr = #cuf.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub7Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
! CHECK: %[[ERR:.*]] = fir.alloca !fir.char<1,50> {bindc_name = "err", uniq_name = "_QFsub7Eerr"}
! CHECK: %[[ERR_DECL:.*]]:2 = hlfir.declare %[[ERR]] typeparams %{{.*}} {uniq_name = "_QFsub7Eerr"} : (!fir.ref<!fir.char<1,50>>, index) -> (!fir.ref<!fir.char<1,50>>, !fir.ref<!fir.char<1,50>>)
! CHECK: %[[ISTAT:.*]] = fir.alloca i32 {bindc_name = "istat", uniq_name = "_QFsub7Eistat"}
! CHECK: %[[ISTAT_DECL:.*]]:2 = hlfir.declare %[[ISTAT]] {uniq_name = "_QFsub7Eistat"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
! CHECK: %[[ERR_BOX:.*]] = fir.embox %[[ERR_DECL]]#1 : (!fir.ref<!fir.char<1,50>>) -> !fir.box<!fir.char<1,50>>
! CHECK: fir.call @_FortranAAllocatableSetBounds
! CHECK: %[[STAT:.*]] = fir.cuda_allocate %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> errmsg(%[[ERR_BOX]] : !fir.box<!fir.char<1,50>>) {cuda_attr = #fir.cuda<device>, hasStat} -> i32
! CHECK: %[[STAT:.*]] = cuf.allocate %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> errmsg(%[[ERR_BOX]] : !fir.box<!fir.char<1,50>>) {data_attr = #cuf.cuda<device>, hasStat} -> i32
! CHECK: fir.store %[[STAT]] to %[[ISTAT_DECL]]#1 : !fir.ref<i32>
! CHECK: %[[ERR_BOX:.*]] = fir.embox %[[ERR_DECL]]#1 : (!fir.ref<!fir.char<1,50>>) -> !fir.box<!fir.char<1,50>>
! CHECK: %[[STAT:.*]] = fir.cuda_deallocate %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> errmsg(%15 : !fir.box<!fir.char<1,50>>) {cuda_attr = #fir.cuda<device>, hasStat} -> i32
! CHECK: %[[STAT:.*]] = cuf.deallocate %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> errmsg(%15 : !fir.box<!fir.char<1,50>>) {data_attr = #cuf.cuda<device>, hasStat} -> i32
! CHECK: fir.store %[[STAT]] to %[[ISTAT_DECL]]#1 : !fir.ref<i32>
! CHECK: fir.if %{{.*}} {
! CHECK: %{{.*}} = fir.cuda_deallocate %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {cuda_attr = #fir.cuda<device>} -> i32
! CHECK: %{{.*}} = cuf.deallocate %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {data_attr = #cuf.cuda<device>} -> i32
! CHECK: }

View File

@@ -5,13 +5,13 @@
module cuda_var
real, constant :: mod_a_rc
! CHECK: fir.global @_QMcuda_varEmod_a_rc {cuda_attr = #fir.cuda<constant>} : f32
! CHECK: fir.global @_QMcuda_varEmod_a_rc {data_attr = #cuf.cuda<constant>} : f32
real, device :: mod_b_ra
! CHECK: fir.global @_QMcuda_varEmod_b_ra {cuda_attr = #fir.cuda<device>} : f32
! CHECK: fir.global @_QMcuda_varEmod_b_ra {data_attr = #cuf.cuda<device>} : f32
real, allocatable, managed :: mod_c_rm
! CHECK: fir.global @_QMcuda_varEmod_c_rm {cuda_attr = #fir.cuda<managed>} : !fir.box<!fir.heap<f32>>
! CHECK: fir.global @_QMcuda_varEmod_c_rm {data_attr = #cuf.cuda<managed>} : !fir.box<!fir.heap<f32>>
real, allocatable, pinned :: mod_d_rp
! CHECK: fir.global @_QMcuda_varEmod_d_rp {cuda_attr = #fir.cuda<pinned>} : !fir.box<!fir.heap<f32>>
! CHECK: fir.global @_QMcuda_varEmod_d_rp {data_attr = #cuf.cuda<pinned>} : !fir.box<!fir.heap<f32>>
contains
@@ -23,44 +23,44 @@ subroutine local_var_attrs
end subroutine
! CHECK-LABEL: func.func @_QMcuda_varPlocal_var_attrs()
! CHECK: %{{.*}}:2 = hlfir.declare %{{.*}} {cuda_attr = #fir.cuda<device>, uniq_name = "_QMcuda_varFlocal_var_attrsErd"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
! CHECK: %{{.*}}:2 = hlfir.declare %{{.*}} {cuda_attr = #fir.cuda<managed>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QMcuda_varFlocal_var_attrsErm"} : (!fir.ref<!fir.box<!fir.heap<f32>>>) -> (!fir.ref<!fir.box<!fir.heap<f32>>>, !fir.ref<!fir.box<!fir.heap<f32>>>)
! CHECK: %{{.*}}:2 = hlfir.declare %{{.*}} {cuda_attr = #fir.cuda<pinned>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QMcuda_varFlocal_var_attrsErp"} : (!fir.ref<!fir.box<!fir.heap<f32>>>) -> (!fir.ref<!fir.box<!fir.heap<f32>>>, !fir.ref<!fir.box<!fir.heap<f32>>>)
! CHECK: %{{.*}}:2 = hlfir.declare %{{.*}} {cuda_attr = #fir.cuda<unified>, uniq_name = "_QMcuda_varFlocal_var_attrsEru"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
! CHECK: %{{.*}}:2 = hlfir.declare %{{.*}} {data_attr = #cuf.cuda<device>, uniq_name = "_QMcuda_varFlocal_var_attrsErd"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
! CHECK: %{{.*}}:2 = hlfir.declare %{{.*}} {data_attr = #cuf.cuda<managed>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QMcuda_varFlocal_var_attrsErm"} : (!fir.ref<!fir.box<!fir.heap<f32>>>) -> (!fir.ref<!fir.box<!fir.heap<f32>>>, !fir.ref<!fir.box<!fir.heap<f32>>>)
! CHECK: %{{.*}}:2 = hlfir.declare %{{.*}} {data_attr = #cuf.cuda<pinned>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QMcuda_varFlocal_var_attrsErp"} : (!fir.ref<!fir.box<!fir.heap<f32>>>) -> (!fir.ref<!fir.box<!fir.heap<f32>>>, !fir.ref<!fir.box<!fir.heap<f32>>>)
! CHECK: %{{.*}}:2 = hlfir.declare %{{.*}} {data_attr = #cuf.cuda<unified>, uniq_name = "_QMcuda_varFlocal_var_attrsEru"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
! FIR: %{{.*}} = fir.declare %{{.*}} {cuda_attr = #fir.cuda<device>, uniq_name = "_QMcuda_varFlocal_var_attrsErd"} : (!fir.ref<f32>) -> !fir.ref<f32>
! FIR: %{{.*}} = fir.declare %{{.*}} {cuda_attr = #fir.cuda<managed>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QMcuda_varFlocal_var_attrsErm"} : (!fir.ref<!fir.box<!fir.heap<f32>>>) -> !fir.ref<!fir.box<!fir.heap<f32>>>
! FIR: %{{.*}} = fir.declare %{{.*}} {cuda_attr = #fir.cuda<pinned>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QMcuda_varFlocal_var_attrsErp"} : (!fir.ref<!fir.box<!fir.heap<f32>>>) -> !fir.ref<!fir.box<!fir.heap<f32>>>
! FIR: %{{.*}} = fir.declare %{{.*}} {cuda_attr = #fir.cuda<unified>, uniq_name = "_QMcuda_varFlocal_var_attrsEru"} : (!fir.ref<f32>) -> !fir.ref<f32>
! FIR: %{{.*}} = fir.declare %{{.*}} {data_attr = #cuf.cuda<device>, uniq_name = "_QMcuda_varFlocal_var_attrsErd"} : (!fir.ref<f32>) -> !fir.ref<f32>
! FIR: %{{.*}} = fir.declare %{{.*}} {data_attr = #cuf.cuda<managed>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QMcuda_varFlocal_var_attrsErm"} : (!fir.ref<!fir.box<!fir.heap<f32>>>) -> !fir.ref<!fir.box<!fir.heap<f32>>>
! FIR: %{{.*}} = fir.declare %{{.*}} {data_attr = #cuf.cuda<pinned>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QMcuda_varFlocal_var_attrsErp"} : (!fir.ref<!fir.box<!fir.heap<f32>>>) -> !fir.ref<!fir.box<!fir.heap<f32>>>
! FIR: %{{.*}} = fir.declare %{{.*}} {data_attr = #cuf.cuda<unified>, uniq_name = "_QMcuda_varFlocal_var_attrsEru"} : (!fir.ref<f32>) -> !fir.ref<f32>
subroutine dummy_arg_device(dd)
real, device :: dd
end subroutine
! CHECK-LABEL: func.func @_QMcuda_varPdummy_arg_device(
! CHECK-SAME: %[[ARG0:.*]]: !fir.ref<f32> {fir.bindc_name = "dd", fir.cuda_attr = #fir.cuda<device>}) {
! CHECK: %{{.*}}:2 = hlfir.declare %[[ARG0]] dummy_scope %{{[0-9]+}} {cuda_attr = #fir.cuda<device>, uniq_name = "_QMcuda_varFdummy_arg_deviceEdd"} : (!fir.ref<f32>, !fir.dscope) -> (!fir.ref<f32>, !fir.ref<f32>)
! CHECK-SAME: %[[ARG0:.*]]: !fir.ref<f32> {cuf.data_attr = #cuf.cuda<device>, fir.bindc_name = "dd"}) {
! CHECK: %{{.*}}:2 = hlfir.declare %[[ARG0]] dummy_scope %{{[0-9]+}} {data_attr = #cuf.cuda<device>, uniq_name = "_QMcuda_varFdummy_arg_deviceEdd"} : (!fir.ref<f32>, !fir.dscope) -> (!fir.ref<f32>, !fir.ref<f32>)
subroutine dummy_arg_managed(dm)
real, allocatable, managed :: dm
end subroutine
! CHECK-LABEL: func.func @_QMcuda_varPdummy_arg_managed(
! CHECK-SAME: %[[ARG0:.*]]: !fir.ref<!fir.box<!fir.heap<f32>>> {fir.bindc_name = "dm", fir.cuda_attr = #fir.cuda<managed>}) {
! CHECK: %{{.*}}:2 = hlfir.declare %[[ARG0]] dummy_scope %{{[0-9]+}} {cuda_attr = #fir.cuda<managed>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QMcuda_varFdummy_arg_managedEdm"} : (!fir.ref<!fir.box<!fir.heap<f32>>>, !fir.dscope) -> (!fir.ref<!fir.box<!fir.heap<f32>>>, !fir.ref<!fir.box<!fir.heap<f32>>>)
! CHECK-SAME: %[[ARG0:.*]]: !fir.ref<!fir.box<!fir.heap<f32>>> {cuf.data_attr = #cuf.cuda<managed>, fir.bindc_name = "dm"}) {
! CHECK: %{{.*}}:2 = hlfir.declare %[[ARG0]] dummy_scope %{{[0-9]+}} {data_attr = #cuf.cuda<managed>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QMcuda_varFdummy_arg_managedEdm"} : (!fir.ref<!fir.box<!fir.heap<f32>>>, !fir.dscope) -> (!fir.ref<!fir.box<!fir.heap<f32>>>, !fir.ref<!fir.box<!fir.heap<f32>>>)
subroutine dummy_arg_pinned(dp)
real, allocatable, pinned :: dp
end subroutine
! CHECK-LABEL: func.func @_QMcuda_varPdummy_arg_pinned(
! CHECK-SAME: %[[ARG0:.*]]: !fir.ref<!fir.box<!fir.heap<f32>>> {fir.bindc_name = "dp", fir.cuda_attr = #fir.cuda<pinned>}) {
! CHECK: %{{.*}}:2 = hlfir.declare %[[ARG0]] dummy_scope %{{[0-9]+}} {cuda_attr = #fir.cuda<pinned>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QMcuda_varFdummy_arg_pinnedEdp"} : (!fir.ref<!fir.box<!fir.heap<f32>>>, !fir.dscope) -> (!fir.ref<!fir.box<!fir.heap<f32>>>, !fir.ref<!fir.box<!fir.heap<f32>>>)
! CHECK-SAME: %[[ARG0:.*]]: !fir.ref<!fir.box<!fir.heap<f32>>> {cuf.data_attr = #cuf.cuda<pinned>, fir.bindc_name = "dp"}) {
! CHECK: %{{.*}}:2 = hlfir.declare %[[ARG0]] dummy_scope %{{[0-9]+}} {data_attr = #cuf.cuda<pinned>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QMcuda_varFdummy_arg_pinnedEdp"} : (!fir.ref<!fir.box<!fir.heap<f32>>>, !fir.dscope) -> (!fir.ref<!fir.box<!fir.heap<f32>>>, !fir.ref<!fir.box<!fir.heap<f32>>>)
subroutine dummy_arg_unified(du)
real, unified :: du
end subroutine
! CHECK-LABEL: func.func @_QMcuda_varPdummy_arg_unified(
! CHECK-SAME: %[[ARG0:.*]]: !fir.ref<f32> {fir.bindc_name = "du", fir.cuda_attr = #fir.cuda<unified>})
! CHECK: %{{.*}}:2 = hlfir.declare %[[ARG0]] dummy_scope %{{[0-9]+}} {cuda_attr = #fir.cuda<unified>, uniq_name = "_QMcuda_varFdummy_arg_unifiedEdu"} : (!fir.ref<f32>, !fir.dscope) -> (!fir.ref<f32>, !fir.ref<f32>)
! CHECK-SAME: %[[ARG0:.*]]: !fir.ref<f32> {cuf.data_attr = #cuf.cuda<unified>, fir.bindc_name = "du"})
! CHECK: %{{.*}}:2 = hlfir.declare %[[ARG0]] dummy_scope %{{[0-9]+}} {data_attr = #cuf.cuda<unified>, uniq_name = "_QMcuda_varFdummy_arg_unifiedEdu"} : (!fir.ref<f32>, !fir.dscope) -> (!fir.ref<f32>, !fir.ref<f32>)
subroutine cuda_alloc_free(n)
integer :: n
@@ -70,27 +70,27 @@ subroutine cuda_alloc_free(n)
end
! CHECK-LABEL: func.func @_QMcuda_varPcuda_alloc_free
! CHECK: %[[ALLOC_A:.*]] = fir.cuda_alloc !fir.array<10xf32> {bindc_name = "a", cuda_attr = #fir.cuda<device>, uniq_name = "_QMcuda_varFcuda_alloc_freeEa"} -> !fir.ref<!fir.array<10xf32>>
! CHECK: %[[ALLOC_A:.*]] = cuf.alloc !fir.array<10xf32> {bindc_name = "a", data_attr = #cuf.cuda<device>, uniq_name = "_QMcuda_varFcuda_alloc_freeEa"} -> !fir.ref<!fir.array<10xf32>>
! CHECK: %[[SHAPE:.*]] = fir.shape %c10 : (index) -> !fir.shape<1>
! CHECK: %[[DECL_A:.*]]:2 = hlfir.declare %[[ALLOC_A]](%[[SHAPE]]) {cuda_attr = #fir.cuda<device>, uniq_name = "_QMcuda_varFcuda_alloc_freeEa"} : (!fir.ref<!fir.array<10xf32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<10xf32>>, !fir.ref<!fir.array<10xf32>>)
! CHECK: %[[DECL_A:.*]]:2 = hlfir.declare %[[ALLOC_A]](%[[SHAPE]]) {data_attr = #cuf.cuda<device>, uniq_name = "_QMcuda_varFcuda_alloc_freeEa"} : (!fir.ref<!fir.array<10xf32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<10xf32>>, !fir.ref<!fir.array<10xf32>>)
! CHECK: %[[ALLOC_U:.*]] = fir.cuda_alloc i32 {bindc_name = "u", cuda_attr = #fir.cuda<unified>, uniq_name = "_QMcuda_varFcuda_alloc_freeEu"} -> !fir.ref<i32>
! CHECK: %[[DECL_U:.*]]:2 = hlfir.declare %[[ALLOC_U]] {cuda_attr = #fir.cuda<unified>, uniq_name = "_QMcuda_varFcuda_alloc_freeEu"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
! CHECK: %[[ALLOC_U:.*]] = cuf.alloc i32 {bindc_name = "u", data_attr = #cuf.cuda<unified>, uniq_name = "_QMcuda_varFcuda_alloc_freeEu"} -> !fir.ref<i32>
! CHECK: %[[DECL_U:.*]]:2 = hlfir.declare %[[ALLOC_U]] {data_attr = #cuf.cuda<unified>, uniq_name = "_QMcuda_varFcuda_alloc_freeEu"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
! CHECK: %[[ALLOC_B:.*]] = fir.cuda_alloc !fir.array<?xf32>, %{{.*}} : index {bindc_name = "b", cuda_attr = #fir.cuda<managed>, uniq_name = "_QMcuda_varFcuda_alloc_freeEb"} -> !fir.ref<!fir.array<?xf32>>
! CHECK: %[[ALLOC_B:.*]] = cuf.alloc !fir.array<?xf32>, %{{.*}} : index {bindc_name = "b", data_attr = #cuf.cuda<managed>, uniq_name = "_QMcuda_varFcuda_alloc_freeEb"} -> !fir.ref<!fir.array<?xf32>>
! CHECK: %[[SHAPE:.*]] = fir.shape %{{.*}} : (index) -> !fir.shape<1>
! CHECK: %[[DECL_B:.*]]:2 = hlfir.declare %[[ALLOC_B]](%[[SHAPE]]) {cuda_attr = #fir.cuda<managed>, uniq_name = "_QMcuda_varFcuda_alloc_freeEb"} : (!fir.ref<!fir.array<?xf32>>, !fir.shape<1>) -> (!fir.box<!fir.array<?xf32>>, !fir.ref<!fir.array<?xf32>>)
! CHECK: %[[DECL_B:.*]]:2 = hlfir.declare %[[ALLOC_B]](%[[SHAPE]]) {data_attr = #cuf.cuda<managed>, uniq_name = "_QMcuda_varFcuda_alloc_freeEb"} : (!fir.ref<!fir.array<?xf32>>, !fir.shape<1>) -> (!fir.box<!fir.array<?xf32>>, !fir.ref<!fir.array<?xf32>>)
! CHECK: fir.cuda_free %[[DECL_B]]#1 : !fir.ref<!fir.array<?xf32>> {cuda_attr = #fir.cuda<managed>}
! CHECK: fir.cuda_free %[[DECL_U]]#1 : !fir.ref<i32> {cuda_attr = #fir.cuda<unified>}
! CHECK: fir.cuda_free %[[DECL_A]]#1 : !fir.ref<!fir.array<10xf32>> {cuda_attr = #fir.cuda<device>}
! CHECK: cuf.free %[[DECL_B]]#1 : !fir.ref<!fir.array<?xf32>> {data_attr = #cuf.cuda<managed>}
! CHECK: cuf.free %[[DECL_U]]#1 : !fir.ref<i32> {data_attr = #cuf.cuda<unified>}
! CHECK: cuf.free %[[DECL_A]]#1 : !fir.ref<!fir.array<10xf32>> {data_attr = #cuf.cuda<device>}
subroutine dummy(x)
real, target, device :: x
end subroutine
! CHECK: func.func @_QMcuda_varPdummy
! CHECK-NOT: fir.cuda_free
! CHECK-NOT: cuf.free
end module

View File

@@ -29,37 +29,37 @@ end
! CHECK-LABEL: func.func @_QPsub1()
! CHECK: %[[ADEV:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {cuda_attr = #fir.cuda<device>, uniq_name = "_QFsub1Eadev"} : (!fir.ref<!fir.array<10xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<10xi32>>, !fir.ref<!fir.array<10xi32>>)
! CHECK: %[[ADEV:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {data_attr = #cuf.cuda<device>, uniq_name = "_QFsub1Eadev"} : (!fir.ref<!fir.array<10xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<10xi32>>, !fir.ref<!fir.array<10xi32>>)
! CHECK: %[[AHOST:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {uniq_name = "_QFsub1Eahost"} : (!fir.ref<!fir.array<10xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<10xi32>>, !fir.ref<!fir.array<10xi32>>)
! CHECK: %[[I:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFsub1Ei"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
! CHECK: %[[M:.*]]:2 = hlfir.declare %{{.*}} {cuda_attr = #fir.cuda<device>, uniq_name = "_QFsub1Em"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
! CHECK: %[[M:.*]]:2 = hlfir.declare %{{.*}} {data_attr = #cuf.cuda<device>, uniq_name = "_QFsub1Em"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
! CHECK: %[[C1:.*]] = arith.constant 1 : i32
! CHECK: %[[LOADED_I:.*]] = fir.load %[[I]]#0 : !fir.ref<i32>
! CHECK: %[[ADD:.*]] = arith.addi %[[C1]], %[[LOADED_I]] : i32
! CHECK: %[[ASSOC:.*]]:3 = hlfir.associate %[[ADD]] {uniq_name = ".cuf_host_tmp"} : (i32) -> (!fir.ref<i32>, !fir.ref<i32>, i1)
! CHECK: fir.cuda_data_transfer %[[ASSOC]]#0 to %[[M]]#0 {transfer_kind = #fir.cuda_transfer<host_device>} : !fir.ref<i32>, !fir.ref<i32>
! CHECK: cuf.data_transfer %[[ASSOC]]#0 to %[[M]]#0 {transfer_kind = #cuf.cuda_transfer<host_device>} : !fir.ref<i32>, !fir.ref<i32>
! CHECK: hlfir.end_associate %[[ASSOC]]#1, %[[ASSOC]]#2 : !fir.ref<i32>, i1
! CHECK: %[[C1:.*]] = arith.constant 1 : i32
! CHECK: %[[ASSOC:.*]]:3 = hlfir.associate %[[C1]] {uniq_name = ".cuf_host_tmp"} : (i32) -> (!fir.ref<i32>, !fir.ref<i32>, i1)
! CHECK: fir.cuda_data_transfer %[[ASSOC]]#0 to %[[M]]#0 {transfer_kind = #fir.cuda_transfer<host_device>} : !fir.ref<i32>, !fir.ref<i32>
! CHECK: cuf.data_transfer %[[ASSOC]]#0 to %[[M]]#0 {transfer_kind = #cuf.cuda_transfer<host_device>} : !fir.ref<i32>, !fir.ref<i32>
! CHECK: hlfir.end_associate %[[ASSOC]]#1, %[[ASSOC]]#2 : !fir.ref<i32>, i1
! CHECK: fir.cuda_data_transfer %[[AHOST]]#0 to %[[ADEV]]#0 {transfer_kind = #fir.cuda_transfer<host_device>} : !fir.ref<!fir.array<10xi32>>, !fir.ref<!fir.array<10xi32>>
! CHECK: cuf.data_transfer %[[AHOST]]#0 to %[[ADEV]]#0 {transfer_kind = #cuf.cuda_transfer<host_device>} : !fir.ref<!fir.array<10xi32>>, !fir.ref<!fir.array<10xi32>>
! CHECK: %[[ELEMENTAL:.*]] = hlfir.elemental %{{.*}} unordered : (!fir.shape<1>) -> !hlfir.expr<10xi32> {
! CHECK: %[[ASSOC:.*]]:3 = hlfir.associate %[[ELEMENTAL]](%{{.*}}) {uniq_name = ".cuf_host_tmp"} : (!hlfir.expr<10xi32>, !fir.shape<1>) -> (!fir.ref<!fir.array<10xi32>>, !fir.ref<!fir.array<10xi32>>, i1)
! CHECK: fir.cuda_data_transfer %[[ASSOC]]#0 to %[[ADEV]]#0 {transfer_kind = #fir.cuda_transfer<host_device>} : !fir.ref<!fir.array<10xi32>>, !fir.ref<!fir.array<10xi32>>
! CHECK: cuf.data_transfer %[[ASSOC]]#0 to %[[ADEV]]#0 {transfer_kind = #cuf.cuda_transfer<host_device>} : !fir.ref<!fir.array<10xi32>>, !fir.ref<!fir.array<10xi32>>
! CHECK: hlfir.end_associate %[[ASSOC]]#1, %[[ASSOC]]#2 : !fir.ref<!fir.array<10xi32>>, i1
! CHECK: %[[DES_AHOST:.*]] = hlfir.designate %[[AHOST]]#0 (%c1{{.*}}:%c5{{.*}}:%c1{{.*}}) shape %{{.*}} : (!fir.ref<!fir.array<10xi32>>, index, index, index, !fir.shape<1>) -> !fir.ref<!fir.array<5xi32>>
! CHECK: %[[DES_ADEV:.*]] = hlfir.designate %[[ADEV]]#0 (%c1{{.*}}:%c5{{.*}}:%c1{{.*}}) shape %{{.*}} : (!fir.ref<!fir.array<10xi32>>, index, index, index, !fir.shape<1>) -> !fir.ref<!fir.array<5xi32>>
! CHECK: fir.cuda_data_transfer %[[DES_AHOST]] to %[[DES_ADEV]] {transfer_kind = #fir.cuda_transfer<host_device>} : !fir.ref<!fir.array<5xi32>>, !fir.ref<!fir.array<5xi32>>
! CHECK: cuf.data_transfer %[[DES_AHOST]] to %[[DES_ADEV]] {transfer_kind = #cuf.cuda_transfer<host_device>} : !fir.ref<!fir.array<5xi32>>, !fir.ref<!fir.array<5xi32>>
! CHECK: %[[ELEMENTAL:.*]] = hlfir.elemental %{{.*}} unordered : (!fir.shape<1>) -> !hlfir.expr<10xi32>
! CHECK: %[[ASSOC:.*]]:3 = hlfir.associate %[[ELEMENTAL]](%{{.*}}) {uniq_name = ".cuf_host_tmp"} : (!hlfir.expr<10xi32>, !fir.shape<1>) -> (!fir.ref<!fir.array<10xi32>>, !fir.ref<!fir.array<10xi32>>, i1)
! CHECK: fir.cuda_data_transfer %[[ASSOC]]#0 to %[[ADEV]]#0 {transfer_kind = #fir.cuda_transfer<host_device>} : !fir.ref<!fir.array<10xi32>>, !fir.ref<!fir.array<10xi32>>
! CHECK: cuf.data_transfer %[[ASSOC]]#0 to %[[ADEV]]#0 {transfer_kind = #cuf.cuda_transfer<host_device>} : !fir.ref<!fir.array<10xi32>>, !fir.ref<!fir.array<10xi32>>
! CHECK: hlfir.end_associate %[[ASSOC]]#1, %[[ASSOC]]#2 : !fir.ref<!fir.array<10xi32>>, i1
subroutine sub2()
@@ -81,25 +81,25 @@ subroutine sub2()
end
! CHECK-LABEL: func.func @_QPsub2()
! CHECK: %[[ADEV:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {cuda_attr = #fir.cuda<device>, uniq_name = "_QFsub2Eadev"} : (!fir.ref<!fir.array<10xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<10xi32>>, !fir.ref<!fir.array<10xi32>>)
! CHECK: %[[ADEV:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {data_attr = #cuf.cuda<device>, uniq_name = "_QFsub2Eadev"} : (!fir.ref<!fir.array<10xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<10xi32>>, !fir.ref<!fir.array<10xi32>>)
! CHECK: %[[AHOST:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {uniq_name = "_QFsub2Eahost"} : (!fir.ref<!fir.array<10xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<10xi32>>, !fir.ref<!fir.array<10xi32>>)
! CHECK: %[[BDEV:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {cuda_attr = #fir.cuda<device>, uniq_name = "_QFsub2Ebdev"} : (!fir.ref<!fir.array<10xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<10xi32>>, !fir.ref<!fir.array<10xi32>>)
! CHECK: %[[BDEV:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {data_attr = #cuf.cuda<device>, uniq_name = "_QFsub2Ebdev"} : (!fir.ref<!fir.array<10xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<10xi32>>, !fir.ref<!fir.array<10xi32>>)
! CHECK: %[[BHOST:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {uniq_name = "_QFsub2Ebhost"} : (!fir.ref<!fir.array<10xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<10xi32>>, !fir.ref<!fir.array<10xi32>>)
! CHECK: %[[I:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFsub2Ei"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
! CHECK: %[[M:.*]]:2 = hlfir.declare %{{.*}} {cuda_attr = #fir.cuda<device>, uniq_name = "_QFsub2Em"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
! CHECK: fir.cuda_data_transfer %[[ADEV]]#0 to %[[AHOST]]#0 {transfer_kind = #fir.cuda_transfer<device_host>} : !fir.ref<!fir.array<10xi32>>, !fir.ref<!fir.array<10xi32>>
! CHECK: fir.cuda_data_transfer %[[M]]#0 to %[[I]]#0 {transfer_kind = #fir.cuda_transfer<device_host>} : !fir.ref<i32>, !fir.ref<i32>
! CHECK: %[[M:.*]]:2 = hlfir.declare %{{.*}} {data_attr = #cuf.cuda<device>, uniq_name = "_QFsub2Em"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
! CHECK: cuf.data_transfer %[[ADEV]]#0 to %[[AHOST]]#0 {transfer_kind = #cuf.cuda_transfer<device_host>} : !fir.ref<!fir.array<10xi32>>, !fir.ref<!fir.array<10xi32>>
! CHECK: cuf.data_transfer %[[M]]#0 to %[[I]]#0 {transfer_kind = #cuf.cuda_transfer<device_host>} : !fir.ref<i32>, !fir.ref<i32>
! CHECK: %[[DES_ADEV:.*]] = hlfir.designate %[[ADEV]]#0 (%{{.*}}:%{{.*}}:%{{.*}}) shape %{{.*}} : (!fir.ref<!fir.array<10xi32>>, index, index, index, !fir.shape<1>) -> !fir.ref<!fir.array<5xi32>>
! CHECK: %[[DES_AHOST:.*]] = hlfir.designate %[[AHOST]]#0 (%{{.*}}:%{{.*}}:%{{.*}}) shape %{{.*}} : (!fir.ref<!fir.array<10xi32>>, index, index, index, !fir.shape<1>) -> !fir.ref<!fir.array<5xi32>>
! CHECK: fir.cuda_data_transfer %[[DES_ADEV]] to %[[DES_AHOST]] {transfer_kind = #fir.cuda_transfer<device_host>} : !fir.ref<!fir.array<5xi32>>, !fir.ref<!fir.array<5xi32>>
! CHECK: cuf.data_transfer %[[DES_ADEV]] to %[[DES_AHOST]] {transfer_kind = #cuf.cuda_transfer<device_host>} : !fir.ref<!fir.array<5xi32>>, !fir.ref<!fir.array<5xi32>>
! CHECK: fir.cuda_data_transfer %[[ADEV]]#0 to %[[BDEV]]#0 {transfer_kind = #fir.cuda_transfer<device_device>} : !fir.ref<!fir.array<10xi32>>, !fir.ref<!fir.array<10xi32>>
! CHECK: cuf.data_transfer %[[ADEV]]#0 to %[[BDEV]]#0 {transfer_kind = #cuf.cuda_transfer<device_device>} : !fir.ref<!fir.array<10xi32>>, !fir.ref<!fir.array<10xi32>>
! CHECK: %[[TEMP:.*]] = fir.allocmem !fir.array<10xi32> {bindc_name = ".tmp", uniq_name = ""}
! CHECK: %[[DECL_TEMP:.*]]:2 = hlfir.declare %[[TEMP]](%{{.*}}) {uniq_name = ".tmp"} : (!fir.heap<!fir.array<10xi32>>, !fir.shape<1>) -> (!fir.heap<!fir.array<10xi32>>, !fir.heap<!fir.array<10xi32>>)
! CHECK: %[[ADEV_TEMP:.*]]:2 = hlfir.declare %[[DECL_TEMP]]#1(%{{.*}}) {cuda_attr = #fir.cuda<device>, uniq_name = "_QFsub2Eadev"} : (!fir.heap<!fir.array<10xi32>>, !fir.shape<1>) -> (!fir.heap<!fir.array<10xi32>>, !fir.heap<!fir.array<10xi32>>)
! CHECK: fir.cuda_data_transfer %[[ADEV]]#1 to %[[DECL_TEMP]]#0 {transfer_kind = #fir.cuda_transfer<device_host>} : !fir.ref<!fir.array<10xi32>>, !fir.heap<!fir.array<10xi32>>
! CHECK: %[[ADEV_TEMP:.*]]:2 = hlfir.declare %[[DECL_TEMP]]#1(%{{.*}}) {data_attr = #cuf.cuda<device>, uniq_name = "_QFsub2Eadev"} : (!fir.heap<!fir.array<10xi32>>, !fir.shape<1>) -> (!fir.heap<!fir.array<10xi32>>, !fir.heap<!fir.array<10xi32>>)
! CHECK: cuf.data_transfer %[[ADEV]]#1 to %[[DECL_TEMP]]#0 {transfer_kind = #cuf.cuda_transfer<device_host>} : !fir.ref<!fir.array<10xi32>>, !fir.heap<!fir.array<10xi32>>
! CHECK: %[[ELEMENTAL:.*]] = hlfir.elemental %{{.*}} unordered : (!fir.shape<1>) -> !hlfir.expr<10xi32>
! CHECK: hlfir.assign %[[ELEMENTAL]] to %[[BHOST]]#0 : !hlfir.expr<10xi32>, !fir.ref<!fir.array<10xi32>>
! CHECK: fir.freemem %[[DECL_TEMP]]#0 : !fir.heap<!fir.array<10xi32>>
@@ -116,12 +116,12 @@ end
! CHECK: %[[TMP:.*]] = fir.alloca !fir.type<_QMmod1Tt1{i:i32}> {bindc_name = ".tmp"}
! CHECK: %[[AHOST:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {uniq_name = "_QFsub3Eahost"} : (!fir.ref<!fir.array<10xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<10xi32>>, !fir.ref<!fir.array<10xi32>>)
! CHECK: %[[BHOST:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {uniq_name = "_QFsub3Ebhost"} : (!fir.ref<!fir.array<10xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<10xi32>>, !fir.ref<!fir.array<10xi32>>)
! CHECK: %[[T:.*]]:2 = hlfir.declare %7 {cuda_attr = #fir.cuda<device>, uniq_name = "_QFsub3Et"} : (!fir.ref<!fir.type<_QMmod1Tt1{i:i32}>>) -> (!fir.ref<!fir.type<_QMmod1Tt1{i:i32}>>, !fir.ref<!fir.type<_QMmod1Tt1{i:i32}>>)
! CHECK: %[[T:.*]]:2 = hlfir.declare %7 {data_attr = #cuf.cuda<device>, uniq_name = "_QFsub3Et"} : (!fir.ref<!fir.type<_QMmod1Tt1{i:i32}>>) -> (!fir.ref<!fir.type<_QMmod1Tt1{i:i32}>>, !fir.ref<!fir.type<_QMmod1Tt1{i:i32}>>)
! CHECK: %[[TMP_DECL:.*]]:2 = hlfir.declare %0 {uniq_name = ".tmp"} : (!fir.ref<!fir.type<_QMmod1Tt1{i:i32}>>) -> (!fir.ref<!fir.type<_QMmod1Tt1{i:i32}>>, !fir.ref<!fir.type<_QMmod1Tt1{i:i32}>>)
! CHECK: fir.cuda_data_transfer %[[T]]#1 to %[[TMP_DECL]]#0 {transfer_kind = #fir.cuda_transfer<device_host>} : !fir.ref<!fir.type<_QMmod1Tt1{i:i32}>>, !fir.ref<!fir.type<_QMmod1Tt1{i:i32}>>
! CHECK: cuf.data_transfer %[[T]]#1 to %[[TMP_DECL]]#0 {transfer_kind = #cuf.cuda_transfer<device_host>} : !fir.ref<!fir.type<_QMmod1Tt1{i:i32}>>, !fir.ref<!fir.type<_QMmod1Tt1{i:i32}>>
! Check that fir.cuda_data_transfer are not generated within cuf kernel
! Check that cuf.data_transfer are not generated within cuf kernel
subroutine sub4()
integer, parameter :: n = 10
real, device :: adev(n)
@@ -137,9 +137,9 @@ subroutine sub4()
end subroutine
! CHECK-LABEL: func.func @_QPsub4()
! CHECK: fir.cuda_data_transfer
! CHECK: fir.cuda_kernel<<<*, *>>>
! CHECK-NOT: fir.cuda_data_transfer
! CHECK: cuf.data_transfer
! CHECK: cuf.kernel<<<*, *>>>
! CHECK-NOT: cuf.data_transfer
! CHECK: hlfir.assign
attributes(global) subroutine sub5(a)
@@ -149,7 +149,7 @@ attributes(global) subroutine sub5(a)
end subroutine
! CHECK-LABEL: func.func @_QPsub5
! CHECK-NOT: fir.cuda_data_transfer
! CHECK-NOT: cuf.data_transfer
attributes(host,device) subroutine sub6(a)
integer, device :: a
@@ -158,4 +158,4 @@ attributes(host,device) subroutine sub6(a)
end subroutine
! CHECK-LABEL: func.func @_QPsub6
! CHECK: fir.cuda_data_transfer
! CHECK: cuf.data_transfer

View File

@@ -16,10 +16,10 @@ contains
subroutine host()
real, device :: a
! CHECK-LABEL: func.func @_QMtest_callPhost()
! CHECK: %[[A:.*]]:2 = hlfir.declare %{{.*}} {cuda_attr = #fir.cuda<device>, uniq_name = "_QMtest_callFhostEa"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
! CHECK: %[[A:.*]]:2 = hlfir.declare %{{.*}} {data_attr = #cuf.cuda<device>, uniq_name = "_QMtest_callFhostEa"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
call dev_kernel0<<<10, 20>>>()
! CHECK: fir.cuda_kernel_launch @_QMtest_callPdev_kernel0<<<%c10{{.*}}, %c1{{.*}}, %c1{{.*}}, %c20{{.*}}, %c1{{.*}}, %c1{{.*}}>>>()
! CHECK: cuf.kernel_launch @_QMtest_callPdev_kernel0<<<%c10{{.*}}, %c1{{.*}}, %c1{{.*}}, %c20{{.*}}, %c1{{.*}}, %c1{{.*}}>>>()
call dev_kernel0<<< __builtin_dim3(1,1,4), __builtin_dim3(32,1,1) >>>
! CHECK: %[[ADDR_DIM3_GRID:.*]] = fir.address_of(@_QQro._QM__fortran_builtinsT__builtin_dim3.{{.*}}) : !fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_dim3{x:i32,y:i32,z:i32}>>
@@ -38,16 +38,16 @@ contains
! CHECK: %[[BLOCK_Y_LOAD:.*]] = fir.load %[[BLOCK_Y]] : !fir.ref<i32>
! CHECK: %[[BLOCK_Z:.*]] = hlfir.designate %[[DIM3_BLOCK]]#1{"z"} : (!fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_dim3{x:i32,y:i32,z:i32}>>) -> !fir.ref<i32>
! CHECK: %[[BLOCK_Z_LOAD:.*]] = fir.load %[[BLOCK_Z]] : !fir.ref<i32>
! CHECK: fir.cuda_kernel_launch @_QMtest_callPdev_kernel0<<<%[[GRID_X_LOAD]], %[[GRID_Y_LOAD]], %[[GRID_Z_LOAD]], %[[BLOCK_X_LOAD]], %[[BLOCK_Y_LOAD]], %[[BLOCK_Z_LOAD]]>>>()
! CHECK: cuf.kernel_launch @_QMtest_callPdev_kernel0<<<%[[GRID_X_LOAD]], %[[GRID_Y_LOAD]], %[[GRID_Z_LOAD]], %[[BLOCK_X_LOAD]], %[[BLOCK_Y_LOAD]], %[[BLOCK_Z_LOAD]]>>>()
call dev_kernel0<<<10, 20, 2>>>()
! CHECK: fir.cuda_kernel_launch @_QMtest_callPdev_kernel0<<<%c10{{.*}}, %c1{{.*}}, %c1{{.*}}, %c20{{.*}}, %c1{{.*}}, %c1{{.*}}, %c2{{.*}}>>>()
! CHECK: cuf.kernel_launch @_QMtest_callPdev_kernel0<<<%c10{{.*}}, %c1{{.*}}, %c1{{.*}}, %c20{{.*}}, %c1{{.*}}, %c1{{.*}}, %c2{{.*}}>>>()
call dev_kernel0<<<10, 20, 2, 0>>>()
! CHECK: fir.cuda_kernel_launch @_QMtest_callPdev_kernel0<<<%c10{{.*}}, %c1{{.*}}, %c1{{.*}}, %c20{{.*}}, %c1{{.*}}, %c1{{.*}}, %c2{{.*}}, %c0{{.*}}>>>()
! CHECK: cuf.kernel_launch @_QMtest_callPdev_kernel0<<<%c10{{.*}}, %c1{{.*}}, %c1{{.*}}, %c20{{.*}}, %c1{{.*}}, %c1{{.*}}, %c2{{.*}}, %c0{{.*}}>>>()
call dev_kernel1<<<1, 32>>>(a)
! CHECK: fir.cuda_kernel_launch @_QMtest_callPdev_kernel1<<<%c1{{.*}}, %c1{{.*}}, %c1{{.*}}, %c32{{.*}}, %c1{{.*}}, %c1{{.*}}>>>(%1#1) : (!fir.ref<f32>)
! CHECK: cuf.kernel_launch @_QMtest_callPdev_kernel1<<<%c1{{.*}}, %c1{{.*}}, %c1{{.*}}, %c32{{.*}}, %c1{{.*}}, %c1{{.*}}>>>(%1#1) : (!fir.ref<f32>)
end
end

View File

@@ -20,7 +20,7 @@ subroutine sub1()
! CHECK: %[[LB:.*]] = fir.convert %c1{{.*}} : (i32) -> index
! CHECK: %[[UB:.*]] = fir.convert %c100{{.*}} : (i32) -> index
! CHECK: %[[STEP:.*]] = arith.constant 1 : index
! CHECK: fir.cuda_kernel<<<%c1_i32, %c2_i32>>> (%[[ARG0:.*]] : index) = (%[[LB]] : index) to (%[[UB]] : index) step (%[[STEP]] : index)
! CHECK: cuf.kernel<<<%c1_i32, %c2_i32>>> (%[[ARG0:.*]] : index) = (%[[LB]] : index) to (%[[UB]] : index) step (%[[STEP]] : index)
! CHECK-NOT: fir.do_loop
! CHECK: %[[ARG0_I32:.*]] = fir.convert %[[ARG0]] : (index) -> i32
! CHECK: fir.store %[[ARG0_I32]] to %[[IV]]#1 : !fir.ref<i32>
@@ -32,7 +32,7 @@ subroutine sub1()
a(i) = a(i) * b(i)
end do
! CHECK: fir.cuda_kernel<<<*, *>>> (%{{.*}} : index) = (%{{.*}} : index) to (%{{.*}} : index) step (%{{.*}} : index)
! CHECK: cuf.kernel<<<*, *>>> (%{{.*}} : index) = (%{{.*}} : index) to (%{{.*}} : index) step (%{{.*}} : index)
!$cuf kernel do(2) <<< 1, (256,1) >>>
do i = 1, n
@@ -41,7 +41,7 @@ subroutine sub1()
end do
end do
! CHECK: fir.cuda_kernel<<<%c1{{.*}}, (%c256{{.*}}, %c1{{.*}})>>> (%[[ARG0:.*]] : index, %[[ARG1:.*]] : index) = (%{{.*}}, %{{.*}} : index, index) to (%{{.*}}, %{{.*}} : index, index) step (%{{.*}}, %{{.*}} : index, index)
! CHECK: cuf.kernel<<<%c1{{.*}}, (%c256{{.*}}, %c1{{.*}})>>> (%[[ARG0:.*]] : index, %[[ARG1:.*]] : index) = (%{{.*}}, %{{.*}} : index, index) to (%{{.*}}, %{{.*}} : index, index) step (%{{.*}}, %{{.*}} : index, index)
! CHECK: %[[ARG0_I32:.*]] = fir.convert %[[ARG0]] : (index) -> i32
! CHECK: fir.store %[[ARG0_I32]] to %[[IV]]#1 : !fir.ref<i32>
! CHECK: %[[ARG1_I32:.*]] = fir.convert %[[ARG1]] : (index) -> i32
@@ -54,7 +54,7 @@ subroutine sub1()
c(i,j) = c(i,j) * d(i,j)
end do
end do
! CHECK: fir.cuda_kernel<<<(%c1{{.*}}, %c0{{.*}}), (%c256{{.*}}, %c1{{.*}})>>> (%{{.*}} : index, %{{.*}} : index) = (%{{.*}}, %{{.*}} : index, index) to (%{{.*}}, %{{.*}} : index, index) step (%{{.*}}, %{{.*}} : index, index)
! CHECK: cuf.kernel<<<(%c1{{.*}}, %c0{{.*}}), (%c256{{.*}}, %c1{{.*}})>>> (%{{.*}} : index, %{{.*}} : index) = (%{{.*}}, %{{.*}} : index, index) to (%{{.*}}, %{{.*}} : index, index) step (%{{.*}}, %{{.*}} : index, index)
!$cuf kernel do(2) <<< (*,*), (32,4) >>>
do i = 1, n
@@ -63,5 +63,5 @@ subroutine sub1()
end do
end do
! CHECK: fir.cuda_kernel<<<*, (%c32{{.*}}, %c4{{.*}})>>> (%{{.*}} : index, %{{.*}} : index) = (%{{.*}}, %{{.*}} : index, index) to (%{{.*}}, %{{.*}} : index, index) step (%{{.*}}, %{{.*}} : index, index)
! CHECK: cuf.kernel<<<*, (%c32{{.*}}, %c4{{.*}})>>> (%{{.*}} : index, %{{.*}} : index) = (%{{.*}}, %{{.*}} : index, index) to (%{{.*}}, %{{.*}} : index, index) step (%{{.*}}, %{{.*}} : index, index)
end

View File

@@ -10,6 +10,6 @@ contains
end
end module
! CHECK: fir.global @_QMcuf_modEmd {cuda_attr = #fir.cuda<device>} : f32
! CHECK: fir.global @_QMcuf_modEmd {data_attr = #cuf.cuda<device>} : f32
! CHECK: func.func @_QMcuf_modPdevsub() attributes {fir.cuda_attr = #fir.cuda_proc<device>}
! CHECK: func.func @_QMcuf_modPdevsub() attributes {cuf.proc_attr = #cuf.cuda_proc<device>}

View File

@@ -10,16 +10,16 @@ end
! CHECK-LABEL: func.func @_QPsub1()
! CHECK: %[[ADDR:.*]] = fir.address_of(@_QMcuf_modEmd) : !fir.ref<f32>
! CHECK: %{{.*}}:2 = hlfir.declare %[[ADDR]] {cuda_attr = #fir.cuda<device>, uniq_name = "_QMcuf_modEmd"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
! CHECK: %{{.*}}:2 = hlfir.declare %[[ADDR]] {data_attr = #cuf.cuda<device>, uniq_name = "_QMcuf_modEmd"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
attributes(device) subroutine sub2()
use cuf_mod
call devsub()
end
! CHECK-LABEL: func.func @_QPsub2() attributes {fir.cuda_attr = #fir.cuda_proc<device>}
! CHECK-LABEL: func.func @_QPsub2() attributes {cuf.proc_attr = #cuf.cuda_proc<device>}
! CHECK: fir.call @_QMcuf_modPdevsub()
! CHECK-LABEL: fir.global @_QMcuf_modEmd {cuda_attr = #fir.cuda<device>} : f32
! CHECK-LABEL: fir.global @_QMcuf_modEmd {data_attr = #cuf.cuda<device>} : f32
! CHECK-LABEL: func.func private @_QMcuf_modPdevsub() attributes {fir.cuda_attr = #fir.cuda_proc<device>}
! CHECK-LABEL: func.func private @_QMcuf_modPdevsub() attributes {cuf.proc_attr = #cuf.cuda_proc<device>}

View File

@@ -4,40 +4,40 @@
! Test lowering of CUDA attribute on procedures.
attributes(host) subroutine sub_host(); end
! CHECK: func.func @_QPsub_host() attributes {fir.cuda_attr = #fir.cuda_proc<host>}
! CHECK: func.func @_QPsub_host() attributes {cuf.proc_attr = #cuf.cuda_proc<host>}
attributes(device) subroutine sub_device(); end
! CHECK: func.func @_QPsub_device() attributes {fir.cuda_attr = #fir.cuda_proc<device>}
! CHECK: func.func @_QPsub_device() attributes {cuf.proc_attr = #cuf.cuda_proc<device>}
attributes(host) attributes(device) subroutine sub_host_device; end
! CHECK: func.func @_QPsub_host_device() attributes {fir.cuda_attr = #fir.cuda_proc<host_device>}
! CHECK: func.func @_QPsub_host_device() attributes {cuf.proc_attr = #cuf.cuda_proc<host_device>}
attributes(device) attributes(host) subroutine sub_device_host; end
! CHECK: func.func @_QPsub_device_host() attributes {fir.cuda_attr = #fir.cuda_proc<host_device>}
! CHECK: func.func @_QPsub_device_host() attributes {cuf.proc_attr = #cuf.cuda_proc<host_device>}
attributes(global) subroutine sub_global(); end
! CHECK: func.func @_QPsub_global() attributes {fir.cuda_attr = #fir.cuda_proc<global>}
! CHECK: func.func @_QPsub_global() attributes {cuf.proc_attr = #cuf.cuda_proc<global>}
attributes(grid_global) subroutine sub_grid_global(); end
! CHECK: func.func @_QPsub_grid_global() attributes {fir.cuda_attr = #fir.cuda_proc<grid_global>}
! CHECK: func.func @_QPsub_grid_global() attributes {cuf.proc_attr = #cuf.cuda_proc<grid_global>}
attributes(host) integer function fct_host(); end
! CHECK: func.func @_QPfct_host() -> i32 attributes {fir.cuda_attr = #fir.cuda_proc<host>}
! CHECK: func.func @_QPfct_host() -> i32 attributes {cuf.proc_attr = #cuf.cuda_proc<host>}
attributes(device) integer function fct_device(); end
! CHECK: func.func @_QPfct_device() -> i32 attributes {fir.cuda_attr = #fir.cuda_proc<device>}
! CHECK: func.func @_QPfct_device() -> i32 attributes {cuf.proc_attr = #cuf.cuda_proc<device>}
attributes(host) attributes(device) integer function fct_host_device; end
! CHECK: func.func @_QPfct_host_device() -> i32 attributes {fir.cuda_attr = #fir.cuda_proc<host_device>}
! CHECK: func.func @_QPfct_host_device() -> i32 attributes {cuf.proc_attr = #cuf.cuda_proc<host_device>}
attributes(device) attributes(host) integer function fct_device_host; end
! CHECK: func.func @_QPfct_device_host() -> i32 attributes {fir.cuda_attr = #fir.cuda_proc<host_device>}
! CHECK: func.func @_QPfct_device_host() -> i32 attributes {cuf.proc_attr = #cuf.cuda_proc<host_device>}
attributes(global) launch_bounds(1, 2) subroutine sub_lbounds1(); end
! CHECK: func.func @_QPsub_lbounds1() attributes {fir.cuda_attr = #fir.cuda_proc<global>, fir.cuda_launch_bounds = #fir.launch_bounds<maxTPB = 1 : i64, minBPM = 2 : i64>}
! CHECK: func.func @_QPsub_lbounds1() attributes {cuf.launch_bounds = #cuf.launch_bounds<maxTPB = 1 : i64, minBPM = 2 : i64>, cuf.proc_attr = #cuf.cuda_proc<global>}
attributes(global) launch_bounds(1, 2, 3) subroutine sub_lbounds2(); end
! CHECK: func.func @_QPsub_lbounds2() attributes {fir.cuda_attr = #fir.cuda_proc<global>, fir.cuda_launch_bounds = #fir.launch_bounds<maxTPB = 1 : i64, minBPM = 2 : i64, upperBoundClusterSize = 3 : i64>}
! CHECK: func.func @_QPsub_lbounds2() attributes {cuf.launch_bounds = #cuf.launch_bounds<maxTPB = 1 : i64, minBPM = 2 : i64, upperBoundClusterSize = 3 : i64>, cuf.proc_attr = #cuf.cuda_proc<global>}
attributes(global) cluster_dims(1, 2, 3) subroutine sub_clusterdims1(); end
! CHECK: func.func @_QPsub_clusterdims1() attributes {fir.cuda_attr = #fir.cuda_proc<global>, fir.cuda_cluster_dims = #fir.cluster_dims<x = 1 : i64, y = 2 : i64, z = 3 : i64>}
! CHECK: func.func @_QPsub_clusterdims1() attributes {cuf.cluster_dims = #cuf.cluster_dims<x = 1 : i64, y = 2 : i64, z = 3 : i64>, cuf.proc_attr = #cuf.cuda_proc<global>}

View File

@@ -16,6 +16,8 @@ get_property(dialect_libs GLOBAL PROPERTY MLIR_DIALECT_LIBS)
get_property(extension_libs GLOBAL PROPERTY MLIR_EXTENSION_LIBS)
target_link_libraries(bbc PRIVATE
CUFAttrs
CUFDialect
FIRDialect
FIRDialectSupport
FIRSupport

View File

@@ -11,6 +11,8 @@ if(FLANG_INCLUDE_TESTS)
endif()
target_link_libraries(fir-opt PRIVATE
CUFAttrs
CUFDialect
FIRDialect
FIRSupport
FIRTransforms

View File

@@ -7,6 +7,8 @@ llvm_update_compile_flags(tco)
get_property(dialect_libs GLOBAL PROPERTY MLIR_DIALECT_LIBS)
get_property(extension_libs GLOBAL PROPERTY MLIR_EXTENSION_LIBS)
target_link_libraries(tco PRIVATE
CUFAttrs
CUFDialect
FIRCodeGen
FIRDialect
FIRDialectSupport

View File

@@ -2,6 +2,7 @@ get_property(dialect_libs GLOBAL PROPERTY MLIR_DIALECT_LIBS)
get_property(extension_libs GLOBAL PROPERTY MLIR_EXTENSION_LIBS)
set(LIBS
CUFDialect
FIRBuilder
FIRCodeGen
FIRDialect
@@ -36,6 +37,7 @@ add_flang_unittest(FlangOptimizerTests
KindMappingTest.cpp
RTBuilder.cpp
DEPENDS
CUFDialect
FIRDialect
FIRSupport
HLFIRDialect

View File

@@ -51,7 +51,7 @@ TEST_F(FortranVariableTest, SimpleScalar) {
/*shape=*/mlir::Value{}, /*typeParams=*/std::nullopt,
/*dummy_scope=*/nullptr, name,
/*fortran_attrs=*/fir::FortranVariableFlagsAttr{},
/*cuda_attr=*/fir::CUDADataAttributeAttr{});
/*data_attr=*/cuf::DataAttributeAttr{});
fir::FortranVariableOpInterface fortranVariable = declare;
EXPECT_FALSE(fortranVariable.isArray());
@@ -77,7 +77,7 @@ TEST_F(FortranVariableTest, CharacterScalar) {
auto declare = builder->create<fir::DeclareOp>(loc, addr.getType(), addr,
/*shape=*/mlir::Value{}, typeParams, /*dummy_scope=*/nullptr, name,
/*fortran_attrs=*/fir::FortranVariableFlagsAttr{},
/*cuda_attr=*/fir::CUDADataAttributeAttr{});
/*data_attr=*/cuf::DataAttributeAttr{});
fir::FortranVariableOpInterface fortranVariable = declare;
EXPECT_FALSE(fortranVariable.isArray());
@@ -108,7 +108,7 @@ TEST_F(FortranVariableTest, SimpleArray) {
auto declare = builder->create<fir::DeclareOp>(loc, addr.getType(), addr,
shape, /*typeParams*/ std::nullopt, /*dummy_scope=*/nullptr, name,
/*fortran_attrs=*/fir::FortranVariableFlagsAttr{},
/*cuda_attr=*/fir::CUDADataAttributeAttr{});
/*data_attr=*/cuf::DataAttributeAttr{});
fir::FortranVariableOpInterface fortranVariable = declare;
EXPECT_TRUE(fortranVariable.isArray());
@@ -139,7 +139,7 @@ TEST_F(FortranVariableTest, CharacterArray) {
auto declare = builder->create<fir::DeclareOp>(loc, addr.getType(), addr,
shape, typeParams, /*dummy_scope=*/nullptr, name,
/*fortran_attrs=*/fir::FortranVariableFlagsAttr{},
/*cuda_attr=*/fir::CUDADataAttributeAttr{});
/*data_attr=*/cuf::DataAttributeAttr{});
fir::FortranVariableOpInterface fortranVariable = declare;
EXPECT_TRUE(fortranVariable.isArray());