[flang][cuda] Move CUDA Fortran operations to a CUF dialect (#92317)
The number of operations dedicated to CUF grew and where all still in FIR. In order to have a better organization, the CUF operations, attributes and code is moved into their specific dialect and files. CUF dialect is tightly coupled with HLFIR/FIR and their types. The CUF attributes are bundled into their own library since some HLFIR/FIR operations depend on them and the CUF dialect depends on the FIR types. Without having the attributes into a separate library there would be a dependency cycle.
This commit is contained in:
committed by
GitHub
parent
d90159add4
commit
45daa4fdc6
@@ -23,6 +23,10 @@
|
||||
#include "mlir/IR/Value.h"
|
||||
#include "llvm/ADT/DenseMap.h"
|
||||
|
||||
namespace cuf {
|
||||
class DataAttributeAttr;
|
||||
}
|
||||
|
||||
namespace fir {
|
||||
class ExtendedValue;
|
||||
class FirOpBuilder;
|
||||
@@ -146,9 +150,9 @@ translateSymbolAttributes(mlir::MLIRContext *mlirContext,
|
||||
|
||||
/// Translate the CUDA Fortran attributes of \p sym into the FIR CUDA attribute
|
||||
/// representation.
|
||||
fir::CUDADataAttributeAttr
|
||||
translateSymbolCUDADataAttribute(mlir::MLIRContext *mlirContext,
|
||||
const Fortran::semantics::Symbol &sym);
|
||||
cuf::DataAttributeAttr
|
||||
translateSymbolCUFDataAttribute(mlir::MLIRContext *mlirContext,
|
||||
const Fortran::semantics::Symbol &sym);
|
||||
|
||||
/// Map a symbol to a given fir::ExtendedValue. This will generate an
|
||||
/// hlfir.declare when lowering to HLFIR and map the hlfir.declare result to the
|
||||
|
||||
@@ -254,13 +254,13 @@ public:
|
||||
mlir::StringAttr linkage = {},
|
||||
mlir::Attribute value = {}, bool isConst = false,
|
||||
bool isTarget = false,
|
||||
fir::CUDADataAttributeAttr cudaAttr = {});
|
||||
cuf::DataAttributeAttr dataAttr = {});
|
||||
|
||||
fir::GlobalOp createGlobal(mlir::Location loc, mlir::Type type,
|
||||
llvm::StringRef name, bool isConst, bool isTarget,
|
||||
std::function<void(FirOpBuilder &)> bodyBuilder,
|
||||
mlir::StringAttr linkage = {},
|
||||
fir::CUDADataAttributeAttr cudaAttr = {});
|
||||
cuf::DataAttributeAttr dataAttr = {});
|
||||
|
||||
/// Create a global constant (read-only) value.
|
||||
fir::GlobalOp createGlobalConstant(mlir::Location loc, mlir::Type type,
|
||||
|
||||
@@ -239,7 +239,7 @@ genDeclare(mlir::Location loc, fir::FirOpBuilder &builder,
|
||||
const fir::ExtendedValue &exv, llvm::StringRef name,
|
||||
fir::FortranVariableFlagsAttr flags,
|
||||
mlir::Value dummyScope = nullptr,
|
||||
fir::CUDADataAttributeAttr cudaAttr = {});
|
||||
cuf::DataAttributeAttr dataAttr = {});
|
||||
|
||||
/// Generate an hlfir.associate to build a variable from an expression value.
|
||||
/// The type of the variable must be provided so that scalar logicals are
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
add_subdirectory(CUF)
|
||||
|
||||
# This replicates part of the add_mlir_dialect cmake function from MLIR that
|
||||
# cannot be used her because it expects to be run inside MLIR directory which
|
||||
# is not the case for FIR.
|
||||
|
||||
@@ -0,0 +1,7 @@
|
||||
set(LLVM_TARGET_DEFINITIONS CUFAttr.td)
|
||||
mlir_tablegen(CUFEnumAttr.h.inc -gen-enum-decls)
|
||||
mlir_tablegen(CUFEnumAttr.cpp.inc -gen-enum-defs)
|
||||
mlir_tablegen(CUFAttr.h.inc --gen-attrdef-decls)
|
||||
mlir_tablegen(CUFAttr.cpp.inc -gen-attrdef-defs)
|
||||
|
||||
add_public_tablegen_target(CUFAttrsIncGen)
|
||||
106
flang/include/flang/Optimizer/Dialect/CUF/Attributes/CUFAttr.h
Normal file
106
flang/include/flang/Optimizer/Dialect/CUF/Attributes/CUFAttr.h
Normal file
@@ -0,0 +1,106 @@
|
||||
//===-- Optimizer/Dialect/CUF/Attributes/CUFAttr.h -- CUF attributes ------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Coding style: https://mlir.llvm.org/getting_started/DeveloperGuide/
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef FORTRAN_OPTIMIZER_DIALECT_CUF_CUFATTR_H
|
||||
#define FORTRAN_OPTIMIZER_DIALECT_CUF_CUFATTR_H
|
||||
|
||||
#include "flang/Common/Fortran.h"
|
||||
#include "mlir/IR/BuiltinAttributes.h"
|
||||
|
||||
namespace llvm {
|
||||
class StringRef;
|
||||
}
|
||||
|
||||
#include "flang/Optimizer/Dialect/CUF/Attributes/CUFEnumAttr.h.inc"
|
||||
|
||||
#define GET_ATTRDEF_CLASSES
|
||||
#include "flang/Optimizer/Dialect/CUF/Attributes/CUFAttr.h.inc"
|
||||
|
||||
namespace cuf {
|
||||
|
||||
/// Attribute to mark Fortran entities with the CUDA attribute.
|
||||
static constexpr llvm::StringRef getDataAttrName() { return "cuf.data_attr"; }
|
||||
static constexpr llvm::StringRef getProcAttrName() { return "cuf.proc_attr"; }
|
||||
|
||||
/// Attribute to carry CUDA launch_bounds values.
|
||||
static constexpr llvm::StringRef getLaunchBoundsAttrName() {
|
||||
return "cuf.launch_bounds";
|
||||
}
|
||||
|
||||
/// Attribute to carry CUDA cluster_dims values.
|
||||
static constexpr llvm::StringRef getClusterDimsAttrName() {
|
||||
return "cuf.cluster_dims";
|
||||
}
|
||||
|
||||
inline cuf::DataAttributeAttr
|
||||
getDataAttribute(mlir::MLIRContext *mlirContext,
|
||||
std::optional<Fortran::common::CUDADataAttr> cudaAttr) {
|
||||
if (cudaAttr) {
|
||||
cuf::DataAttribute attr;
|
||||
switch (*cudaAttr) {
|
||||
case Fortran::common::CUDADataAttr::Constant:
|
||||
attr = cuf::DataAttribute::Constant;
|
||||
break;
|
||||
case Fortran::common::CUDADataAttr::Device:
|
||||
attr = cuf::DataAttribute::Device;
|
||||
break;
|
||||
case Fortran::common::CUDADataAttr::Managed:
|
||||
attr = cuf::DataAttribute::Managed;
|
||||
break;
|
||||
case Fortran::common::CUDADataAttr::Pinned:
|
||||
attr = cuf::DataAttribute::Pinned;
|
||||
break;
|
||||
case Fortran::common::CUDADataAttr::Shared:
|
||||
attr = cuf::DataAttribute::Shared;
|
||||
break;
|
||||
case Fortran::common::CUDADataAttr::Texture:
|
||||
// Obsolete attribute
|
||||
return {};
|
||||
case Fortran::common::CUDADataAttr::Unified:
|
||||
attr = cuf::DataAttribute::Unified;
|
||||
break;
|
||||
}
|
||||
return cuf::DataAttributeAttr::get(mlirContext, attr);
|
||||
}
|
||||
return {};
|
||||
}
|
||||
|
||||
inline cuf::ProcAttributeAttr
|
||||
getProcAttribute(mlir::MLIRContext *mlirContext,
|
||||
std::optional<Fortran::common::CUDASubprogramAttrs> cudaAttr) {
|
||||
if (cudaAttr) {
|
||||
cuf::ProcAttribute attr;
|
||||
switch (*cudaAttr) {
|
||||
case Fortran::common::CUDASubprogramAttrs::Host:
|
||||
attr = cuf::ProcAttribute::Host;
|
||||
break;
|
||||
case Fortran::common::CUDASubprogramAttrs::Device:
|
||||
attr = cuf::ProcAttribute::Device;
|
||||
break;
|
||||
case Fortran::common::CUDASubprogramAttrs::HostDevice:
|
||||
attr = cuf::ProcAttribute::HostDevice;
|
||||
break;
|
||||
case Fortran::common::CUDASubprogramAttrs::Global:
|
||||
attr = cuf::ProcAttribute::Global;
|
||||
break;
|
||||
case Fortran::common::CUDASubprogramAttrs::Grid_Global:
|
||||
attr = cuf::ProcAttribute::GridGlobal;
|
||||
break;
|
||||
}
|
||||
return cuf::ProcAttributeAttr::get(mlirContext, attr);
|
||||
}
|
||||
return {};
|
||||
}
|
||||
|
||||
} // namespace cuf
|
||||
|
||||
#endif // FORTRAN_OPTIMIZER_DIALECT_CUF_CUFATTR_H
|
||||
100
flang/include/flang/Optimizer/Dialect/CUF/Attributes/CUFAttr.td
Normal file
100
flang/include/flang/Optimizer/Dialect/CUF/Attributes/CUFAttr.td
Normal file
@@ -0,0 +1,100 @@
|
||||
//===- CUFAttr.td - CUF Attributes -------------------------*- tablegen -*-===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file declares the CUF dialect attributes.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef FORTRAN_DIALECT_CUF_CUFATTRS
|
||||
#define FORTRAN_DIALECT_CUF_CUFATTRS
|
||||
|
||||
include "flang/Optimizer/Dialect/CUF/CUFDialect.td"
|
||||
include "mlir/IR/EnumAttr.td"
|
||||
|
||||
class cuf_Attr<string name> : AttrDef<CUFDialect, name>;
|
||||
|
||||
def cuf_DataAttribute : I32EnumAttr<
|
||||
"DataAttribute",
|
||||
"CUDA Fortran variable attributes",
|
||||
[
|
||||
I32EnumAttrCase<"Constant", 0, "constant">,
|
||||
I32EnumAttrCase<"Device", 1, "device">,
|
||||
I32EnumAttrCase<"Managed", 2, "managed">,
|
||||
I32EnumAttrCase<"Pinned", 3, "pinned">,
|
||||
I32EnumAttrCase<"Shared", 4, "shared">,
|
||||
I32EnumAttrCase<"Unified", 5, "unified">,
|
||||
// Texture is omitted since it is obsolete and rejected by semantic.
|
||||
]> {
|
||||
let genSpecializedAttr = 0;
|
||||
let cppNamespace = "::cuf";
|
||||
}
|
||||
|
||||
def cuf_DataAttributeAttr :
|
||||
EnumAttr<CUFDialect, cuf_DataAttribute, "cuda"> {
|
||||
let assemblyFormat = [{ ```<` $value `>` }];
|
||||
}
|
||||
|
||||
def cuf_ProcAttribute : I32EnumAttr<
|
||||
"ProcAttribute", "CUDA Fortran procedure attributes",
|
||||
[
|
||||
I32EnumAttrCase<"Host", 0, "host">,
|
||||
I32EnumAttrCase<"Device", 1, "device">,
|
||||
I32EnumAttrCase<"HostDevice", 2, "host_device">,
|
||||
I32EnumAttrCase<"Global", 3, "global">,
|
||||
I32EnumAttrCase<"GridGlobal", 4, "grid_global">,
|
||||
]> {
|
||||
let genSpecializedAttr = 0;
|
||||
let cppNamespace = "::cuf";
|
||||
}
|
||||
|
||||
def cuf_ProcAttributeAttr :
|
||||
EnumAttr<CUFDialect, cuf_ProcAttribute, "cuda_proc"> {
|
||||
let assemblyFormat = [{ ```<` $value `>` }];
|
||||
}
|
||||
|
||||
def cuf_LaunchBoundsAttr : cuf_Attr<"LaunchBounds"> {
|
||||
let mnemonic = "launch_bounds";
|
||||
|
||||
let parameters = (ins
|
||||
"mlir::IntegerAttr":$maxTPB,
|
||||
"mlir::IntegerAttr":$minBPM,
|
||||
OptionalParameter<"mlir::IntegerAttr">:$upperBoundClusterSize
|
||||
);
|
||||
|
||||
let assemblyFormat = "`<` struct(params) `>`";
|
||||
}
|
||||
|
||||
def cuf_ClusterDimsAttr : cuf_Attr<"ClusterDims"> {
|
||||
let mnemonic = "cluster_dims";
|
||||
|
||||
let parameters = (ins
|
||||
"mlir::IntegerAttr":$x,
|
||||
"mlir::IntegerAttr":$y,
|
||||
"mlir::IntegerAttr":$z
|
||||
);
|
||||
|
||||
let assemblyFormat = "`<` struct(params) `>`";
|
||||
}
|
||||
|
||||
def cuf_DataTransferKind : I32EnumAttr<
|
||||
"DataTransferKind", "CUDA Fortran data transfer kind",
|
||||
[
|
||||
I32EnumAttrCase<"DeviceHost", 0, "device_host">,
|
||||
I32EnumAttrCase<"HostDevice", 1, "host_device">,
|
||||
I32EnumAttrCase<"DeviceDevice", 2, "device_device">,
|
||||
]> {
|
||||
let genSpecializedAttr = 0;
|
||||
let cppNamespace = "::cuf";
|
||||
}
|
||||
|
||||
def cuf_DataTransferKindAttr :
|
||||
EnumAttr<CUFDialect, cuf_DataTransferKind, "cuda_transfer"> {
|
||||
let assemblyFormat = [{ ```<` $value `>` }];
|
||||
}
|
||||
|
||||
#endif // FORTRAN_DIALECT_CUF_CUFATTRS
|
||||
11
flang/include/flang/Optimizer/Dialect/CUF/CMakeLists.txt
Normal file
11
flang/include/flang/Optimizer/Dialect/CUF/CMakeLists.txt
Normal file
@@ -0,0 +1,11 @@
|
||||
add_subdirectory(Attributes)
|
||||
|
||||
set(LLVM_TARGET_DEFINITIONS CUFDialect.td)
|
||||
mlir_tablegen(CUFDialect.h.inc -gen-dialect-decls -dialect=cuf)
|
||||
mlir_tablegen(CUFDialect.cpp.inc -gen-dialect-defs -dialect=cuf)
|
||||
|
||||
set(LLVM_TARGET_DEFINITIONS CUFOps.td)
|
||||
mlir_tablegen(CUFOps.h.inc -gen-op-decls)
|
||||
mlir_tablegen(CUFOps.cpp.inc -gen-op-defs)
|
||||
|
||||
add_public_tablegen_target(CUFOpsIncGen)
|
||||
26
flang/include/flang/Optimizer/Dialect/CUF/CUFDialect.h
Normal file
26
flang/include/flang/Optimizer/Dialect/CUF/CUFDialect.h
Normal file
@@ -0,0 +1,26 @@
|
||||
//===-- Optimizer/Dialect/CUFDialect.h -- CUF dialect -----------*- C++ -*-===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Coding style: https://mlir.llvm.org/getting_started/DeveloperGuide/
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef FORTRAN_OPTIMIZER_DIALECT_CUF_CUFDIALECT_H
|
||||
#define FORTRAN_OPTIMIZER_DIALECT_CUF_CUFDIALECT_H
|
||||
|
||||
#include "mlir/Bytecode/BytecodeOpInterface.h"
|
||||
#include "mlir/IR/Dialect.h"
|
||||
#include "mlir/IR/SymbolTable.h"
|
||||
#include "mlir/Interfaces/CallInterfaces.h"
|
||||
#include "mlir/Interfaces/FunctionInterfaces.h"
|
||||
#include "mlir/Interfaces/LoopLikeInterface.h"
|
||||
#include "mlir/Interfaces/SideEffectInterfaces.h"
|
||||
|
||||
#include "flang/Optimizer/Dialect/CUF/CUFDialect.h.inc"
|
||||
|
||||
#endif // FORTRAN_OPTIMIZER_DIALECT_CUF_CUFDIALECT_H
|
||||
43
flang/include/flang/Optimizer/Dialect/CUF/CUFDialect.td
Normal file
43
flang/include/flang/Optimizer/Dialect/CUF/CUFDialect.td
Normal file
@@ -0,0 +1,43 @@
|
||||
//===-- CUFDialect.td - CUF dialect base definitions -------*- tablegen -*-===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
///
|
||||
/// \file
|
||||
/// Definition of the CUDA Fortran dialect
|
||||
///
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef FORTRAN_DIALECT_CUF_CUFDIALECT
|
||||
#define FORTRAN_DIALECT_CUF_CUFDIALECT
|
||||
|
||||
include "mlir/IR/AttrTypeBase.td"
|
||||
include "mlir/IR/EnumAttr.td"
|
||||
include "mlir/IR/OpBase.td"
|
||||
|
||||
def CUFDialect : Dialect {
|
||||
let name = "cuf";
|
||||
|
||||
let summary = "CUDA Fortran dialect";
|
||||
|
||||
let description = [{
|
||||
This dialect models CUDA Fortran operations. The CUF dialect operations use
|
||||
the FIR types and are tightly coupled with FIR and HLFIR.
|
||||
}];
|
||||
|
||||
let useDefaultAttributePrinterParser = 1;
|
||||
let usePropertiesForAttributes = 1;
|
||||
let cppNamespace = "::cuf";
|
||||
let dependentDialects = ["fir::FIROpsDialect"];
|
||||
|
||||
let extraClassDeclaration = [{
|
||||
private:
|
||||
// Register the CUF Attributes.
|
||||
void registerAttributes();
|
||||
}];
|
||||
}
|
||||
|
||||
#endif // FORTRAN_DIALECT_CUF_CUFDIALECT
|
||||
20
flang/include/flang/Optimizer/Dialect/CUF/CUFOps.h
Normal file
20
flang/include/flang/Optimizer/Dialect/CUF/CUFOps.h
Normal file
@@ -0,0 +1,20 @@
|
||||
//===-- Optimizer/Dialect/CUF/CUFOps.h - CUF operations ---------*- C++ -*-===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef FORTRAN_OPTIMIZER_DIALECT_CUF_CUFOPS_H
|
||||
#define FORTRAN_OPTIMIZER_DIALECT_CUF_CUFOPS_H
|
||||
|
||||
#include "flang/Optimizer/Dialect/CUF/Attributes/CUFAttr.h"
|
||||
#include "flang/Optimizer/Dialect/CUF/CUFDialect.h"
|
||||
#include "flang/Optimizer/Dialect/FIRType.h"
|
||||
#include "mlir/IR/OpDefinition.h"
|
||||
|
||||
#define GET_OP_CLASSES
|
||||
#include "flang/Optimizer/Dialect/CUF/CUFOps.h.inc"
|
||||
|
||||
#endif // FORTRAN_OPTIMIZER_DIALECT_CUF_CUFOPS_H
|
||||
263
flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td
Normal file
263
flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td
Normal file
@@ -0,0 +1,263 @@
|
||||
//===-- CUFOps.td - CUF operation definitions --------------*- tablegen -*-===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
///
|
||||
/// \file
|
||||
/// Definition of the CUF dialect operations
|
||||
///
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef FORTRAN_DIALECT_CUF_CUF_OPS
|
||||
#define FORTRAN_DIALECT_CUF_CUF_OPS
|
||||
|
||||
include "flang/Optimizer/Dialect/CUF/CUFDialect.td"
|
||||
include "flang/Optimizer/Dialect/CUF/Attributes/CUFAttr.td"
|
||||
include "flang/Optimizer/Dialect/FIRTypes.td"
|
||||
include "mlir/Interfaces/LoopLikeInterface.td"
|
||||
include "mlir/IR/BuiltinAttributes.td"
|
||||
|
||||
class cuf_Op<string mnemonic, list<Trait> traits>
|
||||
: Op<CUFDialect, mnemonic, traits>;
|
||||
|
||||
def cuf_AllocOp : cuf_Op<"alloc", [AttrSizedOperandSegments,
|
||||
MemoryEffects<[MemAlloc]>]> {
|
||||
let summary = "Allocate an object on device";
|
||||
|
||||
let description = [{
|
||||
This is a drop in replacement for fir.alloca and fir.allocmem for device
|
||||
object. Any device, managed or unified object declared in an host
|
||||
subprogram needs to be allocated in the device memory through runtime calls.
|
||||
The cuf.alloc is an abstraction to the runtime calls and works together
|
||||
with cuf.free.
|
||||
}];
|
||||
|
||||
let arguments = (ins
|
||||
TypeAttr:$in_type,
|
||||
OptionalAttr<StrAttr>:$uniq_name,
|
||||
OptionalAttr<StrAttr>:$bindc_name,
|
||||
Variadic<AnyIntegerType>:$typeparams,
|
||||
Variadic<AnyIntegerType>:$shape,
|
||||
cuf_DataAttributeAttr:$data_attr
|
||||
);
|
||||
|
||||
let results = (outs fir_ReferenceType:$ptr);
|
||||
|
||||
let assemblyFormat = [{
|
||||
$in_type (`(` $typeparams^ `:` type($typeparams) `)`)?
|
||||
(`,` $shape^ `:` type($shape) )? attr-dict `->` qualified(type($ptr))
|
||||
}];
|
||||
|
||||
let builders = [
|
||||
OpBuilder<(ins "mlir::Type":$inType, "llvm::StringRef":$uniqName,
|
||||
"llvm::StringRef":$bindcName,
|
||||
"cuf::DataAttributeAttr":$cudaAttr,
|
||||
CArg<"mlir::ValueRange", "{}">:$typeparams,
|
||||
CArg<"mlir::ValueRange", "{}">:$shape,
|
||||
CArg<"llvm::ArrayRef<mlir::NamedAttribute>", "{}">:$attributes)>];
|
||||
|
||||
let hasVerifier = 1;
|
||||
}
|
||||
|
||||
def cuf_FreeOp : cuf_Op<"free", [MemoryEffects<[MemFree]>]> {
|
||||
let summary = "Free a device allocated object";
|
||||
|
||||
let description = [{
|
||||
The cuf.free operation frees the memory allocated by cuf.alloc.
|
||||
This is used for non-allocatable device, managed and unified device
|
||||
variables declare in host subprogram.
|
||||
}];
|
||||
|
||||
let arguments = (ins
|
||||
Arg<AnyReferenceLike, "", [MemFree]>:$devptr,
|
||||
cuf_DataAttributeAttr:$data_attr
|
||||
);
|
||||
|
||||
let assemblyFormat = "$devptr `:` qualified(type($devptr)) attr-dict";
|
||||
|
||||
let hasVerifier = 1;
|
||||
}
|
||||
|
||||
def cuf_AllocateOp : cuf_Op<"allocate", [AttrSizedOperandSegments,
|
||||
MemoryEffects<[MemAlloc<DefaultResource>]>]> {
|
||||
let summary = "Perform the device allocation of data of an allocatable";
|
||||
|
||||
let description = [{
|
||||
The cuf.allocate operation performs the allocation on the device
|
||||
of the data of an allocatable. The descriptor passed to the operation
|
||||
is initialized before with the standard flang runtime calls.
|
||||
}];
|
||||
|
||||
let arguments = (ins Arg<fir_ReferenceType, "", [MemRead, MemWrite]>:$box,
|
||||
Arg<Optional<AnyRefOrBoxType>, "", [MemWrite]>:$errmsg,
|
||||
Optional<AnyIntegerType>:$stream,
|
||||
Arg<Optional<AnyRefOrBoxType>, "", [MemWrite]>:$pinned,
|
||||
Arg<Optional<AnyRefOrBoxType>, "", [MemRead]>:$source,
|
||||
cuf_DataAttributeAttr:$data_attr,
|
||||
UnitAttr:$hasStat);
|
||||
|
||||
let results = (outs AnyIntegerType:$stat);
|
||||
|
||||
let assemblyFormat = [{
|
||||
$box `:` qualified(type($box))
|
||||
( `source` `(` $source^ `:` qualified(type($source) )`)` )?
|
||||
( `errmsg` `(` $errmsg^ `:` type($errmsg) `)` )?
|
||||
( `stream` `(` $stream^ `:` type($stream) `)` )?
|
||||
( `pinned` `(` $pinned^ `:` type($pinned) `)` )?
|
||||
attr-dict `->` type($stat)
|
||||
}];
|
||||
|
||||
let hasVerifier = 1;
|
||||
}
|
||||
|
||||
def cuf_DeallocateOp : cuf_Op<"deallocate",
|
||||
[MemoryEffects<[MemFree<DefaultResource>]>]> {
|
||||
let summary = "Perform the device deallocation of data of an allocatable";
|
||||
|
||||
let description = [{
|
||||
The cuf.deallocate operation performs the deallocation on the device
|
||||
of the data of an allocatable.
|
||||
}];
|
||||
|
||||
let arguments = (ins Arg<fir_ReferenceType, "", [MemRead, MemWrite]>:$box,
|
||||
Arg<Optional<AnyRefOrBoxType>, "", [MemWrite]>:$errmsg,
|
||||
cuf_DataAttributeAttr:$data_attr,
|
||||
UnitAttr:$hasStat);
|
||||
|
||||
let results = (outs AnyIntegerType:$stat);
|
||||
|
||||
let assemblyFormat = [{
|
||||
$box `:` qualified(type($box))
|
||||
( `errmsg` `(` $errmsg^ `:` type($errmsg) `)` )?
|
||||
attr-dict `->` type($stat)
|
||||
}];
|
||||
|
||||
let hasVerifier = 1;
|
||||
}
|
||||
|
||||
def cuf_DataTransferOp : cuf_Op<"data_transfer", []> {
|
||||
let summary = "Represent a data transfer between host and device memory";
|
||||
|
||||
let description = [{
|
||||
CUDA Fortran allows data transfer to be done via intrinsic assignment
|
||||
between a host and a device variable. This operation is used to materialized
|
||||
the data transfer between the lhs and rhs memory references.
|
||||
The kind of transfer is specified in the attribute.
|
||||
|
||||
```
|
||||
adev = a ! transfer host to device
|
||||
a = adev ! transfer device to host
|
||||
bdev = adev ! transfer device to device
|
||||
```
|
||||
}];
|
||||
|
||||
let arguments = (ins Arg<AnyReferenceLike, "", [MemWrite]>:$src,
|
||||
Arg<AnyReferenceLike, "", [MemRead]>:$dst,
|
||||
cuf_DataTransferKindAttr:$transfer_kind);
|
||||
|
||||
let assemblyFormat = [{
|
||||
$src `to` $dst attr-dict `:` type(operands)
|
||||
}];
|
||||
}
|
||||
|
||||
def cuf_KernelLaunchOp : cuf_Op<"kernel_launch", [CallOpInterface,
|
||||
AttrSizedOperandSegments]> {
|
||||
let summary = "call CUDA kernel";
|
||||
|
||||
let description = [{
|
||||
Launch a CUDA kernel from the host.
|
||||
|
||||
```
|
||||
// launch simple kernel with no arguments. bytes and stream value are
|
||||
// optional in the chevron notation.
|
||||
cuf.kernel_launch @kernel<<<%gx, %gy, %bx, %by, %bz>>>()
|
||||
```
|
||||
}];
|
||||
|
||||
let arguments = (ins
|
||||
SymbolRefAttr:$callee,
|
||||
I32:$grid_x,
|
||||
I32:$grid_y,
|
||||
I32:$grid_z,
|
||||
I32:$block_x,
|
||||
I32:$block_y,
|
||||
I32:$block_z,
|
||||
Optional<I32>:$bytes,
|
||||
Optional<I32>:$stream,
|
||||
Variadic<AnyType>:$args
|
||||
);
|
||||
|
||||
let assemblyFormat = [{
|
||||
$callee `<` `<` `<` $grid_x `,` $grid_y `,` $grid_z `,`$block_x `,`
|
||||
$block_y `,` $block_z ( `,` $bytes^ ( `,` $stream^ )? )? `>` `>` `>`
|
||||
`` `(` $args `)` ( `:` `(` type($args)^ `)` )? attr-dict
|
||||
}];
|
||||
|
||||
let extraClassDeclaration = [{
|
||||
mlir::CallInterfaceCallable getCallableForCallee() {
|
||||
return getCalleeAttr();
|
||||
}
|
||||
|
||||
void setCalleeFromCallable(mlir::CallInterfaceCallable callee) {
|
||||
(*this)->setAttr(getCalleeAttrName(), callee.get<mlir::SymbolRefAttr>());
|
||||
}
|
||||
mlir::FunctionType getFunctionType();
|
||||
|
||||
unsigned getNbNoArgOperand() {
|
||||
unsigned nbNoArgOperand = 5; // grids and blocks values are always present.
|
||||
if (getBytes()) ++nbNoArgOperand;
|
||||
if (getStream()) ++nbNoArgOperand;
|
||||
return nbNoArgOperand;
|
||||
}
|
||||
|
||||
operand_range getArgOperands() {
|
||||
return {operand_begin() + getNbNoArgOperand(), operand_end()};
|
||||
}
|
||||
mlir::MutableOperandRange getArgOperandsMutable() {
|
||||
return mlir::MutableOperandRange(
|
||||
*this, getNbNoArgOperand(), getArgs().size() - 1);
|
||||
}
|
||||
}];
|
||||
}
|
||||
|
||||
def cuf_KernelOp : cuf_Op<"kernel", [AttrSizedOperandSegments,
|
||||
DeclareOpInterfaceMethods<LoopLikeOpInterface>]> {
|
||||
|
||||
let description = [{
|
||||
Represent the CUDA Fortran kernel directive. The operation is a loop like
|
||||
operation that represents the iteration range of the embedded loop nest.
|
||||
|
||||
When grid or block variadic operands are empty, a `*` only syntax was used
|
||||
in the Fortran code.
|
||||
If the `*` is mixed with values for either grid or block, these are
|
||||
represented by a 0 constant value.
|
||||
}];
|
||||
|
||||
let arguments = (ins
|
||||
Variadic<I32>:$grid, // empty means `*`
|
||||
Variadic<I32>:$block, // empty means `*`
|
||||
Optional<I32>:$stream,
|
||||
Variadic<Index>:$lowerbound,
|
||||
Variadic<Index>:$upperbound,
|
||||
Variadic<Index>:$step,
|
||||
OptionalAttr<I64Attr>:$n
|
||||
);
|
||||
|
||||
let regions = (region AnyRegion:$region);
|
||||
|
||||
let assemblyFormat = [{
|
||||
`<` `<` `<` custom<CUFKernelValues>($grid, type($grid)) `,`
|
||||
custom<CUFKernelValues>($block, type($block))
|
||||
( `,` `stream` `=` $stream^ )? `>` `>` `>`
|
||||
custom<CUFKernelLoopControl>($region, $lowerbound, type($lowerbound),
|
||||
$upperbound, type($upperbound), $step, type($step))
|
||||
attr-dict
|
||||
}];
|
||||
|
||||
let hasVerifier = 1;
|
||||
}
|
||||
|
||||
#endif // FORTRAN_DIALECT_CUF_CUF_OPS
|
||||
@@ -70,87 +70,4 @@ def fir_BoxFieldAttr : I32EnumAttr<
|
||||
// mlir::SideEffects::Resource for modelling operations which add debugging information
|
||||
def DebuggingResource : Resource<"::fir::DebuggingResource">;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// CUDA Fortran specific attributes
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
def fir_CUDADataAttribute : I32EnumAttr<
|
||||
"CUDADataAttribute",
|
||||
"CUDA Fortran variable attributes",
|
||||
[
|
||||
I32EnumAttrCase<"Constant", 0, "constant">,
|
||||
I32EnumAttrCase<"Device", 1, "device">,
|
||||
I32EnumAttrCase<"Managed", 2, "managed">,
|
||||
I32EnumAttrCase<"Pinned", 3, "pinned">,
|
||||
I32EnumAttrCase<"Shared", 4, "shared">,
|
||||
I32EnumAttrCase<"Unified", 5, "unified">,
|
||||
// Texture is omitted since it is obsolete and rejected by semantic.
|
||||
]> {
|
||||
let genSpecializedAttr = 0;
|
||||
let cppNamespace = "::fir";
|
||||
}
|
||||
|
||||
def fir_CUDADataAttributeAttr :
|
||||
EnumAttr<FIROpsDialect, fir_CUDADataAttribute, "cuda"> {
|
||||
let assemblyFormat = [{ ```<` $value `>` }];
|
||||
}
|
||||
|
||||
def fir_CUDAProcAttribute : I32EnumAttr<
|
||||
"CUDAProcAttribute", "CUDA Fortran procedure attributes",
|
||||
[
|
||||
I32EnumAttrCase<"Host", 0, "host">,
|
||||
I32EnumAttrCase<"Device", 1, "device">,
|
||||
I32EnumAttrCase<"HostDevice", 2, "host_device">,
|
||||
I32EnumAttrCase<"Global", 3, "global">,
|
||||
I32EnumAttrCase<"GridGlobal", 4, "grid_global">,
|
||||
]> {
|
||||
let genSpecializedAttr = 0;
|
||||
let cppNamespace = "::fir";
|
||||
}
|
||||
|
||||
def fir_CUDAProcAttributeAttr :
|
||||
EnumAttr<FIROpsDialect, fir_CUDAProcAttribute, "cuda_proc"> {
|
||||
let assemblyFormat = [{ ```<` $value `>` }];
|
||||
}
|
||||
|
||||
def fir_CUDALaunchBoundsAttr : fir_Attr<"CUDALaunchBounds"> {
|
||||
let mnemonic = "launch_bounds";
|
||||
|
||||
let parameters = (ins
|
||||
"mlir::IntegerAttr":$maxTPB,
|
||||
"mlir::IntegerAttr":$minBPM,
|
||||
OptionalParameter<"mlir::IntegerAttr">:$upperBoundClusterSize
|
||||
);
|
||||
|
||||
let assemblyFormat = "`<` struct(params) `>`";
|
||||
}
|
||||
|
||||
def fir_CUDAClusterDimsAttr : fir_Attr<"CUDAClusterDims"> {
|
||||
let mnemonic = "cluster_dims";
|
||||
|
||||
let parameters = (ins
|
||||
"mlir::IntegerAttr":$x,
|
||||
"mlir::IntegerAttr":$y,
|
||||
"mlir::IntegerAttr":$z
|
||||
);
|
||||
|
||||
let assemblyFormat = "`<` struct(params) `>`";
|
||||
}
|
||||
|
||||
def fir_CUDADataTransferKind : I32EnumAttr<
|
||||
"CUDADataTransferKind", "CUDA Fortran data transfer kind",
|
||||
[
|
||||
I32EnumAttrCase<"DeviceHost", 0, "device_host">,
|
||||
I32EnumAttrCase<"HostDevice", 1, "host_device">,
|
||||
I32EnumAttrCase<"DeviceDevice", 2, "device_device">,
|
||||
]> {
|
||||
let genSpecializedAttr = 0;
|
||||
let cppNamespace = "::fir";
|
||||
}
|
||||
|
||||
def fir_CUDADataTransferKindAttr :
|
||||
EnumAttr<FIROpsDialect, fir_CUDADataTransferKind, "cuda_transfer"> {
|
||||
let assemblyFormat = [{ ```<` $value `>` }];
|
||||
}
|
||||
|
||||
#endif // FIR_DIALECT_FIR_ATTRS
|
||||
|
||||
@@ -9,6 +9,7 @@
|
||||
#ifndef FORTRAN_OPTIMIZER_DIALECT_FIROPS_H
|
||||
#define FORTRAN_OPTIMIZER_DIALECT_FIROPS_H
|
||||
|
||||
#include "flang/Optimizer/Dialect/CUF/Attributes/CUFAttr.h"
|
||||
#include "flang/Optimizer/Dialect/FIRAttr.h"
|
||||
#include "flang/Optimizer/Dialect/FIRType.h"
|
||||
#include "flang/Optimizer/Dialect/FirAliasTagOpInterface.h"
|
||||
|
||||
@@ -17,6 +17,7 @@
|
||||
include "mlir/Dialect/Arith/IR/ArithBase.td"
|
||||
include "mlir/Dialect/Arith/IR/ArithOpsInterfaces.td"
|
||||
include "mlir/Dialect/LLVMIR/LLVMAttrDefs.td"
|
||||
include "flang/Optimizer/Dialect/CUF/Attributes/CUFAttr.td"
|
||||
include "flang/Optimizer/Dialect/FIRDialect.td"
|
||||
include "flang/Optimizer/Dialect/FIRTypes.td"
|
||||
include "flang/Optimizer/Dialect/FIRAttr.td"
|
||||
@@ -2436,66 +2437,6 @@ def fir_DispatchOp : fir_Op<"dispatch", []> {
|
||||
}];
|
||||
}
|
||||
|
||||
def fir_CUDAKernelLaunch : fir_Op<"cuda_kernel_launch", [CallOpInterface,
|
||||
AttrSizedOperandSegments]> {
|
||||
let summary = "call CUDA kernel";
|
||||
|
||||
let description = [{
|
||||
Launch a CUDA kernel from the host.
|
||||
|
||||
```
|
||||
// launch simple kernel with no arguments. bytes and stream value are
|
||||
// optional in the chevron notation.
|
||||
fir.cuda_kernel_launch @kernel<<<%gx, %gy, %bx, %by, %bz>>>()
|
||||
```
|
||||
}];
|
||||
|
||||
let arguments = (ins
|
||||
SymbolRefAttr:$callee,
|
||||
I32:$grid_x,
|
||||
I32:$grid_y,
|
||||
I32:$grid_z,
|
||||
I32:$block_x,
|
||||
I32:$block_y,
|
||||
I32:$block_z,
|
||||
Optional<I32>:$bytes,
|
||||
Optional<I32>:$stream,
|
||||
Variadic<AnyType>:$args
|
||||
);
|
||||
|
||||
let assemblyFormat = [{
|
||||
$callee `<` `<` `<` $grid_x `,` $grid_y `,` $grid_z `,`$block_x `,`
|
||||
$block_y `,` $block_z ( `,` $bytes^ ( `,` $stream^ )? )? `>` `>` `>`
|
||||
`` `(` $args `)` ( `:` `(` type($args)^ `)` )? attr-dict
|
||||
}];
|
||||
|
||||
let extraClassDeclaration = [{
|
||||
mlir::CallInterfaceCallable getCallableForCallee() {
|
||||
return getCalleeAttr();
|
||||
}
|
||||
|
||||
void setCalleeFromCallable(mlir::CallInterfaceCallable callee) {
|
||||
(*this)->setAttr(getCalleeAttrName(), callee.get<mlir::SymbolRefAttr>());
|
||||
}
|
||||
mlir::FunctionType getFunctionType();
|
||||
|
||||
unsigned getNbNoArgOperand() {
|
||||
unsigned nbNoArgOperand = 5; // grids and blocks values are always present.
|
||||
if (getBytes()) ++nbNoArgOperand;
|
||||
if (getStream()) ++nbNoArgOperand;
|
||||
return nbNoArgOperand;
|
||||
}
|
||||
|
||||
operand_range getArgOperands() {
|
||||
return {operand_begin() + getNbNoArgOperand(), operand_end()};
|
||||
}
|
||||
mlir::MutableOperandRange getArgOperandsMutable() {
|
||||
return mlir::MutableOperandRange(
|
||||
*this, getNbNoArgOperand(), getArgs().size() - 1);
|
||||
}
|
||||
}];
|
||||
}
|
||||
|
||||
// Constant operations that support Fortran
|
||||
|
||||
def fir_StringLitOp : fir_Op<"string_lit", [NoMemoryEffect]> {
|
||||
@@ -2797,7 +2738,7 @@ def fir_GlobalOp : fir_Op<"global", [IsolatedFromAbove, Symbol]> {
|
||||
OptionalAttr<UnitAttr>:$constant,
|
||||
OptionalAttr<UnitAttr>:$target,
|
||||
OptionalAttr<StrAttr>:$linkName,
|
||||
OptionalAttr<fir_CUDADataAttributeAttr>:$cuda_attr
|
||||
OptionalAttr<cuf_DataAttributeAttr>:$data_attr
|
||||
);
|
||||
|
||||
let regions = (region AtMostRegion<1>:$region);
|
||||
@@ -3077,7 +3018,7 @@ def fir_DeclareOp : fir_Op<"declare", [AttrSizedOperandSegments,
|
||||
Optional<fir_DummyScopeType>:$dummy_scope,
|
||||
Builtin_StringAttr:$uniq_name,
|
||||
OptionalAttr<fir_FortranVariableFlagsAttr>:$fortran_attrs,
|
||||
OptionalAttr<fir_CUDADataAttributeAttr>:$cuda_attr
|
||||
OptionalAttr<cuf_DataAttributeAttr>:$data_attr
|
||||
);
|
||||
|
||||
let results = (outs AnyRefOrBox);
|
||||
@@ -3130,125 +3071,6 @@ def fir_BoxOffsetOp : fir_Op<"box_offset", [NoMemoryEffect]> {
|
||||
];
|
||||
}
|
||||
|
||||
def fir_CUDAKernelOp : fir_Op<"cuda_kernel", [AttrSizedOperandSegments,
|
||||
DeclareOpInterfaceMethods<LoopLikeOpInterface>]> {
|
||||
|
||||
let description = [{
|
||||
Represent the CUDA Fortran kernel directive. The operation is a loop like
|
||||
operation that represents the iteration range of the embedded loop nest.
|
||||
|
||||
When grid or block variadic operands are empty, a `*` only syntax was used
|
||||
in the Fortran code.
|
||||
If the `*` is mixed with values for either grid or block, these are
|
||||
represented by a 0 constant value.
|
||||
}];
|
||||
|
||||
let arguments = (ins
|
||||
Variadic<I32>:$grid, // empty means `*`
|
||||
Variadic<I32>:$block, // empty means `*`
|
||||
Optional<I32>:$stream,
|
||||
Variadic<Index>:$lowerbound,
|
||||
Variadic<Index>:$upperbound,
|
||||
Variadic<Index>:$step,
|
||||
OptionalAttr<I64Attr>:$n
|
||||
);
|
||||
|
||||
let regions = (region AnyRegion:$region);
|
||||
|
||||
let assemblyFormat = [{
|
||||
`<` `<` `<` custom<CUFKernelValues>($grid, type($grid)) `,`
|
||||
custom<CUFKernelValues>($block, type($block))
|
||||
( `,` `stream` `=` $stream^ )? `>` `>` `>`
|
||||
custom<CUFKernelLoopControl>($region, $lowerbound, type($lowerbound),
|
||||
$upperbound, type($upperbound), $step, type($step))
|
||||
attr-dict
|
||||
}];
|
||||
|
||||
let hasVerifier = 1;
|
||||
}
|
||||
|
||||
def fir_CUDADataTransferOp : fir_Op<"cuda_data_transfer", []> {
|
||||
let summary = "Represent a data transfer between host and device memory";
|
||||
|
||||
let description = [{
|
||||
CUDA Fortran allows data transfer to be done via intrinsic assignment
|
||||
between a host and a device variable. This operation is used to materialized
|
||||
the data transfer between the lhs and rhs memory references.
|
||||
The kind of transfer is specified in the attribute.
|
||||
|
||||
```
|
||||
adev = a ! transfer host to device
|
||||
a = adev ! transfer device to host
|
||||
bdev = adev ! transfer device to device
|
||||
```
|
||||
}];
|
||||
|
||||
let arguments = (ins Arg<AnyReferenceLike, "", [MemWrite]>:$src,
|
||||
Arg<AnyReferenceLike, "", [MemRead]>:$dst,
|
||||
fir_CUDADataTransferKindAttr:$transfer_kind);
|
||||
|
||||
let assemblyFormat = [{
|
||||
$src `to` $dst attr-dict `:` type(operands)
|
||||
}];
|
||||
}
|
||||
|
||||
def fir_CUDAAllocateOp : fir_Op<"cuda_allocate", [AttrSizedOperandSegments,
|
||||
MemoryEffects<[MemAlloc<DefaultResource>]>]> {
|
||||
let summary = "Perform the device allocation of data of an allocatable";
|
||||
|
||||
let description = [{
|
||||
The fir.cuda_allocate operation performs the allocation on the device
|
||||
of the data of an allocatable. The descriptor passed to the operation
|
||||
is initialized before with the standard flang runtime calls.
|
||||
}];
|
||||
|
||||
let arguments = (ins Arg<fir_ReferenceType, "", [MemRead, MemWrite]>:$box,
|
||||
Arg<Optional<AnyRefOrBoxType>, "", [MemWrite]>:$errmsg,
|
||||
Optional<AnyIntegerType>:$stream,
|
||||
Arg<Optional<AnyRefOrBoxType>, "", [MemWrite]>:$pinned,
|
||||
Arg<Optional<AnyRefOrBoxType>, "", [MemRead]>:$source,
|
||||
fir_CUDADataAttributeAttr:$cuda_attr,
|
||||
UnitAttr:$hasStat);
|
||||
|
||||
let results = (outs AnyIntegerType:$stat);
|
||||
|
||||
let assemblyFormat = [{
|
||||
$box `:` qualified(type($box))
|
||||
( `source` `(` $source^ `:` qualified(type($source) )`)` )?
|
||||
( `errmsg` `(` $errmsg^ `:` type($errmsg) `)` )?
|
||||
( `stream` `(` $stream^ `:` type($stream) `)` )?
|
||||
( `pinned` `(` $pinned^ `:` type($pinned) `)` )?
|
||||
attr-dict `->` type($stat)
|
||||
}];
|
||||
|
||||
let hasVerifier = 1;
|
||||
}
|
||||
|
||||
def fir_CUDADeallocateOp : fir_Op<"cuda_deallocate",
|
||||
[MemoryEffects<[MemFree<DefaultResource>]>]> {
|
||||
let summary = "Perform the device deallocation of data of an allocatable";
|
||||
|
||||
let description = [{
|
||||
The fir.cuda_deallocate operation performs the deallocation on the device
|
||||
of the data of an allocatable.
|
||||
}];
|
||||
|
||||
let arguments = (ins Arg<fir_ReferenceType, "", [MemRead, MemWrite]>:$box,
|
||||
Arg<Optional<AnyRefOrBoxType>, "", [MemWrite]>:$errmsg,
|
||||
fir_CUDADataAttributeAttr:$cuda_attr,
|
||||
UnitAttr:$hasStat);
|
||||
|
||||
let results = (outs AnyIntegerType:$stat);
|
||||
|
||||
let assemblyFormat = [{
|
||||
$box `:` qualified(type($box))
|
||||
( `errmsg` `(` $errmsg^ `:` type($errmsg) `)` )?
|
||||
attr-dict `->` type($stat)
|
||||
}];
|
||||
|
||||
let hasVerifier = 1;
|
||||
}
|
||||
|
||||
def fir_DummyScopeOp : fir_Op<"dummy_scope",
|
||||
[MemoryEffects<[MemWrite<DebuggingResource>]>]> {
|
||||
let summary = "Define a scope for dummy arguments";
|
||||
@@ -3329,62 +3151,4 @@ def fir_DummyScopeOp : fir_Op<"dummy_scope",
|
||||
let assemblyFormat = "attr-dict `:` type(results)";
|
||||
}
|
||||
|
||||
def fir_CUDAAllocOp : fir_Op<"cuda_alloc", [AttrSizedOperandSegments,
|
||||
MemoryEffects<[MemAlloc]>]> {
|
||||
let summary = "Allocate an object on device";
|
||||
|
||||
let description = [{
|
||||
This is a drop in replacement for fir.alloca and fir.allocmem for device
|
||||
object. Any device, managed or unified object declared in an host
|
||||
subprogram needs to be allocated in the device memory through runtime calls.
|
||||
The fir.cuda_alloc is an abstraction to the runtime calls and works together
|
||||
with fir.cuda_free.
|
||||
}];
|
||||
|
||||
let arguments = (ins
|
||||
TypeAttr:$in_type,
|
||||
OptionalAttr<StrAttr>:$uniq_name,
|
||||
OptionalAttr<StrAttr>:$bindc_name,
|
||||
Variadic<AnyIntegerType>:$typeparams,
|
||||
Variadic<AnyIntegerType>:$shape,
|
||||
fir_CUDADataAttributeAttr:$cuda_attr
|
||||
);
|
||||
|
||||
let results = (outs fir_ReferenceType:$ptr);
|
||||
|
||||
let assemblyFormat = [{
|
||||
$in_type (`(` $typeparams^ `:` type($typeparams) `)`)?
|
||||
(`,` $shape^ `:` type($shape) )? attr-dict `->` qualified(type($ptr))
|
||||
}];
|
||||
|
||||
let builders = [
|
||||
OpBuilder<(ins "mlir::Type":$inType, "llvm::StringRef":$uniqName,
|
||||
"llvm::StringRef":$bindcName,
|
||||
"fir::CUDADataAttributeAttr":$cudaAttr,
|
||||
CArg<"mlir::ValueRange", "{}">:$typeparams,
|
||||
CArg<"mlir::ValueRange", "{}">:$shape,
|
||||
CArg<"llvm::ArrayRef<mlir::NamedAttribute>", "{}">:$attributes)>];
|
||||
|
||||
let hasVerifier = 1;
|
||||
}
|
||||
|
||||
def fir_CUDAFreeOp : fir_Op<"cuda_free", [MemoryEffects<[MemFree]>]> {
|
||||
let summary = "Free a device allocated object";
|
||||
|
||||
let description = [{
|
||||
The fir.cuda_free operation frees the memory allocated by fir.cuda_alloc.
|
||||
This is used for non-allocatable device, managed and unified device
|
||||
variables declare in host subprogram.
|
||||
}];
|
||||
|
||||
let arguments = (ins
|
||||
Arg<AnyReferenceLike, "", [MemFree]>:$devptr,
|
||||
fir_CUDADataAttributeAttr:$cuda_attr
|
||||
);
|
||||
|
||||
let assemblyFormat = "$devptr `:` qualified(type($devptr)) attr-dict";
|
||||
|
||||
let hasVerifier = 1;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
@@ -75,19 +75,6 @@ constexpr llvm::StringRef getOptionalAttrName() { return "fir.optional"; }
|
||||
/// Attribute to mark Fortran entities with the TARGET attribute.
|
||||
static constexpr llvm::StringRef getTargetAttrName() { return "fir.target"; }
|
||||
|
||||
/// Attribute to mark Fortran entities with the CUDA attribute.
|
||||
static constexpr llvm::StringRef getCUDAAttrName() { return "fir.cuda_attr"; }
|
||||
|
||||
/// Attribute to carry CUDA launch_bounds values.
|
||||
static constexpr llvm::StringRef getCUDALaunchBoundsAttrName() {
|
||||
return "fir.cuda_launch_bounds";
|
||||
}
|
||||
|
||||
/// Attribute to carry CUDA cluster_dims values.
|
||||
static constexpr llvm::StringRef getCUDAClusterDimsAttrName() {
|
||||
return "fir.cuda_cluster_dims";
|
||||
}
|
||||
|
||||
/// Attribute to mark that a function argument is a character dummy procedure.
|
||||
/// Character dummy procedure have special ABI constraints.
|
||||
static constexpr llvm::StringRef getCharacterProcedureDummyAttrName() {
|
||||
|
||||
@@ -15,6 +15,7 @@
|
||||
#define FORTRAN_DIALECT_HLFIR_OPS
|
||||
|
||||
include "flang/Optimizer/HLFIR/HLFIROpBase.td"
|
||||
include "flang/Optimizer/Dialect/CUF/Attributes/CUFAttr.td"
|
||||
include "flang/Optimizer/Dialect/FIRTypes.td"
|
||||
include "flang/Optimizer/Dialect/FIRAttr.td"
|
||||
include "flang/Optimizer/Dialect/FortranVariableInterface.td"
|
||||
@@ -90,7 +91,7 @@ def hlfir_DeclareOp : hlfir_Op<"declare", [AttrSizedOperandSegments,
|
||||
Optional<fir_DummyScopeType>:$dummy_scope,
|
||||
Builtin_StringAttr:$uniq_name,
|
||||
OptionalAttr<fir_FortranVariableFlagsAttr>:$fortran_attrs,
|
||||
OptionalAttr<fir_CUDADataAttributeAttr>:$cuda_attr
|
||||
OptionalAttr<cuf_DataAttributeAttr>:$data_attr
|
||||
);
|
||||
|
||||
let results = (outs AnyFortranVariable, AnyRefOrBoxLike);
|
||||
@@ -106,7 +107,7 @@ def hlfir_DeclareOp : hlfir_Op<"declare", [AttrSizedOperandSegments,
|
||||
CArg<"mlir::Value", "{}">:$shape, CArg<"mlir::ValueRange", "{}">:$typeparams,
|
||||
CArg<"mlir::Value", "{}">:$dummy_scope,
|
||||
CArg<"fir::FortranVariableFlagsAttr", "{}">:$fortran_attrs,
|
||||
CArg<"fir::CUDADataAttributeAttr", "{}">:$cuda_attr)>];
|
||||
CArg<"cuf::DataAttributeAttr", "{}">:$data_attr)>];
|
||||
|
||||
let extraClassDeclaration = [{
|
||||
/// Get the variable original base (same as input). It lacks
|
||||
|
||||
@@ -13,6 +13,7 @@
|
||||
#ifndef FORTRAN_OPTIMIZER_SUPPORT_INITFIR_H
|
||||
#define FORTRAN_OPTIMIZER_SUPPORT_INITFIR_H
|
||||
|
||||
#include "flang/Optimizer/Dialect/CUF/CUFDialect.h"
|
||||
#include "flang/Optimizer/Dialect/FIRDialect.h"
|
||||
#include "flang/Optimizer/HLFIR/HLFIRDialect.h"
|
||||
#include "mlir/Conversion/Passes.h"
|
||||
@@ -34,7 +35,7 @@ namespace fir::support {
|
||||
mlir::scf::SCFDialect, mlir::arith::ArithDialect, \
|
||||
mlir::cf::ControlFlowDialect, mlir::func::FuncDialect, \
|
||||
mlir::vector::VectorDialect, mlir::math::MathDialect, \
|
||||
mlir::complex::ComplexDialect, mlir::DLTIDialect
|
||||
mlir::complex::ComplexDialect, mlir::DLTIDialect, cuf::CUFDialect
|
||||
|
||||
#define FLANG_CODEGEN_DIALECT_LIST FIRCodeGenDialect, mlir::LLVM::LLVMDialect
|
||||
|
||||
|
||||
@@ -16,6 +16,7 @@
|
||||
#include "flang/Common/default-kinds.h"
|
||||
#include "flang/Optimizer/Builder/FIRBuilder.h"
|
||||
#include "flang/Optimizer/Builder/Todo.h"
|
||||
#include "flang/Optimizer/Dialect/CUF/Attributes/CUFAttr.h"
|
||||
#include "flang/Optimizer/Dialect/FIROps.h"
|
||||
#include "flang/Optimizer/Dialect/FIRType.h"
|
||||
#include "flang/Optimizer/Support/FatalError.h"
|
||||
@@ -136,66 +137,6 @@ inline void intrinsicTypeTODO(fir::FirOpBuilder &builder, mlir::Type type,
|
||||
" in " + intrinsicName);
|
||||
}
|
||||
|
||||
inline fir::CUDADataAttributeAttr
|
||||
getCUDADataAttribute(mlir::MLIRContext *mlirContext,
|
||||
std::optional<Fortran::common::CUDADataAttr> cudaAttr) {
|
||||
if (cudaAttr) {
|
||||
fir::CUDADataAttribute attr;
|
||||
switch (*cudaAttr) {
|
||||
case Fortran::common::CUDADataAttr::Constant:
|
||||
attr = fir::CUDADataAttribute::Constant;
|
||||
break;
|
||||
case Fortran::common::CUDADataAttr::Device:
|
||||
attr = fir::CUDADataAttribute::Device;
|
||||
break;
|
||||
case Fortran::common::CUDADataAttr::Managed:
|
||||
attr = fir::CUDADataAttribute::Managed;
|
||||
break;
|
||||
case Fortran::common::CUDADataAttr::Pinned:
|
||||
attr = fir::CUDADataAttribute::Pinned;
|
||||
break;
|
||||
case Fortran::common::CUDADataAttr::Shared:
|
||||
attr = fir::CUDADataAttribute::Shared;
|
||||
break;
|
||||
case Fortran::common::CUDADataAttr::Texture:
|
||||
// Obsolete attribute
|
||||
return {};
|
||||
case Fortran::common::CUDADataAttr::Unified:
|
||||
attr = fir::CUDADataAttribute::Unified;
|
||||
break;
|
||||
}
|
||||
return fir::CUDADataAttributeAttr::get(mlirContext, attr);
|
||||
}
|
||||
return {};
|
||||
}
|
||||
|
||||
inline fir::CUDAProcAttributeAttr getCUDAProcAttribute(
|
||||
mlir::MLIRContext *mlirContext,
|
||||
std::optional<Fortran::common::CUDASubprogramAttrs> cudaAttr) {
|
||||
if (cudaAttr) {
|
||||
fir::CUDAProcAttribute attr;
|
||||
switch (*cudaAttr) {
|
||||
case Fortran::common::CUDASubprogramAttrs::Host:
|
||||
attr = fir::CUDAProcAttribute::Host;
|
||||
break;
|
||||
case Fortran::common::CUDASubprogramAttrs::Device:
|
||||
attr = fir::CUDAProcAttribute::Device;
|
||||
break;
|
||||
case Fortran::common::CUDASubprogramAttrs::HostDevice:
|
||||
attr = fir::CUDAProcAttribute::HostDevice;
|
||||
break;
|
||||
case Fortran::common::CUDASubprogramAttrs::Global:
|
||||
attr = fir::CUDAProcAttribute::Global;
|
||||
break;
|
||||
case Fortran::common::CUDASubprogramAttrs::Grid_Global:
|
||||
attr = fir::CUDAProcAttribute::GridGlobal;
|
||||
break;
|
||||
}
|
||||
return fir::CUDAProcAttributeAttr::get(mlirContext, attr);
|
||||
}
|
||||
return {};
|
||||
}
|
||||
|
||||
} // namespace fir
|
||||
|
||||
#endif // FORTRAN_OPTIMIZER_SUPPORT_UTILS_H
|
||||
|
||||
@@ -14,6 +14,7 @@ add_flang_library(flangFrontend
|
||||
TextDiagnostic.cpp
|
||||
|
||||
DEPENDS
|
||||
CUFDialect
|
||||
FIRDialect
|
||||
FIROptCodeGenPassIncGen
|
||||
FIROptTransformsPassIncGen
|
||||
@@ -23,6 +24,7 @@ add_flang_library(flangFrontend
|
||||
${extension_libs}
|
||||
|
||||
LINK_LIBS
|
||||
CUFDialect
|
||||
FortranParser
|
||||
FortranSemantics
|
||||
FortranEvaluate
|
||||
|
||||
@@ -24,6 +24,7 @@
|
||||
#include "flang/Optimizer/Builder/FIRBuilder.h"
|
||||
#include "flang/Optimizer/Builder/Runtime/RTBuilder.h"
|
||||
#include "flang/Optimizer/Builder/Todo.h"
|
||||
#include "flang/Optimizer/Dialect/CUF/CUFOps.h"
|
||||
#include "flang/Optimizer/Dialect/FIROps.h"
|
||||
#include "flang/Optimizer/Dialect/FIROpsSupport.h"
|
||||
#include "flang/Optimizer/Support/FatalError.h"
|
||||
@@ -729,9 +730,9 @@ private:
|
||||
ErrorManager &errorManager,
|
||||
const Fortran::semantics::Symbol &sym) {
|
||||
Fortran::lower::StatementContext stmtCtx;
|
||||
fir::CUDADataAttributeAttr cudaAttr =
|
||||
Fortran::lower::translateSymbolCUDADataAttribute(builder.getContext(),
|
||||
sym);
|
||||
cuf::DataAttributeAttr cudaAttr =
|
||||
Fortran::lower::translateSymbolCUFDataAttribute(builder.getContext(),
|
||||
sym);
|
||||
mlir::Value errmsg = errMsgExpr ? errorManager.errMsgAddr : nullptr;
|
||||
mlir::Value stream =
|
||||
streamExpr
|
||||
@@ -746,7 +747,7 @@ private:
|
||||
// Keep return type the same as a standard AllocatableAllocate call.
|
||||
mlir::Type retTy = fir::runtime::getModel<int>()(builder.getContext());
|
||||
return builder
|
||||
.create<fir::CUDAAllocateOp>(
|
||||
.create<cuf::AllocateOp>(
|
||||
loc, retTy, box.getAddr(), errmsg, stream, pinned, source, cudaAttr,
|
||||
errorManager.hasStatSpec() ? builder.getUnitAttr() : nullptr)
|
||||
.getResult();
|
||||
@@ -804,9 +805,9 @@ static mlir::Value genCudaDeallocate(fir::FirOpBuilder &builder,
|
||||
const fir::MutableBoxValue &box,
|
||||
ErrorManager &errorManager,
|
||||
const Fortran::semantics::Symbol &sym) {
|
||||
fir::CUDADataAttributeAttr cudaAttr =
|
||||
Fortran::lower::translateSymbolCUDADataAttribute(builder.getContext(),
|
||||
sym);
|
||||
cuf::DataAttributeAttr cudaAttr =
|
||||
Fortran::lower::translateSymbolCUFDataAttribute(builder.getContext(),
|
||||
sym);
|
||||
mlir::Value errmsg =
|
||||
mlir::isa<fir::AbsentOp>(errorManager.errMsgAddr.getDefiningOp())
|
||||
? nullptr
|
||||
@@ -815,7 +816,7 @@ static mlir::Value genCudaDeallocate(fir::FirOpBuilder &builder,
|
||||
// Keep return type the same as a standard AllocatableAllocate call.
|
||||
mlir::Type retTy = fir::runtime::getModel<int>()(builder.getContext());
|
||||
return builder
|
||||
.create<fir::CUDADeallocateOp>(
|
||||
.create<cuf::DeallocateOp>(
|
||||
loc, retTy, box.getAddr(), errmsg, cudaAttr,
|
||||
errorManager.hasStatSpec() ? builder.getUnitAttr() : nullptr)
|
||||
.getResult();
|
||||
|
||||
@@ -40,6 +40,8 @@
|
||||
#include "flang/Optimizer/Builder/Runtime/Ragged.h"
|
||||
#include "flang/Optimizer/Builder/Runtime/Stop.h"
|
||||
#include "flang/Optimizer/Builder/Todo.h"
|
||||
#include "flang/Optimizer/Dialect/CUF/Attributes/CUFAttr.h"
|
||||
#include "flang/Optimizer/Dialect/CUF/CUFOps.h"
|
||||
#include "flang/Optimizer/Dialect/FIRAttr.h"
|
||||
#include "flang/Optimizer/Dialect/FIRDialect.h"
|
||||
#include "flang/Optimizer/Dialect/FIROps.h"
|
||||
@@ -2648,8 +2650,8 @@ private:
|
||||
loopEval = &*std::next(loopEval->getNestedEvaluations().begin());
|
||||
}
|
||||
|
||||
auto op = builder->create<fir::CUDAKernelOp>(
|
||||
loc, gridValues, blockValues, streamValue, lbs, ubs, steps, n);
|
||||
auto op = builder->create<cuf::KernelOp>(loc, gridValues, blockValues,
|
||||
streamValue, lbs, ubs, steps, n);
|
||||
builder->createBlock(&op.getRegion(), op.getRegion().end(), ivTypes,
|
||||
ivLocs);
|
||||
mlir::Block &b = op.getRegion().back();
|
||||
@@ -3719,46 +3721,44 @@ private:
|
||||
|
||||
// device = host
|
||||
if (lhsIsDevice && !rhsIsDevice) {
|
||||
auto transferKindAttr = fir::CUDADataTransferKindAttr::get(
|
||||
builder.getContext(), fir::CUDADataTransferKind::HostDevice);
|
||||
auto transferKindAttr = cuf::DataTransferKindAttr::get(
|
||||
builder.getContext(), cuf::DataTransferKind::HostDevice);
|
||||
if (!rhs.isVariable()) {
|
||||
auto associate = hlfir::genAssociateExpr(
|
||||
loc, builder, rhs, rhs.getType(), ".cuf_host_tmp");
|
||||
builder.create<fir::CUDADataTransferOp>(loc, associate.getBase(), lhs,
|
||||
transferKindAttr);
|
||||
builder.create<cuf::DataTransferOp>(loc, associate.getBase(), lhs,
|
||||
transferKindAttr);
|
||||
builder.create<hlfir::EndAssociateOp>(loc, associate);
|
||||
} else {
|
||||
builder.create<fir::CUDADataTransferOp>(loc, rhs, lhs,
|
||||
transferKindAttr);
|
||||
builder.create<cuf::DataTransferOp>(loc, rhs, lhs, transferKindAttr);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
// host = device
|
||||
if (!lhsIsDevice && rhsIsDevice) {
|
||||
auto transferKindAttr = fir::CUDADataTransferKindAttr::get(
|
||||
builder.getContext(), fir::CUDADataTransferKind::DeviceHost);
|
||||
auto transferKindAttr = cuf::DataTransferKindAttr::get(
|
||||
builder.getContext(), cuf::DataTransferKind::DeviceHost);
|
||||
if (!rhs.isVariable()) {
|
||||
// evaluateRhs loads scalar. Look for the memory reference to be used in
|
||||
// the transfer.
|
||||
if (mlir::isa_and_nonnull<fir::LoadOp>(rhs.getDefiningOp())) {
|
||||
auto loadOp = mlir::dyn_cast<fir::LoadOp>(rhs.getDefiningOp());
|
||||
builder.create<fir::CUDADataTransferOp>(loc, loadOp.getMemref(), lhs,
|
||||
transferKindAttr);
|
||||
builder.create<cuf::DataTransferOp>(loc, loadOp.getMemref(), lhs,
|
||||
transferKindAttr);
|
||||
return;
|
||||
}
|
||||
} else {
|
||||
builder.create<fir::CUDADataTransferOp>(loc, rhs, lhs,
|
||||
transferKindAttr);
|
||||
builder.create<cuf::DataTransferOp>(loc, rhs, lhs, transferKindAttr);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
if (lhsIsDevice && rhsIsDevice) {
|
||||
assert(rhs.isVariable() && "CUDA Fortran assignment rhs is not legal");
|
||||
auto transferKindAttr = fir::CUDADataTransferKindAttr::get(
|
||||
builder.getContext(), fir::CUDADataTransferKind::DeviceDevice);
|
||||
builder.create<fir::CUDADataTransferOp>(loc, rhs, lhs, transferKindAttr);
|
||||
auto transferKindAttr = cuf::DataTransferKindAttr::get(
|
||||
builder.getContext(), cuf::DataTransferKind::DeviceDevice);
|
||||
builder.create<cuf::DataTransferOp>(loc, rhs, lhs, transferKindAttr);
|
||||
return;
|
||||
}
|
||||
llvm_unreachable("Unhandled CUDA data transfer");
|
||||
@@ -3769,8 +3769,8 @@ private:
|
||||
const Fortran::evaluate::Assignment &assign) {
|
||||
llvm::SmallVector<mlir::Value> temps;
|
||||
localSymbols.pushScope();
|
||||
auto transferKindAttr = fir::CUDADataTransferKindAttr::get(
|
||||
builder.getContext(), fir::CUDADataTransferKind::DeviceHost);
|
||||
auto transferKindAttr = cuf::DataTransferKindAttr::get(
|
||||
builder.getContext(), cuf::DataTransferKind::DeviceHost);
|
||||
[[maybe_unused]] unsigned nbDeviceResidentObject = 0;
|
||||
for (const Fortran::semantics::Symbol &sym :
|
||||
Fortran::evaluate::CollectSymbols(assign.rhs)) {
|
||||
@@ -3795,8 +3795,8 @@ private:
|
||||
addSymbol(sym,
|
||||
hlfir::translateToExtendedValue(loc, builder, temp).first,
|
||||
/*forced=*/true);
|
||||
builder.create<fir::CUDADataTransferOp>(loc, addr, temp,
|
||||
transferKindAttr);
|
||||
builder.create<cuf::DataTransferOp>(loc, addr, temp,
|
||||
transferKindAttr);
|
||||
++nbDeviceResidentObject;
|
||||
}
|
||||
}
|
||||
@@ -3808,15 +3808,15 @@ private:
|
||||
// subprogram are not considered fully device context so it will return false
|
||||
// for it.
|
||||
static bool isDeviceContext(fir::FirOpBuilder &builder) {
|
||||
if (builder.getRegion().getParentOfType<fir::CUDAKernelOp>())
|
||||
if (builder.getRegion().getParentOfType<cuf::KernelOp>())
|
||||
return true;
|
||||
if (auto funcOp =
|
||||
builder.getRegion().getParentOfType<mlir::func::FuncOp>()) {
|
||||
if (auto cudaProcAttr =
|
||||
funcOp.getOperation()->getAttrOfType<fir::CUDAProcAttributeAttr>(
|
||||
fir::getCUDAAttrName())) {
|
||||
return cudaProcAttr.getValue() != fir::CUDAProcAttribute::Host &&
|
||||
cudaProcAttr.getValue() != fir::CUDAProcAttribute::HostDevice;
|
||||
funcOp.getOperation()->getAttrOfType<cuf::ProcAttributeAttr>(
|
||||
cuf::getProcAttrName())) {
|
||||
return cudaProcAttr.getValue() != cuf::ProcAttribute::Host &&
|
||||
cudaProcAttr.getValue() != cuf::ProcAttribute::HostDevice;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
|
||||
@@ -37,6 +37,8 @@ add_flang_library(FortranLower
|
||||
VectorSubscripts.cpp
|
||||
|
||||
DEPENDS
|
||||
CUFAttrs
|
||||
CUFDialect
|
||||
FIRDialect
|
||||
FIRTransforms
|
||||
HLFIRDialect
|
||||
@@ -44,6 +46,8 @@ add_flang_library(FortranLower
|
||||
${extension_libs}
|
||||
|
||||
LINK_LIBS
|
||||
CUFAttrs
|
||||
CUFDialect
|
||||
FIRDialect
|
||||
FIRDialectSupport
|
||||
FIRBuilder
|
||||
|
||||
@@ -627,9 +627,9 @@ setCUDAAttributes(mlir::func::FuncOp func,
|
||||
characteristic) {
|
||||
if (characteristic && characteristic->cudaSubprogramAttrs) {
|
||||
func.getOperation()->setAttr(
|
||||
fir::getCUDAAttrName(),
|
||||
fir::getCUDAProcAttribute(func.getContext(),
|
||||
*characteristic->cudaSubprogramAttrs));
|
||||
cuf::getProcAttrName(),
|
||||
cuf::getProcAttribute(func.getContext(),
|
||||
*characteristic->cudaSubprogramAttrs));
|
||||
}
|
||||
|
||||
if (sym) {
|
||||
@@ -649,9 +649,9 @@ setCUDAAttributes(mlir::func::FuncOp func,
|
||||
ubAttr =
|
||||
mlir::IntegerAttr::get(i64Ty, details->cudaLaunchBounds()[2]);
|
||||
func.getOperation()->setAttr(
|
||||
fir::getCUDALaunchBoundsAttrName(),
|
||||
fir::CUDALaunchBoundsAttr::get(func.getContext(), maxTPBAttr,
|
||||
minBPMAttr, ubAttr));
|
||||
cuf::getLaunchBoundsAttrName(),
|
||||
cuf::LaunchBoundsAttr::get(func.getContext(), maxTPBAttr,
|
||||
minBPMAttr, ubAttr));
|
||||
}
|
||||
|
||||
if (!details->cudaClusterDims().empty()) {
|
||||
@@ -663,9 +663,8 @@ setCUDAAttributes(mlir::func::FuncOp func,
|
||||
auto zAttr =
|
||||
mlir::IntegerAttr::get(i64Ty, details->cudaClusterDims()[2]);
|
||||
func.getOperation()->setAttr(
|
||||
fir::getCUDAClusterDimsAttrName(),
|
||||
fir::CUDAClusterDimsAttr::get(func.getContext(), xAttr, yAttr,
|
||||
zAttr));
|
||||
cuf::getClusterDimsAttrName(),
|
||||
cuf::ClusterDimsAttr::get(func.getContext(), xAttr, yAttr, zAttr));
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1116,8 +1115,8 @@ private:
|
||||
addMLIRAttr(fir::getTargetAttrName());
|
||||
if (obj.cudaDataAttr)
|
||||
attrs.emplace_back(
|
||||
mlir::StringAttr::get(&mlirContext, fir::getCUDAAttrName()),
|
||||
fir::getCUDADataAttribute(&mlirContext, obj.cudaDataAttr));
|
||||
mlir::StringAttr::get(&mlirContext, cuf::getDataAttrName()),
|
||||
cuf::getDataAttribute(&mlirContext, obj.cudaDataAttr));
|
||||
|
||||
// TODO: intents that require special care (e.g finalization)
|
||||
|
||||
|
||||
@@ -28,6 +28,7 @@
|
||||
#include "flang/Optimizer/Builder/MutableBox.h"
|
||||
#include "flang/Optimizer/Builder/Runtime/Derived.h"
|
||||
#include "flang/Optimizer/Builder/Todo.h"
|
||||
#include "flang/Optimizer/Dialect/CUF/CUFOps.h"
|
||||
#include "flang/Optimizer/Dialect/FIROpsSupport.h"
|
||||
#include "flang/Optimizer/HLFIR/HLFIROps.h"
|
||||
#include "mlir/IR/IRMapping.h"
|
||||
@@ -589,7 +590,7 @@ std::pair<fir::ExtendedValue, bool> Fortran::lower::genCallOpAndResult(
|
||||
fir::getBase(converter.genExprValue(
|
||||
caller.getCallDescription().chevrons()[3], stmtCtx)));
|
||||
|
||||
builder.create<fir::CUDAKernelLaunch>(
|
||||
builder.create<cuf::KernelLaunchOp>(
|
||||
loc, funcType.getResults(), funcSymbolAttr, grid_x, grid_y, grid_z,
|
||||
block_x, block_y, block_z, bytes, stream, operands);
|
||||
callNumResults = 0;
|
||||
|
||||
@@ -30,6 +30,7 @@
|
||||
#include "flang/Optimizer/Builder/IntrinsicCall.h"
|
||||
#include "flang/Optimizer/Builder/Runtime/Derived.h"
|
||||
#include "flang/Optimizer/Builder/Todo.h"
|
||||
#include "flang/Optimizer/Dialect/CUF/CUFOps.h"
|
||||
#include "flang/Optimizer/Dialect/FIRAttr.h"
|
||||
#include "flang/Optimizer/Dialect/FIRDialect.h"
|
||||
#include "flang/Optimizer/Dialect/FIROps.h"
|
||||
@@ -139,7 +140,7 @@ static fir::GlobalOp defineGlobal(Fortran::lower::AbstractConverter &converter,
|
||||
const Fortran::lower::pft::Variable &var,
|
||||
llvm::StringRef globalName,
|
||||
mlir::StringAttr linkage,
|
||||
fir::CUDADataAttributeAttr cudaAttr = {});
|
||||
cuf::DataAttributeAttr dataAttr = {});
|
||||
|
||||
static mlir::Location genLocation(Fortran::lower::AbstractConverter &converter,
|
||||
const Fortran::semantics::Symbol &sym) {
|
||||
@@ -172,12 +173,12 @@ static fir::GlobalOp declareGlobal(Fortran::lower::AbstractConverter &converter,
|
||||
!Fortran::semantics::IsProcedurePointer(ultimate))
|
||||
mlir::emitError(loc, "processing global declaration: symbol '")
|
||||
<< toStringRef(sym.name()) << "' has unexpected details\n";
|
||||
fir::CUDADataAttributeAttr cudaAttr =
|
||||
Fortran::lower::translateSymbolCUDADataAttribute(
|
||||
cuf::DataAttributeAttr dataAttr =
|
||||
Fortran::lower::translateSymbolCUFDataAttribute(
|
||||
converter.getFirOpBuilder().getContext(), sym);
|
||||
return builder.createGlobal(loc, converter.genType(var), globalName, linkage,
|
||||
mlir::Attribute{}, isConstant(ultimate),
|
||||
var.isTarget(), cudaAttr);
|
||||
var.isTarget(), dataAttr);
|
||||
}
|
||||
|
||||
/// Temporary helper to catch todos in initial data target lowering.
|
||||
@@ -474,7 +475,7 @@ static fir::GlobalOp defineGlobal(Fortran::lower::AbstractConverter &converter,
|
||||
const Fortran::lower::pft::Variable &var,
|
||||
llvm::StringRef globalName,
|
||||
mlir::StringAttr linkage,
|
||||
fir::CUDADataAttributeAttr cudaAttr) {
|
||||
cuf::DataAttributeAttr dataAttr) {
|
||||
fir::FirOpBuilder &builder = converter.getFirOpBuilder();
|
||||
const Fortran::semantics::Symbol &sym = var.getSymbol();
|
||||
mlir::Location loc = genLocation(converter, sym);
|
||||
@@ -514,7 +515,7 @@ static fir::GlobalOp defineGlobal(Fortran::lower::AbstractConverter &converter,
|
||||
if (!global)
|
||||
global =
|
||||
builder.createGlobal(loc, symTy, globalName, linkage, mlir::Attribute{},
|
||||
isConst, var.isTarget(), cudaAttr);
|
||||
isConst, var.isTarget(), dataAttr);
|
||||
if (Fortran::semantics::IsAllocatableOrPointer(sym) &&
|
||||
!Fortran::semantics::IsProcedure(sym)) {
|
||||
const auto *details =
|
||||
@@ -694,9 +695,9 @@ static mlir::Value createNewLocal(Fortran::lower::AbstractConverter &converter,
|
||||
return builder.create<fir::ZeroOp>(loc, fir::ReferenceType::get(ty));
|
||||
|
||||
if (Fortran::semantics::NeedCUDAAlloc(ultimateSymbol)) {
|
||||
fir::CUDADataAttributeAttr cudaAttr =
|
||||
Fortran::lower::translateSymbolCUDADataAttribute(builder.getContext(),
|
||||
ultimateSymbol);
|
||||
cuf::DataAttributeAttr dataAttr =
|
||||
Fortran::lower::translateSymbolCUFDataAttribute(builder.getContext(),
|
||||
ultimateSymbol);
|
||||
llvm::SmallVector<mlir::Value> indices;
|
||||
llvm::SmallVector<mlir::Value> elidedShape =
|
||||
fir::factory::elideExtentsAlreadyInType(ty, shape);
|
||||
@@ -705,8 +706,8 @@ static mlir::Value createNewLocal(Fortran::lower::AbstractConverter &converter,
|
||||
auto idxTy = builder.getIndexType();
|
||||
for (mlir::Value sh : elidedShape)
|
||||
indices.push_back(builder.createConvert(loc, idxTy, sh));
|
||||
return builder.create<fir::CUDAAllocOp>(loc, ty, nm, symNm, cudaAttr,
|
||||
lenParams, indices);
|
||||
return builder.create<cuf::AllocOp>(loc, ty, nm, symNm, dataAttr, lenParams,
|
||||
indices);
|
||||
}
|
||||
|
||||
// Let the builder do all the heavy lifting.
|
||||
@@ -950,10 +951,10 @@ static void instantiateLocal(Fortran::lower::AbstractConverter &converter,
|
||||
converter.getSymbolExtendedValue(var.getSymbol(), &symMap);
|
||||
auto *sym = &var.getSymbol();
|
||||
converter.getFctCtx().attachCleanup([builder, loc, exv, sym]() {
|
||||
fir::CUDADataAttributeAttr cudaAttr =
|
||||
Fortran::lower::translateSymbolCUDADataAttribute(
|
||||
builder->getContext(), *sym);
|
||||
builder->create<fir::CUDAFreeOp>(loc, fir::getBase(exv), cudaAttr);
|
||||
cuf::DataAttributeAttr dataAttr =
|
||||
Fortran::lower::translateSymbolCUFDataAttribute(builder->getContext(),
|
||||
*sym);
|
||||
builder->create<cuf::FreeOp>(loc, fir::getBase(exv), dataAttr);
|
||||
});
|
||||
}
|
||||
}
|
||||
@@ -1628,11 +1629,11 @@ fir::FortranVariableFlagsAttr Fortran::lower::translateSymbolAttributes(
|
||||
return fir::FortranVariableFlagsAttr::get(mlirContext, flags);
|
||||
}
|
||||
|
||||
fir::CUDADataAttributeAttr Fortran::lower::translateSymbolCUDADataAttribute(
|
||||
cuf::DataAttributeAttr Fortran::lower::translateSymbolCUFDataAttribute(
|
||||
mlir::MLIRContext *mlirContext, const Fortran::semantics::Symbol &sym) {
|
||||
std::optional<Fortran::common::CUDADataAttr> cudaAttr =
|
||||
Fortran::semantics::GetCUDADataAttr(&sym.GetUltimate());
|
||||
return fir::getCUDADataAttribute(mlirContext, cudaAttr);
|
||||
return cuf::getDataAttribute(mlirContext, cudaAttr);
|
||||
}
|
||||
|
||||
/// Map a symbol to its FIR address and evaluated specification expressions.
|
||||
@@ -1672,9 +1673,9 @@ static void genDeclareSymbol(Fortran::lower::AbstractConverter &converter,
|
||||
auto name = converter.mangleName(sym);
|
||||
fir::FortranVariableFlagsAttr attributes =
|
||||
Fortran::lower::translateSymbolAttributes(builder.getContext(), sym);
|
||||
fir::CUDADataAttributeAttr cudaAttr =
|
||||
Fortran::lower::translateSymbolCUDADataAttribute(builder.getContext(),
|
||||
sym);
|
||||
cuf::DataAttributeAttr dataAttr =
|
||||
Fortran::lower::translateSymbolCUFDataAttribute(builder.getContext(),
|
||||
sym);
|
||||
|
||||
if (sym.test(Fortran::semantics::Symbol::Flag::CrayPointee)) {
|
||||
mlir::Type ptrBoxType =
|
||||
@@ -1716,7 +1717,7 @@ static void genDeclareSymbol(Fortran::lower::AbstractConverter &converter,
|
||||
dummyScope = converter.dummyArgsScopeValue();
|
||||
auto newBase = builder.create<hlfir::DeclareOp>(
|
||||
loc, base, name, shapeOrShift, lenParams, dummyScope, attributes,
|
||||
cudaAttr);
|
||||
dataAttr);
|
||||
symMap.addVariableDefinition(sym, newBase, force);
|
||||
return;
|
||||
}
|
||||
@@ -1762,15 +1763,15 @@ void Fortran::lower::genDeclareSymbol(
|
||||
fir::FortranVariableFlagsAttr attributes =
|
||||
Fortran::lower::translateSymbolAttributes(
|
||||
builder.getContext(), sym.GetUltimate(), extraFlags);
|
||||
fir::CUDADataAttributeAttr cudaAttr =
|
||||
Fortran::lower::translateSymbolCUDADataAttribute(builder.getContext(),
|
||||
sym.GetUltimate());
|
||||
cuf::DataAttributeAttr dataAttr =
|
||||
Fortran::lower::translateSymbolCUFDataAttribute(builder.getContext(),
|
||||
sym.GetUltimate());
|
||||
auto name = converter.mangleName(sym);
|
||||
mlir::Value dummyScope;
|
||||
if (converter.isRegisteredDummySymbol(sym))
|
||||
dummyScope = converter.dummyArgsScopeValue();
|
||||
hlfir::EntityWithAttributes declare = hlfir::genDeclare(
|
||||
loc, builder, exv, name, attributes, dummyScope, cudaAttr);
|
||||
loc, builder, exv, name, attributes, dummyScope, dataAttr);
|
||||
symMap.addVariableDefinition(sym, declare.getIfVariableInterface(), force);
|
||||
return;
|
||||
}
|
||||
@@ -2272,10 +2273,10 @@ void Fortran::lower::defineModuleVariable(
|
||||
// Do nothing. Mapping will be done on user side.
|
||||
} else {
|
||||
std::string globalName = converter.mangleName(sym);
|
||||
fir::CUDADataAttributeAttr cudaAttr =
|
||||
Fortran::lower::translateSymbolCUDADataAttribute(
|
||||
cuf::DataAttributeAttr dataAttr =
|
||||
Fortran::lower::translateSymbolCUFDataAttribute(
|
||||
converter.getFirOpBuilder().getContext(), sym);
|
||||
defineGlobal(converter, var, globalName, linkage, cudaAttr);
|
||||
defineGlobal(converter, var, globalName, linkage, dataAttr);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -322,18 +322,18 @@ mlir::Value fir::FirOpBuilder::createHeapTemporary(
|
||||
fir::GlobalOp fir::FirOpBuilder::createGlobal(
|
||||
mlir::Location loc, mlir::Type type, llvm::StringRef name,
|
||||
mlir::StringAttr linkage, mlir::Attribute value, bool isConst,
|
||||
bool isTarget, fir::CUDADataAttributeAttr cudaAttr) {
|
||||
bool isTarget, cuf::DataAttributeAttr dataAttr) {
|
||||
if (auto global = getNamedGlobal(name))
|
||||
return global;
|
||||
auto module = getModule();
|
||||
auto insertPt = saveInsertionPoint();
|
||||
setInsertionPoint(module.getBody(), module.getBody()->end());
|
||||
llvm::SmallVector<mlir::NamedAttribute> attrs;
|
||||
if (cudaAttr) {
|
||||
if (dataAttr) {
|
||||
auto globalOpName = mlir::OperationName(fir::GlobalOp::getOperationName(),
|
||||
module.getContext());
|
||||
attrs.push_back(mlir::NamedAttribute(
|
||||
fir::GlobalOp::getCudaAttrAttrName(globalOpName), cudaAttr));
|
||||
fir::GlobalOp::getDataAttrAttrName(globalOpName), dataAttr));
|
||||
}
|
||||
auto glob = create<fir::GlobalOp>(loc, name, isConst, isTarget, type, value,
|
||||
linkage, attrs);
|
||||
@@ -346,7 +346,7 @@ fir::GlobalOp fir::FirOpBuilder::createGlobal(
|
||||
fir::GlobalOp fir::FirOpBuilder::createGlobal(
|
||||
mlir::Location loc, mlir::Type type, llvm::StringRef name, bool isConst,
|
||||
bool isTarget, std::function<void(FirOpBuilder &)> bodyBuilder,
|
||||
mlir::StringAttr linkage, fir::CUDADataAttributeAttr cudaAttr) {
|
||||
mlir::StringAttr linkage, cuf::DataAttributeAttr dataAttr) {
|
||||
if (auto global = getNamedGlobal(name))
|
||||
return global;
|
||||
auto module = getModule();
|
||||
|
||||
@@ -199,7 +199,7 @@ fir::FortranVariableOpInterface
|
||||
hlfir::genDeclare(mlir::Location loc, fir::FirOpBuilder &builder,
|
||||
const fir::ExtendedValue &exv, llvm::StringRef name,
|
||||
fir::FortranVariableFlagsAttr flags, mlir::Value dummyScope,
|
||||
fir::CUDADataAttributeAttr cudaAttr) {
|
||||
cuf::DataAttributeAttr dataAttr) {
|
||||
|
||||
mlir::Value base = fir::getBase(exv);
|
||||
assert(fir::conformsWithPassByRef(base.getType()) &&
|
||||
@@ -229,7 +229,7 @@ hlfir::genDeclare(mlir::Location loc, fir::FirOpBuilder &builder,
|
||||
},
|
||||
[](const auto &) {});
|
||||
auto declareOp = builder.create<hlfir::DeclareOp>(
|
||||
loc, base, name, shapeOrShift, lenParams, dummyScope, flags, cudaAttr);
|
||||
loc, base, name, shapeOrShift, lenParams, dummyScope, flags, dataAttr);
|
||||
return mlir::cast<fir::FortranVariableOpInterface>(declareOp.getOperation());
|
||||
}
|
||||
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
add_subdirectory(Support)
|
||||
add_subdirectory(CUF)
|
||||
|
||||
add_flang_library(FIRDialect
|
||||
FIRAttr.cpp
|
||||
@@ -13,9 +14,11 @@ add_flang_library(FIRDialect
|
||||
CanonicalizationPatternsIncGen
|
||||
MLIRIR
|
||||
FIROpsIncGen
|
||||
CUFAttrsIncGen
|
||||
intrinsics_gen
|
||||
|
||||
LINK_LIBS
|
||||
CUFAttrs
|
||||
FIRDialectSupport
|
||||
MLIRArithDialect
|
||||
MLIRBuiltinToLLVMIRTranslation
|
||||
|
||||
16
flang/lib/Optimizer/Dialect/CUF/Attributes/CMakeLists.txt
Normal file
16
flang/lib/Optimizer/Dialect/CUF/Attributes/CMakeLists.txt
Normal file
@@ -0,0 +1,16 @@
|
||||
# Keep CUF attributes as a separate library as FIR and HLFIR depend on it.
|
||||
add_flang_library(CUFAttrs
|
||||
CUFAttr.cpp
|
||||
|
||||
DEPENDS
|
||||
MLIRIR
|
||||
CUFAttrsIncGen
|
||||
|
||||
LINK_LIBS
|
||||
MLIRTargetLLVMIRExport
|
||||
|
||||
LINK_COMPONENTS
|
||||
AsmParser
|
||||
AsmPrinter
|
||||
Remarks
|
||||
)
|
||||
32
flang/lib/Optimizer/Dialect/CUF/Attributes/CUFAttr.cpp
Normal file
32
flang/lib/Optimizer/Dialect/CUF/Attributes/CUFAttr.cpp
Normal file
@@ -0,0 +1,32 @@
|
||||
//===-- CUFAttr.cpp -------------------------------------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Coding style: https://mlir.llvm.org/getting_started/DeveloperGuide/
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "flang/Optimizer/Dialect/CUF/Attributes/CUFAttr.h"
|
||||
#include "flang/Optimizer/Dialect/CUF/CUFDialect.h"
|
||||
#include "mlir/IR/Builders.h"
|
||||
#include "mlir/IR/BuiltinTypes.h"
|
||||
#include "mlir/IR/DialectImplementation.h"
|
||||
#include "mlir/IR/OpDefinition.h"
|
||||
#include "llvm/ADT/TypeSwitch.h"
|
||||
|
||||
#include "flang/Optimizer/Dialect/CUF/Attributes/CUFEnumAttr.cpp.inc"
|
||||
#define GET_ATTRDEF_CLASSES
|
||||
#include "flang/Optimizer/Dialect/CUF/Attributes/CUFAttr.cpp.inc"
|
||||
|
||||
namespace cuf {
|
||||
|
||||
void CUFDialect::registerAttributes() {
|
||||
addAttributes<ClusterDimsAttr, DataAttributeAttr, DataTransferKindAttr,
|
||||
LaunchBoundsAttr, ProcAttributeAttr>();
|
||||
}
|
||||
|
||||
} // namespace cuf
|
||||
22
flang/lib/Optimizer/Dialect/CUF/CMakeLists.txt
Normal file
22
flang/lib/Optimizer/Dialect/CUF/CMakeLists.txt
Normal file
@@ -0,0 +1,22 @@
|
||||
add_subdirectory(Attributes)
|
||||
|
||||
add_flang_library(CUFDialect
|
||||
CUFDialect.cpp
|
||||
CUFOps.cpp
|
||||
|
||||
DEPENDS
|
||||
MLIRIR
|
||||
CUFOpsIncGen
|
||||
|
||||
LINK_LIBS
|
||||
CUFAttrs
|
||||
FIRDialect
|
||||
FIRDialectSupport
|
||||
MLIRIR
|
||||
MLIRTargetLLVMIRExport
|
||||
|
||||
LINK_COMPONENTS
|
||||
AsmParser
|
||||
AsmPrinter
|
||||
Remarks
|
||||
)
|
||||
25
flang/lib/Optimizer/Dialect/CUF/CUFDialect.cpp
Normal file
25
flang/lib/Optimizer/Dialect/CUF/CUFDialect.cpp
Normal file
@@ -0,0 +1,25 @@
|
||||
//===-- CUFDialect.cpp ----------------------------------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Coding style: https://mlir.llvm.org/getting_started/DeveloperGuide/
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "flang/Optimizer/Dialect/CUF/CUFDialect.h"
|
||||
#include "flang/Optimizer/Dialect/CUF/CUFOps.h"
|
||||
#include "flang/Optimizer/Dialect/FIRDialect.h"
|
||||
|
||||
#include "flang/Optimizer/Dialect/CUF/CUFDialect.cpp.inc"
|
||||
|
||||
void cuf::CUFDialect::initialize() {
|
||||
registerAttributes();
|
||||
addOperations<
|
||||
#define GET_OP_LIST
|
||||
#include "flang/Optimizer/Dialect/CUF/CUFOps.cpp.inc"
|
||||
>();
|
||||
}
|
||||
219
flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp
Normal file
219
flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp
Normal file
@@ -0,0 +1,219 @@
|
||||
//===-- CUFOps.cpp --------------------------------------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Coding style: https://mlir.llvm.org/getting_started/DeveloperGuide/
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "flang/Optimizer/Dialect/CUF/CUFOps.h"
|
||||
#include "flang/Optimizer/Dialect/CUF/Attributes/CUFAttr.h"
|
||||
#include "flang/Optimizer/Dialect/CUF/CUFDialect.h"
|
||||
#include "flang/Optimizer/Dialect/FIRType.h"
|
||||
#include "mlir/IR/Attributes.h"
|
||||
#include "mlir/IR/BuiltinAttributes.h"
|
||||
#include "mlir/IR/BuiltinOps.h"
|
||||
#include "mlir/IR/Diagnostics.h"
|
||||
#include "mlir/IR/Matchers.h"
|
||||
#include "mlir/IR/OpDefinition.h"
|
||||
#include "mlir/IR/PatternMatch.h"
|
||||
#include "llvm/ADT/SmallVector.h"
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// AllocOp
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
static mlir::Type wrapAllocaResultType(mlir::Type intype) {
|
||||
if (mlir::isa<fir::ReferenceType>(intype))
|
||||
return {};
|
||||
return fir::ReferenceType::get(intype);
|
||||
}
|
||||
|
||||
void cuf::AllocOp::build(mlir::OpBuilder &builder, mlir::OperationState &result,
|
||||
mlir::Type inType, llvm::StringRef uniqName,
|
||||
llvm::StringRef bindcName,
|
||||
cuf::DataAttributeAttr cudaAttr,
|
||||
mlir::ValueRange typeparams, mlir::ValueRange shape,
|
||||
llvm::ArrayRef<mlir::NamedAttribute> attributes) {
|
||||
mlir::StringAttr nameAttr =
|
||||
uniqName.empty() ? mlir::StringAttr{} : builder.getStringAttr(uniqName);
|
||||
mlir::StringAttr bindcAttr =
|
||||
bindcName.empty() ? mlir::StringAttr{} : builder.getStringAttr(bindcName);
|
||||
build(builder, result, wrapAllocaResultType(inType),
|
||||
mlir::TypeAttr::get(inType), nameAttr, bindcAttr, typeparams, shape,
|
||||
cudaAttr);
|
||||
result.addAttributes(attributes);
|
||||
}
|
||||
|
||||
template <typename Op>
|
||||
static mlir::LogicalResult checkCudaAttr(Op op) {
|
||||
if (op.getDataAttr() == cuf::DataAttribute::Device ||
|
||||
op.getDataAttr() == cuf::DataAttribute::Managed ||
|
||||
op.getDataAttr() == cuf::DataAttribute::Unified)
|
||||
return mlir::success();
|
||||
return op.emitOpError("expect device, managed or unified cuda attribute");
|
||||
}
|
||||
|
||||
mlir::LogicalResult cuf::AllocOp::verify() { return checkCudaAttr(*this); }
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// FreeOp
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
mlir::LogicalResult cuf::FreeOp::verify() { return checkCudaAttr(*this); }
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// AllocateOp
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
mlir::LogicalResult cuf::AllocateOp::verify() {
|
||||
if (getPinned() && getStream())
|
||||
return emitOpError("pinned and stream cannot appears at the same time");
|
||||
if (!mlir::isa<fir::BaseBoxType>(fir::unwrapRefType(getBox().getType())))
|
||||
return emitOpError(
|
||||
"expect box to be a reference to a class or box type value");
|
||||
if (getSource() &&
|
||||
!mlir::isa<fir::BaseBoxType>(fir::unwrapRefType(getSource().getType())))
|
||||
return emitOpError(
|
||||
"expect source to be a reference to/or a class or box type value");
|
||||
if (getErrmsg() &&
|
||||
!mlir::isa<fir::BoxType>(fir::unwrapRefType(getErrmsg().getType())))
|
||||
return emitOpError(
|
||||
"expect errmsg to be a reference to/or a box type value");
|
||||
if (getErrmsg() && !getHasStat())
|
||||
return emitOpError("expect stat attribute when errmsg is provided");
|
||||
return mlir::success();
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// DeallocateOp
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
mlir::LogicalResult cuf::DeallocateOp::verify() {
|
||||
if (!mlir::isa<fir::BaseBoxType>(fir::unwrapRefType(getBox().getType())))
|
||||
return emitOpError(
|
||||
"expect box to be a reference to class or box type value");
|
||||
if (getErrmsg() &&
|
||||
!mlir::isa<fir::BoxType>(fir::unwrapRefType(getErrmsg().getType())))
|
||||
return emitOpError(
|
||||
"expect errmsg to be a reference to/or a box type value");
|
||||
if (getErrmsg() && !getHasStat())
|
||||
return emitOpError("expect stat attribute when errmsg is provided");
|
||||
return mlir::success();
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// KernelOp
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
llvm::SmallVector<mlir::Region *> cuf::KernelOp::getLoopRegions() {
|
||||
return {&getRegion()};
|
||||
}
|
||||
|
||||
mlir::ParseResult parseCUFKernelValues(
|
||||
mlir::OpAsmParser &parser,
|
||||
llvm::SmallVectorImpl<mlir::OpAsmParser::UnresolvedOperand> &values,
|
||||
llvm::SmallVectorImpl<mlir::Type> &types) {
|
||||
if (mlir::succeeded(parser.parseOptionalStar()))
|
||||
return mlir::success();
|
||||
|
||||
if (mlir::succeeded(parser.parseOptionalLParen())) {
|
||||
if (mlir::failed(parser.parseCommaSeparatedList(
|
||||
mlir::AsmParser::Delimiter::None, [&]() {
|
||||
if (parser.parseOperand(values.emplace_back()))
|
||||
return mlir::failure();
|
||||
return mlir::success();
|
||||
})))
|
||||
return mlir::failure();
|
||||
auto builder = parser.getBuilder();
|
||||
for (size_t i = 0; i < values.size(); i++) {
|
||||
types.emplace_back(builder.getI32Type());
|
||||
}
|
||||
if (parser.parseRParen())
|
||||
return mlir::failure();
|
||||
} else {
|
||||
if (parser.parseOperand(values.emplace_back()))
|
||||
return mlir::failure();
|
||||
auto builder = parser.getBuilder();
|
||||
types.emplace_back(builder.getI32Type());
|
||||
return mlir::success();
|
||||
}
|
||||
return mlir::success();
|
||||
}
|
||||
|
||||
void printCUFKernelValues(mlir::OpAsmPrinter &p, mlir::Operation *op,
|
||||
mlir::ValueRange values, mlir::TypeRange types) {
|
||||
if (values.empty())
|
||||
p << "*";
|
||||
|
||||
if (values.size() > 1)
|
||||
p << "(";
|
||||
llvm::interleaveComma(values, p, [&p](mlir::Value v) { p << v; });
|
||||
if (values.size() > 1)
|
||||
p << ")";
|
||||
}
|
||||
|
||||
mlir::ParseResult parseCUFKernelLoopControl(
|
||||
mlir::OpAsmParser &parser, mlir::Region ®ion,
|
||||
llvm::SmallVectorImpl<mlir::OpAsmParser::UnresolvedOperand> &lowerbound,
|
||||
llvm::SmallVectorImpl<mlir::Type> &lowerboundType,
|
||||
llvm::SmallVectorImpl<mlir::OpAsmParser::UnresolvedOperand> &upperbound,
|
||||
llvm::SmallVectorImpl<mlir::Type> &upperboundType,
|
||||
llvm::SmallVectorImpl<mlir::OpAsmParser::UnresolvedOperand> &step,
|
||||
llvm::SmallVectorImpl<mlir::Type> &stepType) {
|
||||
|
||||
llvm::SmallVector<mlir::OpAsmParser::Argument> inductionVars;
|
||||
if (parser.parseLParen() ||
|
||||
parser.parseArgumentList(inductionVars,
|
||||
mlir::OpAsmParser::Delimiter::None,
|
||||
/*allowType=*/true) ||
|
||||
parser.parseRParen() || parser.parseEqual() || parser.parseLParen() ||
|
||||
parser.parseOperandList(lowerbound, inductionVars.size(),
|
||||
mlir::OpAsmParser::Delimiter::None) ||
|
||||
parser.parseColonTypeList(lowerboundType) || parser.parseRParen() ||
|
||||
parser.parseKeyword("to") || parser.parseLParen() ||
|
||||
parser.parseOperandList(upperbound, inductionVars.size(),
|
||||
mlir::OpAsmParser::Delimiter::None) ||
|
||||
parser.parseColonTypeList(upperboundType) || parser.parseRParen() ||
|
||||
parser.parseKeyword("step") || parser.parseLParen() ||
|
||||
parser.parseOperandList(step, inductionVars.size(),
|
||||
mlir::OpAsmParser::Delimiter::None) ||
|
||||
parser.parseColonTypeList(stepType) || parser.parseRParen())
|
||||
return mlir::failure();
|
||||
return parser.parseRegion(region, inductionVars);
|
||||
}
|
||||
|
||||
void printCUFKernelLoopControl(
|
||||
mlir::OpAsmPrinter &p, mlir::Operation *op, mlir::Region ®ion,
|
||||
mlir::ValueRange lowerbound, mlir::TypeRange lowerboundType,
|
||||
mlir::ValueRange upperbound, mlir::TypeRange upperboundType,
|
||||
mlir::ValueRange steps, mlir::TypeRange stepType) {
|
||||
mlir::ValueRange regionArgs = region.front().getArguments();
|
||||
if (!regionArgs.empty()) {
|
||||
p << "(";
|
||||
llvm::interleaveComma(
|
||||
regionArgs, p, [&p](mlir::Value v) { p << v << " : " << v.getType(); });
|
||||
p << ") = (" << lowerbound << " : " << lowerboundType << ") to ("
|
||||
<< upperbound << " : " << upperboundType << ") "
|
||||
<< " step (" << steps << " : " << stepType << ") ";
|
||||
}
|
||||
p.printRegion(region, /*printEntryBlockArgs=*/false);
|
||||
}
|
||||
|
||||
mlir::LogicalResult cuf::KernelOp::verify() {
|
||||
if (getLowerbound().size() != getUpperbound().size() ||
|
||||
getLowerbound().size() != getStep().size())
|
||||
return emitOpError(
|
||||
"expect same number of values in lowerbound, upperbound and step");
|
||||
|
||||
return mlir::success();
|
||||
}
|
||||
|
||||
// Tablegen operators
|
||||
|
||||
#define GET_OP_CLASSES
|
||||
#include "flang/Optimizer/Dialect/CUF/CUFOps.cpp.inc"
|
||||
@@ -298,7 +298,5 @@ void fir::printFirAttribute(FIROpsDialect *dialect, mlir::Attribute attr,
|
||||
void FIROpsDialect::registerAttributes() {
|
||||
addAttributes<ClosedIntervalAttr, ExactTypeAttr, FortranVariableFlagsAttr,
|
||||
LowerBoundAttr, PointIntervalAttr, RealAttr, SubclassAttr,
|
||||
UpperBoundAttr, CUDADataAttributeAttr, CUDAProcAttributeAttr,
|
||||
CUDALaunchBoundsAttr, CUDAClusterDimsAttr,
|
||||
CUDADataTransferKindAttr>();
|
||||
UpperBoundAttr>();
|
||||
}
|
||||
|
||||
@@ -11,6 +11,7 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "flang/Optimizer/Dialect/FIRDialect.h"
|
||||
#include "flang/Optimizer/Dialect/CUF/Attributes/CUFAttr.h"
|
||||
#include "flang/Optimizer/Dialect/FIRAttr.h"
|
||||
#include "flang/Optimizer/Dialect/FIROps.h"
|
||||
#include "flang/Optimizer/Dialect/FIRType.h"
|
||||
|
||||
@@ -3898,169 +3898,6 @@ mlir::LogicalResult fir::DeclareOp::verify() {
|
||||
return fortranVar.verifyDeclareLikeOpImpl(getMemref());
|
||||
}
|
||||
|
||||
llvm::SmallVector<mlir::Region *> fir::CUDAKernelOp::getLoopRegions() {
|
||||
return {&getRegion()};
|
||||
}
|
||||
|
||||
mlir::ParseResult parseCUFKernelValues(
|
||||
mlir::OpAsmParser &parser,
|
||||
llvm::SmallVectorImpl<mlir::OpAsmParser::UnresolvedOperand> &values,
|
||||
llvm::SmallVectorImpl<mlir::Type> &types) {
|
||||
if (mlir::succeeded(parser.parseOptionalStar()))
|
||||
return mlir::success();
|
||||
|
||||
if (mlir::succeeded(parser.parseOptionalLParen())) {
|
||||
if (mlir::failed(parser.parseCommaSeparatedList(
|
||||
mlir::AsmParser::Delimiter::None, [&]() {
|
||||
if (parser.parseOperand(values.emplace_back()))
|
||||
return mlir::failure();
|
||||
return mlir::success();
|
||||
})))
|
||||
return mlir::failure();
|
||||
auto builder = parser.getBuilder();
|
||||
for (size_t i = 0; i < values.size(); i++) {
|
||||
types.emplace_back(builder.getI32Type());
|
||||
}
|
||||
if (parser.parseRParen())
|
||||
return mlir::failure();
|
||||
} else {
|
||||
if (parser.parseOperand(values.emplace_back()))
|
||||
return mlir::failure();
|
||||
auto builder = parser.getBuilder();
|
||||
types.emplace_back(builder.getI32Type());
|
||||
return mlir::success();
|
||||
}
|
||||
return mlir::success();
|
||||
}
|
||||
|
||||
void printCUFKernelValues(mlir::OpAsmPrinter &p, mlir::Operation *op,
|
||||
mlir::ValueRange values, mlir::TypeRange types) {
|
||||
if (values.empty())
|
||||
p << "*";
|
||||
|
||||
if (values.size() > 1)
|
||||
p << "(";
|
||||
llvm::interleaveComma(values, p, [&p](mlir::Value v) { p << v; });
|
||||
if (values.size() > 1)
|
||||
p << ")";
|
||||
}
|
||||
|
||||
mlir::ParseResult parseCUFKernelLoopControl(
|
||||
mlir::OpAsmParser &parser, mlir::Region ®ion,
|
||||
llvm::SmallVectorImpl<mlir::OpAsmParser::UnresolvedOperand> &lowerbound,
|
||||
llvm::SmallVectorImpl<mlir::Type> &lowerboundType,
|
||||
llvm::SmallVectorImpl<mlir::OpAsmParser::UnresolvedOperand> &upperbound,
|
||||
llvm::SmallVectorImpl<mlir::Type> &upperboundType,
|
||||
llvm::SmallVectorImpl<mlir::OpAsmParser::UnresolvedOperand> &step,
|
||||
llvm::SmallVectorImpl<mlir::Type> &stepType) {
|
||||
|
||||
llvm::SmallVector<mlir::OpAsmParser::Argument> inductionVars;
|
||||
if (parser.parseLParen() ||
|
||||
parser.parseArgumentList(inductionVars,
|
||||
mlir::OpAsmParser::Delimiter::None,
|
||||
/*allowType=*/true) ||
|
||||
parser.parseRParen() || parser.parseEqual() || parser.parseLParen() ||
|
||||
parser.parseOperandList(lowerbound, inductionVars.size(),
|
||||
mlir::OpAsmParser::Delimiter::None) ||
|
||||
parser.parseColonTypeList(lowerboundType) || parser.parseRParen() ||
|
||||
parser.parseKeyword("to") || parser.parseLParen() ||
|
||||
parser.parseOperandList(upperbound, inductionVars.size(),
|
||||
mlir::OpAsmParser::Delimiter::None) ||
|
||||
parser.parseColonTypeList(upperboundType) || parser.parseRParen() ||
|
||||
parser.parseKeyword("step") || parser.parseLParen() ||
|
||||
parser.parseOperandList(step, inductionVars.size(),
|
||||
mlir::OpAsmParser::Delimiter::None) ||
|
||||
parser.parseColonTypeList(stepType) || parser.parseRParen())
|
||||
return mlir::failure();
|
||||
return parser.parseRegion(region, inductionVars);
|
||||
}
|
||||
|
||||
void printCUFKernelLoopControl(
|
||||
mlir::OpAsmPrinter &p, mlir::Operation *op, mlir::Region ®ion,
|
||||
mlir::ValueRange lowerbound, mlir::TypeRange lowerboundType,
|
||||
mlir::ValueRange upperbound, mlir::TypeRange upperboundType,
|
||||
mlir::ValueRange steps, mlir::TypeRange stepType) {
|
||||
mlir::ValueRange regionArgs = region.front().getArguments();
|
||||
if (!regionArgs.empty()) {
|
||||
p << "(";
|
||||
llvm::interleaveComma(
|
||||
regionArgs, p, [&p](mlir::Value v) { p << v << " : " << v.getType(); });
|
||||
p << ") = (" << lowerbound << " : " << lowerboundType << ") to ("
|
||||
<< upperbound << " : " << upperboundType << ") "
|
||||
<< " step (" << steps << " : " << stepType << ") ";
|
||||
}
|
||||
p.printRegion(region, /*printEntryBlockArgs=*/false);
|
||||
}
|
||||
|
||||
mlir::LogicalResult fir::CUDAKernelOp::verify() {
|
||||
if (getLowerbound().size() != getUpperbound().size() ||
|
||||
getLowerbound().size() != getStep().size())
|
||||
return emitOpError(
|
||||
"expect same number of values in lowerbound, upperbound and step");
|
||||
|
||||
return mlir::success();
|
||||
}
|
||||
|
||||
mlir::LogicalResult fir::CUDAAllocateOp::verify() {
|
||||
if (getPinned() && getStream())
|
||||
return emitOpError("pinned and stream cannot appears at the same time");
|
||||
if (!mlir::isa<fir::BaseBoxType>(fir::unwrapRefType(getBox().getType())))
|
||||
return emitOpError(
|
||||
"expect box to be a reference to a class or box type value");
|
||||
if (getSource() &&
|
||||
!mlir::isa<fir::BaseBoxType>(fir::unwrapRefType(getSource().getType())))
|
||||
return emitOpError(
|
||||
"expect source to be a reference to/or a class or box type value");
|
||||
if (getErrmsg() &&
|
||||
!mlir::isa<fir::BoxType>(fir::unwrapRefType(getErrmsg().getType())))
|
||||
return emitOpError(
|
||||
"expect errmsg to be a reference to/or a box type value");
|
||||
if (getErrmsg() && !getHasStat())
|
||||
return emitOpError("expect stat attribute when errmsg is provided");
|
||||
return mlir::success();
|
||||
}
|
||||
|
||||
mlir::LogicalResult fir::CUDADeallocateOp::verify() {
|
||||
if (!mlir::isa<fir::BaseBoxType>(fir::unwrapRefType(getBox().getType())))
|
||||
return emitOpError(
|
||||
"expect box to be a reference to class or box type value");
|
||||
if (getErrmsg() &&
|
||||
!mlir::isa<fir::BoxType>(fir::unwrapRefType(getErrmsg().getType())))
|
||||
return emitOpError(
|
||||
"expect errmsg to be a reference to/or a box type value");
|
||||
if (getErrmsg() && !getHasStat())
|
||||
return emitOpError("expect stat attribute when errmsg is provided");
|
||||
return mlir::success();
|
||||
}
|
||||
|
||||
void fir::CUDAAllocOp::build(
|
||||
mlir::OpBuilder &builder, mlir::OperationState &result, mlir::Type inType,
|
||||
llvm::StringRef uniqName, llvm::StringRef bindcName,
|
||||
fir::CUDADataAttributeAttr cudaAttr, mlir::ValueRange typeparams,
|
||||
mlir::ValueRange shape, llvm::ArrayRef<mlir::NamedAttribute> attributes) {
|
||||
mlir::StringAttr nameAttr =
|
||||
uniqName.empty() ? mlir::StringAttr{} : builder.getStringAttr(uniqName);
|
||||
mlir::StringAttr bindcAttr =
|
||||
bindcName.empty() ? mlir::StringAttr{} : builder.getStringAttr(bindcName);
|
||||
build(builder, result, wrapAllocaResultType(inType),
|
||||
mlir::TypeAttr::get(inType), nameAttr, bindcAttr, typeparams, shape,
|
||||
cudaAttr);
|
||||
result.addAttributes(attributes);
|
||||
}
|
||||
|
||||
template <typename Op>
|
||||
static mlir::LogicalResult checkCudaAttr(Op op) {
|
||||
if (op.getCudaAttr() == fir::CUDADataAttribute::Device ||
|
||||
op.getCudaAttr() == fir::CUDADataAttribute::Managed ||
|
||||
op.getCudaAttr() == fir::CUDADataAttribute::Unified)
|
||||
return mlir::success();
|
||||
return op.emitOpError("expect device, managed or unified cuda attribute");
|
||||
}
|
||||
|
||||
mlir::LogicalResult fir::CUDAAllocOp::verify() { return checkCudaAttr(*this); }
|
||||
|
||||
mlir::LogicalResult fir::CUDAFreeOp::verify() { return checkCudaAttr(*this); }
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// FIROpsDialect
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
@@ -5,11 +5,13 @@ add_flang_library(HLFIRDialect
|
||||
HLFIROps.cpp
|
||||
|
||||
DEPENDS
|
||||
CUFAttrsIncGen
|
||||
FIRDialect
|
||||
HLFIROpsIncGen
|
||||
${dialect_libs}
|
||||
|
||||
LINK_LIBS
|
||||
CUFAttrs
|
||||
FIRDialect
|
||||
MLIRIR
|
||||
${dialect_libs}
|
||||
|
||||
@@ -127,14 +127,14 @@ void hlfir::DeclareOp::build(mlir::OpBuilder &builder,
|
||||
mlir::ValueRange typeparams,
|
||||
mlir::Value dummy_scope,
|
||||
fir::FortranVariableFlagsAttr fortran_attrs,
|
||||
fir::CUDADataAttributeAttr cuda_attr) {
|
||||
cuf::DataAttributeAttr data_attr) {
|
||||
auto nameAttr = builder.getStringAttr(uniq_name);
|
||||
mlir::Type inputType = memref.getType();
|
||||
bool hasExplicitLbs = hasExplicitLowerBounds(shape);
|
||||
mlir::Type hlfirVariableType =
|
||||
getHLFIRVariableType(inputType, hasExplicitLbs);
|
||||
build(builder, result, {hlfirVariableType, inputType}, memref, shape,
|
||||
typeparams, dummy_scope, nameAttr, fortran_attrs, cuda_attr);
|
||||
typeparams, dummy_scope, nameAttr, fortran_attrs, data_attr);
|
||||
}
|
||||
|
||||
mlir::LogicalResult hlfir::DeclareOp::verify() {
|
||||
|
||||
@@ -11,11 +11,13 @@ add_flang_library(HLFIRTransforms
|
||||
OptimizedBufferization.cpp
|
||||
|
||||
DEPENDS
|
||||
CUFAttrsIncGen
|
||||
FIRDialect
|
||||
HLFIROpsIncGen
|
||||
${dialect_libs}
|
||||
|
||||
LINK_LIBS
|
||||
CUFAttrs
|
||||
FIRAnalysis
|
||||
FIRDialect
|
||||
FIRBuilder
|
||||
|
||||
@@ -320,16 +320,16 @@ public:
|
||||
mlir::Location loc = declareOp->getLoc();
|
||||
mlir::Value memref = declareOp.getMemref();
|
||||
fir::FortranVariableFlagsAttr fortranAttrs;
|
||||
fir::CUDADataAttributeAttr cudaAttr;
|
||||
cuf::DataAttributeAttr dataAttr;
|
||||
if (auto attrs = declareOp.getFortranAttrs())
|
||||
fortranAttrs =
|
||||
fir::FortranVariableFlagsAttr::get(rewriter.getContext(), *attrs);
|
||||
if (auto attr = declareOp.getCudaAttr())
|
||||
cudaAttr = fir::CUDADataAttributeAttr::get(rewriter.getContext(), *attr);
|
||||
if (auto attr = declareOp.getDataAttr())
|
||||
dataAttr = cuf::DataAttributeAttr::get(rewriter.getContext(), *attr);
|
||||
auto firDeclareOp = rewriter.create<fir::DeclareOp>(
|
||||
loc, memref.getType(), memref, declareOp.getShape(),
|
||||
declareOp.getTypeparams(), declareOp.getDummyScope(),
|
||||
declareOp.getUniqName(), fortranAttrs, cudaAttr);
|
||||
declareOp.getUniqName(), fortranAttrs, dataAttr);
|
||||
|
||||
// Propagate other attributes from hlfir.declare to fir.declare.
|
||||
// OpenACC's acc.declare is one example. Right now, the propagation
|
||||
|
||||
@@ -4,11 +4,11 @@ func.func @_QPsub1() {
|
||||
%0 = fir.alloca !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "a", uniq_name = "_QFsub1Ea"}
|
||||
%1 = fir.alloca i32
|
||||
%pinned = fir.alloca i1
|
||||
%4:2 = hlfir.declare %0 {cuda_attr = #fir.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub1Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
|
||||
%4:2 = hlfir.declare %0 {data_attr = #cuf.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub1Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
|
||||
%11 = fir.convert %4#1 : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> !fir.ref<!fir.box<none>>
|
||||
%s = fir.load %1 : !fir.ref<i32>
|
||||
// expected-error@+1{{'fir.cuda_allocate' op pinned and stream cannot appears at the same time}}
|
||||
%13 = fir.cuda_allocate %11 : !fir.ref<!fir.box<none>> stream(%s : i32) pinned(%pinned : !fir.ref<i1>) {cuda_attr = #fir.cuda<device>} -> i32
|
||||
// expected-error@+1{{'cuf.allocate' op pinned and stream cannot appears at the same time}}
|
||||
%13 = cuf.allocate %11 : !fir.ref<!fir.box<none>> stream(%s : i32) pinned(%pinned : !fir.ref<i1>) {data_attr = #cuf.cuda<device>} -> i32
|
||||
return
|
||||
}
|
||||
|
||||
@@ -16,8 +16,8 @@ func.func @_QPsub1() {
|
||||
|
||||
func.func @_QPsub1() {
|
||||
%1 = fir.alloca i32
|
||||
// expected-error@+1{{'fir.cuda_allocate' op expect box to be a reference to a class or box type value}}
|
||||
%2 = fir.cuda_allocate %1 : !fir.ref<i32> {cuda_attr = #fir.cuda<device>} -> i32
|
||||
// expected-error@+1{{'cuf.allocate' op expect box to be a reference to a class or box type value}}
|
||||
%2 = cuf.allocate %1 : !fir.ref<i32> {data_attr = #cuf.cuda<device>} -> i32
|
||||
return
|
||||
}
|
||||
|
||||
@@ -25,15 +25,15 @@ func.func @_QPsub1() {
|
||||
|
||||
func.func @_QPsub1() {
|
||||
%0 = fir.alloca !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "a", uniq_name = "_QFsub1Ea"}
|
||||
%4:2 = hlfir.declare %0 {cuda_attr = #fir.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub1Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
|
||||
%4:2 = hlfir.declare %0 {data_attr = #cuf.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub1Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
|
||||
%c100 = arith.constant 100 : index
|
||||
%7 = fir.alloca !fir.char<1,100> {bindc_name = "msg", uniq_name = "_QFsub1Emsg"}
|
||||
%8:2 = hlfir.declare %7 typeparams %c100 {uniq_name = "_QFsub1Emsg"} : (!fir.ref<!fir.char<1,100>>, index) -> (!fir.ref<!fir.char<1,100>>, !fir.ref<!fir.char<1,100>>)
|
||||
%9 = fir.embox %8#1 : (!fir.ref<!fir.char<1,100>>) -> !fir.box<!fir.char<1,100>>
|
||||
%11 = fir.convert %4#1 : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> !fir.ref<!fir.box<none>>
|
||||
%16 = fir.convert %9 : (!fir.box<!fir.char<1,100>>) -> !fir.box<none>
|
||||
// expected-error@+1{{'fir.cuda_allocate' op expect stat attribute when errmsg is provided}}
|
||||
%13 = fir.cuda_allocate %11 : !fir.ref<!fir.box<none>> errmsg(%16 : !fir.box<none>) {cuda_attr = #fir.cuda<device>} -> i32
|
||||
// expected-error@+1{{'cuf.allocate' op expect stat attribute when errmsg is provided}}
|
||||
%13 = cuf.allocate %11 : !fir.ref<!fir.box<none>> errmsg(%16 : !fir.box<none>) {data_attr = #cuf.cuda<device>} -> i32
|
||||
return
|
||||
}
|
||||
|
||||
@@ -41,11 +41,11 @@ func.func @_QPsub1() {
|
||||
|
||||
func.func @_QPsub1() {
|
||||
%0 = fir.alloca !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "a", uniq_name = "_QFsub1Ea"}
|
||||
%4:2 = hlfir.declare %0 {cuda_attr = #fir.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub1Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
|
||||
%4:2 = hlfir.declare %0 {data_attr = #cuf.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub1Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
|
||||
%1 = fir.alloca i32
|
||||
%11 = fir.convert %4#1 : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> !fir.ref<!fir.box<none>>
|
||||
// expected-error@+1{{'fir.cuda_allocate' op expect errmsg to be a reference to/or a box type value}}
|
||||
%13 = fir.cuda_allocate %11 : !fir.ref<!fir.box<none>> errmsg(%1 : !fir.ref<i32>) {cuda_attr = #fir.cuda<device>, hasStat} -> i32
|
||||
// expected-error@+1{{'cuf.allocate' op expect errmsg to be a reference to/or a box type value}}
|
||||
%13 = cuf.allocate %11 : !fir.ref<!fir.box<none>> errmsg(%1 : !fir.ref<i32>) {data_attr = #cuf.cuda<device>, hasStat} -> i32
|
||||
return
|
||||
}
|
||||
|
||||
@@ -53,8 +53,8 @@ func.func @_QPsub1() {
|
||||
|
||||
func.func @_QPsub1() {
|
||||
%1 = fir.alloca i32
|
||||
// expected-error@+1{{'fir.cuda_deallocate' op expect box to be a reference to class or box type value}}
|
||||
%2 = fir.cuda_deallocate %1 : !fir.ref<i32> {cuda_attr = #fir.cuda<device>} -> i32
|
||||
// expected-error@+1{{'cuf.deallocate' op expect box to be a reference to class or box type value}}
|
||||
%2 = cuf.deallocate %1 : !fir.ref<i32> {data_attr = #cuf.cuda<device>} -> i32
|
||||
return
|
||||
}
|
||||
|
||||
@@ -62,11 +62,11 @@ func.func @_QPsub1() {
|
||||
|
||||
func.func @_QPsub1() {
|
||||
%0 = fir.alloca !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "a", uniq_name = "_QFsub1Ea"}
|
||||
%4:2 = hlfir.declare %0 {cuda_attr = #fir.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub1Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
|
||||
%4:2 = hlfir.declare %0 {data_attr = #cuf.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub1Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
|
||||
%1 = fir.alloca i32
|
||||
%11 = fir.convert %4#1 : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> !fir.ref<!fir.box<none>>
|
||||
// expected-error@+1{{'fir.cuda_deallocate' op expect errmsg to be a reference to/or a box type value}}
|
||||
%13 = fir.cuda_deallocate %11 : !fir.ref<!fir.box<none>> errmsg(%1 : !fir.ref<i32>) {cuda_attr = #fir.cuda<device>, hasStat} -> i32
|
||||
// expected-error@+1{{'cuf.deallocate' op expect errmsg to be a reference to/or a box type value}}
|
||||
%13 = cuf.deallocate %11 : !fir.ref<!fir.box<none>> errmsg(%1 : !fir.ref<i32>) {data_attr = #cuf.cuda<device>, hasStat} -> i32
|
||||
return
|
||||
}
|
||||
|
||||
@@ -74,32 +74,32 @@ func.func @_QPsub1() {
|
||||
|
||||
func.func @_QPsub1() {
|
||||
%0 = fir.alloca !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "a", uniq_name = "_QFsub1Ea"}
|
||||
%4:2 = hlfir.declare %0 {cuda_attr = #fir.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub1Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
|
||||
%4:2 = hlfir.declare %0 {data_attr = #cuf.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub1Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
|
||||
%c100 = arith.constant 100 : index
|
||||
%7 = fir.alloca !fir.char<1,100> {bindc_name = "msg", uniq_name = "_QFsub1Emsg"}
|
||||
%8:2 = hlfir.declare %7 typeparams %c100 {uniq_name = "_QFsub1Emsg"} : (!fir.ref<!fir.char<1,100>>, index) -> (!fir.ref<!fir.char<1,100>>, !fir.ref<!fir.char<1,100>>)
|
||||
%9 = fir.embox %8#1 : (!fir.ref<!fir.char<1,100>>) -> !fir.box<!fir.char<1,100>>
|
||||
%11 = fir.convert %4#1 : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> !fir.ref<!fir.box<none>>
|
||||
%16 = fir.convert %9 : (!fir.box<!fir.char<1,100>>) -> !fir.box<none>
|
||||
// expected-error@+1{{'fir.cuda_deallocate' op expect stat attribute when errmsg is provided}}
|
||||
%13 = fir.cuda_deallocate %11 : !fir.ref<!fir.box<none>> errmsg(%16 : !fir.box<none>) {cuda_attr = #fir.cuda<device>} -> i32
|
||||
// expected-error@+1{{'cuf.deallocate' op expect stat attribute when errmsg is provided}}
|
||||
%13 = cuf.deallocate %11 : !fir.ref<!fir.box<none>> errmsg(%16 : !fir.box<none>) {data_attr = #cuf.cuda<device>} -> i32
|
||||
return
|
||||
}
|
||||
|
||||
// -----
|
||||
|
||||
func.func @_QPsub1() {
|
||||
// expected-error@+1{{'fir.cuda_alloc' op expect device, managed or unified cuda attribute}}
|
||||
%0 = fir.cuda_alloc f32 {bindc_name = "r", cuda_attr = #fir.cuda<pinned>, uniq_name = "_QFsub1Er"} -> !fir.ref<f32>
|
||||
fir.cuda_free %0 : !fir.ref<f32> {cuda_attr = #fir.cuda<constant>}
|
||||
// expected-error@+1{{'cuf.alloc' op expect device, managed or unified cuda attribute}}
|
||||
%0 = cuf.alloc f32 {bindc_name = "r", data_attr = #cuf.cuda<pinned>, uniq_name = "_QFsub1Er"} -> !fir.ref<f32>
|
||||
cuf.free %0 : !fir.ref<f32> {data_attr = #cuf.cuda<constant>}
|
||||
return
|
||||
}
|
||||
|
||||
// -----
|
||||
|
||||
func.func @_QPsub1() {
|
||||
%0 = fir.cuda_alloc f32 {bindc_name = "r", cuda_attr = #fir.cuda<device>, uniq_name = "_QFsub1Er"} -> !fir.ref<f32>
|
||||
// expected-error@+1{{'fir.cuda_free' op expect device, managed or unified cuda attribute}}
|
||||
fir.cuda_free %0 : !fir.ref<f32> {cuda_attr = #fir.cuda<constant>}
|
||||
%0 = cuf.alloc f32 {bindc_name = "r", data_attr = #cuf.cuda<device>, uniq_name = "_QFsub1Er"} -> !fir.ref<f32>
|
||||
// expected-error@+1{{'cuf.free' op expect device, managed or unified cuda attribute}}
|
||||
cuf.free %0 : !fir.ref<f32> {data_attr = #cuf.cuda<constant>}
|
||||
return
|
||||
}
|
||||
|
||||
@@ -4,85 +4,85 @@
|
||||
|
||||
func.func @_QPsub1() {
|
||||
%0 = fir.alloca !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "a", uniq_name = "_QFsub1Ea"}
|
||||
%4:2 = hlfir.declare %0 {cuda_attr = #fir.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub1Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
|
||||
%4:2 = hlfir.declare %0 {data_attr = #cuf.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub1Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
|
||||
%11 = fir.convert %4#1 : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> !fir.ref<!fir.box<none>>
|
||||
%13 = fir.cuda_allocate %11 : !fir.ref<!fir.box<none>> {cuda_attr = #fir.cuda<device>} -> i32
|
||||
%14 = fir.cuda_deallocate %11 : !fir.ref<!fir.box<none>> {cuda_attr = #fir.cuda<device>} -> i32
|
||||
%13 = cuf.allocate %11 : !fir.ref<!fir.box<none>> {data_attr = #cuf.cuda<device>} -> i32
|
||||
%14 = cuf.deallocate %11 : !fir.ref<!fir.box<none>> {data_attr = #cuf.cuda<device>} -> i32
|
||||
return
|
||||
}
|
||||
|
||||
// CHECK: fir.cuda_allocate %{{.*}} : !fir.ref<!fir.box<none>> {cuda_attr = #fir.cuda<device>} -> i32
|
||||
// CHECK: fir.cuda_deallocate %{{.*}} : !fir.ref<!fir.box<none>> {cuda_attr = #fir.cuda<device>} -> i32
|
||||
// CHECK: cuf.allocate %{{.*}} : !fir.ref<!fir.box<none>> {data_attr = #cuf.cuda<device>} -> i32
|
||||
// CHECK: cuf.deallocate %{{.*}} : !fir.ref<!fir.box<none>> {data_attr = #cuf.cuda<device>} -> i32
|
||||
|
||||
// -----
|
||||
|
||||
func.func @_QPsub1() {
|
||||
%0 = fir.alloca !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "a", uniq_name = "_QFsub1Ea"}
|
||||
%1 = fir.alloca i32
|
||||
%4:2 = hlfir.declare %0 {cuda_attr = #fir.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub1Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
|
||||
%4:2 = hlfir.declare %0 {data_attr = #cuf.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub1Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
|
||||
%11 = fir.convert %4#1 : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> !fir.ref<!fir.box<none>>
|
||||
%s = fir.load %1 : !fir.ref<i32>
|
||||
%13 = fir.cuda_allocate %11 : !fir.ref<!fir.box<none>> stream(%s : i32) {cuda_attr = #fir.cuda<device>} -> i32
|
||||
%13 = cuf.allocate %11 : !fir.ref<!fir.box<none>> stream(%s : i32) {data_attr = #cuf.cuda<device>} -> i32
|
||||
return
|
||||
}
|
||||
|
||||
// CHECK: fir.cuda_allocate %{{.*}} : !fir.ref<!fir.box<none>> stream(%{{.*}} : i32) {cuda_attr = #fir.cuda<device>} -> i32
|
||||
// CHECK: cuf.allocate %{{.*}} : !fir.ref<!fir.box<none>> stream(%{{.*}} : i32) {data_attr = #cuf.cuda<device>} -> i32
|
||||
|
||||
// -----
|
||||
|
||||
func.func @_QPsub1() {
|
||||
%0 = fir.alloca !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "a", uniq_name = "_QFsub1Ea"}
|
||||
%1 = fir.alloca !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "b", uniq_name = "_QFsub1Eb"}
|
||||
%4:2 = hlfir.declare %0 {cuda_attr = #fir.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub1Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
|
||||
%4:2 = hlfir.declare %0 {data_attr = #cuf.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub1Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
|
||||
%5:2 = hlfir.declare %1 {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub1Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
|
||||
%11 = fir.convert %4#1 : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> !fir.ref<!fir.box<none>>
|
||||
%12 = fir.convert %5#1 : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> !fir.ref<!fir.box<none>>
|
||||
%13 = fir.cuda_allocate %11 : !fir.ref<!fir.box<none>> source(%12 : !fir.ref<!fir.box<none>>) {cuda_attr = #fir.cuda<device>} -> i32
|
||||
%13 = cuf.allocate %11 : !fir.ref<!fir.box<none>> source(%12 : !fir.ref<!fir.box<none>>) {data_attr = #cuf.cuda<device>} -> i32
|
||||
return
|
||||
}
|
||||
|
||||
// CHECK: fir.cuda_allocate %{{.*}} : !fir.ref<!fir.box<none>> source(%{{.*}} : !fir.ref<!fir.box<none>>) {cuda_attr = #fir.cuda<device>} -> i32
|
||||
// CHECK: cuf.allocate %{{.*}} : !fir.ref<!fir.box<none>> source(%{{.*}} : !fir.ref<!fir.box<none>>) {data_attr = #cuf.cuda<device>} -> i32
|
||||
|
||||
// -----
|
||||
|
||||
func.func @_QPsub1() {
|
||||
%0 = fir.alloca !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "a", uniq_name = "_QFsub1Ea"}
|
||||
%pinned = fir.alloca i1
|
||||
%4:2 = hlfir.declare %0 {cuda_attr = #fir.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub1Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
|
||||
%4:2 = hlfir.declare %0 {data_attr = #cuf.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub1Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
|
||||
%11 = fir.convert %4#1 : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> !fir.ref<!fir.box<none>>
|
||||
%13 = fir.cuda_allocate %11 : !fir.ref<!fir.box<none>> pinned(%pinned : !fir.ref<i1>) {cuda_attr = #fir.cuda<device>} -> i32
|
||||
%13 = cuf.allocate %11 : !fir.ref<!fir.box<none>> pinned(%pinned : !fir.ref<i1>) {data_attr = #cuf.cuda<device>} -> i32
|
||||
return
|
||||
}
|
||||
|
||||
// CHECK: fir.cuda_allocate %{{.*}} : !fir.ref<!fir.box<none>> pinned(%{{.*}} : !fir.ref<i1>) {cuda_attr = #fir.cuda<device>} -> i32
|
||||
// CHECK: cuf.allocate %{{.*}} : !fir.ref<!fir.box<none>> pinned(%{{.*}} : !fir.ref<i1>) {data_attr = #cuf.cuda<device>} -> i32
|
||||
|
||||
// -----
|
||||
|
||||
func.func @_QPsub1() {
|
||||
%0 = fir.alloca !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "a", uniq_name = "_QFsub1Ea"}
|
||||
%4:2 = hlfir.declare %0 {cuda_attr = #fir.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub1Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
|
||||
%4:2 = hlfir.declare %0 {data_attr = #cuf.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub1Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
|
||||
%c100 = arith.constant 100 : index
|
||||
%7 = fir.alloca !fir.char<1,100> {bindc_name = "msg", uniq_name = "_QFsub1Emsg"}
|
||||
%8:2 = hlfir.declare %7 typeparams %c100 {uniq_name = "_QFsub1Emsg"} : (!fir.ref<!fir.char<1,100>>, index) -> (!fir.ref<!fir.char<1,100>>, !fir.ref<!fir.char<1,100>>)
|
||||
%9 = fir.embox %8#1 : (!fir.ref<!fir.char<1,100>>) -> !fir.box<!fir.char<1,100>>
|
||||
%11 = fir.convert %4#1 : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> !fir.ref<!fir.box<none>>
|
||||
%16 = fir.convert %9 : (!fir.box<!fir.char<1,100>>) -> !fir.box<none>
|
||||
%13 = fir.cuda_allocate %11 : !fir.ref<!fir.box<none>> errmsg(%16 : !fir.box<none>) {cuda_attr = #fir.cuda<device>, hasStat} -> i32
|
||||
%14 = fir.cuda_deallocate %11 : !fir.ref<!fir.box<none>> errmsg(%16 : !fir.box<none>) {cuda_attr = #fir.cuda<device>, hasStat} -> i32
|
||||
%13 = cuf.allocate %11 : !fir.ref<!fir.box<none>> errmsg(%16 : !fir.box<none>) {data_attr = #cuf.cuda<device>, hasStat} -> i32
|
||||
%14 = cuf.deallocate %11 : !fir.ref<!fir.box<none>> errmsg(%16 : !fir.box<none>) {data_attr = #cuf.cuda<device>, hasStat} -> i32
|
||||
return
|
||||
}
|
||||
|
||||
// CHECK: fir.cuda_allocate %{{.*}} : !fir.ref<!fir.box<none>> errmsg(%{{.*}} : !fir.box<none>) {cuda_attr = #fir.cuda<device>, hasStat} -> i32
|
||||
// CHECK: fir.cuda_deallocate %{{.*}} : !fir.ref<!fir.box<none>> errmsg(%{{.*}} : !fir.box<none>) {cuda_attr = #fir.cuda<device>, hasStat} -> i32
|
||||
// CHECK: cuf.allocate %{{.*}} : !fir.ref<!fir.box<none>> errmsg(%{{.*}} : !fir.box<none>) {data_attr = #cuf.cuda<device>, hasStat} -> i32
|
||||
// CHECK: cuf.deallocate %{{.*}} : !fir.ref<!fir.box<none>> errmsg(%{{.*}} : !fir.box<none>) {data_attr = #cuf.cuda<device>, hasStat} -> i32
|
||||
|
||||
// -----
|
||||
|
||||
func.func @_QPsub1() {
|
||||
%0 = fir.cuda_alloc f32 {bindc_name = "r", cuda_attr = #fir.cuda<device>, uniq_name = "_QFsub1Er"} -> !fir.ref<f32>
|
||||
fir.cuda_free %0 : !fir.ref<f32> {cuda_attr = #fir.cuda<device>}
|
||||
%0 = cuf.alloc f32 {bindc_name = "r", data_attr = #cuf.cuda<device>, uniq_name = "_QFsub1Er"} -> !fir.ref<f32>
|
||||
cuf.free %0 : !fir.ref<f32> {data_attr = #cuf.cuda<device>}
|
||||
return
|
||||
}
|
||||
|
||||
// CHECK: fir.cuda_alloc
|
||||
// CHECK: fir.cuda_free
|
||||
// CHECK: cuf.alloc
|
||||
// CHECK: cuf.free
|
||||
|
||||
|
||||
@@ -11,11 +11,11 @@ end subroutine
|
||||
|
||||
! CHECK-LABEL: func.func @_QPsub1()
|
||||
! CHECK: %[[BOX:.*]] = fir.alloca !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "a", uniq_name = "_QFsub1Ea"}
|
||||
! CHECK: %[[BOX_DECL:.*]]:2 = hlfir.declare %[[BOX]] {cuda_attr = #fir.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub1Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
|
||||
! CHECK: %[[BOX_DECL:.*]]:2 = hlfir.declare %[[BOX]] {data_attr = #cuf.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub1Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
|
||||
! CHECK: fir.call @_FortranAAllocatableSetBounds
|
||||
! CHECK: %{{.*}} = fir.cuda_allocate %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {cuda_attr = #fir.cuda<device>} -> i32
|
||||
! CHECK: %{{.*}} = cuf.allocate %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {data_attr = #cuf.cuda<device>} -> i32
|
||||
|
||||
! CHECK: %{{.*}} = fir.cuda_deallocate %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {cuda_attr = #fir.cuda<device>} -> i32
|
||||
! CHECK: %{{.*}} = cuf.deallocate %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {data_attr = #cuf.cuda<device>} -> i32
|
||||
|
||||
! CHECK: %[[BOX_LOAD:.*]] = fir.load %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
|
||||
! CHECK: %[[ADDR:.*]] = fir.box_addr %[[BOX_LOAD]] : (!fir.box<!fir.heap<!fir.array<?xf32>>>) -> !fir.heap<!fir.array<?xf32>>
|
||||
@@ -23,7 +23,7 @@ end subroutine
|
||||
! CHECK: %[[C0:.*]] = arith.constant 0 : i64
|
||||
! CHECK: %[[NE_C0:.*]] = arith.cmpi ne, %[[ADDR_I64]], %[[C0]] : i64
|
||||
! CHECK: fir.if %[[NE_C0]] {
|
||||
! CHECK: %{{.*}} = fir.cuda_deallocate %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {cuda_attr = #fir.cuda<device>} -> i32
|
||||
! CHECK: %{{.*}} = cuf.deallocate %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {data_attr = #cuf.cuda<device>} -> i32
|
||||
! CHECK: }
|
||||
|
||||
subroutine sub2()
|
||||
@@ -36,18 +36,18 @@ end subroutine
|
||||
|
||||
! CHECK-LABEL: func.func @_QPsub2()
|
||||
! CHECK: %[[BOX:.*]] = fir.alloca !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "a", uniq_name = "_QFsub2Ea"}
|
||||
! CHECK: %[[BOX_DECL:.*]]:2 = hlfir.declare %[[BOX]] {cuda_attr = #fir.cuda<managed>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub2Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
|
||||
! CHECK: %[[BOX_DECL:.*]]:2 = hlfir.declare %[[BOX]] {data_attr = #cuf.cuda<managed>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub2Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
|
||||
! CHECK: %[[ISTAT:.*]] = fir.alloca i32 {bindc_name = "istat", uniq_name = "_QFsub2Eistat"}
|
||||
! CHECK: %[[ISTAT_DECL:.*]]:2 = hlfir.declare %[[ISTAT]] {uniq_name = "_QFsub2Eistat"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
|
||||
! CHECK: fir.call @_FortranAAllocatableSetBounds
|
||||
! CHECK: %[[STAT:.*]] = fir.cuda_allocate %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {cuda_attr = #fir.cuda<managed>, hasStat} -> i32
|
||||
! CHECK: %[[STAT:.*]] = cuf.allocate %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {data_attr = #cuf.cuda<managed>, hasStat} -> i32
|
||||
! CHECK: fir.store %[[STAT]] to %[[ISTAT_DECL]]#1 : !fir.ref<i32>
|
||||
|
||||
! CHECK: %[[STAT:.*]] = fir.cuda_deallocate %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {cuda_attr = #fir.cuda<managed>, hasStat} -> i32
|
||||
! CHECK: %[[STAT:.*]] = cuf.deallocate %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {data_attr = #cuf.cuda<managed>, hasStat} -> i32
|
||||
! CHECK: fir.store %[[STAT]] to %[[ISTAT_DECL]]#1 : !fir.ref<i32>
|
||||
|
||||
! CHECK: fir.if %{{.*}} {
|
||||
! CHECK: %{{.*}} = fir.cuda_deallocate %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {cuda_attr = #fir.cuda<managed>} -> i32
|
||||
! CHECK: %{{.*}} = cuf.deallocate %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {data_attr = #cuf.cuda<managed>} -> i32
|
||||
! CHECK: }
|
||||
|
||||
subroutine sub3()
|
||||
@@ -58,13 +58,13 @@ end subroutine
|
||||
|
||||
! CHECK-LABEL: func.func @_QPsub3()
|
||||
! CHECK: %[[BOX:.*]] = fir.alloca !fir.box<!fir.heap<!fir.array<?x?xi32>>> {bindc_name = "a", uniq_name = "_QFsub3Ea"}
|
||||
! CHECK: %[[BOX_DECL:.*]]:2 = hlfir.declare %[[BOX]] {cuda_attr = #fir.cuda<pinned>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub3Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?x?xi32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?x?xi32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?x?xi32>>>>)
|
||||
! CHECK: %[[BOX_DECL:.*]]:2 = hlfir.declare %[[BOX]] {data_attr = #cuf.cuda<pinned>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub3Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?x?xi32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?x?xi32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?x?xi32>>>>)
|
||||
! CHECK: %[[PLOG:.*]] = fir.alloca !fir.logical<4> {bindc_name = "plog", uniq_name = "_QFsub3Eplog"}
|
||||
! CHECK: %[[PLOG_DECL:.*]]:2 = hlfir.declare %5 {uniq_name = "_QFsub3Eplog"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
|
||||
! CHECK-2: fir.call @_FortranAAllocatableSetBounds
|
||||
! CHECK: %{{.*}} = fir.cuda_allocate %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?x?xi32>>>> pinned(%[[PLOG_DECL]]#1 : !fir.ref<!fir.logical<4>>) {cuda_attr = #fir.cuda<pinned>} -> i32
|
||||
! CHECK: %{{.*}} = cuf.allocate %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?x?xi32>>>> pinned(%[[PLOG_DECL]]#1 : !fir.ref<!fir.logical<4>>) {data_attr = #cuf.cuda<pinned>} -> i32
|
||||
! CHECK: fir.if %{{.*}} {
|
||||
! CHECK: %{{.*}} = fir.cuda_deallocate %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?x?xi32>>>> {cuda_attr = #fir.cuda<pinned>} -> i32
|
||||
! CHECK: %{{.*}} = cuf.deallocate %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?x?xi32>>>> {data_attr = #cuf.cuda<pinned>} -> i32
|
||||
! CHECK: }
|
||||
|
||||
subroutine sub4()
|
||||
@@ -75,14 +75,14 @@ end subroutine
|
||||
|
||||
! CHECK-LABEL: func.func @_QPsub4()
|
||||
! CHECK: %[[BOX:.*]] = fir.alloca !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "a", uniq_name = "_QFsub4Ea"}
|
||||
! CHECK: %[[BOX_DECL:.*]]:2 = hlfir.declare %0 {cuda_attr = #fir.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub4Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
|
||||
! CHECK: %[[BOX_DECL:.*]]:2 = hlfir.declare %0 {data_attr = #cuf.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub4Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
|
||||
! CHECK: %[[ISTREAM:.*]] = fir.alloca i32 {bindc_name = "istream", uniq_name = "_QFsub4Eistream"}
|
||||
! CHECK: %[[ISTREAM_DECL:.*]]:2 = hlfir.declare %[[ISTREAM]] {uniq_name = "_QFsub4Eistream"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
|
||||
! CHECK: fir.call @_FortranAAllocatableSetBounds
|
||||
! CHECK: %[[STREAM:.*]] = fir.load %[[ISTREAM_DECL]]#0 : !fir.ref<i32>
|
||||
! CHECK: %{{.*}} = fir.cuda_allocate %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> stream(%[[STREAM]] : i32) {cuda_attr = #fir.cuda<device>} -> i32
|
||||
! CHECK: %{{.*}} = cuf.allocate %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> stream(%[[STREAM]] : i32) {data_attr = #cuf.cuda<device>} -> i32
|
||||
! CHECK: fir.if %{{.*}} {
|
||||
! CHECK: %{{.*}} = fir.cuda_deallocate %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {cuda_attr = #fir.cuda<device>} -> i32
|
||||
! CHECK: %{{.*}} = cuf.deallocate %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {data_attr = #cuf.cuda<device>} -> i32
|
||||
! CHECK: }
|
||||
|
||||
subroutine sub5()
|
||||
@@ -93,16 +93,16 @@ end subroutine
|
||||
|
||||
! CHECK-LABEL: func.func @_QPsub5()
|
||||
! CHECK: %[[BOX_A:.*]] = fir.alloca !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "a", uniq_name = "_QFsub5Ea"}
|
||||
! CHECK: %[[BOX_A_DECL:.*]]:2 = hlfir.declare %[[BOX]] {cuda_attr = #fir.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub5Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
|
||||
! CHECK: %[[BOX_A_DECL:.*]]:2 = hlfir.declare %[[BOX]] {data_attr = #cuf.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub5Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
|
||||
! CHECK: %[[BOX_B:.*]] = fir.alloca !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "b", uniq_name = "_QFsub5Eb"}
|
||||
! CHECK: %[[BOX_B_DECL:.*]]:2 = hlfir.declare %[[BOX_B]] {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub5Eb"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
|
||||
! CHECK: %[[LOAD_B:.*]] = fir.load %[[BOX_B_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
|
||||
! CHECK: fir.call @_FortranAAllocatableSetBounds
|
||||
! CHECK: %{{.*}} = fir.cuda_allocate %[[BOX_A_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> source(%[[LOAD_B]] : !fir.box<!fir.heap<!fir.array<?xf32>>>) {cuda_attr = #fir.cuda<device>} -> i32
|
||||
! CHECK: %{{.*}} = cuf.allocate %[[BOX_A_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> source(%[[LOAD_B]] : !fir.box<!fir.heap<!fir.array<?xf32>>>) {data_attr = #cuf.cuda<device>} -> i32
|
||||
! CHECK: fir.if
|
||||
! CHECK: fir.freemem
|
||||
! CHECK: fir.if %{{.*}} {
|
||||
! CHECK: %{{.*}} = fir.cuda_deallocate %[[BOX_A_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {cuda_attr = #fir.cuda<device>} -> i32
|
||||
! CHECK: %{{.*}} = cuf.deallocate %[[BOX_A_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {data_attr = #cuf.cuda<device>} -> i32
|
||||
! CHECK: }
|
||||
|
||||
subroutine sub6()
|
||||
@@ -113,14 +113,14 @@ end subroutine
|
||||
|
||||
! CHECK-LABEL: func.func @_QPsub6()
|
||||
! CHECK: %[[BOX_A:.*]] = fir.alloca !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "a", uniq_name = "_QFsub6Ea"}
|
||||
! CHECK: %[[BOX_A_DECL:.*]]:2 = hlfir.declare %[[BOX]] {cuda_attr = #fir.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub6Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
|
||||
! CHECK: %[[BOX_A_DECL:.*]]:2 = hlfir.declare %[[BOX]] {data_attr = #cuf.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub6Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
|
||||
! CHECK: %[[BOX_B:.*]] = fir.alloca !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "b", uniq_name = "_QFsub6Eb"}
|
||||
! CHECK: %[[BOX_B_DECL:.*]]:2 = hlfir.declare %[[BOX_B]] {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub6Eb"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
|
||||
! CHECK: %[[LOAD_B:.*]] = fir.load %[[BOX_B_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
|
||||
! CHECK: fir.call @_FortranAAllocatableApplyMold
|
||||
! CHECK: %{{.*}} = fir.cuda_allocate %[[BOX_A_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {cuda_attr = #fir.cuda<device>} -> i32
|
||||
! CHECK: %{{.*}} = cuf.allocate %[[BOX_A_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {data_attr = #cuf.cuda<device>} -> i32
|
||||
! CHECK: fir.if %{{.*}} {
|
||||
! CHECK: %{{.*}} = fir.cuda_deallocate %[[BOX_A_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {cuda_attr = #fir.cuda<device>} -> i32
|
||||
! CHECK: %{{.*}} = cuf.deallocate %[[BOX_A_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {data_attr = #cuf.cuda<device>} -> i32
|
||||
! CHECK: }
|
||||
|
||||
subroutine sub7()
|
||||
@@ -134,19 +134,19 @@ end subroutine
|
||||
|
||||
! CHECK-LABEL: func.func @_QPsub7()
|
||||
! CHECK: %[[BOX:.*]] = fir.alloca !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "a", uniq_name = "_QFsub7Ea"}
|
||||
! CHECK: %[[BOX_DECL:.*]]:2 = hlfir.declare %[[BOX]] {cuda_attr = #fir.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub7Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
|
||||
! CHECK: %[[BOX_DECL:.*]]:2 = hlfir.declare %[[BOX]] {data_attr = #cuf.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub7Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
|
||||
! CHECK: %[[ERR:.*]] = fir.alloca !fir.char<1,50> {bindc_name = "err", uniq_name = "_QFsub7Eerr"}
|
||||
! CHECK: %[[ERR_DECL:.*]]:2 = hlfir.declare %[[ERR]] typeparams %{{.*}} {uniq_name = "_QFsub7Eerr"} : (!fir.ref<!fir.char<1,50>>, index) -> (!fir.ref<!fir.char<1,50>>, !fir.ref<!fir.char<1,50>>)
|
||||
! CHECK: %[[ISTAT:.*]] = fir.alloca i32 {bindc_name = "istat", uniq_name = "_QFsub7Eistat"}
|
||||
! CHECK: %[[ISTAT_DECL:.*]]:2 = hlfir.declare %[[ISTAT]] {uniq_name = "_QFsub7Eistat"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
|
||||
! CHECK: %[[ERR_BOX:.*]] = fir.embox %[[ERR_DECL]]#1 : (!fir.ref<!fir.char<1,50>>) -> !fir.box<!fir.char<1,50>>
|
||||
! CHECK: fir.call @_FortranAAllocatableSetBounds
|
||||
! CHECK: %[[STAT:.*]] = fir.cuda_allocate %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> errmsg(%[[ERR_BOX]] : !fir.box<!fir.char<1,50>>) {cuda_attr = #fir.cuda<device>, hasStat} -> i32
|
||||
! CHECK: %[[STAT:.*]] = cuf.allocate %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> errmsg(%[[ERR_BOX]] : !fir.box<!fir.char<1,50>>) {data_attr = #cuf.cuda<device>, hasStat} -> i32
|
||||
! CHECK: fir.store %[[STAT]] to %[[ISTAT_DECL]]#1 : !fir.ref<i32>
|
||||
|
||||
! CHECK: %[[ERR_BOX:.*]] = fir.embox %[[ERR_DECL]]#1 : (!fir.ref<!fir.char<1,50>>) -> !fir.box<!fir.char<1,50>>
|
||||
! CHECK: %[[STAT:.*]] = fir.cuda_deallocate %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> errmsg(%15 : !fir.box<!fir.char<1,50>>) {cuda_attr = #fir.cuda<device>, hasStat} -> i32
|
||||
! CHECK: %[[STAT:.*]] = cuf.deallocate %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> errmsg(%15 : !fir.box<!fir.char<1,50>>) {data_attr = #cuf.cuda<device>, hasStat} -> i32
|
||||
! CHECK: fir.store %[[STAT]] to %[[ISTAT_DECL]]#1 : !fir.ref<i32>
|
||||
! CHECK: fir.if %{{.*}} {
|
||||
! CHECK: %{{.*}} = fir.cuda_deallocate %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {cuda_attr = #fir.cuda<device>} -> i32
|
||||
! CHECK: %{{.*}} = cuf.deallocate %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {data_attr = #cuf.cuda<device>} -> i32
|
||||
! CHECK: }
|
||||
|
||||
@@ -5,13 +5,13 @@
|
||||
|
||||
module cuda_var
|
||||
real, constant :: mod_a_rc
|
||||
! CHECK: fir.global @_QMcuda_varEmod_a_rc {cuda_attr = #fir.cuda<constant>} : f32
|
||||
! CHECK: fir.global @_QMcuda_varEmod_a_rc {data_attr = #cuf.cuda<constant>} : f32
|
||||
real, device :: mod_b_ra
|
||||
! CHECK: fir.global @_QMcuda_varEmod_b_ra {cuda_attr = #fir.cuda<device>} : f32
|
||||
! CHECK: fir.global @_QMcuda_varEmod_b_ra {data_attr = #cuf.cuda<device>} : f32
|
||||
real, allocatable, managed :: mod_c_rm
|
||||
! CHECK: fir.global @_QMcuda_varEmod_c_rm {cuda_attr = #fir.cuda<managed>} : !fir.box<!fir.heap<f32>>
|
||||
! CHECK: fir.global @_QMcuda_varEmod_c_rm {data_attr = #cuf.cuda<managed>} : !fir.box<!fir.heap<f32>>
|
||||
real, allocatable, pinned :: mod_d_rp
|
||||
! CHECK: fir.global @_QMcuda_varEmod_d_rp {cuda_attr = #fir.cuda<pinned>} : !fir.box<!fir.heap<f32>>
|
||||
! CHECK: fir.global @_QMcuda_varEmod_d_rp {data_attr = #cuf.cuda<pinned>} : !fir.box<!fir.heap<f32>>
|
||||
|
||||
contains
|
||||
|
||||
@@ -23,44 +23,44 @@ subroutine local_var_attrs
|
||||
end subroutine
|
||||
|
||||
! CHECK-LABEL: func.func @_QMcuda_varPlocal_var_attrs()
|
||||
! CHECK: %{{.*}}:2 = hlfir.declare %{{.*}} {cuda_attr = #fir.cuda<device>, uniq_name = "_QMcuda_varFlocal_var_attrsErd"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
|
||||
! CHECK: %{{.*}}:2 = hlfir.declare %{{.*}} {cuda_attr = #fir.cuda<managed>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QMcuda_varFlocal_var_attrsErm"} : (!fir.ref<!fir.box<!fir.heap<f32>>>) -> (!fir.ref<!fir.box<!fir.heap<f32>>>, !fir.ref<!fir.box<!fir.heap<f32>>>)
|
||||
! CHECK: %{{.*}}:2 = hlfir.declare %{{.*}} {cuda_attr = #fir.cuda<pinned>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QMcuda_varFlocal_var_attrsErp"} : (!fir.ref<!fir.box<!fir.heap<f32>>>) -> (!fir.ref<!fir.box<!fir.heap<f32>>>, !fir.ref<!fir.box<!fir.heap<f32>>>)
|
||||
! CHECK: %{{.*}}:2 = hlfir.declare %{{.*}} {cuda_attr = #fir.cuda<unified>, uniq_name = "_QMcuda_varFlocal_var_attrsEru"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
|
||||
! CHECK: %{{.*}}:2 = hlfir.declare %{{.*}} {data_attr = #cuf.cuda<device>, uniq_name = "_QMcuda_varFlocal_var_attrsErd"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
|
||||
! CHECK: %{{.*}}:2 = hlfir.declare %{{.*}} {data_attr = #cuf.cuda<managed>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QMcuda_varFlocal_var_attrsErm"} : (!fir.ref<!fir.box<!fir.heap<f32>>>) -> (!fir.ref<!fir.box<!fir.heap<f32>>>, !fir.ref<!fir.box<!fir.heap<f32>>>)
|
||||
! CHECK: %{{.*}}:2 = hlfir.declare %{{.*}} {data_attr = #cuf.cuda<pinned>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QMcuda_varFlocal_var_attrsErp"} : (!fir.ref<!fir.box<!fir.heap<f32>>>) -> (!fir.ref<!fir.box<!fir.heap<f32>>>, !fir.ref<!fir.box<!fir.heap<f32>>>)
|
||||
! CHECK: %{{.*}}:2 = hlfir.declare %{{.*}} {data_attr = #cuf.cuda<unified>, uniq_name = "_QMcuda_varFlocal_var_attrsEru"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
|
||||
|
||||
|
||||
! FIR: %{{.*}} = fir.declare %{{.*}} {cuda_attr = #fir.cuda<device>, uniq_name = "_QMcuda_varFlocal_var_attrsErd"} : (!fir.ref<f32>) -> !fir.ref<f32>
|
||||
! FIR: %{{.*}} = fir.declare %{{.*}} {cuda_attr = #fir.cuda<managed>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QMcuda_varFlocal_var_attrsErm"} : (!fir.ref<!fir.box<!fir.heap<f32>>>) -> !fir.ref<!fir.box<!fir.heap<f32>>>
|
||||
! FIR: %{{.*}} = fir.declare %{{.*}} {cuda_attr = #fir.cuda<pinned>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QMcuda_varFlocal_var_attrsErp"} : (!fir.ref<!fir.box<!fir.heap<f32>>>) -> !fir.ref<!fir.box<!fir.heap<f32>>>
|
||||
! FIR: %{{.*}} = fir.declare %{{.*}} {cuda_attr = #fir.cuda<unified>, uniq_name = "_QMcuda_varFlocal_var_attrsEru"} : (!fir.ref<f32>) -> !fir.ref<f32>
|
||||
! FIR: %{{.*}} = fir.declare %{{.*}} {data_attr = #cuf.cuda<device>, uniq_name = "_QMcuda_varFlocal_var_attrsErd"} : (!fir.ref<f32>) -> !fir.ref<f32>
|
||||
! FIR: %{{.*}} = fir.declare %{{.*}} {data_attr = #cuf.cuda<managed>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QMcuda_varFlocal_var_attrsErm"} : (!fir.ref<!fir.box<!fir.heap<f32>>>) -> !fir.ref<!fir.box<!fir.heap<f32>>>
|
||||
! FIR: %{{.*}} = fir.declare %{{.*}} {data_attr = #cuf.cuda<pinned>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QMcuda_varFlocal_var_attrsErp"} : (!fir.ref<!fir.box<!fir.heap<f32>>>) -> !fir.ref<!fir.box<!fir.heap<f32>>>
|
||||
! FIR: %{{.*}} = fir.declare %{{.*}} {data_attr = #cuf.cuda<unified>, uniq_name = "_QMcuda_varFlocal_var_attrsEru"} : (!fir.ref<f32>) -> !fir.ref<f32>
|
||||
|
||||
subroutine dummy_arg_device(dd)
|
||||
real, device :: dd
|
||||
end subroutine
|
||||
! CHECK-LABEL: func.func @_QMcuda_varPdummy_arg_device(
|
||||
! CHECK-SAME: %[[ARG0:.*]]: !fir.ref<f32> {fir.bindc_name = "dd", fir.cuda_attr = #fir.cuda<device>}) {
|
||||
! CHECK: %{{.*}}:2 = hlfir.declare %[[ARG0]] dummy_scope %{{[0-9]+}} {cuda_attr = #fir.cuda<device>, uniq_name = "_QMcuda_varFdummy_arg_deviceEdd"} : (!fir.ref<f32>, !fir.dscope) -> (!fir.ref<f32>, !fir.ref<f32>)
|
||||
! CHECK-SAME: %[[ARG0:.*]]: !fir.ref<f32> {cuf.data_attr = #cuf.cuda<device>, fir.bindc_name = "dd"}) {
|
||||
! CHECK: %{{.*}}:2 = hlfir.declare %[[ARG0]] dummy_scope %{{[0-9]+}} {data_attr = #cuf.cuda<device>, uniq_name = "_QMcuda_varFdummy_arg_deviceEdd"} : (!fir.ref<f32>, !fir.dscope) -> (!fir.ref<f32>, !fir.ref<f32>)
|
||||
|
||||
subroutine dummy_arg_managed(dm)
|
||||
real, allocatable, managed :: dm
|
||||
end subroutine
|
||||
! CHECK-LABEL: func.func @_QMcuda_varPdummy_arg_managed(
|
||||
! CHECK-SAME: %[[ARG0:.*]]: !fir.ref<!fir.box<!fir.heap<f32>>> {fir.bindc_name = "dm", fir.cuda_attr = #fir.cuda<managed>}) {
|
||||
! CHECK: %{{.*}}:2 = hlfir.declare %[[ARG0]] dummy_scope %{{[0-9]+}} {cuda_attr = #fir.cuda<managed>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QMcuda_varFdummy_arg_managedEdm"} : (!fir.ref<!fir.box<!fir.heap<f32>>>, !fir.dscope) -> (!fir.ref<!fir.box<!fir.heap<f32>>>, !fir.ref<!fir.box<!fir.heap<f32>>>)
|
||||
! CHECK-SAME: %[[ARG0:.*]]: !fir.ref<!fir.box<!fir.heap<f32>>> {cuf.data_attr = #cuf.cuda<managed>, fir.bindc_name = "dm"}) {
|
||||
! CHECK: %{{.*}}:2 = hlfir.declare %[[ARG0]] dummy_scope %{{[0-9]+}} {data_attr = #cuf.cuda<managed>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QMcuda_varFdummy_arg_managedEdm"} : (!fir.ref<!fir.box<!fir.heap<f32>>>, !fir.dscope) -> (!fir.ref<!fir.box<!fir.heap<f32>>>, !fir.ref<!fir.box<!fir.heap<f32>>>)
|
||||
|
||||
subroutine dummy_arg_pinned(dp)
|
||||
real, allocatable, pinned :: dp
|
||||
end subroutine
|
||||
! CHECK-LABEL: func.func @_QMcuda_varPdummy_arg_pinned(
|
||||
! CHECK-SAME: %[[ARG0:.*]]: !fir.ref<!fir.box<!fir.heap<f32>>> {fir.bindc_name = "dp", fir.cuda_attr = #fir.cuda<pinned>}) {
|
||||
! CHECK: %{{.*}}:2 = hlfir.declare %[[ARG0]] dummy_scope %{{[0-9]+}} {cuda_attr = #fir.cuda<pinned>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QMcuda_varFdummy_arg_pinnedEdp"} : (!fir.ref<!fir.box<!fir.heap<f32>>>, !fir.dscope) -> (!fir.ref<!fir.box<!fir.heap<f32>>>, !fir.ref<!fir.box<!fir.heap<f32>>>)
|
||||
! CHECK-SAME: %[[ARG0:.*]]: !fir.ref<!fir.box<!fir.heap<f32>>> {cuf.data_attr = #cuf.cuda<pinned>, fir.bindc_name = "dp"}) {
|
||||
! CHECK: %{{.*}}:2 = hlfir.declare %[[ARG0]] dummy_scope %{{[0-9]+}} {data_attr = #cuf.cuda<pinned>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QMcuda_varFdummy_arg_pinnedEdp"} : (!fir.ref<!fir.box<!fir.heap<f32>>>, !fir.dscope) -> (!fir.ref<!fir.box<!fir.heap<f32>>>, !fir.ref<!fir.box<!fir.heap<f32>>>)
|
||||
|
||||
subroutine dummy_arg_unified(du)
|
||||
real, unified :: du
|
||||
end subroutine
|
||||
! CHECK-LABEL: func.func @_QMcuda_varPdummy_arg_unified(
|
||||
! CHECK-SAME: %[[ARG0:.*]]: !fir.ref<f32> {fir.bindc_name = "du", fir.cuda_attr = #fir.cuda<unified>})
|
||||
! CHECK: %{{.*}}:2 = hlfir.declare %[[ARG0]] dummy_scope %{{[0-9]+}} {cuda_attr = #fir.cuda<unified>, uniq_name = "_QMcuda_varFdummy_arg_unifiedEdu"} : (!fir.ref<f32>, !fir.dscope) -> (!fir.ref<f32>, !fir.ref<f32>)
|
||||
! CHECK-SAME: %[[ARG0:.*]]: !fir.ref<f32> {cuf.data_attr = #cuf.cuda<unified>, fir.bindc_name = "du"})
|
||||
! CHECK: %{{.*}}:2 = hlfir.declare %[[ARG0]] dummy_scope %{{[0-9]+}} {data_attr = #cuf.cuda<unified>, uniq_name = "_QMcuda_varFdummy_arg_unifiedEdu"} : (!fir.ref<f32>, !fir.dscope) -> (!fir.ref<f32>, !fir.ref<f32>)
|
||||
|
||||
subroutine cuda_alloc_free(n)
|
||||
integer :: n
|
||||
@@ -70,27 +70,27 @@ subroutine cuda_alloc_free(n)
|
||||
end
|
||||
|
||||
! CHECK-LABEL: func.func @_QMcuda_varPcuda_alloc_free
|
||||
! CHECK: %[[ALLOC_A:.*]] = fir.cuda_alloc !fir.array<10xf32> {bindc_name = "a", cuda_attr = #fir.cuda<device>, uniq_name = "_QMcuda_varFcuda_alloc_freeEa"} -> !fir.ref<!fir.array<10xf32>>
|
||||
! CHECK: %[[ALLOC_A:.*]] = cuf.alloc !fir.array<10xf32> {bindc_name = "a", data_attr = #cuf.cuda<device>, uniq_name = "_QMcuda_varFcuda_alloc_freeEa"} -> !fir.ref<!fir.array<10xf32>>
|
||||
! CHECK: %[[SHAPE:.*]] = fir.shape %c10 : (index) -> !fir.shape<1>
|
||||
! CHECK: %[[DECL_A:.*]]:2 = hlfir.declare %[[ALLOC_A]](%[[SHAPE]]) {cuda_attr = #fir.cuda<device>, uniq_name = "_QMcuda_varFcuda_alloc_freeEa"} : (!fir.ref<!fir.array<10xf32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<10xf32>>, !fir.ref<!fir.array<10xf32>>)
|
||||
! CHECK: %[[DECL_A:.*]]:2 = hlfir.declare %[[ALLOC_A]](%[[SHAPE]]) {data_attr = #cuf.cuda<device>, uniq_name = "_QMcuda_varFcuda_alloc_freeEa"} : (!fir.ref<!fir.array<10xf32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<10xf32>>, !fir.ref<!fir.array<10xf32>>)
|
||||
|
||||
! CHECK: %[[ALLOC_U:.*]] = fir.cuda_alloc i32 {bindc_name = "u", cuda_attr = #fir.cuda<unified>, uniq_name = "_QMcuda_varFcuda_alloc_freeEu"} -> !fir.ref<i32>
|
||||
! CHECK: %[[DECL_U:.*]]:2 = hlfir.declare %[[ALLOC_U]] {cuda_attr = #fir.cuda<unified>, uniq_name = "_QMcuda_varFcuda_alloc_freeEu"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
|
||||
! CHECK: %[[ALLOC_U:.*]] = cuf.alloc i32 {bindc_name = "u", data_attr = #cuf.cuda<unified>, uniq_name = "_QMcuda_varFcuda_alloc_freeEu"} -> !fir.ref<i32>
|
||||
! CHECK: %[[DECL_U:.*]]:2 = hlfir.declare %[[ALLOC_U]] {data_attr = #cuf.cuda<unified>, uniq_name = "_QMcuda_varFcuda_alloc_freeEu"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
|
||||
|
||||
! CHECK: %[[ALLOC_B:.*]] = fir.cuda_alloc !fir.array<?xf32>, %{{.*}} : index {bindc_name = "b", cuda_attr = #fir.cuda<managed>, uniq_name = "_QMcuda_varFcuda_alloc_freeEb"} -> !fir.ref<!fir.array<?xf32>>
|
||||
! CHECK: %[[ALLOC_B:.*]] = cuf.alloc !fir.array<?xf32>, %{{.*}} : index {bindc_name = "b", data_attr = #cuf.cuda<managed>, uniq_name = "_QMcuda_varFcuda_alloc_freeEb"} -> !fir.ref<!fir.array<?xf32>>
|
||||
! CHECK: %[[SHAPE:.*]] = fir.shape %{{.*}} : (index) -> !fir.shape<1>
|
||||
! CHECK: %[[DECL_B:.*]]:2 = hlfir.declare %[[ALLOC_B]](%[[SHAPE]]) {cuda_attr = #fir.cuda<managed>, uniq_name = "_QMcuda_varFcuda_alloc_freeEb"} : (!fir.ref<!fir.array<?xf32>>, !fir.shape<1>) -> (!fir.box<!fir.array<?xf32>>, !fir.ref<!fir.array<?xf32>>)
|
||||
! CHECK: %[[DECL_B:.*]]:2 = hlfir.declare %[[ALLOC_B]](%[[SHAPE]]) {data_attr = #cuf.cuda<managed>, uniq_name = "_QMcuda_varFcuda_alloc_freeEb"} : (!fir.ref<!fir.array<?xf32>>, !fir.shape<1>) -> (!fir.box<!fir.array<?xf32>>, !fir.ref<!fir.array<?xf32>>)
|
||||
|
||||
! CHECK: fir.cuda_free %[[DECL_B]]#1 : !fir.ref<!fir.array<?xf32>> {cuda_attr = #fir.cuda<managed>}
|
||||
! CHECK: fir.cuda_free %[[DECL_U]]#1 : !fir.ref<i32> {cuda_attr = #fir.cuda<unified>}
|
||||
! CHECK: fir.cuda_free %[[DECL_A]]#1 : !fir.ref<!fir.array<10xf32>> {cuda_attr = #fir.cuda<device>}
|
||||
! CHECK: cuf.free %[[DECL_B]]#1 : !fir.ref<!fir.array<?xf32>> {data_attr = #cuf.cuda<managed>}
|
||||
! CHECK: cuf.free %[[DECL_U]]#1 : !fir.ref<i32> {data_attr = #cuf.cuda<unified>}
|
||||
! CHECK: cuf.free %[[DECL_A]]#1 : !fir.ref<!fir.array<10xf32>> {data_attr = #cuf.cuda<device>}
|
||||
|
||||
subroutine dummy(x)
|
||||
real, target, device :: x
|
||||
end subroutine
|
||||
|
||||
! CHECK: func.func @_QMcuda_varPdummy
|
||||
! CHECK-NOT: fir.cuda_free
|
||||
! CHECK-NOT: cuf.free
|
||||
|
||||
end module
|
||||
|
||||
|
||||
@@ -29,37 +29,37 @@ end
|
||||
|
||||
! CHECK-LABEL: func.func @_QPsub1()
|
||||
|
||||
! CHECK: %[[ADEV:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {cuda_attr = #fir.cuda<device>, uniq_name = "_QFsub1Eadev"} : (!fir.ref<!fir.array<10xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<10xi32>>, !fir.ref<!fir.array<10xi32>>)
|
||||
! CHECK: %[[ADEV:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {data_attr = #cuf.cuda<device>, uniq_name = "_QFsub1Eadev"} : (!fir.ref<!fir.array<10xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<10xi32>>, !fir.ref<!fir.array<10xi32>>)
|
||||
! CHECK: %[[AHOST:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {uniq_name = "_QFsub1Eahost"} : (!fir.ref<!fir.array<10xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<10xi32>>, !fir.ref<!fir.array<10xi32>>)
|
||||
! CHECK: %[[I:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFsub1Ei"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
|
||||
! CHECK: %[[M:.*]]:2 = hlfir.declare %{{.*}} {cuda_attr = #fir.cuda<device>, uniq_name = "_QFsub1Em"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
|
||||
! CHECK: %[[M:.*]]:2 = hlfir.declare %{{.*}} {data_attr = #cuf.cuda<device>, uniq_name = "_QFsub1Em"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
|
||||
|
||||
! CHECK: %[[C1:.*]] = arith.constant 1 : i32
|
||||
! CHECK: %[[LOADED_I:.*]] = fir.load %[[I]]#0 : !fir.ref<i32>
|
||||
! CHECK: %[[ADD:.*]] = arith.addi %[[C1]], %[[LOADED_I]] : i32
|
||||
! CHECK: %[[ASSOC:.*]]:3 = hlfir.associate %[[ADD]] {uniq_name = ".cuf_host_tmp"} : (i32) -> (!fir.ref<i32>, !fir.ref<i32>, i1)
|
||||
! CHECK: fir.cuda_data_transfer %[[ASSOC]]#0 to %[[M]]#0 {transfer_kind = #fir.cuda_transfer<host_device>} : !fir.ref<i32>, !fir.ref<i32>
|
||||
! CHECK: cuf.data_transfer %[[ASSOC]]#0 to %[[M]]#0 {transfer_kind = #cuf.cuda_transfer<host_device>} : !fir.ref<i32>, !fir.ref<i32>
|
||||
! CHECK: hlfir.end_associate %[[ASSOC]]#1, %[[ASSOC]]#2 : !fir.ref<i32>, i1
|
||||
|
||||
! CHECK: %[[C1:.*]] = arith.constant 1 : i32
|
||||
! CHECK: %[[ASSOC:.*]]:3 = hlfir.associate %[[C1]] {uniq_name = ".cuf_host_tmp"} : (i32) -> (!fir.ref<i32>, !fir.ref<i32>, i1)
|
||||
! CHECK: fir.cuda_data_transfer %[[ASSOC]]#0 to %[[M]]#0 {transfer_kind = #fir.cuda_transfer<host_device>} : !fir.ref<i32>, !fir.ref<i32>
|
||||
! CHECK: cuf.data_transfer %[[ASSOC]]#0 to %[[M]]#0 {transfer_kind = #cuf.cuda_transfer<host_device>} : !fir.ref<i32>, !fir.ref<i32>
|
||||
! CHECK: hlfir.end_associate %[[ASSOC]]#1, %[[ASSOC]]#2 : !fir.ref<i32>, i1
|
||||
|
||||
! CHECK: fir.cuda_data_transfer %[[AHOST]]#0 to %[[ADEV]]#0 {transfer_kind = #fir.cuda_transfer<host_device>} : !fir.ref<!fir.array<10xi32>>, !fir.ref<!fir.array<10xi32>>
|
||||
! CHECK: cuf.data_transfer %[[AHOST]]#0 to %[[ADEV]]#0 {transfer_kind = #cuf.cuda_transfer<host_device>} : !fir.ref<!fir.array<10xi32>>, !fir.ref<!fir.array<10xi32>>
|
||||
|
||||
! CHECK: %[[ELEMENTAL:.*]] = hlfir.elemental %{{.*}} unordered : (!fir.shape<1>) -> !hlfir.expr<10xi32> {
|
||||
! CHECK: %[[ASSOC:.*]]:3 = hlfir.associate %[[ELEMENTAL]](%{{.*}}) {uniq_name = ".cuf_host_tmp"} : (!hlfir.expr<10xi32>, !fir.shape<1>) -> (!fir.ref<!fir.array<10xi32>>, !fir.ref<!fir.array<10xi32>>, i1)
|
||||
! CHECK: fir.cuda_data_transfer %[[ASSOC]]#0 to %[[ADEV]]#0 {transfer_kind = #fir.cuda_transfer<host_device>} : !fir.ref<!fir.array<10xi32>>, !fir.ref<!fir.array<10xi32>>
|
||||
! CHECK: cuf.data_transfer %[[ASSOC]]#0 to %[[ADEV]]#0 {transfer_kind = #cuf.cuda_transfer<host_device>} : !fir.ref<!fir.array<10xi32>>, !fir.ref<!fir.array<10xi32>>
|
||||
! CHECK: hlfir.end_associate %[[ASSOC]]#1, %[[ASSOC]]#2 : !fir.ref<!fir.array<10xi32>>, i1
|
||||
|
||||
! CHECK: %[[DES_AHOST:.*]] = hlfir.designate %[[AHOST]]#0 (%c1{{.*}}:%c5{{.*}}:%c1{{.*}}) shape %{{.*}} : (!fir.ref<!fir.array<10xi32>>, index, index, index, !fir.shape<1>) -> !fir.ref<!fir.array<5xi32>>
|
||||
! CHECK: %[[DES_ADEV:.*]] = hlfir.designate %[[ADEV]]#0 (%c1{{.*}}:%c5{{.*}}:%c1{{.*}}) shape %{{.*}} : (!fir.ref<!fir.array<10xi32>>, index, index, index, !fir.shape<1>) -> !fir.ref<!fir.array<5xi32>>
|
||||
! CHECK: fir.cuda_data_transfer %[[DES_AHOST]] to %[[DES_ADEV]] {transfer_kind = #fir.cuda_transfer<host_device>} : !fir.ref<!fir.array<5xi32>>, !fir.ref<!fir.array<5xi32>>
|
||||
! CHECK: cuf.data_transfer %[[DES_AHOST]] to %[[DES_ADEV]] {transfer_kind = #cuf.cuda_transfer<host_device>} : !fir.ref<!fir.array<5xi32>>, !fir.ref<!fir.array<5xi32>>
|
||||
|
||||
! CHECK: %[[ELEMENTAL:.*]] = hlfir.elemental %{{.*}} unordered : (!fir.shape<1>) -> !hlfir.expr<10xi32>
|
||||
! CHECK: %[[ASSOC:.*]]:3 = hlfir.associate %[[ELEMENTAL]](%{{.*}}) {uniq_name = ".cuf_host_tmp"} : (!hlfir.expr<10xi32>, !fir.shape<1>) -> (!fir.ref<!fir.array<10xi32>>, !fir.ref<!fir.array<10xi32>>, i1)
|
||||
! CHECK: fir.cuda_data_transfer %[[ASSOC]]#0 to %[[ADEV]]#0 {transfer_kind = #fir.cuda_transfer<host_device>} : !fir.ref<!fir.array<10xi32>>, !fir.ref<!fir.array<10xi32>>
|
||||
! CHECK: cuf.data_transfer %[[ASSOC]]#0 to %[[ADEV]]#0 {transfer_kind = #cuf.cuda_transfer<host_device>} : !fir.ref<!fir.array<10xi32>>, !fir.ref<!fir.array<10xi32>>
|
||||
! CHECK: hlfir.end_associate %[[ASSOC]]#1, %[[ASSOC]]#2 : !fir.ref<!fir.array<10xi32>>, i1
|
||||
|
||||
subroutine sub2()
|
||||
@@ -81,25 +81,25 @@ subroutine sub2()
|
||||
end
|
||||
|
||||
! CHECK-LABEL: func.func @_QPsub2()
|
||||
! CHECK: %[[ADEV:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {cuda_attr = #fir.cuda<device>, uniq_name = "_QFsub2Eadev"} : (!fir.ref<!fir.array<10xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<10xi32>>, !fir.ref<!fir.array<10xi32>>)
|
||||
! CHECK: %[[ADEV:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {data_attr = #cuf.cuda<device>, uniq_name = "_QFsub2Eadev"} : (!fir.ref<!fir.array<10xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<10xi32>>, !fir.ref<!fir.array<10xi32>>)
|
||||
! CHECK: %[[AHOST:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {uniq_name = "_QFsub2Eahost"} : (!fir.ref<!fir.array<10xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<10xi32>>, !fir.ref<!fir.array<10xi32>>)
|
||||
! CHECK: %[[BDEV:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {cuda_attr = #fir.cuda<device>, uniq_name = "_QFsub2Ebdev"} : (!fir.ref<!fir.array<10xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<10xi32>>, !fir.ref<!fir.array<10xi32>>)
|
||||
! CHECK: %[[BDEV:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {data_attr = #cuf.cuda<device>, uniq_name = "_QFsub2Ebdev"} : (!fir.ref<!fir.array<10xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<10xi32>>, !fir.ref<!fir.array<10xi32>>)
|
||||
! CHECK: %[[BHOST:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {uniq_name = "_QFsub2Ebhost"} : (!fir.ref<!fir.array<10xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<10xi32>>, !fir.ref<!fir.array<10xi32>>)
|
||||
! CHECK: %[[I:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFsub2Ei"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
|
||||
! CHECK: %[[M:.*]]:2 = hlfir.declare %{{.*}} {cuda_attr = #fir.cuda<device>, uniq_name = "_QFsub2Em"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
|
||||
! CHECK: fir.cuda_data_transfer %[[ADEV]]#0 to %[[AHOST]]#0 {transfer_kind = #fir.cuda_transfer<device_host>} : !fir.ref<!fir.array<10xi32>>, !fir.ref<!fir.array<10xi32>>
|
||||
! CHECK: fir.cuda_data_transfer %[[M]]#0 to %[[I]]#0 {transfer_kind = #fir.cuda_transfer<device_host>} : !fir.ref<i32>, !fir.ref<i32>
|
||||
! CHECK: %[[M:.*]]:2 = hlfir.declare %{{.*}} {data_attr = #cuf.cuda<device>, uniq_name = "_QFsub2Em"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
|
||||
! CHECK: cuf.data_transfer %[[ADEV]]#0 to %[[AHOST]]#0 {transfer_kind = #cuf.cuda_transfer<device_host>} : !fir.ref<!fir.array<10xi32>>, !fir.ref<!fir.array<10xi32>>
|
||||
! CHECK: cuf.data_transfer %[[M]]#0 to %[[I]]#0 {transfer_kind = #cuf.cuda_transfer<device_host>} : !fir.ref<i32>, !fir.ref<i32>
|
||||
|
||||
! CHECK: %[[DES_ADEV:.*]] = hlfir.designate %[[ADEV]]#0 (%{{.*}}:%{{.*}}:%{{.*}}) shape %{{.*}} : (!fir.ref<!fir.array<10xi32>>, index, index, index, !fir.shape<1>) -> !fir.ref<!fir.array<5xi32>>
|
||||
! CHECK: %[[DES_AHOST:.*]] = hlfir.designate %[[AHOST]]#0 (%{{.*}}:%{{.*}}:%{{.*}}) shape %{{.*}} : (!fir.ref<!fir.array<10xi32>>, index, index, index, !fir.shape<1>) -> !fir.ref<!fir.array<5xi32>>
|
||||
! CHECK: fir.cuda_data_transfer %[[DES_ADEV]] to %[[DES_AHOST]] {transfer_kind = #fir.cuda_transfer<device_host>} : !fir.ref<!fir.array<5xi32>>, !fir.ref<!fir.array<5xi32>>
|
||||
! CHECK: cuf.data_transfer %[[DES_ADEV]] to %[[DES_AHOST]] {transfer_kind = #cuf.cuda_transfer<device_host>} : !fir.ref<!fir.array<5xi32>>, !fir.ref<!fir.array<5xi32>>
|
||||
|
||||
! CHECK: fir.cuda_data_transfer %[[ADEV]]#0 to %[[BDEV]]#0 {transfer_kind = #fir.cuda_transfer<device_device>} : !fir.ref<!fir.array<10xi32>>, !fir.ref<!fir.array<10xi32>>
|
||||
! CHECK: cuf.data_transfer %[[ADEV]]#0 to %[[BDEV]]#0 {transfer_kind = #cuf.cuda_transfer<device_device>} : !fir.ref<!fir.array<10xi32>>, !fir.ref<!fir.array<10xi32>>
|
||||
|
||||
! CHECK: %[[TEMP:.*]] = fir.allocmem !fir.array<10xi32> {bindc_name = ".tmp", uniq_name = ""}
|
||||
! CHECK: %[[DECL_TEMP:.*]]:2 = hlfir.declare %[[TEMP]](%{{.*}}) {uniq_name = ".tmp"} : (!fir.heap<!fir.array<10xi32>>, !fir.shape<1>) -> (!fir.heap<!fir.array<10xi32>>, !fir.heap<!fir.array<10xi32>>)
|
||||
! CHECK: %[[ADEV_TEMP:.*]]:2 = hlfir.declare %[[DECL_TEMP]]#1(%{{.*}}) {cuda_attr = #fir.cuda<device>, uniq_name = "_QFsub2Eadev"} : (!fir.heap<!fir.array<10xi32>>, !fir.shape<1>) -> (!fir.heap<!fir.array<10xi32>>, !fir.heap<!fir.array<10xi32>>)
|
||||
! CHECK: fir.cuda_data_transfer %[[ADEV]]#1 to %[[DECL_TEMP]]#0 {transfer_kind = #fir.cuda_transfer<device_host>} : !fir.ref<!fir.array<10xi32>>, !fir.heap<!fir.array<10xi32>>
|
||||
! CHECK: %[[ADEV_TEMP:.*]]:2 = hlfir.declare %[[DECL_TEMP]]#1(%{{.*}}) {data_attr = #cuf.cuda<device>, uniq_name = "_QFsub2Eadev"} : (!fir.heap<!fir.array<10xi32>>, !fir.shape<1>) -> (!fir.heap<!fir.array<10xi32>>, !fir.heap<!fir.array<10xi32>>)
|
||||
! CHECK: cuf.data_transfer %[[ADEV]]#1 to %[[DECL_TEMP]]#0 {transfer_kind = #cuf.cuda_transfer<device_host>} : !fir.ref<!fir.array<10xi32>>, !fir.heap<!fir.array<10xi32>>
|
||||
! CHECK: %[[ELEMENTAL:.*]] = hlfir.elemental %{{.*}} unordered : (!fir.shape<1>) -> !hlfir.expr<10xi32>
|
||||
! CHECK: hlfir.assign %[[ELEMENTAL]] to %[[BHOST]]#0 : !hlfir.expr<10xi32>, !fir.ref<!fir.array<10xi32>>
|
||||
! CHECK: fir.freemem %[[DECL_TEMP]]#0 : !fir.heap<!fir.array<10xi32>>
|
||||
@@ -116,12 +116,12 @@ end
|
||||
! CHECK: %[[TMP:.*]] = fir.alloca !fir.type<_QMmod1Tt1{i:i32}> {bindc_name = ".tmp"}
|
||||
! CHECK: %[[AHOST:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {uniq_name = "_QFsub3Eahost"} : (!fir.ref<!fir.array<10xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<10xi32>>, !fir.ref<!fir.array<10xi32>>)
|
||||
! CHECK: %[[BHOST:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {uniq_name = "_QFsub3Ebhost"} : (!fir.ref<!fir.array<10xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<10xi32>>, !fir.ref<!fir.array<10xi32>>)
|
||||
! CHECK: %[[T:.*]]:2 = hlfir.declare %7 {cuda_attr = #fir.cuda<device>, uniq_name = "_QFsub3Et"} : (!fir.ref<!fir.type<_QMmod1Tt1{i:i32}>>) -> (!fir.ref<!fir.type<_QMmod1Tt1{i:i32}>>, !fir.ref<!fir.type<_QMmod1Tt1{i:i32}>>)
|
||||
! CHECK: %[[T:.*]]:2 = hlfir.declare %7 {data_attr = #cuf.cuda<device>, uniq_name = "_QFsub3Et"} : (!fir.ref<!fir.type<_QMmod1Tt1{i:i32}>>) -> (!fir.ref<!fir.type<_QMmod1Tt1{i:i32}>>, !fir.ref<!fir.type<_QMmod1Tt1{i:i32}>>)
|
||||
! CHECK: %[[TMP_DECL:.*]]:2 = hlfir.declare %0 {uniq_name = ".tmp"} : (!fir.ref<!fir.type<_QMmod1Tt1{i:i32}>>) -> (!fir.ref<!fir.type<_QMmod1Tt1{i:i32}>>, !fir.ref<!fir.type<_QMmod1Tt1{i:i32}>>)
|
||||
! CHECK: fir.cuda_data_transfer %[[T]]#1 to %[[TMP_DECL]]#0 {transfer_kind = #fir.cuda_transfer<device_host>} : !fir.ref<!fir.type<_QMmod1Tt1{i:i32}>>, !fir.ref<!fir.type<_QMmod1Tt1{i:i32}>>
|
||||
! CHECK: cuf.data_transfer %[[T]]#1 to %[[TMP_DECL]]#0 {transfer_kind = #cuf.cuda_transfer<device_host>} : !fir.ref<!fir.type<_QMmod1Tt1{i:i32}>>, !fir.ref<!fir.type<_QMmod1Tt1{i:i32}>>
|
||||
|
||||
|
||||
! Check that fir.cuda_data_transfer are not generated within cuf kernel
|
||||
! Check that cuf.data_transfer are not generated within cuf kernel
|
||||
subroutine sub4()
|
||||
integer, parameter :: n = 10
|
||||
real, device :: adev(n)
|
||||
@@ -137,9 +137,9 @@ subroutine sub4()
|
||||
end subroutine
|
||||
|
||||
! CHECK-LABEL: func.func @_QPsub4()
|
||||
! CHECK: fir.cuda_data_transfer
|
||||
! CHECK: fir.cuda_kernel<<<*, *>>>
|
||||
! CHECK-NOT: fir.cuda_data_transfer
|
||||
! CHECK: cuf.data_transfer
|
||||
! CHECK: cuf.kernel<<<*, *>>>
|
||||
! CHECK-NOT: cuf.data_transfer
|
||||
! CHECK: hlfir.assign
|
||||
|
||||
attributes(global) subroutine sub5(a)
|
||||
@@ -149,7 +149,7 @@ attributes(global) subroutine sub5(a)
|
||||
end subroutine
|
||||
|
||||
! CHECK-LABEL: func.func @_QPsub5
|
||||
! CHECK-NOT: fir.cuda_data_transfer
|
||||
! CHECK-NOT: cuf.data_transfer
|
||||
|
||||
attributes(host,device) subroutine sub6(a)
|
||||
integer, device :: a
|
||||
@@ -158,4 +158,4 @@ attributes(host,device) subroutine sub6(a)
|
||||
end subroutine
|
||||
|
||||
! CHECK-LABEL: func.func @_QPsub6
|
||||
! CHECK: fir.cuda_data_transfer
|
||||
! CHECK: cuf.data_transfer
|
||||
|
||||
@@ -16,10 +16,10 @@ contains
|
||||
subroutine host()
|
||||
real, device :: a
|
||||
! CHECK-LABEL: func.func @_QMtest_callPhost()
|
||||
! CHECK: %[[A:.*]]:2 = hlfir.declare %{{.*}} {cuda_attr = #fir.cuda<device>, uniq_name = "_QMtest_callFhostEa"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
|
||||
! CHECK: %[[A:.*]]:2 = hlfir.declare %{{.*}} {data_attr = #cuf.cuda<device>, uniq_name = "_QMtest_callFhostEa"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
|
||||
|
||||
call dev_kernel0<<<10, 20>>>()
|
||||
! CHECK: fir.cuda_kernel_launch @_QMtest_callPdev_kernel0<<<%c10{{.*}}, %c1{{.*}}, %c1{{.*}}, %c20{{.*}}, %c1{{.*}}, %c1{{.*}}>>>()
|
||||
! CHECK: cuf.kernel_launch @_QMtest_callPdev_kernel0<<<%c10{{.*}}, %c1{{.*}}, %c1{{.*}}, %c20{{.*}}, %c1{{.*}}, %c1{{.*}}>>>()
|
||||
|
||||
call dev_kernel0<<< __builtin_dim3(1,1,4), __builtin_dim3(32,1,1) >>>
|
||||
! CHECK: %[[ADDR_DIM3_GRID:.*]] = fir.address_of(@_QQro._QM__fortran_builtinsT__builtin_dim3.{{.*}}) : !fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_dim3{x:i32,y:i32,z:i32}>>
|
||||
@@ -38,16 +38,16 @@ contains
|
||||
! CHECK: %[[BLOCK_Y_LOAD:.*]] = fir.load %[[BLOCK_Y]] : !fir.ref<i32>
|
||||
! CHECK: %[[BLOCK_Z:.*]] = hlfir.designate %[[DIM3_BLOCK]]#1{"z"} : (!fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_dim3{x:i32,y:i32,z:i32}>>) -> !fir.ref<i32>
|
||||
! CHECK: %[[BLOCK_Z_LOAD:.*]] = fir.load %[[BLOCK_Z]] : !fir.ref<i32>
|
||||
! CHECK: fir.cuda_kernel_launch @_QMtest_callPdev_kernel0<<<%[[GRID_X_LOAD]], %[[GRID_Y_LOAD]], %[[GRID_Z_LOAD]], %[[BLOCK_X_LOAD]], %[[BLOCK_Y_LOAD]], %[[BLOCK_Z_LOAD]]>>>()
|
||||
! CHECK: cuf.kernel_launch @_QMtest_callPdev_kernel0<<<%[[GRID_X_LOAD]], %[[GRID_Y_LOAD]], %[[GRID_Z_LOAD]], %[[BLOCK_X_LOAD]], %[[BLOCK_Y_LOAD]], %[[BLOCK_Z_LOAD]]>>>()
|
||||
|
||||
call dev_kernel0<<<10, 20, 2>>>()
|
||||
! CHECK: fir.cuda_kernel_launch @_QMtest_callPdev_kernel0<<<%c10{{.*}}, %c1{{.*}}, %c1{{.*}}, %c20{{.*}}, %c1{{.*}}, %c1{{.*}}, %c2{{.*}}>>>()
|
||||
! CHECK: cuf.kernel_launch @_QMtest_callPdev_kernel0<<<%c10{{.*}}, %c1{{.*}}, %c1{{.*}}, %c20{{.*}}, %c1{{.*}}, %c1{{.*}}, %c2{{.*}}>>>()
|
||||
|
||||
call dev_kernel0<<<10, 20, 2, 0>>>()
|
||||
! CHECK: fir.cuda_kernel_launch @_QMtest_callPdev_kernel0<<<%c10{{.*}}, %c1{{.*}}, %c1{{.*}}, %c20{{.*}}, %c1{{.*}}, %c1{{.*}}, %c2{{.*}}, %c0{{.*}}>>>()
|
||||
! CHECK: cuf.kernel_launch @_QMtest_callPdev_kernel0<<<%c10{{.*}}, %c1{{.*}}, %c1{{.*}}, %c20{{.*}}, %c1{{.*}}, %c1{{.*}}, %c2{{.*}}, %c0{{.*}}>>>()
|
||||
|
||||
call dev_kernel1<<<1, 32>>>(a)
|
||||
! CHECK: fir.cuda_kernel_launch @_QMtest_callPdev_kernel1<<<%c1{{.*}}, %c1{{.*}}, %c1{{.*}}, %c32{{.*}}, %c1{{.*}}, %c1{{.*}}>>>(%1#1) : (!fir.ref<f32>)
|
||||
! CHECK: cuf.kernel_launch @_QMtest_callPdev_kernel1<<<%c1{{.*}}, %c1{{.*}}, %c1{{.*}}, %c32{{.*}}, %c1{{.*}}, %c1{{.*}}>>>(%1#1) : (!fir.ref<f32>)
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
@@ -20,7 +20,7 @@ subroutine sub1()
|
||||
! CHECK: %[[LB:.*]] = fir.convert %c1{{.*}} : (i32) -> index
|
||||
! CHECK: %[[UB:.*]] = fir.convert %c100{{.*}} : (i32) -> index
|
||||
! CHECK: %[[STEP:.*]] = arith.constant 1 : index
|
||||
! CHECK: fir.cuda_kernel<<<%c1_i32, %c2_i32>>> (%[[ARG0:.*]] : index) = (%[[LB]] : index) to (%[[UB]] : index) step (%[[STEP]] : index)
|
||||
! CHECK: cuf.kernel<<<%c1_i32, %c2_i32>>> (%[[ARG0:.*]] : index) = (%[[LB]] : index) to (%[[UB]] : index) step (%[[STEP]] : index)
|
||||
! CHECK-NOT: fir.do_loop
|
||||
! CHECK: %[[ARG0_I32:.*]] = fir.convert %[[ARG0]] : (index) -> i32
|
||||
! CHECK: fir.store %[[ARG0_I32]] to %[[IV]]#1 : !fir.ref<i32>
|
||||
@@ -32,7 +32,7 @@ subroutine sub1()
|
||||
a(i) = a(i) * b(i)
|
||||
end do
|
||||
|
||||
! CHECK: fir.cuda_kernel<<<*, *>>> (%{{.*}} : index) = (%{{.*}} : index) to (%{{.*}} : index) step (%{{.*}} : index)
|
||||
! CHECK: cuf.kernel<<<*, *>>> (%{{.*}} : index) = (%{{.*}} : index) to (%{{.*}} : index) step (%{{.*}} : index)
|
||||
|
||||
!$cuf kernel do(2) <<< 1, (256,1) >>>
|
||||
do i = 1, n
|
||||
@@ -41,7 +41,7 @@ subroutine sub1()
|
||||
end do
|
||||
end do
|
||||
|
||||
! CHECK: fir.cuda_kernel<<<%c1{{.*}}, (%c256{{.*}}, %c1{{.*}})>>> (%[[ARG0:.*]] : index, %[[ARG1:.*]] : index) = (%{{.*}}, %{{.*}} : index, index) to (%{{.*}}, %{{.*}} : index, index) step (%{{.*}}, %{{.*}} : index, index)
|
||||
! CHECK: cuf.kernel<<<%c1{{.*}}, (%c256{{.*}}, %c1{{.*}})>>> (%[[ARG0:.*]] : index, %[[ARG1:.*]] : index) = (%{{.*}}, %{{.*}} : index, index) to (%{{.*}}, %{{.*}} : index, index) step (%{{.*}}, %{{.*}} : index, index)
|
||||
! CHECK: %[[ARG0_I32:.*]] = fir.convert %[[ARG0]] : (index) -> i32
|
||||
! CHECK: fir.store %[[ARG0_I32]] to %[[IV]]#1 : !fir.ref<i32>
|
||||
! CHECK: %[[ARG1_I32:.*]] = fir.convert %[[ARG1]] : (index) -> i32
|
||||
@@ -54,7 +54,7 @@ subroutine sub1()
|
||||
c(i,j) = c(i,j) * d(i,j)
|
||||
end do
|
||||
end do
|
||||
! CHECK: fir.cuda_kernel<<<(%c1{{.*}}, %c0{{.*}}), (%c256{{.*}}, %c1{{.*}})>>> (%{{.*}} : index, %{{.*}} : index) = (%{{.*}}, %{{.*}} : index, index) to (%{{.*}}, %{{.*}} : index, index) step (%{{.*}}, %{{.*}} : index, index)
|
||||
! CHECK: cuf.kernel<<<(%c1{{.*}}, %c0{{.*}}), (%c256{{.*}}, %c1{{.*}})>>> (%{{.*}} : index, %{{.*}} : index) = (%{{.*}}, %{{.*}} : index, index) to (%{{.*}}, %{{.*}} : index, index) step (%{{.*}}, %{{.*}} : index, index)
|
||||
|
||||
!$cuf kernel do(2) <<< (*,*), (32,4) >>>
|
||||
do i = 1, n
|
||||
@@ -63,5 +63,5 @@ subroutine sub1()
|
||||
end do
|
||||
end do
|
||||
|
||||
! CHECK: fir.cuda_kernel<<<*, (%c32{{.*}}, %c4{{.*}})>>> (%{{.*}} : index, %{{.*}} : index) = (%{{.*}}, %{{.*}} : index, index) to (%{{.*}}, %{{.*}} : index, index) step (%{{.*}}, %{{.*}} : index, index)
|
||||
! CHECK: cuf.kernel<<<*, (%c32{{.*}}, %c4{{.*}})>>> (%{{.*}} : index, %{{.*}} : index) = (%{{.*}}, %{{.*}} : index, index) to (%{{.*}}, %{{.*}} : index, index) step (%{{.*}}, %{{.*}} : index, index)
|
||||
end
|
||||
|
||||
@@ -10,6 +10,6 @@ contains
|
||||
end
|
||||
end module
|
||||
|
||||
! CHECK: fir.global @_QMcuf_modEmd {cuda_attr = #fir.cuda<device>} : f32
|
||||
! CHECK: fir.global @_QMcuf_modEmd {data_attr = #cuf.cuda<device>} : f32
|
||||
|
||||
! CHECK: func.func @_QMcuf_modPdevsub() attributes {fir.cuda_attr = #fir.cuda_proc<device>}
|
||||
! CHECK: func.func @_QMcuf_modPdevsub() attributes {cuf.proc_attr = #cuf.cuda_proc<device>}
|
||||
|
||||
@@ -10,16 +10,16 @@ end
|
||||
|
||||
! CHECK-LABEL: func.func @_QPsub1()
|
||||
! CHECK: %[[ADDR:.*]] = fir.address_of(@_QMcuf_modEmd) : !fir.ref<f32>
|
||||
! CHECK: %{{.*}}:2 = hlfir.declare %[[ADDR]] {cuda_attr = #fir.cuda<device>, uniq_name = "_QMcuf_modEmd"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
|
||||
! CHECK: %{{.*}}:2 = hlfir.declare %[[ADDR]] {data_attr = #cuf.cuda<device>, uniq_name = "_QMcuf_modEmd"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
|
||||
|
||||
attributes(device) subroutine sub2()
|
||||
use cuf_mod
|
||||
call devsub()
|
||||
end
|
||||
|
||||
! CHECK-LABEL: func.func @_QPsub2() attributes {fir.cuda_attr = #fir.cuda_proc<device>}
|
||||
! CHECK-LABEL: func.func @_QPsub2() attributes {cuf.proc_attr = #cuf.cuda_proc<device>}
|
||||
! CHECK: fir.call @_QMcuf_modPdevsub()
|
||||
|
||||
! CHECK-LABEL: fir.global @_QMcuf_modEmd {cuda_attr = #fir.cuda<device>} : f32
|
||||
! CHECK-LABEL: fir.global @_QMcuf_modEmd {data_attr = #cuf.cuda<device>} : f32
|
||||
|
||||
! CHECK-LABEL: func.func private @_QMcuf_modPdevsub() attributes {fir.cuda_attr = #fir.cuda_proc<device>}
|
||||
! CHECK-LABEL: func.func private @_QMcuf_modPdevsub() attributes {cuf.proc_attr = #cuf.cuda_proc<device>}
|
||||
|
||||
@@ -4,40 +4,40 @@
|
||||
! Test lowering of CUDA attribute on procedures.
|
||||
|
||||
attributes(host) subroutine sub_host(); end
|
||||
! CHECK: func.func @_QPsub_host() attributes {fir.cuda_attr = #fir.cuda_proc<host>}
|
||||
! CHECK: func.func @_QPsub_host() attributes {cuf.proc_attr = #cuf.cuda_proc<host>}
|
||||
|
||||
attributes(device) subroutine sub_device(); end
|
||||
! CHECK: func.func @_QPsub_device() attributes {fir.cuda_attr = #fir.cuda_proc<device>}
|
||||
! CHECK: func.func @_QPsub_device() attributes {cuf.proc_attr = #cuf.cuda_proc<device>}
|
||||
|
||||
attributes(host) attributes(device) subroutine sub_host_device; end
|
||||
! CHECK: func.func @_QPsub_host_device() attributes {fir.cuda_attr = #fir.cuda_proc<host_device>}
|
||||
! CHECK: func.func @_QPsub_host_device() attributes {cuf.proc_attr = #cuf.cuda_proc<host_device>}
|
||||
|
||||
attributes(device) attributes(host) subroutine sub_device_host; end
|
||||
! CHECK: func.func @_QPsub_device_host() attributes {fir.cuda_attr = #fir.cuda_proc<host_device>}
|
||||
! CHECK: func.func @_QPsub_device_host() attributes {cuf.proc_attr = #cuf.cuda_proc<host_device>}
|
||||
|
||||
attributes(global) subroutine sub_global(); end
|
||||
! CHECK: func.func @_QPsub_global() attributes {fir.cuda_attr = #fir.cuda_proc<global>}
|
||||
! CHECK: func.func @_QPsub_global() attributes {cuf.proc_attr = #cuf.cuda_proc<global>}
|
||||
|
||||
attributes(grid_global) subroutine sub_grid_global(); end
|
||||
! CHECK: func.func @_QPsub_grid_global() attributes {fir.cuda_attr = #fir.cuda_proc<grid_global>}
|
||||
! CHECK: func.func @_QPsub_grid_global() attributes {cuf.proc_attr = #cuf.cuda_proc<grid_global>}
|
||||
|
||||
attributes(host) integer function fct_host(); end
|
||||
! CHECK: func.func @_QPfct_host() -> i32 attributes {fir.cuda_attr = #fir.cuda_proc<host>}
|
||||
! CHECK: func.func @_QPfct_host() -> i32 attributes {cuf.proc_attr = #cuf.cuda_proc<host>}
|
||||
|
||||
attributes(device) integer function fct_device(); end
|
||||
! CHECK: func.func @_QPfct_device() -> i32 attributes {fir.cuda_attr = #fir.cuda_proc<device>}
|
||||
! CHECK: func.func @_QPfct_device() -> i32 attributes {cuf.proc_attr = #cuf.cuda_proc<device>}
|
||||
|
||||
attributes(host) attributes(device) integer function fct_host_device; end
|
||||
! CHECK: func.func @_QPfct_host_device() -> i32 attributes {fir.cuda_attr = #fir.cuda_proc<host_device>}
|
||||
! CHECK: func.func @_QPfct_host_device() -> i32 attributes {cuf.proc_attr = #cuf.cuda_proc<host_device>}
|
||||
|
||||
attributes(device) attributes(host) integer function fct_device_host; end
|
||||
! CHECK: func.func @_QPfct_device_host() -> i32 attributes {fir.cuda_attr = #fir.cuda_proc<host_device>}
|
||||
! CHECK: func.func @_QPfct_device_host() -> i32 attributes {cuf.proc_attr = #cuf.cuda_proc<host_device>}
|
||||
|
||||
attributes(global) launch_bounds(1, 2) subroutine sub_lbounds1(); end
|
||||
! CHECK: func.func @_QPsub_lbounds1() attributes {fir.cuda_attr = #fir.cuda_proc<global>, fir.cuda_launch_bounds = #fir.launch_bounds<maxTPB = 1 : i64, minBPM = 2 : i64>}
|
||||
! CHECK: func.func @_QPsub_lbounds1() attributes {cuf.launch_bounds = #cuf.launch_bounds<maxTPB = 1 : i64, minBPM = 2 : i64>, cuf.proc_attr = #cuf.cuda_proc<global>}
|
||||
|
||||
attributes(global) launch_bounds(1, 2, 3) subroutine sub_lbounds2(); end
|
||||
! CHECK: func.func @_QPsub_lbounds2() attributes {fir.cuda_attr = #fir.cuda_proc<global>, fir.cuda_launch_bounds = #fir.launch_bounds<maxTPB = 1 : i64, minBPM = 2 : i64, upperBoundClusterSize = 3 : i64>}
|
||||
! CHECK: func.func @_QPsub_lbounds2() attributes {cuf.launch_bounds = #cuf.launch_bounds<maxTPB = 1 : i64, minBPM = 2 : i64, upperBoundClusterSize = 3 : i64>, cuf.proc_attr = #cuf.cuda_proc<global>}
|
||||
|
||||
attributes(global) cluster_dims(1, 2, 3) subroutine sub_clusterdims1(); end
|
||||
! CHECK: func.func @_QPsub_clusterdims1() attributes {fir.cuda_attr = #fir.cuda_proc<global>, fir.cuda_cluster_dims = #fir.cluster_dims<x = 1 : i64, y = 2 : i64, z = 3 : i64>}
|
||||
! CHECK: func.func @_QPsub_clusterdims1() attributes {cuf.cluster_dims = #cuf.cluster_dims<x = 1 : i64, y = 2 : i64, z = 3 : i64>, cuf.proc_attr = #cuf.cuda_proc<global>}
|
||||
|
||||
@@ -16,6 +16,8 @@ get_property(dialect_libs GLOBAL PROPERTY MLIR_DIALECT_LIBS)
|
||||
get_property(extension_libs GLOBAL PROPERTY MLIR_EXTENSION_LIBS)
|
||||
|
||||
target_link_libraries(bbc PRIVATE
|
||||
CUFAttrs
|
||||
CUFDialect
|
||||
FIRDialect
|
||||
FIRDialectSupport
|
||||
FIRSupport
|
||||
|
||||
@@ -11,6 +11,8 @@ if(FLANG_INCLUDE_TESTS)
|
||||
endif()
|
||||
|
||||
target_link_libraries(fir-opt PRIVATE
|
||||
CUFAttrs
|
||||
CUFDialect
|
||||
FIRDialect
|
||||
FIRSupport
|
||||
FIRTransforms
|
||||
|
||||
@@ -7,6 +7,8 @@ llvm_update_compile_flags(tco)
|
||||
get_property(dialect_libs GLOBAL PROPERTY MLIR_DIALECT_LIBS)
|
||||
get_property(extension_libs GLOBAL PROPERTY MLIR_EXTENSION_LIBS)
|
||||
target_link_libraries(tco PRIVATE
|
||||
CUFAttrs
|
||||
CUFDialect
|
||||
FIRCodeGen
|
||||
FIRDialect
|
||||
FIRDialectSupport
|
||||
|
||||
@@ -2,6 +2,7 @@ get_property(dialect_libs GLOBAL PROPERTY MLIR_DIALECT_LIBS)
|
||||
get_property(extension_libs GLOBAL PROPERTY MLIR_EXTENSION_LIBS)
|
||||
|
||||
set(LIBS
|
||||
CUFDialect
|
||||
FIRBuilder
|
||||
FIRCodeGen
|
||||
FIRDialect
|
||||
@@ -36,6 +37,7 @@ add_flang_unittest(FlangOptimizerTests
|
||||
KindMappingTest.cpp
|
||||
RTBuilder.cpp
|
||||
DEPENDS
|
||||
CUFDialect
|
||||
FIRDialect
|
||||
FIRSupport
|
||||
HLFIRDialect
|
||||
|
||||
@@ -51,7 +51,7 @@ TEST_F(FortranVariableTest, SimpleScalar) {
|
||||
/*shape=*/mlir::Value{}, /*typeParams=*/std::nullopt,
|
||||
/*dummy_scope=*/nullptr, name,
|
||||
/*fortran_attrs=*/fir::FortranVariableFlagsAttr{},
|
||||
/*cuda_attr=*/fir::CUDADataAttributeAttr{});
|
||||
/*data_attr=*/cuf::DataAttributeAttr{});
|
||||
|
||||
fir::FortranVariableOpInterface fortranVariable = declare;
|
||||
EXPECT_FALSE(fortranVariable.isArray());
|
||||
@@ -77,7 +77,7 @@ TEST_F(FortranVariableTest, CharacterScalar) {
|
||||
auto declare = builder->create<fir::DeclareOp>(loc, addr.getType(), addr,
|
||||
/*shape=*/mlir::Value{}, typeParams, /*dummy_scope=*/nullptr, name,
|
||||
/*fortran_attrs=*/fir::FortranVariableFlagsAttr{},
|
||||
/*cuda_attr=*/fir::CUDADataAttributeAttr{});
|
||||
/*data_attr=*/cuf::DataAttributeAttr{});
|
||||
|
||||
fir::FortranVariableOpInterface fortranVariable = declare;
|
||||
EXPECT_FALSE(fortranVariable.isArray());
|
||||
@@ -108,7 +108,7 @@ TEST_F(FortranVariableTest, SimpleArray) {
|
||||
auto declare = builder->create<fir::DeclareOp>(loc, addr.getType(), addr,
|
||||
shape, /*typeParams*/ std::nullopt, /*dummy_scope=*/nullptr, name,
|
||||
/*fortran_attrs=*/fir::FortranVariableFlagsAttr{},
|
||||
/*cuda_attr=*/fir::CUDADataAttributeAttr{});
|
||||
/*data_attr=*/cuf::DataAttributeAttr{});
|
||||
|
||||
fir::FortranVariableOpInterface fortranVariable = declare;
|
||||
EXPECT_TRUE(fortranVariable.isArray());
|
||||
@@ -139,7 +139,7 @@ TEST_F(FortranVariableTest, CharacterArray) {
|
||||
auto declare = builder->create<fir::DeclareOp>(loc, addr.getType(), addr,
|
||||
shape, typeParams, /*dummy_scope=*/nullptr, name,
|
||||
/*fortran_attrs=*/fir::FortranVariableFlagsAttr{},
|
||||
/*cuda_attr=*/fir::CUDADataAttributeAttr{});
|
||||
/*data_attr=*/cuf::DataAttributeAttr{});
|
||||
|
||||
fir::FortranVariableOpInterface fortranVariable = declare;
|
||||
EXPECT_TRUE(fortranVariable.isArray());
|
||||
|
||||
Reference in New Issue
Block a user