From 45daa4fdc68f5faa5bd5c33da052d2415cd88540 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Valentin=20Clement=20=28=E3=83=90=E3=83=AC=E3=83=B3?= =?UTF-8?q?=E3=82=BF=E3=82=A4=E3=83=B3=20=E3=82=AF=E3=83=AC=E3=83=A1?= =?UTF-8?q?=E3=83=B3=29?= Date: Fri, 17 May 2024 09:37:53 -0700 Subject: [PATCH] [flang][cuda] Move CUDA Fortran operations to a CUF dialect (#92317) The number of operations dedicated to CUF grew and where all still in FIR. In order to have a better organization, the CUF operations, attributes and code is moved into their specific dialect and files. CUF dialect is tightly coupled with HLFIR/FIR and their types. The CUF attributes are bundled into their own library since some HLFIR/FIR operations depend on them and the CUF dialect depends on the FIR types. Without having the attributes into a separate library there would be a dependency cycle. --- flang/include/flang/Lower/ConvertVariable.h | 10 +- .../flang/Optimizer/Builder/FIRBuilder.h | 4 +- .../flang/Optimizer/Builder/HLFIRTools.h | 2 +- .../flang/Optimizer/Dialect/CMakeLists.txt | 2 + .../Dialect/CUF/Attributes/CMakeLists.txt | 7 + .../Dialect/CUF/Attributes/CUFAttr.h | 106 +++++++ .../Dialect/CUF/Attributes/CUFAttr.td | 100 +++++++ .../Optimizer/Dialect/CUF/CMakeLists.txt | 11 + .../flang/Optimizer/Dialect/CUF/CUFDialect.h | 26 ++ .../flang/Optimizer/Dialect/CUF/CUFDialect.td | 43 +++ .../flang/Optimizer/Dialect/CUF/CUFOps.h | 20 ++ .../flang/Optimizer/Dialect/CUF/CUFOps.td | 263 ++++++++++++++++++ .../flang/Optimizer/Dialect/FIRAttr.td | 83 ------ .../include/flang/Optimizer/Dialect/FIROps.h | 1 + .../include/flang/Optimizer/Dialect/FIROps.td | 242 +--------------- .../flang/Optimizer/Dialect/FIROpsSupport.h | 13 - .../include/flang/Optimizer/HLFIR/HLFIROps.td | 5 +- .../include/flang/Optimizer/Support/InitFIR.h | 3 +- flang/include/flang/Optimizer/Support/Utils.h | 61 +--- flang/lib/Frontend/CMakeLists.txt | 2 + flang/lib/Lower/Allocatable.cpp | 17 +- flang/lib/Lower/Bridge.cpp | 52 ++-- flang/lib/Lower/CMakeLists.txt | 4 + flang/lib/Lower/CallInterface.cpp | 21 +- flang/lib/Lower/ConvertCall.cpp | 3 +- flang/lib/Lower/ConvertVariable.cpp | 57 ++-- flang/lib/Optimizer/Builder/FIRBuilder.cpp | 8 +- flang/lib/Optimizer/Builder/HLFIRTools.cpp | 4 +- flang/lib/Optimizer/Dialect/CMakeLists.txt | 3 + .../Dialect/CUF/Attributes/CMakeLists.txt | 16 ++ .../Dialect/CUF/Attributes/CUFAttr.cpp | 32 +++ .../lib/Optimizer/Dialect/CUF/CMakeLists.txt | 22 ++ .../lib/Optimizer/Dialect/CUF/CUFDialect.cpp | 25 ++ flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp | 219 +++++++++++++++ flang/lib/Optimizer/Dialect/FIRAttr.cpp | 4 +- flang/lib/Optimizer/Dialect/FIRDialect.cpp | 1 + flang/lib/Optimizer/Dialect/FIROps.cpp | 163 ----------- flang/lib/Optimizer/HLFIR/IR/CMakeLists.txt | 2 + flang/lib/Optimizer/HLFIR/IR/HLFIROps.cpp | 4 +- .../Optimizer/HLFIR/Transforms/CMakeLists.txt | 2 + .../HLFIR/Transforms/ConvertToFIR.cpp | 8 +- flang/test/Fir/cuf-invalid.fir | 50 ++-- flang/test/Fir/cuf.mlir | 46 +-- flang/test/Lower/CUDA/cuda-allocatable.cuf | 48 ++-- flang/test/Lower/CUDA/cuda-data-attribute.cuf | 60 ++-- flang/test/Lower/CUDA/cuda-data-transfer.cuf | 50 ++-- flang/test/Lower/CUDA/cuda-kernel-calls.cuf | 12 +- .../Lower/CUDA/cuda-kernel-loop-directive.cuf | 10 +- flang/test/Lower/CUDA/cuda-mod.cuf | 4 +- flang/test/Lower/CUDA/cuda-module-use.cuf | 8 +- flang/test/Lower/CUDA/cuda-proc-attribute.cuf | 26 +- flang/tools/bbc/CMakeLists.txt | 2 + flang/tools/fir-opt/CMakeLists.txt | 2 + flang/tools/tco/CMakeLists.txt | 2 + flang/unittests/Optimizer/CMakeLists.txt | 2 + .../Optimizer/FortranVariableTest.cpp | 8 +- 56 files changed, 1184 insertions(+), 817 deletions(-) create mode 100644 flang/include/flang/Optimizer/Dialect/CUF/Attributes/CMakeLists.txt create mode 100644 flang/include/flang/Optimizer/Dialect/CUF/Attributes/CUFAttr.h create mode 100644 flang/include/flang/Optimizer/Dialect/CUF/Attributes/CUFAttr.td create mode 100644 flang/include/flang/Optimizer/Dialect/CUF/CMakeLists.txt create mode 100644 flang/include/flang/Optimizer/Dialect/CUF/CUFDialect.h create mode 100644 flang/include/flang/Optimizer/Dialect/CUF/CUFDialect.td create mode 100644 flang/include/flang/Optimizer/Dialect/CUF/CUFOps.h create mode 100644 flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td create mode 100644 flang/lib/Optimizer/Dialect/CUF/Attributes/CMakeLists.txt create mode 100644 flang/lib/Optimizer/Dialect/CUF/Attributes/CUFAttr.cpp create mode 100644 flang/lib/Optimizer/Dialect/CUF/CMakeLists.txt create mode 100644 flang/lib/Optimizer/Dialect/CUF/CUFDialect.cpp create mode 100644 flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp diff --git a/flang/include/flang/Lower/ConvertVariable.h b/flang/include/flang/Lower/ConvertVariable.h index d70d3268acac..515f4695951b 100644 --- a/flang/include/flang/Lower/ConvertVariable.h +++ b/flang/include/flang/Lower/ConvertVariable.h @@ -23,6 +23,10 @@ #include "mlir/IR/Value.h" #include "llvm/ADT/DenseMap.h" +namespace cuf { +class DataAttributeAttr; +} + namespace fir { class ExtendedValue; class FirOpBuilder; @@ -146,9 +150,9 @@ translateSymbolAttributes(mlir::MLIRContext *mlirContext, /// Translate the CUDA Fortran attributes of \p sym into the FIR CUDA attribute /// representation. -fir::CUDADataAttributeAttr -translateSymbolCUDADataAttribute(mlir::MLIRContext *mlirContext, - const Fortran::semantics::Symbol &sym); +cuf::DataAttributeAttr +translateSymbolCUFDataAttribute(mlir::MLIRContext *mlirContext, + const Fortran::semantics::Symbol &sym); /// Map a symbol to a given fir::ExtendedValue. This will generate an /// hlfir.declare when lowering to HLFIR and map the hlfir.declare result to the diff --git a/flang/include/flang/Optimizer/Builder/FIRBuilder.h b/flang/include/flang/Optimizer/Builder/FIRBuilder.h index 0d650f830b64..287730ef2ac8 100644 --- a/flang/include/flang/Optimizer/Builder/FIRBuilder.h +++ b/flang/include/flang/Optimizer/Builder/FIRBuilder.h @@ -254,13 +254,13 @@ public: mlir::StringAttr linkage = {}, mlir::Attribute value = {}, bool isConst = false, bool isTarget = false, - fir::CUDADataAttributeAttr cudaAttr = {}); + cuf::DataAttributeAttr dataAttr = {}); fir::GlobalOp createGlobal(mlir::Location loc, mlir::Type type, llvm::StringRef name, bool isConst, bool isTarget, std::function bodyBuilder, mlir::StringAttr linkage = {}, - fir::CUDADataAttributeAttr cudaAttr = {}); + cuf::DataAttributeAttr dataAttr = {}); /// Create a global constant (read-only) value. fir::GlobalOp createGlobalConstant(mlir::Location loc, mlir::Type type, diff --git a/flang/include/flang/Optimizer/Builder/HLFIRTools.h b/flang/include/flang/Optimizer/Builder/HLFIRTools.h index 6cc8e71b3b18..43aa1661550e 100644 --- a/flang/include/flang/Optimizer/Builder/HLFIRTools.h +++ b/flang/include/flang/Optimizer/Builder/HLFIRTools.h @@ -239,7 +239,7 @@ genDeclare(mlir::Location loc, fir::FirOpBuilder &builder, const fir::ExtendedValue &exv, llvm::StringRef name, fir::FortranVariableFlagsAttr flags, mlir::Value dummyScope = nullptr, - fir::CUDADataAttributeAttr cudaAttr = {}); + cuf::DataAttributeAttr dataAttr = {}); /// Generate an hlfir.associate to build a variable from an expression value. /// The type of the variable must be provided so that scalar logicals are diff --git a/flang/include/flang/Optimizer/Dialect/CMakeLists.txt b/flang/include/flang/Optimizer/Dialect/CMakeLists.txt index f00993d4d377..301a93c1fe5b 100644 --- a/flang/include/flang/Optimizer/Dialect/CMakeLists.txt +++ b/flang/include/flang/Optimizer/Dialect/CMakeLists.txt @@ -1,3 +1,5 @@ +add_subdirectory(CUF) + # This replicates part of the add_mlir_dialect cmake function from MLIR that # cannot be used her because it expects to be run inside MLIR directory which # is not the case for FIR. diff --git a/flang/include/flang/Optimizer/Dialect/CUF/Attributes/CMakeLists.txt b/flang/include/flang/Optimizer/Dialect/CUF/Attributes/CMakeLists.txt new file mode 100644 index 000000000000..bae7fe3484f4 --- /dev/null +++ b/flang/include/flang/Optimizer/Dialect/CUF/Attributes/CMakeLists.txt @@ -0,0 +1,7 @@ +set(LLVM_TARGET_DEFINITIONS CUFAttr.td) +mlir_tablegen(CUFEnumAttr.h.inc -gen-enum-decls) +mlir_tablegen(CUFEnumAttr.cpp.inc -gen-enum-defs) +mlir_tablegen(CUFAttr.h.inc --gen-attrdef-decls) +mlir_tablegen(CUFAttr.cpp.inc -gen-attrdef-defs) + +add_public_tablegen_target(CUFAttrsIncGen) diff --git a/flang/include/flang/Optimizer/Dialect/CUF/Attributes/CUFAttr.h b/flang/include/flang/Optimizer/Dialect/CUF/Attributes/CUFAttr.h new file mode 100644 index 000000000000..f32e39b543e3 --- /dev/null +++ b/flang/include/flang/Optimizer/Dialect/CUF/Attributes/CUFAttr.h @@ -0,0 +1,106 @@ +//===-- Optimizer/Dialect/CUF/Attributes/CUFAttr.h -- CUF attributes ------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Coding style: https://mlir.llvm.org/getting_started/DeveloperGuide/ +// +//===----------------------------------------------------------------------===// + +#ifndef FORTRAN_OPTIMIZER_DIALECT_CUF_CUFATTR_H +#define FORTRAN_OPTIMIZER_DIALECT_CUF_CUFATTR_H + +#include "flang/Common/Fortran.h" +#include "mlir/IR/BuiltinAttributes.h" + +namespace llvm { +class StringRef; +} + +#include "flang/Optimizer/Dialect/CUF/Attributes/CUFEnumAttr.h.inc" + +#define GET_ATTRDEF_CLASSES +#include "flang/Optimizer/Dialect/CUF/Attributes/CUFAttr.h.inc" + +namespace cuf { + +/// Attribute to mark Fortran entities with the CUDA attribute. +static constexpr llvm::StringRef getDataAttrName() { return "cuf.data_attr"; } +static constexpr llvm::StringRef getProcAttrName() { return "cuf.proc_attr"; } + +/// Attribute to carry CUDA launch_bounds values. +static constexpr llvm::StringRef getLaunchBoundsAttrName() { + return "cuf.launch_bounds"; +} + +/// Attribute to carry CUDA cluster_dims values. +static constexpr llvm::StringRef getClusterDimsAttrName() { + return "cuf.cluster_dims"; +} + +inline cuf::DataAttributeAttr +getDataAttribute(mlir::MLIRContext *mlirContext, + std::optional cudaAttr) { + if (cudaAttr) { + cuf::DataAttribute attr; + switch (*cudaAttr) { + case Fortran::common::CUDADataAttr::Constant: + attr = cuf::DataAttribute::Constant; + break; + case Fortran::common::CUDADataAttr::Device: + attr = cuf::DataAttribute::Device; + break; + case Fortran::common::CUDADataAttr::Managed: + attr = cuf::DataAttribute::Managed; + break; + case Fortran::common::CUDADataAttr::Pinned: + attr = cuf::DataAttribute::Pinned; + break; + case Fortran::common::CUDADataAttr::Shared: + attr = cuf::DataAttribute::Shared; + break; + case Fortran::common::CUDADataAttr::Texture: + // Obsolete attribute + return {}; + case Fortran::common::CUDADataAttr::Unified: + attr = cuf::DataAttribute::Unified; + break; + } + return cuf::DataAttributeAttr::get(mlirContext, attr); + } + return {}; +} + +inline cuf::ProcAttributeAttr +getProcAttribute(mlir::MLIRContext *mlirContext, + std::optional cudaAttr) { + if (cudaAttr) { + cuf::ProcAttribute attr; + switch (*cudaAttr) { + case Fortran::common::CUDASubprogramAttrs::Host: + attr = cuf::ProcAttribute::Host; + break; + case Fortran::common::CUDASubprogramAttrs::Device: + attr = cuf::ProcAttribute::Device; + break; + case Fortran::common::CUDASubprogramAttrs::HostDevice: + attr = cuf::ProcAttribute::HostDevice; + break; + case Fortran::common::CUDASubprogramAttrs::Global: + attr = cuf::ProcAttribute::Global; + break; + case Fortran::common::CUDASubprogramAttrs::Grid_Global: + attr = cuf::ProcAttribute::GridGlobal; + break; + } + return cuf::ProcAttributeAttr::get(mlirContext, attr); + } + return {}; +} + +} // namespace cuf + +#endif // FORTRAN_OPTIMIZER_DIALECT_CUF_CUFATTR_H diff --git a/flang/include/flang/Optimizer/Dialect/CUF/Attributes/CUFAttr.td b/flang/include/flang/Optimizer/Dialect/CUF/Attributes/CUFAttr.td new file mode 100644 index 000000000000..8e2b54672527 --- /dev/null +++ b/flang/include/flang/Optimizer/Dialect/CUF/Attributes/CUFAttr.td @@ -0,0 +1,100 @@ +//===- CUFAttr.td - CUF Attributes -------------------------*- tablegen -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file declares the CUF dialect attributes. +// +//===----------------------------------------------------------------------===// + +#ifndef FORTRAN_DIALECT_CUF_CUFATTRS +#define FORTRAN_DIALECT_CUF_CUFATTRS + +include "flang/Optimizer/Dialect/CUF/CUFDialect.td" +include "mlir/IR/EnumAttr.td" + +class cuf_Attr : AttrDef; + +def cuf_DataAttribute : I32EnumAttr< + "DataAttribute", + "CUDA Fortran variable attributes", + [ + I32EnumAttrCase<"Constant", 0, "constant">, + I32EnumAttrCase<"Device", 1, "device">, + I32EnumAttrCase<"Managed", 2, "managed">, + I32EnumAttrCase<"Pinned", 3, "pinned">, + I32EnumAttrCase<"Shared", 4, "shared">, + I32EnumAttrCase<"Unified", 5, "unified">, + // Texture is omitted since it is obsolete and rejected by semantic. + ]> { + let genSpecializedAttr = 0; + let cppNamespace = "::cuf"; +} + +def cuf_DataAttributeAttr : + EnumAttr { + let assemblyFormat = [{ ```<` $value `>` }]; +} + +def cuf_ProcAttribute : I32EnumAttr< + "ProcAttribute", "CUDA Fortran procedure attributes", + [ + I32EnumAttrCase<"Host", 0, "host">, + I32EnumAttrCase<"Device", 1, "device">, + I32EnumAttrCase<"HostDevice", 2, "host_device">, + I32EnumAttrCase<"Global", 3, "global">, + I32EnumAttrCase<"GridGlobal", 4, "grid_global">, + ]> { + let genSpecializedAttr = 0; + let cppNamespace = "::cuf"; +} + +def cuf_ProcAttributeAttr : + EnumAttr { + let assemblyFormat = [{ ```<` $value `>` }]; +} + +def cuf_LaunchBoundsAttr : cuf_Attr<"LaunchBounds"> { + let mnemonic = "launch_bounds"; + + let parameters = (ins + "mlir::IntegerAttr":$maxTPB, + "mlir::IntegerAttr":$minBPM, + OptionalParameter<"mlir::IntegerAttr">:$upperBoundClusterSize + ); + + let assemblyFormat = "`<` struct(params) `>`"; +} + +def cuf_ClusterDimsAttr : cuf_Attr<"ClusterDims"> { + let mnemonic = "cluster_dims"; + + let parameters = (ins + "mlir::IntegerAttr":$x, + "mlir::IntegerAttr":$y, + "mlir::IntegerAttr":$z + ); + + let assemblyFormat = "`<` struct(params) `>`"; +} + +def cuf_DataTransferKind : I32EnumAttr< + "DataTransferKind", "CUDA Fortran data transfer kind", + [ + I32EnumAttrCase<"DeviceHost", 0, "device_host">, + I32EnumAttrCase<"HostDevice", 1, "host_device">, + I32EnumAttrCase<"DeviceDevice", 2, "device_device">, + ]> { + let genSpecializedAttr = 0; + let cppNamespace = "::cuf"; +} + +def cuf_DataTransferKindAttr : + EnumAttr { + let assemblyFormat = [{ ```<` $value `>` }]; +} + +#endif // FORTRAN_DIALECT_CUF_CUFATTRS diff --git a/flang/include/flang/Optimizer/Dialect/CUF/CMakeLists.txt b/flang/include/flang/Optimizer/Dialect/CUF/CMakeLists.txt new file mode 100644 index 000000000000..07490c7b9ca2 --- /dev/null +++ b/flang/include/flang/Optimizer/Dialect/CUF/CMakeLists.txt @@ -0,0 +1,11 @@ +add_subdirectory(Attributes) + +set(LLVM_TARGET_DEFINITIONS CUFDialect.td) +mlir_tablegen(CUFDialect.h.inc -gen-dialect-decls -dialect=cuf) +mlir_tablegen(CUFDialect.cpp.inc -gen-dialect-defs -dialect=cuf) + +set(LLVM_TARGET_DEFINITIONS CUFOps.td) +mlir_tablegen(CUFOps.h.inc -gen-op-decls) +mlir_tablegen(CUFOps.cpp.inc -gen-op-defs) + +add_public_tablegen_target(CUFOpsIncGen) diff --git a/flang/include/flang/Optimizer/Dialect/CUF/CUFDialect.h b/flang/include/flang/Optimizer/Dialect/CUF/CUFDialect.h new file mode 100644 index 000000000000..cf562b226835 --- /dev/null +++ b/flang/include/flang/Optimizer/Dialect/CUF/CUFDialect.h @@ -0,0 +1,26 @@ +//===-- Optimizer/Dialect/CUFDialect.h -- CUF dialect -----------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Coding style: https://mlir.llvm.org/getting_started/DeveloperGuide/ +// +//===----------------------------------------------------------------------===// + +#ifndef FORTRAN_OPTIMIZER_DIALECT_CUF_CUFDIALECT_H +#define FORTRAN_OPTIMIZER_DIALECT_CUF_CUFDIALECT_H + +#include "mlir/Bytecode/BytecodeOpInterface.h" +#include "mlir/IR/Dialect.h" +#include "mlir/IR/SymbolTable.h" +#include "mlir/Interfaces/CallInterfaces.h" +#include "mlir/Interfaces/FunctionInterfaces.h" +#include "mlir/Interfaces/LoopLikeInterface.h" +#include "mlir/Interfaces/SideEffectInterfaces.h" + +#include "flang/Optimizer/Dialect/CUF/CUFDialect.h.inc" + +#endif // FORTRAN_OPTIMIZER_DIALECT_CUF_CUFDIALECT_H diff --git a/flang/include/flang/Optimizer/Dialect/CUF/CUFDialect.td b/flang/include/flang/Optimizer/Dialect/CUF/CUFDialect.td new file mode 100644 index 000000000000..df866e566406 --- /dev/null +++ b/flang/include/flang/Optimizer/Dialect/CUF/CUFDialect.td @@ -0,0 +1,43 @@ +//===-- CUFDialect.td - CUF dialect base definitions -------*- tablegen -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Definition of the CUDA Fortran dialect +/// +//===----------------------------------------------------------------------===// + +#ifndef FORTRAN_DIALECT_CUF_CUFDIALECT +#define FORTRAN_DIALECT_CUF_CUFDIALECT + +include "mlir/IR/AttrTypeBase.td" +include "mlir/IR/EnumAttr.td" +include "mlir/IR/OpBase.td" + +def CUFDialect : Dialect { + let name = "cuf"; + + let summary = "CUDA Fortran dialect"; + + let description = [{ + This dialect models CUDA Fortran operations. The CUF dialect operations use + the FIR types and are tightly coupled with FIR and HLFIR. + }]; + + let useDefaultAttributePrinterParser = 1; + let usePropertiesForAttributes = 1; + let cppNamespace = "::cuf"; + let dependentDialects = ["fir::FIROpsDialect"]; + + let extraClassDeclaration = [{ + private: + // Register the CUF Attributes. + void registerAttributes(); + }]; +} + +#endif // FORTRAN_DIALECT_CUF_CUFDIALECT diff --git a/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.h b/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.h new file mode 100644 index 000000000000..4132db672e39 --- /dev/null +++ b/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.h @@ -0,0 +1,20 @@ +//===-- Optimizer/Dialect/CUF/CUFOps.h - CUF operations ---------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef FORTRAN_OPTIMIZER_DIALECT_CUF_CUFOPS_H +#define FORTRAN_OPTIMIZER_DIALECT_CUF_CUFOPS_H + +#include "flang/Optimizer/Dialect/CUF/Attributes/CUFAttr.h" +#include "flang/Optimizer/Dialect/CUF/CUFDialect.h" +#include "flang/Optimizer/Dialect/FIRType.h" +#include "mlir/IR/OpDefinition.h" + +#define GET_OP_CLASSES +#include "flang/Optimizer/Dialect/CUF/CUFOps.h.inc" + +#endif // FORTRAN_OPTIMIZER_DIALECT_CUF_CUFOPS_H diff --git a/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td b/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td new file mode 100644 index 000000000000..72157bce4f76 --- /dev/null +++ b/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td @@ -0,0 +1,263 @@ +//===-- CUFOps.td - CUF operation definitions --------------*- tablegen -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Definition of the CUF dialect operations +/// +//===----------------------------------------------------------------------===// + +#ifndef FORTRAN_DIALECT_CUF_CUF_OPS +#define FORTRAN_DIALECT_CUF_CUF_OPS + +include "flang/Optimizer/Dialect/CUF/CUFDialect.td" +include "flang/Optimizer/Dialect/CUF/Attributes/CUFAttr.td" +include "flang/Optimizer/Dialect/FIRTypes.td" +include "mlir/Interfaces/LoopLikeInterface.td" +include "mlir/IR/BuiltinAttributes.td" + +class cuf_Op traits> + : Op; + +def cuf_AllocOp : cuf_Op<"alloc", [AttrSizedOperandSegments, + MemoryEffects<[MemAlloc]>]> { + let summary = "Allocate an object on device"; + + let description = [{ + This is a drop in replacement for fir.alloca and fir.allocmem for device + object. Any device, managed or unified object declared in an host + subprogram needs to be allocated in the device memory through runtime calls. + The cuf.alloc is an abstraction to the runtime calls and works together + with cuf.free. + }]; + + let arguments = (ins + TypeAttr:$in_type, + OptionalAttr:$uniq_name, + OptionalAttr:$bindc_name, + Variadic:$typeparams, + Variadic:$shape, + cuf_DataAttributeAttr:$data_attr + ); + + let results = (outs fir_ReferenceType:$ptr); + + let assemblyFormat = [{ + $in_type (`(` $typeparams^ `:` type($typeparams) `)`)? + (`,` $shape^ `:` type($shape) )? attr-dict `->` qualified(type($ptr)) + }]; + + let builders = [ + OpBuilder<(ins "mlir::Type":$inType, "llvm::StringRef":$uniqName, + "llvm::StringRef":$bindcName, + "cuf::DataAttributeAttr":$cudaAttr, + CArg<"mlir::ValueRange", "{}">:$typeparams, + CArg<"mlir::ValueRange", "{}">:$shape, + CArg<"llvm::ArrayRef", "{}">:$attributes)>]; + + let hasVerifier = 1; +} + +def cuf_FreeOp : cuf_Op<"free", [MemoryEffects<[MemFree]>]> { + let summary = "Free a device allocated object"; + + let description = [{ + The cuf.free operation frees the memory allocated by cuf.alloc. + This is used for non-allocatable device, managed and unified device + variables declare in host subprogram. + }]; + + let arguments = (ins + Arg:$devptr, + cuf_DataAttributeAttr:$data_attr + ); + + let assemblyFormat = "$devptr `:` qualified(type($devptr)) attr-dict"; + + let hasVerifier = 1; +} + +def cuf_AllocateOp : cuf_Op<"allocate", [AttrSizedOperandSegments, + MemoryEffects<[MemAlloc]>]> { + let summary = "Perform the device allocation of data of an allocatable"; + + let description = [{ + The cuf.allocate operation performs the allocation on the device + of the data of an allocatable. The descriptor passed to the operation + is initialized before with the standard flang runtime calls. + }]; + + let arguments = (ins Arg:$box, + Arg, "", [MemWrite]>:$errmsg, + Optional:$stream, + Arg, "", [MemWrite]>:$pinned, + Arg, "", [MemRead]>:$source, + cuf_DataAttributeAttr:$data_attr, + UnitAttr:$hasStat); + + let results = (outs AnyIntegerType:$stat); + + let assemblyFormat = [{ + $box `:` qualified(type($box)) + ( `source` `(` $source^ `:` qualified(type($source) )`)` )? + ( `errmsg` `(` $errmsg^ `:` type($errmsg) `)` )? + ( `stream` `(` $stream^ `:` type($stream) `)` )? + ( `pinned` `(` $pinned^ `:` type($pinned) `)` )? + attr-dict `->` type($stat) + }]; + + let hasVerifier = 1; +} + +def cuf_DeallocateOp : cuf_Op<"deallocate", + [MemoryEffects<[MemFree]>]> { + let summary = "Perform the device deallocation of data of an allocatable"; + + let description = [{ + The cuf.deallocate operation performs the deallocation on the device + of the data of an allocatable. + }]; + + let arguments = (ins Arg:$box, + Arg, "", [MemWrite]>:$errmsg, + cuf_DataAttributeAttr:$data_attr, + UnitAttr:$hasStat); + + let results = (outs AnyIntegerType:$stat); + + let assemblyFormat = [{ + $box `:` qualified(type($box)) + ( `errmsg` `(` $errmsg^ `:` type($errmsg) `)` )? + attr-dict `->` type($stat) + }]; + + let hasVerifier = 1; +} + +def cuf_DataTransferOp : cuf_Op<"data_transfer", []> { + let summary = "Represent a data transfer between host and device memory"; + + let description = [{ + CUDA Fortran allows data transfer to be done via intrinsic assignment + between a host and a device variable. This operation is used to materialized + the data transfer between the lhs and rhs memory references. + The kind of transfer is specified in the attribute. + + ``` + adev = a ! transfer host to device + a = adev ! transfer device to host + bdev = adev ! transfer device to device + ``` + }]; + + let arguments = (ins Arg:$src, + Arg:$dst, + cuf_DataTransferKindAttr:$transfer_kind); + + let assemblyFormat = [{ + $src `to` $dst attr-dict `:` type(operands) + }]; +} + +def cuf_KernelLaunchOp : cuf_Op<"kernel_launch", [CallOpInterface, + AttrSizedOperandSegments]> { + let summary = "call CUDA kernel"; + + let description = [{ + Launch a CUDA kernel from the host. + + ``` + // launch simple kernel with no arguments. bytes and stream value are + // optional in the chevron notation. + cuf.kernel_launch @kernel<<<%gx, %gy, %bx, %by, %bz>>>() + ``` + }]; + + let arguments = (ins + SymbolRefAttr:$callee, + I32:$grid_x, + I32:$grid_y, + I32:$grid_z, + I32:$block_x, + I32:$block_y, + I32:$block_z, + Optional:$bytes, + Optional:$stream, + Variadic:$args + ); + + let assemblyFormat = [{ + $callee `<` `<` `<` $grid_x `,` $grid_y `,` $grid_z `,`$block_x `,` + $block_y `,` $block_z ( `,` $bytes^ ( `,` $stream^ )? )? `>` `>` `>` + `` `(` $args `)` ( `:` `(` type($args)^ `)` )? attr-dict + }]; + + let extraClassDeclaration = [{ + mlir::CallInterfaceCallable getCallableForCallee() { + return getCalleeAttr(); + } + + void setCalleeFromCallable(mlir::CallInterfaceCallable callee) { + (*this)->setAttr(getCalleeAttrName(), callee.get()); + } + mlir::FunctionType getFunctionType(); + + unsigned getNbNoArgOperand() { + unsigned nbNoArgOperand = 5; // grids and blocks values are always present. + if (getBytes()) ++nbNoArgOperand; + if (getStream()) ++nbNoArgOperand; + return nbNoArgOperand; + } + + operand_range getArgOperands() { + return {operand_begin() + getNbNoArgOperand(), operand_end()}; + } + mlir::MutableOperandRange getArgOperandsMutable() { + return mlir::MutableOperandRange( + *this, getNbNoArgOperand(), getArgs().size() - 1); + } + }]; +} + +def cuf_KernelOp : cuf_Op<"kernel", [AttrSizedOperandSegments, + DeclareOpInterfaceMethods]> { + + let description = [{ + Represent the CUDA Fortran kernel directive. The operation is a loop like + operation that represents the iteration range of the embedded loop nest. + + When grid or block variadic operands are empty, a `*` only syntax was used + in the Fortran code. + If the `*` is mixed with values for either grid or block, these are + represented by a 0 constant value. + }]; + + let arguments = (ins + Variadic:$grid, // empty means `*` + Variadic:$block, // empty means `*` + Optional:$stream, + Variadic:$lowerbound, + Variadic:$upperbound, + Variadic:$step, + OptionalAttr:$n + ); + + let regions = (region AnyRegion:$region); + + let assemblyFormat = [{ + `<` `<` `<` custom($grid, type($grid)) `,` + custom($block, type($block)) + ( `,` `stream` `=` $stream^ )? `>` `>` `>` + custom($region, $lowerbound, type($lowerbound), + $upperbound, type($upperbound), $step, type($step)) + attr-dict + }]; + + let hasVerifier = 1; +} + +#endif // FORTRAN_DIALECT_CUF_CUF_OPS diff --git a/flang/include/flang/Optimizer/Dialect/FIRAttr.td b/flang/include/flang/Optimizer/Dialect/FIRAttr.td index f8b3fb861cc6..989319ff3dda 100644 --- a/flang/include/flang/Optimizer/Dialect/FIRAttr.td +++ b/flang/include/flang/Optimizer/Dialect/FIRAttr.td @@ -70,87 +70,4 @@ def fir_BoxFieldAttr : I32EnumAttr< // mlir::SideEffects::Resource for modelling operations which add debugging information def DebuggingResource : Resource<"::fir::DebuggingResource">; -//===----------------------------------------------------------------------===// -// CUDA Fortran specific attributes -//===----------------------------------------------------------------------===// - -def fir_CUDADataAttribute : I32EnumAttr< - "CUDADataAttribute", - "CUDA Fortran variable attributes", - [ - I32EnumAttrCase<"Constant", 0, "constant">, - I32EnumAttrCase<"Device", 1, "device">, - I32EnumAttrCase<"Managed", 2, "managed">, - I32EnumAttrCase<"Pinned", 3, "pinned">, - I32EnumAttrCase<"Shared", 4, "shared">, - I32EnumAttrCase<"Unified", 5, "unified">, - // Texture is omitted since it is obsolete and rejected by semantic. - ]> { - let genSpecializedAttr = 0; - let cppNamespace = "::fir"; -} - -def fir_CUDADataAttributeAttr : - EnumAttr { - let assemblyFormat = [{ ```<` $value `>` }]; -} - -def fir_CUDAProcAttribute : I32EnumAttr< - "CUDAProcAttribute", "CUDA Fortran procedure attributes", - [ - I32EnumAttrCase<"Host", 0, "host">, - I32EnumAttrCase<"Device", 1, "device">, - I32EnumAttrCase<"HostDevice", 2, "host_device">, - I32EnumAttrCase<"Global", 3, "global">, - I32EnumAttrCase<"GridGlobal", 4, "grid_global">, - ]> { - let genSpecializedAttr = 0; - let cppNamespace = "::fir"; -} - -def fir_CUDAProcAttributeAttr : - EnumAttr { - let assemblyFormat = [{ ```<` $value `>` }]; -} - -def fir_CUDALaunchBoundsAttr : fir_Attr<"CUDALaunchBounds"> { - let mnemonic = "launch_bounds"; - - let parameters = (ins - "mlir::IntegerAttr":$maxTPB, - "mlir::IntegerAttr":$minBPM, - OptionalParameter<"mlir::IntegerAttr">:$upperBoundClusterSize - ); - - let assemblyFormat = "`<` struct(params) `>`"; -} - -def fir_CUDAClusterDimsAttr : fir_Attr<"CUDAClusterDims"> { - let mnemonic = "cluster_dims"; - - let parameters = (ins - "mlir::IntegerAttr":$x, - "mlir::IntegerAttr":$y, - "mlir::IntegerAttr":$z - ); - - let assemblyFormat = "`<` struct(params) `>`"; -} - -def fir_CUDADataTransferKind : I32EnumAttr< - "CUDADataTransferKind", "CUDA Fortran data transfer kind", - [ - I32EnumAttrCase<"DeviceHost", 0, "device_host">, - I32EnumAttrCase<"HostDevice", 1, "host_device">, - I32EnumAttrCase<"DeviceDevice", 2, "device_device">, - ]> { - let genSpecializedAttr = 0; - let cppNamespace = "::fir"; -} - -def fir_CUDADataTransferKindAttr : - EnumAttr { - let assemblyFormat = [{ ```<` $value `>` }]; -} - #endif // FIR_DIALECT_FIR_ATTRS diff --git a/flang/include/flang/Optimizer/Dialect/FIROps.h b/flang/include/flang/Optimizer/Dialect/FIROps.h index 016ad0433ed8..9f07364ddb62 100644 --- a/flang/include/flang/Optimizer/Dialect/FIROps.h +++ b/flang/include/flang/Optimizer/Dialect/FIROps.h @@ -9,6 +9,7 @@ #ifndef FORTRAN_OPTIMIZER_DIALECT_FIROPS_H #define FORTRAN_OPTIMIZER_DIALECT_FIROPS_H +#include "flang/Optimizer/Dialect/CUF/Attributes/CUFAttr.h" #include "flang/Optimizer/Dialect/FIRAttr.h" #include "flang/Optimizer/Dialect/FIRType.h" #include "flang/Optimizer/Dialect/FirAliasTagOpInterface.h" diff --git a/flang/include/flang/Optimizer/Dialect/FIROps.td b/flang/include/flang/Optimizer/Dialect/FIROps.td index 64c5e360b28f..d9c114904006 100644 --- a/flang/include/flang/Optimizer/Dialect/FIROps.td +++ b/flang/include/flang/Optimizer/Dialect/FIROps.td @@ -17,6 +17,7 @@ include "mlir/Dialect/Arith/IR/ArithBase.td" include "mlir/Dialect/Arith/IR/ArithOpsInterfaces.td" include "mlir/Dialect/LLVMIR/LLVMAttrDefs.td" +include "flang/Optimizer/Dialect/CUF/Attributes/CUFAttr.td" include "flang/Optimizer/Dialect/FIRDialect.td" include "flang/Optimizer/Dialect/FIRTypes.td" include "flang/Optimizer/Dialect/FIRAttr.td" @@ -2436,66 +2437,6 @@ def fir_DispatchOp : fir_Op<"dispatch", []> { }]; } -def fir_CUDAKernelLaunch : fir_Op<"cuda_kernel_launch", [CallOpInterface, - AttrSizedOperandSegments]> { - let summary = "call CUDA kernel"; - - let description = [{ - Launch a CUDA kernel from the host. - - ``` - // launch simple kernel with no arguments. bytes and stream value are - // optional in the chevron notation. - fir.cuda_kernel_launch @kernel<<<%gx, %gy, %bx, %by, %bz>>>() - ``` - }]; - - let arguments = (ins - SymbolRefAttr:$callee, - I32:$grid_x, - I32:$grid_y, - I32:$grid_z, - I32:$block_x, - I32:$block_y, - I32:$block_z, - Optional:$bytes, - Optional:$stream, - Variadic:$args - ); - - let assemblyFormat = [{ - $callee `<` `<` `<` $grid_x `,` $grid_y `,` $grid_z `,`$block_x `,` - $block_y `,` $block_z ( `,` $bytes^ ( `,` $stream^ )? )? `>` `>` `>` - `` `(` $args `)` ( `:` `(` type($args)^ `)` )? attr-dict - }]; - - let extraClassDeclaration = [{ - mlir::CallInterfaceCallable getCallableForCallee() { - return getCalleeAttr(); - } - - void setCalleeFromCallable(mlir::CallInterfaceCallable callee) { - (*this)->setAttr(getCalleeAttrName(), callee.get()); - } - mlir::FunctionType getFunctionType(); - - unsigned getNbNoArgOperand() { - unsigned nbNoArgOperand = 5; // grids and blocks values are always present. - if (getBytes()) ++nbNoArgOperand; - if (getStream()) ++nbNoArgOperand; - return nbNoArgOperand; - } - - operand_range getArgOperands() { - return {operand_begin() + getNbNoArgOperand(), operand_end()}; - } - mlir::MutableOperandRange getArgOperandsMutable() { - return mlir::MutableOperandRange( - *this, getNbNoArgOperand(), getArgs().size() - 1); - } - }]; -} - // Constant operations that support Fortran def fir_StringLitOp : fir_Op<"string_lit", [NoMemoryEffect]> { @@ -2797,7 +2738,7 @@ def fir_GlobalOp : fir_Op<"global", [IsolatedFromAbove, Symbol]> { OptionalAttr:$constant, OptionalAttr:$target, OptionalAttr:$linkName, - OptionalAttr:$cuda_attr + OptionalAttr:$data_attr ); let regions = (region AtMostRegion<1>:$region); @@ -3077,7 +3018,7 @@ def fir_DeclareOp : fir_Op<"declare", [AttrSizedOperandSegments, Optional:$dummy_scope, Builtin_StringAttr:$uniq_name, OptionalAttr:$fortran_attrs, - OptionalAttr:$cuda_attr + OptionalAttr:$data_attr ); let results = (outs AnyRefOrBox); @@ -3130,125 +3071,6 @@ def fir_BoxOffsetOp : fir_Op<"box_offset", [NoMemoryEffect]> { ]; } -def fir_CUDAKernelOp : fir_Op<"cuda_kernel", [AttrSizedOperandSegments, - DeclareOpInterfaceMethods]> { - - let description = [{ - Represent the CUDA Fortran kernel directive. The operation is a loop like - operation that represents the iteration range of the embedded loop nest. - - When grid or block variadic operands are empty, a `*` only syntax was used - in the Fortran code. - If the `*` is mixed with values for either grid or block, these are - represented by a 0 constant value. - }]; - - let arguments = (ins - Variadic:$grid, // empty means `*` - Variadic:$block, // empty means `*` - Optional:$stream, - Variadic:$lowerbound, - Variadic:$upperbound, - Variadic:$step, - OptionalAttr:$n - ); - - let regions = (region AnyRegion:$region); - - let assemblyFormat = [{ - `<` `<` `<` custom($grid, type($grid)) `,` - custom($block, type($block)) - ( `,` `stream` `=` $stream^ )? `>` `>` `>` - custom($region, $lowerbound, type($lowerbound), - $upperbound, type($upperbound), $step, type($step)) - attr-dict - }]; - - let hasVerifier = 1; -} - -def fir_CUDADataTransferOp : fir_Op<"cuda_data_transfer", []> { - let summary = "Represent a data transfer between host and device memory"; - - let description = [{ - CUDA Fortran allows data transfer to be done via intrinsic assignment - between a host and a device variable. This operation is used to materialized - the data transfer between the lhs and rhs memory references. - The kind of transfer is specified in the attribute. - - ``` - adev = a ! transfer host to device - a = adev ! transfer device to host - bdev = adev ! transfer device to device - ``` - }]; - - let arguments = (ins Arg:$src, - Arg:$dst, - fir_CUDADataTransferKindAttr:$transfer_kind); - - let assemblyFormat = [{ - $src `to` $dst attr-dict `:` type(operands) - }]; -} - -def fir_CUDAAllocateOp : fir_Op<"cuda_allocate", [AttrSizedOperandSegments, - MemoryEffects<[MemAlloc]>]> { - let summary = "Perform the device allocation of data of an allocatable"; - - let description = [{ - The fir.cuda_allocate operation performs the allocation on the device - of the data of an allocatable. The descriptor passed to the operation - is initialized before with the standard flang runtime calls. - }]; - - let arguments = (ins Arg:$box, - Arg, "", [MemWrite]>:$errmsg, - Optional:$stream, - Arg, "", [MemWrite]>:$pinned, - Arg, "", [MemRead]>:$source, - fir_CUDADataAttributeAttr:$cuda_attr, - UnitAttr:$hasStat); - - let results = (outs AnyIntegerType:$stat); - - let assemblyFormat = [{ - $box `:` qualified(type($box)) - ( `source` `(` $source^ `:` qualified(type($source) )`)` )? - ( `errmsg` `(` $errmsg^ `:` type($errmsg) `)` )? - ( `stream` `(` $stream^ `:` type($stream) `)` )? - ( `pinned` `(` $pinned^ `:` type($pinned) `)` )? - attr-dict `->` type($stat) - }]; - - let hasVerifier = 1; -} - -def fir_CUDADeallocateOp : fir_Op<"cuda_deallocate", - [MemoryEffects<[MemFree]>]> { - let summary = "Perform the device deallocation of data of an allocatable"; - - let description = [{ - The fir.cuda_deallocate operation performs the deallocation on the device - of the data of an allocatable. - }]; - - let arguments = (ins Arg:$box, - Arg, "", [MemWrite]>:$errmsg, - fir_CUDADataAttributeAttr:$cuda_attr, - UnitAttr:$hasStat); - - let results = (outs AnyIntegerType:$stat); - - let assemblyFormat = [{ - $box `:` qualified(type($box)) - ( `errmsg` `(` $errmsg^ `:` type($errmsg) `)` )? - attr-dict `->` type($stat) - }]; - - let hasVerifier = 1; -} - def fir_DummyScopeOp : fir_Op<"dummy_scope", [MemoryEffects<[MemWrite]>]> { let summary = "Define a scope for dummy arguments"; @@ -3329,62 +3151,4 @@ def fir_DummyScopeOp : fir_Op<"dummy_scope", let assemblyFormat = "attr-dict `:` type(results)"; } -def fir_CUDAAllocOp : fir_Op<"cuda_alloc", [AttrSizedOperandSegments, - MemoryEffects<[MemAlloc]>]> { - let summary = "Allocate an object on device"; - - let description = [{ - This is a drop in replacement for fir.alloca and fir.allocmem for device - object. Any device, managed or unified object declared in an host - subprogram needs to be allocated in the device memory through runtime calls. - The fir.cuda_alloc is an abstraction to the runtime calls and works together - with fir.cuda_free. - }]; - - let arguments = (ins - TypeAttr:$in_type, - OptionalAttr:$uniq_name, - OptionalAttr:$bindc_name, - Variadic:$typeparams, - Variadic:$shape, - fir_CUDADataAttributeAttr:$cuda_attr - ); - - let results = (outs fir_ReferenceType:$ptr); - - let assemblyFormat = [{ - $in_type (`(` $typeparams^ `:` type($typeparams) `)`)? - (`,` $shape^ `:` type($shape) )? attr-dict `->` qualified(type($ptr)) - }]; - - let builders = [ - OpBuilder<(ins "mlir::Type":$inType, "llvm::StringRef":$uniqName, - "llvm::StringRef":$bindcName, - "fir::CUDADataAttributeAttr":$cudaAttr, - CArg<"mlir::ValueRange", "{}">:$typeparams, - CArg<"mlir::ValueRange", "{}">:$shape, - CArg<"llvm::ArrayRef", "{}">:$attributes)>]; - - let hasVerifier = 1; -} - -def fir_CUDAFreeOp : fir_Op<"cuda_free", [MemoryEffects<[MemFree]>]> { - let summary = "Free a device allocated object"; - - let description = [{ - The fir.cuda_free operation frees the memory allocated by fir.cuda_alloc. - This is used for non-allocatable device, managed and unified device - variables declare in host subprogram. - }]; - - let arguments = (ins - Arg:$devptr, - fir_CUDADataAttributeAttr:$cuda_attr - ); - - let assemblyFormat = "$devptr `:` qualified(type($devptr)) attr-dict"; - - let hasVerifier = 1; -} - #endif diff --git a/flang/include/flang/Optimizer/Dialect/FIROpsSupport.h b/flang/include/flang/Optimizer/Dialect/FIROpsSupport.h index 46b62d8de8d3..b68a39bf374b 100644 --- a/flang/include/flang/Optimizer/Dialect/FIROpsSupport.h +++ b/flang/include/flang/Optimizer/Dialect/FIROpsSupport.h @@ -75,19 +75,6 @@ constexpr llvm::StringRef getOptionalAttrName() { return "fir.optional"; } /// Attribute to mark Fortran entities with the TARGET attribute. static constexpr llvm::StringRef getTargetAttrName() { return "fir.target"; } -/// Attribute to mark Fortran entities with the CUDA attribute. -static constexpr llvm::StringRef getCUDAAttrName() { return "fir.cuda_attr"; } - -/// Attribute to carry CUDA launch_bounds values. -static constexpr llvm::StringRef getCUDALaunchBoundsAttrName() { - return "fir.cuda_launch_bounds"; -} - -/// Attribute to carry CUDA cluster_dims values. -static constexpr llvm::StringRef getCUDAClusterDimsAttrName() { - return "fir.cuda_cluster_dims"; -} - /// Attribute to mark that a function argument is a character dummy procedure. /// Character dummy procedure have special ABI constraints. static constexpr llvm::StringRef getCharacterProcedureDummyAttrName() { diff --git a/flang/include/flang/Optimizer/HLFIR/HLFIROps.td b/flang/include/flang/Optimizer/HLFIR/HLFIROps.td index 376417e3c353..b537d9e11ef8 100644 --- a/flang/include/flang/Optimizer/HLFIR/HLFIROps.td +++ b/flang/include/flang/Optimizer/HLFIR/HLFIROps.td @@ -15,6 +15,7 @@ #define FORTRAN_DIALECT_HLFIR_OPS include "flang/Optimizer/HLFIR/HLFIROpBase.td" +include "flang/Optimizer/Dialect/CUF/Attributes/CUFAttr.td" include "flang/Optimizer/Dialect/FIRTypes.td" include "flang/Optimizer/Dialect/FIRAttr.td" include "flang/Optimizer/Dialect/FortranVariableInterface.td" @@ -90,7 +91,7 @@ def hlfir_DeclareOp : hlfir_Op<"declare", [AttrSizedOperandSegments, Optional:$dummy_scope, Builtin_StringAttr:$uniq_name, OptionalAttr:$fortran_attrs, - OptionalAttr:$cuda_attr + OptionalAttr:$data_attr ); let results = (outs AnyFortranVariable, AnyRefOrBoxLike); @@ -106,7 +107,7 @@ def hlfir_DeclareOp : hlfir_Op<"declare", [AttrSizedOperandSegments, CArg<"mlir::Value", "{}">:$shape, CArg<"mlir::ValueRange", "{}">:$typeparams, CArg<"mlir::Value", "{}">:$dummy_scope, CArg<"fir::FortranVariableFlagsAttr", "{}">:$fortran_attrs, - CArg<"fir::CUDADataAttributeAttr", "{}">:$cuda_attr)>]; + CArg<"cuf::DataAttributeAttr", "{}">:$data_attr)>]; let extraClassDeclaration = [{ /// Get the variable original base (same as input). It lacks diff --git a/flang/include/flang/Optimizer/Support/InitFIR.h b/flang/include/flang/Optimizer/Support/InitFIR.h index 9f4c4ed28a4a..48cc1cbc6456 100644 --- a/flang/include/flang/Optimizer/Support/InitFIR.h +++ b/flang/include/flang/Optimizer/Support/InitFIR.h @@ -13,6 +13,7 @@ #ifndef FORTRAN_OPTIMIZER_SUPPORT_INITFIR_H #define FORTRAN_OPTIMIZER_SUPPORT_INITFIR_H +#include "flang/Optimizer/Dialect/CUF/CUFDialect.h" #include "flang/Optimizer/Dialect/FIRDialect.h" #include "flang/Optimizer/HLFIR/HLFIRDialect.h" #include "mlir/Conversion/Passes.h" @@ -34,7 +35,7 @@ namespace fir::support { mlir::scf::SCFDialect, mlir::arith::ArithDialect, \ mlir::cf::ControlFlowDialect, mlir::func::FuncDialect, \ mlir::vector::VectorDialect, mlir::math::MathDialect, \ - mlir::complex::ComplexDialect, mlir::DLTIDialect + mlir::complex::ComplexDialect, mlir::DLTIDialect, cuf::CUFDialect #define FLANG_CODEGEN_DIALECT_LIST FIRCodeGenDialect, mlir::LLVM::LLVMDialect diff --git a/flang/include/flang/Optimizer/Support/Utils.h b/flang/include/flang/Optimizer/Support/Utils.h index 2da6f24da40e..d8bcb5fae034 100644 --- a/flang/include/flang/Optimizer/Support/Utils.h +++ b/flang/include/flang/Optimizer/Support/Utils.h @@ -16,6 +16,7 @@ #include "flang/Common/default-kinds.h" #include "flang/Optimizer/Builder/FIRBuilder.h" #include "flang/Optimizer/Builder/Todo.h" +#include "flang/Optimizer/Dialect/CUF/Attributes/CUFAttr.h" #include "flang/Optimizer/Dialect/FIROps.h" #include "flang/Optimizer/Dialect/FIRType.h" #include "flang/Optimizer/Support/FatalError.h" @@ -136,66 +137,6 @@ inline void intrinsicTypeTODO(fir::FirOpBuilder &builder, mlir::Type type, " in " + intrinsicName); } -inline fir::CUDADataAttributeAttr -getCUDADataAttribute(mlir::MLIRContext *mlirContext, - std::optional cudaAttr) { - if (cudaAttr) { - fir::CUDADataAttribute attr; - switch (*cudaAttr) { - case Fortran::common::CUDADataAttr::Constant: - attr = fir::CUDADataAttribute::Constant; - break; - case Fortran::common::CUDADataAttr::Device: - attr = fir::CUDADataAttribute::Device; - break; - case Fortran::common::CUDADataAttr::Managed: - attr = fir::CUDADataAttribute::Managed; - break; - case Fortran::common::CUDADataAttr::Pinned: - attr = fir::CUDADataAttribute::Pinned; - break; - case Fortran::common::CUDADataAttr::Shared: - attr = fir::CUDADataAttribute::Shared; - break; - case Fortran::common::CUDADataAttr::Texture: - // Obsolete attribute - return {}; - case Fortran::common::CUDADataAttr::Unified: - attr = fir::CUDADataAttribute::Unified; - break; - } - return fir::CUDADataAttributeAttr::get(mlirContext, attr); - } - return {}; -} - -inline fir::CUDAProcAttributeAttr getCUDAProcAttribute( - mlir::MLIRContext *mlirContext, - std::optional cudaAttr) { - if (cudaAttr) { - fir::CUDAProcAttribute attr; - switch (*cudaAttr) { - case Fortran::common::CUDASubprogramAttrs::Host: - attr = fir::CUDAProcAttribute::Host; - break; - case Fortran::common::CUDASubprogramAttrs::Device: - attr = fir::CUDAProcAttribute::Device; - break; - case Fortran::common::CUDASubprogramAttrs::HostDevice: - attr = fir::CUDAProcAttribute::HostDevice; - break; - case Fortran::common::CUDASubprogramAttrs::Global: - attr = fir::CUDAProcAttribute::Global; - break; - case Fortran::common::CUDASubprogramAttrs::Grid_Global: - attr = fir::CUDAProcAttribute::GridGlobal; - break; - } - return fir::CUDAProcAttributeAttr::get(mlirContext, attr); - } - return {}; -} - } // namespace fir #endif // FORTRAN_OPTIMIZER_SUPPORT_UTILS_H diff --git a/flang/lib/Frontend/CMakeLists.txt b/flang/lib/Frontend/CMakeLists.txt index a701c264bc4c..f85665d11429 100644 --- a/flang/lib/Frontend/CMakeLists.txt +++ b/flang/lib/Frontend/CMakeLists.txt @@ -14,6 +14,7 @@ add_flang_library(flangFrontend TextDiagnostic.cpp DEPENDS + CUFDialect FIRDialect FIROptCodeGenPassIncGen FIROptTransformsPassIncGen @@ -23,6 +24,7 @@ add_flang_library(flangFrontend ${extension_libs} LINK_LIBS + CUFDialect FortranParser FortranSemantics FortranEvaluate diff --git a/flang/lib/Lower/Allocatable.cpp b/flang/lib/Lower/Allocatable.cpp index a1957c0eb1bb..61f4bbd856a8 100644 --- a/flang/lib/Lower/Allocatable.cpp +++ b/flang/lib/Lower/Allocatable.cpp @@ -24,6 +24,7 @@ #include "flang/Optimizer/Builder/FIRBuilder.h" #include "flang/Optimizer/Builder/Runtime/RTBuilder.h" #include "flang/Optimizer/Builder/Todo.h" +#include "flang/Optimizer/Dialect/CUF/CUFOps.h" #include "flang/Optimizer/Dialect/FIROps.h" #include "flang/Optimizer/Dialect/FIROpsSupport.h" #include "flang/Optimizer/Support/FatalError.h" @@ -729,9 +730,9 @@ private: ErrorManager &errorManager, const Fortran::semantics::Symbol &sym) { Fortran::lower::StatementContext stmtCtx; - fir::CUDADataAttributeAttr cudaAttr = - Fortran::lower::translateSymbolCUDADataAttribute(builder.getContext(), - sym); + cuf::DataAttributeAttr cudaAttr = + Fortran::lower::translateSymbolCUFDataAttribute(builder.getContext(), + sym); mlir::Value errmsg = errMsgExpr ? errorManager.errMsgAddr : nullptr; mlir::Value stream = streamExpr @@ -746,7 +747,7 @@ private: // Keep return type the same as a standard AllocatableAllocate call. mlir::Type retTy = fir::runtime::getModel()(builder.getContext()); return builder - .create( + .create( loc, retTy, box.getAddr(), errmsg, stream, pinned, source, cudaAttr, errorManager.hasStatSpec() ? builder.getUnitAttr() : nullptr) .getResult(); @@ -804,9 +805,9 @@ static mlir::Value genCudaDeallocate(fir::FirOpBuilder &builder, const fir::MutableBoxValue &box, ErrorManager &errorManager, const Fortran::semantics::Symbol &sym) { - fir::CUDADataAttributeAttr cudaAttr = - Fortran::lower::translateSymbolCUDADataAttribute(builder.getContext(), - sym); + cuf::DataAttributeAttr cudaAttr = + Fortran::lower::translateSymbolCUFDataAttribute(builder.getContext(), + sym); mlir::Value errmsg = mlir::isa(errorManager.errMsgAddr.getDefiningOp()) ? nullptr @@ -815,7 +816,7 @@ static mlir::Value genCudaDeallocate(fir::FirOpBuilder &builder, // Keep return type the same as a standard AllocatableAllocate call. mlir::Type retTy = fir::runtime::getModel()(builder.getContext()); return builder - .create( + .create( loc, retTy, box.getAddr(), errmsg, cudaAttr, errorManager.hasStatSpec() ? builder.getUnitAttr() : nullptr) .getResult(); diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp index afbc1122de86..4a29c6b8fae7 100644 --- a/flang/lib/Lower/Bridge.cpp +++ b/flang/lib/Lower/Bridge.cpp @@ -40,6 +40,8 @@ #include "flang/Optimizer/Builder/Runtime/Ragged.h" #include "flang/Optimizer/Builder/Runtime/Stop.h" #include "flang/Optimizer/Builder/Todo.h" +#include "flang/Optimizer/Dialect/CUF/Attributes/CUFAttr.h" +#include "flang/Optimizer/Dialect/CUF/CUFOps.h" #include "flang/Optimizer/Dialect/FIRAttr.h" #include "flang/Optimizer/Dialect/FIRDialect.h" #include "flang/Optimizer/Dialect/FIROps.h" @@ -2648,8 +2650,8 @@ private: loopEval = &*std::next(loopEval->getNestedEvaluations().begin()); } - auto op = builder->create( - loc, gridValues, blockValues, streamValue, lbs, ubs, steps, n); + auto op = builder->create(loc, gridValues, blockValues, + streamValue, lbs, ubs, steps, n); builder->createBlock(&op.getRegion(), op.getRegion().end(), ivTypes, ivLocs); mlir::Block &b = op.getRegion().back(); @@ -3719,46 +3721,44 @@ private: // device = host if (lhsIsDevice && !rhsIsDevice) { - auto transferKindAttr = fir::CUDADataTransferKindAttr::get( - builder.getContext(), fir::CUDADataTransferKind::HostDevice); + auto transferKindAttr = cuf::DataTransferKindAttr::get( + builder.getContext(), cuf::DataTransferKind::HostDevice); if (!rhs.isVariable()) { auto associate = hlfir::genAssociateExpr( loc, builder, rhs, rhs.getType(), ".cuf_host_tmp"); - builder.create(loc, associate.getBase(), lhs, - transferKindAttr); + builder.create(loc, associate.getBase(), lhs, + transferKindAttr); builder.create(loc, associate); } else { - builder.create(loc, rhs, lhs, - transferKindAttr); + builder.create(loc, rhs, lhs, transferKindAttr); } return; } // host = device if (!lhsIsDevice && rhsIsDevice) { - auto transferKindAttr = fir::CUDADataTransferKindAttr::get( - builder.getContext(), fir::CUDADataTransferKind::DeviceHost); + auto transferKindAttr = cuf::DataTransferKindAttr::get( + builder.getContext(), cuf::DataTransferKind::DeviceHost); if (!rhs.isVariable()) { // evaluateRhs loads scalar. Look for the memory reference to be used in // the transfer. if (mlir::isa_and_nonnull(rhs.getDefiningOp())) { auto loadOp = mlir::dyn_cast(rhs.getDefiningOp()); - builder.create(loc, loadOp.getMemref(), lhs, - transferKindAttr); + builder.create(loc, loadOp.getMemref(), lhs, + transferKindAttr); return; } } else { - builder.create(loc, rhs, lhs, - transferKindAttr); + builder.create(loc, rhs, lhs, transferKindAttr); } return; } if (lhsIsDevice && rhsIsDevice) { assert(rhs.isVariable() && "CUDA Fortran assignment rhs is not legal"); - auto transferKindAttr = fir::CUDADataTransferKindAttr::get( - builder.getContext(), fir::CUDADataTransferKind::DeviceDevice); - builder.create(loc, rhs, lhs, transferKindAttr); + auto transferKindAttr = cuf::DataTransferKindAttr::get( + builder.getContext(), cuf::DataTransferKind::DeviceDevice); + builder.create(loc, rhs, lhs, transferKindAttr); return; } llvm_unreachable("Unhandled CUDA data transfer"); @@ -3769,8 +3769,8 @@ private: const Fortran::evaluate::Assignment &assign) { llvm::SmallVector temps; localSymbols.pushScope(); - auto transferKindAttr = fir::CUDADataTransferKindAttr::get( - builder.getContext(), fir::CUDADataTransferKind::DeviceHost); + auto transferKindAttr = cuf::DataTransferKindAttr::get( + builder.getContext(), cuf::DataTransferKind::DeviceHost); [[maybe_unused]] unsigned nbDeviceResidentObject = 0; for (const Fortran::semantics::Symbol &sym : Fortran::evaluate::CollectSymbols(assign.rhs)) { @@ -3795,8 +3795,8 @@ private: addSymbol(sym, hlfir::translateToExtendedValue(loc, builder, temp).first, /*forced=*/true); - builder.create(loc, addr, temp, - transferKindAttr); + builder.create(loc, addr, temp, + transferKindAttr); ++nbDeviceResidentObject; } } @@ -3808,15 +3808,15 @@ private: // subprogram are not considered fully device context so it will return false // for it. static bool isDeviceContext(fir::FirOpBuilder &builder) { - if (builder.getRegion().getParentOfType()) + if (builder.getRegion().getParentOfType()) return true; if (auto funcOp = builder.getRegion().getParentOfType()) { if (auto cudaProcAttr = - funcOp.getOperation()->getAttrOfType( - fir::getCUDAAttrName())) { - return cudaProcAttr.getValue() != fir::CUDAProcAttribute::Host && - cudaProcAttr.getValue() != fir::CUDAProcAttribute::HostDevice; + funcOp.getOperation()->getAttrOfType( + cuf::getProcAttrName())) { + return cudaProcAttr.getValue() != cuf::ProcAttribute::Host && + cudaProcAttr.getValue() != cuf::ProcAttribute::HostDevice; } } return false; diff --git a/flang/lib/Lower/CMakeLists.txt b/flang/lib/Lower/CMakeLists.txt index 1546409752e7..ba6622d8504a 100644 --- a/flang/lib/Lower/CMakeLists.txt +++ b/flang/lib/Lower/CMakeLists.txt @@ -37,6 +37,8 @@ add_flang_library(FortranLower VectorSubscripts.cpp DEPENDS + CUFAttrs + CUFDialect FIRDialect FIRTransforms HLFIRDialect @@ -44,6 +46,8 @@ add_flang_library(FortranLower ${extension_libs} LINK_LIBS + CUFAttrs + CUFDialect FIRDialect FIRDialectSupport FIRBuilder diff --git a/flang/lib/Lower/CallInterface.cpp b/flang/lib/Lower/CallInterface.cpp index c1f54ad39287..cfbb7c7f6b4f 100644 --- a/flang/lib/Lower/CallInterface.cpp +++ b/flang/lib/Lower/CallInterface.cpp @@ -627,9 +627,9 @@ setCUDAAttributes(mlir::func::FuncOp func, characteristic) { if (characteristic && characteristic->cudaSubprogramAttrs) { func.getOperation()->setAttr( - fir::getCUDAAttrName(), - fir::getCUDAProcAttribute(func.getContext(), - *characteristic->cudaSubprogramAttrs)); + cuf::getProcAttrName(), + cuf::getProcAttribute(func.getContext(), + *characteristic->cudaSubprogramAttrs)); } if (sym) { @@ -649,9 +649,9 @@ setCUDAAttributes(mlir::func::FuncOp func, ubAttr = mlir::IntegerAttr::get(i64Ty, details->cudaLaunchBounds()[2]); func.getOperation()->setAttr( - fir::getCUDALaunchBoundsAttrName(), - fir::CUDALaunchBoundsAttr::get(func.getContext(), maxTPBAttr, - minBPMAttr, ubAttr)); + cuf::getLaunchBoundsAttrName(), + cuf::LaunchBoundsAttr::get(func.getContext(), maxTPBAttr, + minBPMAttr, ubAttr)); } if (!details->cudaClusterDims().empty()) { @@ -663,9 +663,8 @@ setCUDAAttributes(mlir::func::FuncOp func, auto zAttr = mlir::IntegerAttr::get(i64Ty, details->cudaClusterDims()[2]); func.getOperation()->setAttr( - fir::getCUDAClusterDimsAttrName(), - fir::CUDAClusterDimsAttr::get(func.getContext(), xAttr, yAttr, - zAttr)); + cuf::getClusterDimsAttrName(), + cuf::ClusterDimsAttr::get(func.getContext(), xAttr, yAttr, zAttr)); } } } @@ -1116,8 +1115,8 @@ private: addMLIRAttr(fir::getTargetAttrName()); if (obj.cudaDataAttr) attrs.emplace_back( - mlir::StringAttr::get(&mlirContext, fir::getCUDAAttrName()), - fir::getCUDADataAttribute(&mlirContext, obj.cudaDataAttr)); + mlir::StringAttr::get(&mlirContext, cuf::getDataAttrName()), + cuf::getDataAttribute(&mlirContext, obj.cudaDataAttr)); // TODO: intents that require special care (e.g finalization) diff --git a/flang/lib/Lower/ConvertCall.cpp b/flang/lib/Lower/ConvertCall.cpp index c6bfe3592169..7ec719a2cb9e 100644 --- a/flang/lib/Lower/ConvertCall.cpp +++ b/flang/lib/Lower/ConvertCall.cpp @@ -28,6 +28,7 @@ #include "flang/Optimizer/Builder/MutableBox.h" #include "flang/Optimizer/Builder/Runtime/Derived.h" #include "flang/Optimizer/Builder/Todo.h" +#include "flang/Optimizer/Dialect/CUF/CUFOps.h" #include "flang/Optimizer/Dialect/FIROpsSupport.h" #include "flang/Optimizer/HLFIR/HLFIROps.h" #include "mlir/IR/IRMapping.h" @@ -589,7 +590,7 @@ std::pair Fortran::lower::genCallOpAndResult( fir::getBase(converter.genExprValue( caller.getCallDescription().chevrons()[3], stmtCtx))); - builder.create( + builder.create( loc, funcType.getResults(), funcSymbolAttr, grid_x, grid_y, grid_z, block_x, block_y, block_z, bytes, stream, operands); callNumResults = 0; diff --git a/flang/lib/Lower/ConvertVariable.cpp b/flang/lib/Lower/ConvertVariable.cpp index 5ddd8a6a9d41..b8868161fa05 100644 --- a/flang/lib/Lower/ConvertVariable.cpp +++ b/flang/lib/Lower/ConvertVariable.cpp @@ -30,6 +30,7 @@ #include "flang/Optimizer/Builder/IntrinsicCall.h" #include "flang/Optimizer/Builder/Runtime/Derived.h" #include "flang/Optimizer/Builder/Todo.h" +#include "flang/Optimizer/Dialect/CUF/CUFOps.h" #include "flang/Optimizer/Dialect/FIRAttr.h" #include "flang/Optimizer/Dialect/FIRDialect.h" #include "flang/Optimizer/Dialect/FIROps.h" @@ -139,7 +140,7 @@ static fir::GlobalOp defineGlobal(Fortran::lower::AbstractConverter &converter, const Fortran::lower::pft::Variable &var, llvm::StringRef globalName, mlir::StringAttr linkage, - fir::CUDADataAttributeAttr cudaAttr = {}); + cuf::DataAttributeAttr dataAttr = {}); static mlir::Location genLocation(Fortran::lower::AbstractConverter &converter, const Fortran::semantics::Symbol &sym) { @@ -172,12 +173,12 @@ static fir::GlobalOp declareGlobal(Fortran::lower::AbstractConverter &converter, !Fortran::semantics::IsProcedurePointer(ultimate)) mlir::emitError(loc, "processing global declaration: symbol '") << toStringRef(sym.name()) << "' has unexpected details\n"; - fir::CUDADataAttributeAttr cudaAttr = - Fortran::lower::translateSymbolCUDADataAttribute( + cuf::DataAttributeAttr dataAttr = + Fortran::lower::translateSymbolCUFDataAttribute( converter.getFirOpBuilder().getContext(), sym); return builder.createGlobal(loc, converter.genType(var), globalName, linkage, mlir::Attribute{}, isConstant(ultimate), - var.isTarget(), cudaAttr); + var.isTarget(), dataAttr); } /// Temporary helper to catch todos in initial data target lowering. @@ -474,7 +475,7 @@ static fir::GlobalOp defineGlobal(Fortran::lower::AbstractConverter &converter, const Fortran::lower::pft::Variable &var, llvm::StringRef globalName, mlir::StringAttr linkage, - fir::CUDADataAttributeAttr cudaAttr) { + cuf::DataAttributeAttr dataAttr) { fir::FirOpBuilder &builder = converter.getFirOpBuilder(); const Fortran::semantics::Symbol &sym = var.getSymbol(); mlir::Location loc = genLocation(converter, sym); @@ -514,7 +515,7 @@ static fir::GlobalOp defineGlobal(Fortran::lower::AbstractConverter &converter, if (!global) global = builder.createGlobal(loc, symTy, globalName, linkage, mlir::Attribute{}, - isConst, var.isTarget(), cudaAttr); + isConst, var.isTarget(), dataAttr); if (Fortran::semantics::IsAllocatableOrPointer(sym) && !Fortran::semantics::IsProcedure(sym)) { const auto *details = @@ -694,9 +695,9 @@ static mlir::Value createNewLocal(Fortran::lower::AbstractConverter &converter, return builder.create(loc, fir::ReferenceType::get(ty)); if (Fortran::semantics::NeedCUDAAlloc(ultimateSymbol)) { - fir::CUDADataAttributeAttr cudaAttr = - Fortran::lower::translateSymbolCUDADataAttribute(builder.getContext(), - ultimateSymbol); + cuf::DataAttributeAttr dataAttr = + Fortran::lower::translateSymbolCUFDataAttribute(builder.getContext(), + ultimateSymbol); llvm::SmallVector indices; llvm::SmallVector elidedShape = fir::factory::elideExtentsAlreadyInType(ty, shape); @@ -705,8 +706,8 @@ static mlir::Value createNewLocal(Fortran::lower::AbstractConverter &converter, auto idxTy = builder.getIndexType(); for (mlir::Value sh : elidedShape) indices.push_back(builder.createConvert(loc, idxTy, sh)); - return builder.create(loc, ty, nm, symNm, cudaAttr, - lenParams, indices); + return builder.create(loc, ty, nm, symNm, dataAttr, lenParams, + indices); } // Let the builder do all the heavy lifting. @@ -950,10 +951,10 @@ static void instantiateLocal(Fortran::lower::AbstractConverter &converter, converter.getSymbolExtendedValue(var.getSymbol(), &symMap); auto *sym = &var.getSymbol(); converter.getFctCtx().attachCleanup([builder, loc, exv, sym]() { - fir::CUDADataAttributeAttr cudaAttr = - Fortran::lower::translateSymbolCUDADataAttribute( - builder->getContext(), *sym); - builder->create(loc, fir::getBase(exv), cudaAttr); + cuf::DataAttributeAttr dataAttr = + Fortran::lower::translateSymbolCUFDataAttribute(builder->getContext(), + *sym); + builder->create(loc, fir::getBase(exv), dataAttr); }); } } @@ -1628,11 +1629,11 @@ fir::FortranVariableFlagsAttr Fortran::lower::translateSymbolAttributes( return fir::FortranVariableFlagsAttr::get(mlirContext, flags); } -fir::CUDADataAttributeAttr Fortran::lower::translateSymbolCUDADataAttribute( +cuf::DataAttributeAttr Fortran::lower::translateSymbolCUFDataAttribute( mlir::MLIRContext *mlirContext, const Fortran::semantics::Symbol &sym) { std::optional cudaAttr = Fortran::semantics::GetCUDADataAttr(&sym.GetUltimate()); - return fir::getCUDADataAttribute(mlirContext, cudaAttr); + return cuf::getDataAttribute(mlirContext, cudaAttr); } /// Map a symbol to its FIR address and evaluated specification expressions. @@ -1672,9 +1673,9 @@ static void genDeclareSymbol(Fortran::lower::AbstractConverter &converter, auto name = converter.mangleName(sym); fir::FortranVariableFlagsAttr attributes = Fortran::lower::translateSymbolAttributes(builder.getContext(), sym); - fir::CUDADataAttributeAttr cudaAttr = - Fortran::lower::translateSymbolCUDADataAttribute(builder.getContext(), - sym); + cuf::DataAttributeAttr dataAttr = + Fortran::lower::translateSymbolCUFDataAttribute(builder.getContext(), + sym); if (sym.test(Fortran::semantics::Symbol::Flag::CrayPointee)) { mlir::Type ptrBoxType = @@ -1716,7 +1717,7 @@ static void genDeclareSymbol(Fortran::lower::AbstractConverter &converter, dummyScope = converter.dummyArgsScopeValue(); auto newBase = builder.create( loc, base, name, shapeOrShift, lenParams, dummyScope, attributes, - cudaAttr); + dataAttr); symMap.addVariableDefinition(sym, newBase, force); return; } @@ -1762,15 +1763,15 @@ void Fortran::lower::genDeclareSymbol( fir::FortranVariableFlagsAttr attributes = Fortran::lower::translateSymbolAttributes( builder.getContext(), sym.GetUltimate(), extraFlags); - fir::CUDADataAttributeAttr cudaAttr = - Fortran::lower::translateSymbolCUDADataAttribute(builder.getContext(), - sym.GetUltimate()); + cuf::DataAttributeAttr dataAttr = + Fortran::lower::translateSymbolCUFDataAttribute(builder.getContext(), + sym.GetUltimate()); auto name = converter.mangleName(sym); mlir::Value dummyScope; if (converter.isRegisteredDummySymbol(sym)) dummyScope = converter.dummyArgsScopeValue(); hlfir::EntityWithAttributes declare = hlfir::genDeclare( - loc, builder, exv, name, attributes, dummyScope, cudaAttr); + loc, builder, exv, name, attributes, dummyScope, dataAttr); symMap.addVariableDefinition(sym, declare.getIfVariableInterface(), force); return; } @@ -2272,10 +2273,10 @@ void Fortran::lower::defineModuleVariable( // Do nothing. Mapping will be done on user side. } else { std::string globalName = converter.mangleName(sym); - fir::CUDADataAttributeAttr cudaAttr = - Fortran::lower::translateSymbolCUDADataAttribute( + cuf::DataAttributeAttr dataAttr = + Fortran::lower::translateSymbolCUFDataAttribute( converter.getFirOpBuilder().getContext(), sym); - defineGlobal(converter, var, globalName, linkage, cudaAttr); + defineGlobal(converter, var, globalName, linkage, dataAttr); } } diff --git a/flang/lib/Optimizer/Builder/FIRBuilder.cpp b/flang/lib/Optimizer/Builder/FIRBuilder.cpp index bd018d7f015b..3c3fd02d7c88 100644 --- a/flang/lib/Optimizer/Builder/FIRBuilder.cpp +++ b/flang/lib/Optimizer/Builder/FIRBuilder.cpp @@ -322,18 +322,18 @@ mlir::Value fir::FirOpBuilder::createHeapTemporary( fir::GlobalOp fir::FirOpBuilder::createGlobal( mlir::Location loc, mlir::Type type, llvm::StringRef name, mlir::StringAttr linkage, mlir::Attribute value, bool isConst, - bool isTarget, fir::CUDADataAttributeAttr cudaAttr) { + bool isTarget, cuf::DataAttributeAttr dataAttr) { if (auto global = getNamedGlobal(name)) return global; auto module = getModule(); auto insertPt = saveInsertionPoint(); setInsertionPoint(module.getBody(), module.getBody()->end()); llvm::SmallVector attrs; - if (cudaAttr) { + if (dataAttr) { auto globalOpName = mlir::OperationName(fir::GlobalOp::getOperationName(), module.getContext()); attrs.push_back(mlir::NamedAttribute( - fir::GlobalOp::getCudaAttrAttrName(globalOpName), cudaAttr)); + fir::GlobalOp::getDataAttrAttrName(globalOpName), dataAttr)); } auto glob = create(loc, name, isConst, isTarget, type, value, linkage, attrs); @@ -346,7 +346,7 @@ fir::GlobalOp fir::FirOpBuilder::createGlobal( fir::GlobalOp fir::FirOpBuilder::createGlobal( mlir::Location loc, mlir::Type type, llvm::StringRef name, bool isConst, bool isTarget, std::function bodyBuilder, - mlir::StringAttr linkage, fir::CUDADataAttributeAttr cudaAttr) { + mlir::StringAttr linkage, cuf::DataAttributeAttr dataAttr) { if (auto global = getNamedGlobal(name)) return global; auto module = getModule(); diff --git a/flang/lib/Optimizer/Builder/HLFIRTools.cpp b/flang/lib/Optimizer/Builder/HLFIRTools.cpp index 8fdab2a57181..511585dc7689 100644 --- a/flang/lib/Optimizer/Builder/HLFIRTools.cpp +++ b/flang/lib/Optimizer/Builder/HLFIRTools.cpp @@ -199,7 +199,7 @@ fir::FortranVariableOpInterface hlfir::genDeclare(mlir::Location loc, fir::FirOpBuilder &builder, const fir::ExtendedValue &exv, llvm::StringRef name, fir::FortranVariableFlagsAttr flags, mlir::Value dummyScope, - fir::CUDADataAttributeAttr cudaAttr) { + cuf::DataAttributeAttr dataAttr) { mlir::Value base = fir::getBase(exv); assert(fir::conformsWithPassByRef(base.getType()) && @@ -229,7 +229,7 @@ hlfir::genDeclare(mlir::Location loc, fir::FirOpBuilder &builder, }, [](const auto &) {}); auto declareOp = builder.create( - loc, base, name, shapeOrShift, lenParams, dummyScope, flags, cudaAttr); + loc, base, name, shapeOrShift, lenParams, dummyScope, flags, dataAttr); return mlir::cast(declareOp.getOperation()); } diff --git a/flang/lib/Optimizer/Dialect/CMakeLists.txt b/flang/lib/Optimizer/Dialect/CMakeLists.txt index 745439b7e1e5..a8235f841b87 100644 --- a/flang/lib/Optimizer/Dialect/CMakeLists.txt +++ b/flang/lib/Optimizer/Dialect/CMakeLists.txt @@ -1,4 +1,5 @@ add_subdirectory(Support) +add_subdirectory(CUF) add_flang_library(FIRDialect FIRAttr.cpp @@ -13,9 +14,11 @@ add_flang_library(FIRDialect CanonicalizationPatternsIncGen MLIRIR FIROpsIncGen + CUFAttrsIncGen intrinsics_gen LINK_LIBS + CUFAttrs FIRDialectSupport MLIRArithDialect MLIRBuiltinToLLVMIRTranslation diff --git a/flang/lib/Optimizer/Dialect/CUF/Attributes/CMakeLists.txt b/flang/lib/Optimizer/Dialect/CUF/Attributes/CMakeLists.txt new file mode 100644 index 000000000000..81db40f3ba46 --- /dev/null +++ b/flang/lib/Optimizer/Dialect/CUF/Attributes/CMakeLists.txt @@ -0,0 +1,16 @@ +# Keep CUF attributes as a separate library as FIR and HLFIR depend on it. +add_flang_library(CUFAttrs + CUFAttr.cpp + + DEPENDS + MLIRIR + CUFAttrsIncGen + + LINK_LIBS + MLIRTargetLLVMIRExport + + LINK_COMPONENTS + AsmParser + AsmPrinter + Remarks +) diff --git a/flang/lib/Optimizer/Dialect/CUF/Attributes/CUFAttr.cpp b/flang/lib/Optimizer/Dialect/CUF/Attributes/CUFAttr.cpp new file mode 100644 index 000000000000..52c733dcad6f --- /dev/null +++ b/flang/lib/Optimizer/Dialect/CUF/Attributes/CUFAttr.cpp @@ -0,0 +1,32 @@ +//===-- CUFAttr.cpp -------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Coding style: https://mlir.llvm.org/getting_started/DeveloperGuide/ +// +//===----------------------------------------------------------------------===// + +#include "flang/Optimizer/Dialect/CUF/Attributes/CUFAttr.h" +#include "flang/Optimizer/Dialect/CUF/CUFDialect.h" +#include "mlir/IR/Builders.h" +#include "mlir/IR/BuiltinTypes.h" +#include "mlir/IR/DialectImplementation.h" +#include "mlir/IR/OpDefinition.h" +#include "llvm/ADT/TypeSwitch.h" + +#include "flang/Optimizer/Dialect/CUF/Attributes/CUFEnumAttr.cpp.inc" +#define GET_ATTRDEF_CLASSES +#include "flang/Optimizer/Dialect/CUF/Attributes/CUFAttr.cpp.inc" + +namespace cuf { + +void CUFDialect::registerAttributes() { + addAttributes(); +} + +} // namespace cuf diff --git a/flang/lib/Optimizer/Dialect/CUF/CMakeLists.txt b/flang/lib/Optimizer/Dialect/CUF/CMakeLists.txt new file mode 100644 index 000000000000..d5ce5e0a7614 --- /dev/null +++ b/flang/lib/Optimizer/Dialect/CUF/CMakeLists.txt @@ -0,0 +1,22 @@ +add_subdirectory(Attributes) + +add_flang_library(CUFDialect + CUFDialect.cpp + CUFOps.cpp + + DEPENDS + MLIRIR + CUFOpsIncGen + + LINK_LIBS + CUFAttrs + FIRDialect + FIRDialectSupport + MLIRIR + MLIRTargetLLVMIRExport + + LINK_COMPONENTS + AsmParser + AsmPrinter + Remarks +) diff --git a/flang/lib/Optimizer/Dialect/CUF/CUFDialect.cpp b/flang/lib/Optimizer/Dialect/CUF/CUFDialect.cpp new file mode 100644 index 000000000000..47d3636df4f9 --- /dev/null +++ b/flang/lib/Optimizer/Dialect/CUF/CUFDialect.cpp @@ -0,0 +1,25 @@ +//===-- CUFDialect.cpp ----------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Coding style: https://mlir.llvm.org/getting_started/DeveloperGuide/ +// +//===----------------------------------------------------------------------===// + +#include "flang/Optimizer/Dialect/CUF/CUFDialect.h" +#include "flang/Optimizer/Dialect/CUF/CUFOps.h" +#include "flang/Optimizer/Dialect/FIRDialect.h" + +#include "flang/Optimizer/Dialect/CUF/CUFDialect.cpp.inc" + +void cuf::CUFDialect::initialize() { + registerAttributes(); + addOperations< +#define GET_OP_LIST +#include "flang/Optimizer/Dialect/CUF/CUFOps.cpp.inc" + >(); +} diff --git a/flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp b/flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp new file mode 100644 index 000000000000..870652c72fab --- /dev/null +++ b/flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp @@ -0,0 +1,219 @@ +//===-- CUFOps.cpp --------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Coding style: https://mlir.llvm.org/getting_started/DeveloperGuide/ +// +//===----------------------------------------------------------------------===// + +#include "flang/Optimizer/Dialect/CUF/CUFOps.h" +#include "flang/Optimizer/Dialect/CUF/Attributes/CUFAttr.h" +#include "flang/Optimizer/Dialect/CUF/CUFDialect.h" +#include "flang/Optimizer/Dialect/FIRType.h" +#include "mlir/IR/Attributes.h" +#include "mlir/IR/BuiltinAttributes.h" +#include "mlir/IR/BuiltinOps.h" +#include "mlir/IR/Diagnostics.h" +#include "mlir/IR/Matchers.h" +#include "mlir/IR/OpDefinition.h" +#include "mlir/IR/PatternMatch.h" +#include "llvm/ADT/SmallVector.h" + +//===----------------------------------------------------------------------===// +// AllocOp +//===----------------------------------------------------------------------===// + +static mlir::Type wrapAllocaResultType(mlir::Type intype) { + if (mlir::isa(intype)) + return {}; + return fir::ReferenceType::get(intype); +} + +void cuf::AllocOp::build(mlir::OpBuilder &builder, mlir::OperationState &result, + mlir::Type inType, llvm::StringRef uniqName, + llvm::StringRef bindcName, + cuf::DataAttributeAttr cudaAttr, + mlir::ValueRange typeparams, mlir::ValueRange shape, + llvm::ArrayRef attributes) { + mlir::StringAttr nameAttr = + uniqName.empty() ? mlir::StringAttr{} : builder.getStringAttr(uniqName); + mlir::StringAttr bindcAttr = + bindcName.empty() ? mlir::StringAttr{} : builder.getStringAttr(bindcName); + build(builder, result, wrapAllocaResultType(inType), + mlir::TypeAttr::get(inType), nameAttr, bindcAttr, typeparams, shape, + cudaAttr); + result.addAttributes(attributes); +} + +template +static mlir::LogicalResult checkCudaAttr(Op op) { + if (op.getDataAttr() == cuf::DataAttribute::Device || + op.getDataAttr() == cuf::DataAttribute::Managed || + op.getDataAttr() == cuf::DataAttribute::Unified) + return mlir::success(); + return op.emitOpError("expect device, managed or unified cuda attribute"); +} + +mlir::LogicalResult cuf::AllocOp::verify() { return checkCudaAttr(*this); } + +//===----------------------------------------------------------------------===// +// FreeOp +//===----------------------------------------------------------------------===// + +mlir::LogicalResult cuf::FreeOp::verify() { return checkCudaAttr(*this); } + +//===----------------------------------------------------------------------===// +// AllocateOp +//===----------------------------------------------------------------------===// + +mlir::LogicalResult cuf::AllocateOp::verify() { + if (getPinned() && getStream()) + return emitOpError("pinned and stream cannot appears at the same time"); + if (!mlir::isa(fir::unwrapRefType(getBox().getType()))) + return emitOpError( + "expect box to be a reference to a class or box type value"); + if (getSource() && + !mlir::isa(fir::unwrapRefType(getSource().getType()))) + return emitOpError( + "expect source to be a reference to/or a class or box type value"); + if (getErrmsg() && + !mlir::isa(fir::unwrapRefType(getErrmsg().getType()))) + return emitOpError( + "expect errmsg to be a reference to/or a box type value"); + if (getErrmsg() && !getHasStat()) + return emitOpError("expect stat attribute when errmsg is provided"); + return mlir::success(); +} + +//===----------------------------------------------------------------------===// +// DeallocateOp +//===----------------------------------------------------------------------===// + +mlir::LogicalResult cuf::DeallocateOp::verify() { + if (!mlir::isa(fir::unwrapRefType(getBox().getType()))) + return emitOpError( + "expect box to be a reference to class or box type value"); + if (getErrmsg() && + !mlir::isa(fir::unwrapRefType(getErrmsg().getType()))) + return emitOpError( + "expect errmsg to be a reference to/or a box type value"); + if (getErrmsg() && !getHasStat()) + return emitOpError("expect stat attribute when errmsg is provided"); + return mlir::success(); +} + +//===----------------------------------------------------------------------===// +// KernelOp +//===----------------------------------------------------------------------===// + +llvm::SmallVector cuf::KernelOp::getLoopRegions() { + return {&getRegion()}; +} + +mlir::ParseResult parseCUFKernelValues( + mlir::OpAsmParser &parser, + llvm::SmallVectorImpl &values, + llvm::SmallVectorImpl &types) { + if (mlir::succeeded(parser.parseOptionalStar())) + return mlir::success(); + + if (mlir::succeeded(parser.parseOptionalLParen())) { + if (mlir::failed(parser.parseCommaSeparatedList( + mlir::AsmParser::Delimiter::None, [&]() { + if (parser.parseOperand(values.emplace_back())) + return mlir::failure(); + return mlir::success(); + }))) + return mlir::failure(); + auto builder = parser.getBuilder(); + for (size_t i = 0; i < values.size(); i++) { + types.emplace_back(builder.getI32Type()); + } + if (parser.parseRParen()) + return mlir::failure(); + } else { + if (parser.parseOperand(values.emplace_back())) + return mlir::failure(); + auto builder = parser.getBuilder(); + types.emplace_back(builder.getI32Type()); + return mlir::success(); + } + return mlir::success(); +} + +void printCUFKernelValues(mlir::OpAsmPrinter &p, mlir::Operation *op, + mlir::ValueRange values, mlir::TypeRange types) { + if (values.empty()) + p << "*"; + + if (values.size() > 1) + p << "("; + llvm::interleaveComma(values, p, [&p](mlir::Value v) { p << v; }); + if (values.size() > 1) + p << ")"; +} + +mlir::ParseResult parseCUFKernelLoopControl( + mlir::OpAsmParser &parser, mlir::Region ®ion, + llvm::SmallVectorImpl &lowerbound, + llvm::SmallVectorImpl &lowerboundType, + llvm::SmallVectorImpl &upperbound, + llvm::SmallVectorImpl &upperboundType, + llvm::SmallVectorImpl &step, + llvm::SmallVectorImpl &stepType) { + + llvm::SmallVector inductionVars; + if (parser.parseLParen() || + parser.parseArgumentList(inductionVars, + mlir::OpAsmParser::Delimiter::None, + /*allowType=*/true) || + parser.parseRParen() || parser.parseEqual() || parser.parseLParen() || + parser.parseOperandList(lowerbound, inductionVars.size(), + mlir::OpAsmParser::Delimiter::None) || + parser.parseColonTypeList(lowerboundType) || parser.parseRParen() || + parser.parseKeyword("to") || parser.parseLParen() || + parser.parseOperandList(upperbound, inductionVars.size(), + mlir::OpAsmParser::Delimiter::None) || + parser.parseColonTypeList(upperboundType) || parser.parseRParen() || + parser.parseKeyword("step") || parser.parseLParen() || + parser.parseOperandList(step, inductionVars.size(), + mlir::OpAsmParser::Delimiter::None) || + parser.parseColonTypeList(stepType) || parser.parseRParen()) + return mlir::failure(); + return parser.parseRegion(region, inductionVars); +} + +void printCUFKernelLoopControl( + mlir::OpAsmPrinter &p, mlir::Operation *op, mlir::Region ®ion, + mlir::ValueRange lowerbound, mlir::TypeRange lowerboundType, + mlir::ValueRange upperbound, mlir::TypeRange upperboundType, + mlir::ValueRange steps, mlir::TypeRange stepType) { + mlir::ValueRange regionArgs = region.front().getArguments(); + if (!regionArgs.empty()) { + p << "("; + llvm::interleaveComma( + regionArgs, p, [&p](mlir::Value v) { p << v << " : " << v.getType(); }); + p << ") = (" << lowerbound << " : " << lowerboundType << ") to (" + << upperbound << " : " << upperboundType << ") " + << " step (" << steps << " : " << stepType << ") "; + } + p.printRegion(region, /*printEntryBlockArgs=*/false); +} + +mlir::LogicalResult cuf::KernelOp::verify() { + if (getLowerbound().size() != getUpperbound().size() || + getLowerbound().size() != getStep().size()) + return emitOpError( + "expect same number of values in lowerbound, upperbound and step"); + + return mlir::success(); +} + +// Tablegen operators + +#define GET_OP_CLASSES +#include "flang/Optimizer/Dialect/CUF/CUFOps.cpp.inc" diff --git a/flang/lib/Optimizer/Dialect/FIRAttr.cpp b/flang/lib/Optimizer/Dialect/FIRAttr.cpp index 9ea3a0568f69..2faba63dfba0 100644 --- a/flang/lib/Optimizer/Dialect/FIRAttr.cpp +++ b/flang/lib/Optimizer/Dialect/FIRAttr.cpp @@ -298,7 +298,5 @@ void fir::printFirAttribute(FIROpsDialect *dialect, mlir::Attribute attr, void FIROpsDialect::registerAttributes() { addAttributes(); + UpperBoundAttr>(); } diff --git a/flang/lib/Optimizer/Dialect/FIRDialect.cpp b/flang/lib/Optimizer/Dialect/FIRDialect.cpp index 4d1e8cd1405a..4b1dadaac672 100644 --- a/flang/lib/Optimizer/Dialect/FIRDialect.cpp +++ b/flang/lib/Optimizer/Dialect/FIRDialect.cpp @@ -11,6 +11,7 @@ //===----------------------------------------------------------------------===// #include "flang/Optimizer/Dialect/FIRDialect.h" +#include "flang/Optimizer/Dialect/CUF/Attributes/CUFAttr.h" #include "flang/Optimizer/Dialect/FIRAttr.h" #include "flang/Optimizer/Dialect/FIROps.h" #include "flang/Optimizer/Dialect/FIRType.h" diff --git a/flang/lib/Optimizer/Dialect/FIROps.cpp b/flang/lib/Optimizer/Dialect/FIROps.cpp index edf7f7f4b1a9..94113da9a46c 100644 --- a/flang/lib/Optimizer/Dialect/FIROps.cpp +++ b/flang/lib/Optimizer/Dialect/FIROps.cpp @@ -3898,169 +3898,6 @@ mlir::LogicalResult fir::DeclareOp::verify() { return fortranVar.verifyDeclareLikeOpImpl(getMemref()); } -llvm::SmallVector fir::CUDAKernelOp::getLoopRegions() { - return {&getRegion()}; -} - -mlir::ParseResult parseCUFKernelValues( - mlir::OpAsmParser &parser, - llvm::SmallVectorImpl &values, - llvm::SmallVectorImpl &types) { - if (mlir::succeeded(parser.parseOptionalStar())) - return mlir::success(); - - if (mlir::succeeded(parser.parseOptionalLParen())) { - if (mlir::failed(parser.parseCommaSeparatedList( - mlir::AsmParser::Delimiter::None, [&]() { - if (parser.parseOperand(values.emplace_back())) - return mlir::failure(); - return mlir::success(); - }))) - return mlir::failure(); - auto builder = parser.getBuilder(); - for (size_t i = 0; i < values.size(); i++) { - types.emplace_back(builder.getI32Type()); - } - if (parser.parseRParen()) - return mlir::failure(); - } else { - if (parser.parseOperand(values.emplace_back())) - return mlir::failure(); - auto builder = parser.getBuilder(); - types.emplace_back(builder.getI32Type()); - return mlir::success(); - } - return mlir::success(); -} - -void printCUFKernelValues(mlir::OpAsmPrinter &p, mlir::Operation *op, - mlir::ValueRange values, mlir::TypeRange types) { - if (values.empty()) - p << "*"; - - if (values.size() > 1) - p << "("; - llvm::interleaveComma(values, p, [&p](mlir::Value v) { p << v; }); - if (values.size() > 1) - p << ")"; -} - -mlir::ParseResult parseCUFKernelLoopControl( - mlir::OpAsmParser &parser, mlir::Region ®ion, - llvm::SmallVectorImpl &lowerbound, - llvm::SmallVectorImpl &lowerboundType, - llvm::SmallVectorImpl &upperbound, - llvm::SmallVectorImpl &upperboundType, - llvm::SmallVectorImpl &step, - llvm::SmallVectorImpl &stepType) { - - llvm::SmallVector inductionVars; - if (parser.parseLParen() || - parser.parseArgumentList(inductionVars, - mlir::OpAsmParser::Delimiter::None, - /*allowType=*/true) || - parser.parseRParen() || parser.parseEqual() || parser.parseLParen() || - parser.parseOperandList(lowerbound, inductionVars.size(), - mlir::OpAsmParser::Delimiter::None) || - parser.parseColonTypeList(lowerboundType) || parser.parseRParen() || - parser.parseKeyword("to") || parser.parseLParen() || - parser.parseOperandList(upperbound, inductionVars.size(), - mlir::OpAsmParser::Delimiter::None) || - parser.parseColonTypeList(upperboundType) || parser.parseRParen() || - parser.parseKeyword("step") || parser.parseLParen() || - parser.parseOperandList(step, inductionVars.size(), - mlir::OpAsmParser::Delimiter::None) || - parser.parseColonTypeList(stepType) || parser.parseRParen()) - return mlir::failure(); - return parser.parseRegion(region, inductionVars); -} - -void printCUFKernelLoopControl( - mlir::OpAsmPrinter &p, mlir::Operation *op, mlir::Region ®ion, - mlir::ValueRange lowerbound, mlir::TypeRange lowerboundType, - mlir::ValueRange upperbound, mlir::TypeRange upperboundType, - mlir::ValueRange steps, mlir::TypeRange stepType) { - mlir::ValueRange regionArgs = region.front().getArguments(); - if (!regionArgs.empty()) { - p << "("; - llvm::interleaveComma( - regionArgs, p, [&p](mlir::Value v) { p << v << " : " << v.getType(); }); - p << ") = (" << lowerbound << " : " << lowerboundType << ") to (" - << upperbound << " : " << upperboundType << ") " - << " step (" << steps << " : " << stepType << ") "; - } - p.printRegion(region, /*printEntryBlockArgs=*/false); -} - -mlir::LogicalResult fir::CUDAKernelOp::verify() { - if (getLowerbound().size() != getUpperbound().size() || - getLowerbound().size() != getStep().size()) - return emitOpError( - "expect same number of values in lowerbound, upperbound and step"); - - return mlir::success(); -} - -mlir::LogicalResult fir::CUDAAllocateOp::verify() { - if (getPinned() && getStream()) - return emitOpError("pinned and stream cannot appears at the same time"); - if (!mlir::isa(fir::unwrapRefType(getBox().getType()))) - return emitOpError( - "expect box to be a reference to a class or box type value"); - if (getSource() && - !mlir::isa(fir::unwrapRefType(getSource().getType()))) - return emitOpError( - "expect source to be a reference to/or a class or box type value"); - if (getErrmsg() && - !mlir::isa(fir::unwrapRefType(getErrmsg().getType()))) - return emitOpError( - "expect errmsg to be a reference to/or a box type value"); - if (getErrmsg() && !getHasStat()) - return emitOpError("expect stat attribute when errmsg is provided"); - return mlir::success(); -} - -mlir::LogicalResult fir::CUDADeallocateOp::verify() { - if (!mlir::isa(fir::unwrapRefType(getBox().getType()))) - return emitOpError( - "expect box to be a reference to class or box type value"); - if (getErrmsg() && - !mlir::isa(fir::unwrapRefType(getErrmsg().getType()))) - return emitOpError( - "expect errmsg to be a reference to/or a box type value"); - if (getErrmsg() && !getHasStat()) - return emitOpError("expect stat attribute when errmsg is provided"); - return mlir::success(); -} - -void fir::CUDAAllocOp::build( - mlir::OpBuilder &builder, mlir::OperationState &result, mlir::Type inType, - llvm::StringRef uniqName, llvm::StringRef bindcName, - fir::CUDADataAttributeAttr cudaAttr, mlir::ValueRange typeparams, - mlir::ValueRange shape, llvm::ArrayRef attributes) { - mlir::StringAttr nameAttr = - uniqName.empty() ? mlir::StringAttr{} : builder.getStringAttr(uniqName); - mlir::StringAttr bindcAttr = - bindcName.empty() ? mlir::StringAttr{} : builder.getStringAttr(bindcName); - build(builder, result, wrapAllocaResultType(inType), - mlir::TypeAttr::get(inType), nameAttr, bindcAttr, typeparams, shape, - cudaAttr); - result.addAttributes(attributes); -} - -template -static mlir::LogicalResult checkCudaAttr(Op op) { - if (op.getCudaAttr() == fir::CUDADataAttribute::Device || - op.getCudaAttr() == fir::CUDADataAttribute::Managed || - op.getCudaAttr() == fir::CUDADataAttribute::Unified) - return mlir::success(); - return op.emitOpError("expect device, managed or unified cuda attribute"); -} - -mlir::LogicalResult fir::CUDAAllocOp::verify() { return checkCudaAttr(*this); } - -mlir::LogicalResult fir::CUDAFreeOp::verify() { return checkCudaAttr(*this); } - //===----------------------------------------------------------------------===// // FIROpsDialect //===----------------------------------------------------------------------===// diff --git a/flang/lib/Optimizer/HLFIR/IR/CMakeLists.txt b/flang/lib/Optimizer/HLFIR/IR/CMakeLists.txt index dc9e080b0f8b..267d6469ee7a 100644 --- a/flang/lib/Optimizer/HLFIR/IR/CMakeLists.txt +++ b/flang/lib/Optimizer/HLFIR/IR/CMakeLists.txt @@ -5,11 +5,13 @@ add_flang_library(HLFIRDialect HLFIROps.cpp DEPENDS + CUFAttrsIncGen FIRDialect HLFIROpsIncGen ${dialect_libs} LINK_LIBS + CUFAttrs FIRDialect MLIRIR ${dialect_libs} diff --git a/flang/lib/Optimizer/HLFIR/IR/HLFIROps.cpp b/flang/lib/Optimizer/HLFIR/IR/HLFIROps.cpp index c232ae165d4c..11196353b07c 100644 --- a/flang/lib/Optimizer/HLFIR/IR/HLFIROps.cpp +++ b/flang/lib/Optimizer/HLFIR/IR/HLFIROps.cpp @@ -127,14 +127,14 @@ void hlfir::DeclareOp::build(mlir::OpBuilder &builder, mlir::ValueRange typeparams, mlir::Value dummy_scope, fir::FortranVariableFlagsAttr fortran_attrs, - fir::CUDADataAttributeAttr cuda_attr) { + cuf::DataAttributeAttr data_attr) { auto nameAttr = builder.getStringAttr(uniq_name); mlir::Type inputType = memref.getType(); bool hasExplicitLbs = hasExplicitLowerBounds(shape); mlir::Type hlfirVariableType = getHLFIRVariableType(inputType, hasExplicitLbs); build(builder, result, {hlfirVariableType, inputType}, memref, shape, - typeparams, dummy_scope, nameAttr, fortran_attrs, cuda_attr); + typeparams, dummy_scope, nameAttr, fortran_attrs, data_attr); } mlir::LogicalResult hlfir::DeclareOp::verify() { diff --git a/flang/lib/Optimizer/HLFIR/Transforms/CMakeLists.txt b/flang/lib/Optimizer/HLFIR/Transforms/CMakeLists.txt index ad569ce3b41f..fa3a59303137 100644 --- a/flang/lib/Optimizer/HLFIR/Transforms/CMakeLists.txt +++ b/flang/lib/Optimizer/HLFIR/Transforms/CMakeLists.txt @@ -11,11 +11,13 @@ add_flang_library(HLFIRTransforms OptimizedBufferization.cpp DEPENDS + CUFAttrsIncGen FIRDialect HLFIROpsIncGen ${dialect_libs} LINK_LIBS + CUFAttrs FIRAnalysis FIRDialect FIRBuilder diff --git a/flang/lib/Optimizer/HLFIR/Transforms/ConvertToFIR.cpp b/flang/lib/Optimizer/HLFIR/Transforms/ConvertToFIR.cpp index 3570e0011ca7..e56595d1c8e2 100644 --- a/flang/lib/Optimizer/HLFIR/Transforms/ConvertToFIR.cpp +++ b/flang/lib/Optimizer/HLFIR/Transforms/ConvertToFIR.cpp @@ -320,16 +320,16 @@ public: mlir::Location loc = declareOp->getLoc(); mlir::Value memref = declareOp.getMemref(); fir::FortranVariableFlagsAttr fortranAttrs; - fir::CUDADataAttributeAttr cudaAttr; + cuf::DataAttributeAttr dataAttr; if (auto attrs = declareOp.getFortranAttrs()) fortranAttrs = fir::FortranVariableFlagsAttr::get(rewriter.getContext(), *attrs); - if (auto attr = declareOp.getCudaAttr()) - cudaAttr = fir::CUDADataAttributeAttr::get(rewriter.getContext(), *attr); + if (auto attr = declareOp.getDataAttr()) + dataAttr = cuf::DataAttributeAttr::get(rewriter.getContext(), *attr); auto firDeclareOp = rewriter.create( loc, memref.getType(), memref, declareOp.getShape(), declareOp.getTypeparams(), declareOp.getDummyScope(), - declareOp.getUniqName(), fortranAttrs, cudaAttr); + declareOp.getUniqName(), fortranAttrs, dataAttr); // Propagate other attributes from hlfir.declare to fir.declare. // OpenACC's acc.declare is one example. Right now, the propagation diff --git a/flang/test/Fir/cuf-invalid.fir b/flang/test/Fir/cuf-invalid.fir index 5a12e3c1a4bf..6e18e48ac82f 100644 --- a/flang/test/Fir/cuf-invalid.fir +++ b/flang/test/Fir/cuf-invalid.fir @@ -4,11 +4,11 @@ func.func @_QPsub1() { %0 = fir.alloca !fir.box>> {bindc_name = "a", uniq_name = "_QFsub1Ea"} %1 = fir.alloca i32 %pinned = fir.alloca i1 - %4:2 = hlfir.declare %0 {cuda_attr = #fir.cuda, fortran_attrs = #fir.var_attrs, uniq_name = "_QFsub1Ea"} : (!fir.ref>>>) -> (!fir.ref>>>, !fir.ref>>>) + %4:2 = hlfir.declare %0 {data_attr = #cuf.cuda, fortran_attrs = #fir.var_attrs, uniq_name = "_QFsub1Ea"} : (!fir.ref>>>) -> (!fir.ref>>>, !fir.ref>>>) %11 = fir.convert %4#1 : (!fir.ref>>>) -> !fir.ref> %s = fir.load %1 : !fir.ref - // expected-error@+1{{'fir.cuda_allocate' op pinned and stream cannot appears at the same time}} - %13 = fir.cuda_allocate %11 : !fir.ref> stream(%s : i32) pinned(%pinned : !fir.ref) {cuda_attr = #fir.cuda} -> i32 + // expected-error@+1{{'cuf.allocate' op pinned and stream cannot appears at the same time}} + %13 = cuf.allocate %11 : !fir.ref> stream(%s : i32) pinned(%pinned : !fir.ref) {data_attr = #cuf.cuda} -> i32 return } @@ -16,8 +16,8 @@ func.func @_QPsub1() { func.func @_QPsub1() { %1 = fir.alloca i32 - // expected-error@+1{{'fir.cuda_allocate' op expect box to be a reference to a class or box type value}} - %2 = fir.cuda_allocate %1 : !fir.ref {cuda_attr = #fir.cuda} -> i32 + // expected-error@+1{{'cuf.allocate' op expect box to be a reference to a class or box type value}} + %2 = cuf.allocate %1 : !fir.ref {data_attr = #cuf.cuda} -> i32 return } @@ -25,15 +25,15 @@ func.func @_QPsub1() { func.func @_QPsub1() { %0 = fir.alloca !fir.box>> {bindc_name = "a", uniq_name = "_QFsub1Ea"} - %4:2 = hlfir.declare %0 {cuda_attr = #fir.cuda, fortran_attrs = #fir.var_attrs, uniq_name = "_QFsub1Ea"} : (!fir.ref>>>) -> (!fir.ref>>>, !fir.ref>>>) + %4:2 = hlfir.declare %0 {data_attr = #cuf.cuda, fortran_attrs = #fir.var_attrs, uniq_name = "_QFsub1Ea"} : (!fir.ref>>>) -> (!fir.ref>>>, !fir.ref>>>) %c100 = arith.constant 100 : index %7 = fir.alloca !fir.char<1,100> {bindc_name = "msg", uniq_name = "_QFsub1Emsg"} %8:2 = hlfir.declare %7 typeparams %c100 {uniq_name = "_QFsub1Emsg"} : (!fir.ref>, index) -> (!fir.ref>, !fir.ref>) %9 = fir.embox %8#1 : (!fir.ref>) -> !fir.box> %11 = fir.convert %4#1 : (!fir.ref>>>) -> !fir.ref> %16 = fir.convert %9 : (!fir.box>) -> !fir.box - // expected-error@+1{{'fir.cuda_allocate' op expect stat attribute when errmsg is provided}} - %13 = fir.cuda_allocate %11 : !fir.ref> errmsg(%16 : !fir.box) {cuda_attr = #fir.cuda} -> i32 + // expected-error@+1{{'cuf.allocate' op expect stat attribute when errmsg is provided}} + %13 = cuf.allocate %11 : !fir.ref> errmsg(%16 : !fir.box) {data_attr = #cuf.cuda} -> i32 return } @@ -41,11 +41,11 @@ func.func @_QPsub1() { func.func @_QPsub1() { %0 = fir.alloca !fir.box>> {bindc_name = "a", uniq_name = "_QFsub1Ea"} - %4:2 = hlfir.declare %0 {cuda_attr = #fir.cuda, fortran_attrs = #fir.var_attrs, uniq_name = "_QFsub1Ea"} : (!fir.ref>>>) -> (!fir.ref>>>, !fir.ref>>>) + %4:2 = hlfir.declare %0 {data_attr = #cuf.cuda, fortran_attrs = #fir.var_attrs, uniq_name = "_QFsub1Ea"} : (!fir.ref>>>) -> (!fir.ref>>>, !fir.ref>>>) %1 = fir.alloca i32 %11 = fir.convert %4#1 : (!fir.ref>>>) -> !fir.ref> - // expected-error@+1{{'fir.cuda_allocate' op expect errmsg to be a reference to/or a box type value}} - %13 = fir.cuda_allocate %11 : !fir.ref> errmsg(%1 : !fir.ref) {cuda_attr = #fir.cuda, hasStat} -> i32 + // expected-error@+1{{'cuf.allocate' op expect errmsg to be a reference to/or a box type value}} + %13 = cuf.allocate %11 : !fir.ref> errmsg(%1 : !fir.ref) {data_attr = #cuf.cuda, hasStat} -> i32 return } @@ -53,8 +53,8 @@ func.func @_QPsub1() { func.func @_QPsub1() { %1 = fir.alloca i32 - // expected-error@+1{{'fir.cuda_deallocate' op expect box to be a reference to class or box type value}} - %2 = fir.cuda_deallocate %1 : !fir.ref {cuda_attr = #fir.cuda} -> i32 + // expected-error@+1{{'cuf.deallocate' op expect box to be a reference to class or box type value}} + %2 = cuf.deallocate %1 : !fir.ref {data_attr = #cuf.cuda} -> i32 return } @@ -62,11 +62,11 @@ func.func @_QPsub1() { func.func @_QPsub1() { %0 = fir.alloca !fir.box>> {bindc_name = "a", uniq_name = "_QFsub1Ea"} - %4:2 = hlfir.declare %0 {cuda_attr = #fir.cuda, fortran_attrs = #fir.var_attrs, uniq_name = "_QFsub1Ea"} : (!fir.ref>>>) -> (!fir.ref>>>, !fir.ref>>>) + %4:2 = hlfir.declare %0 {data_attr = #cuf.cuda, fortran_attrs = #fir.var_attrs, uniq_name = "_QFsub1Ea"} : (!fir.ref>>>) -> (!fir.ref>>>, !fir.ref>>>) %1 = fir.alloca i32 %11 = fir.convert %4#1 : (!fir.ref>>>) -> !fir.ref> - // expected-error@+1{{'fir.cuda_deallocate' op expect errmsg to be a reference to/or a box type value}} - %13 = fir.cuda_deallocate %11 : !fir.ref> errmsg(%1 : !fir.ref) {cuda_attr = #fir.cuda, hasStat} -> i32 + // expected-error@+1{{'cuf.deallocate' op expect errmsg to be a reference to/or a box type value}} + %13 = cuf.deallocate %11 : !fir.ref> errmsg(%1 : !fir.ref) {data_attr = #cuf.cuda, hasStat} -> i32 return } @@ -74,32 +74,32 @@ func.func @_QPsub1() { func.func @_QPsub1() { %0 = fir.alloca !fir.box>> {bindc_name = "a", uniq_name = "_QFsub1Ea"} - %4:2 = hlfir.declare %0 {cuda_attr = #fir.cuda, fortran_attrs = #fir.var_attrs, uniq_name = "_QFsub1Ea"} : (!fir.ref>>>) -> (!fir.ref>>>, !fir.ref>>>) + %4:2 = hlfir.declare %0 {data_attr = #cuf.cuda, fortran_attrs = #fir.var_attrs, uniq_name = "_QFsub1Ea"} : (!fir.ref>>>) -> (!fir.ref>>>, !fir.ref>>>) %c100 = arith.constant 100 : index %7 = fir.alloca !fir.char<1,100> {bindc_name = "msg", uniq_name = "_QFsub1Emsg"} %8:2 = hlfir.declare %7 typeparams %c100 {uniq_name = "_QFsub1Emsg"} : (!fir.ref>, index) -> (!fir.ref>, !fir.ref>) %9 = fir.embox %8#1 : (!fir.ref>) -> !fir.box> %11 = fir.convert %4#1 : (!fir.ref>>>) -> !fir.ref> %16 = fir.convert %9 : (!fir.box>) -> !fir.box - // expected-error@+1{{'fir.cuda_deallocate' op expect stat attribute when errmsg is provided}} - %13 = fir.cuda_deallocate %11 : !fir.ref> errmsg(%16 : !fir.box) {cuda_attr = #fir.cuda} -> i32 + // expected-error@+1{{'cuf.deallocate' op expect stat attribute when errmsg is provided}} + %13 = cuf.deallocate %11 : !fir.ref> errmsg(%16 : !fir.box) {data_attr = #cuf.cuda} -> i32 return } // ----- func.func @_QPsub1() { - // expected-error@+1{{'fir.cuda_alloc' op expect device, managed or unified cuda attribute}} - %0 = fir.cuda_alloc f32 {bindc_name = "r", cuda_attr = #fir.cuda, uniq_name = "_QFsub1Er"} -> !fir.ref - fir.cuda_free %0 : !fir.ref {cuda_attr = #fir.cuda} + // expected-error@+1{{'cuf.alloc' op expect device, managed or unified cuda attribute}} + %0 = cuf.alloc f32 {bindc_name = "r", data_attr = #cuf.cuda, uniq_name = "_QFsub1Er"} -> !fir.ref + cuf.free %0 : !fir.ref {data_attr = #cuf.cuda} return } // ----- func.func @_QPsub1() { - %0 = fir.cuda_alloc f32 {bindc_name = "r", cuda_attr = #fir.cuda, uniq_name = "_QFsub1Er"} -> !fir.ref - // expected-error@+1{{'fir.cuda_free' op expect device, managed or unified cuda attribute}} - fir.cuda_free %0 : !fir.ref {cuda_attr = #fir.cuda} + %0 = cuf.alloc f32 {bindc_name = "r", data_attr = #cuf.cuda, uniq_name = "_QFsub1Er"} -> !fir.ref + // expected-error@+1{{'cuf.free' op expect device, managed or unified cuda attribute}} + cuf.free %0 : !fir.ref {data_attr = #cuf.cuda} return } diff --git a/flang/test/Fir/cuf.mlir b/flang/test/Fir/cuf.mlir index 8e2346def43e..188044d04b84 100644 --- a/flang/test/Fir/cuf.mlir +++ b/flang/test/Fir/cuf.mlir @@ -4,85 +4,85 @@ func.func @_QPsub1() { %0 = fir.alloca !fir.box>> {bindc_name = "a", uniq_name = "_QFsub1Ea"} - %4:2 = hlfir.declare %0 {cuda_attr = #fir.cuda, fortran_attrs = #fir.var_attrs, uniq_name = "_QFsub1Ea"} : (!fir.ref>>>) -> (!fir.ref>>>, !fir.ref>>>) + %4:2 = hlfir.declare %0 {data_attr = #cuf.cuda, fortran_attrs = #fir.var_attrs, uniq_name = "_QFsub1Ea"} : (!fir.ref>>>) -> (!fir.ref>>>, !fir.ref>>>) %11 = fir.convert %4#1 : (!fir.ref>>>) -> !fir.ref> - %13 = fir.cuda_allocate %11 : !fir.ref> {cuda_attr = #fir.cuda} -> i32 - %14 = fir.cuda_deallocate %11 : !fir.ref> {cuda_attr = #fir.cuda} -> i32 + %13 = cuf.allocate %11 : !fir.ref> {data_attr = #cuf.cuda} -> i32 + %14 = cuf.deallocate %11 : !fir.ref> {data_attr = #cuf.cuda} -> i32 return } -// CHECK: fir.cuda_allocate %{{.*}} : !fir.ref> {cuda_attr = #fir.cuda} -> i32 -// CHECK: fir.cuda_deallocate %{{.*}} : !fir.ref> {cuda_attr = #fir.cuda} -> i32 +// CHECK: cuf.allocate %{{.*}} : !fir.ref> {data_attr = #cuf.cuda} -> i32 +// CHECK: cuf.deallocate %{{.*}} : !fir.ref> {data_attr = #cuf.cuda} -> i32 // ----- func.func @_QPsub1() { %0 = fir.alloca !fir.box>> {bindc_name = "a", uniq_name = "_QFsub1Ea"} %1 = fir.alloca i32 - %4:2 = hlfir.declare %0 {cuda_attr = #fir.cuda, fortran_attrs = #fir.var_attrs, uniq_name = "_QFsub1Ea"} : (!fir.ref>>>) -> (!fir.ref>>>, !fir.ref>>>) + %4:2 = hlfir.declare %0 {data_attr = #cuf.cuda, fortran_attrs = #fir.var_attrs, uniq_name = "_QFsub1Ea"} : (!fir.ref>>>) -> (!fir.ref>>>, !fir.ref>>>) %11 = fir.convert %4#1 : (!fir.ref>>>) -> !fir.ref> %s = fir.load %1 : !fir.ref - %13 = fir.cuda_allocate %11 : !fir.ref> stream(%s : i32) {cuda_attr = #fir.cuda} -> i32 + %13 = cuf.allocate %11 : !fir.ref> stream(%s : i32) {data_attr = #cuf.cuda} -> i32 return } -// CHECK: fir.cuda_allocate %{{.*}} : !fir.ref> stream(%{{.*}} : i32) {cuda_attr = #fir.cuda} -> i32 +// CHECK: cuf.allocate %{{.*}} : !fir.ref> stream(%{{.*}} : i32) {data_attr = #cuf.cuda} -> i32 // ----- func.func @_QPsub1() { %0 = fir.alloca !fir.box>> {bindc_name = "a", uniq_name = "_QFsub1Ea"} %1 = fir.alloca !fir.box>> {bindc_name = "b", uniq_name = "_QFsub1Eb"} - %4:2 = hlfir.declare %0 {cuda_attr = #fir.cuda, fortran_attrs = #fir.var_attrs, uniq_name = "_QFsub1Ea"} : (!fir.ref>>>) -> (!fir.ref>>>, !fir.ref>>>) + %4:2 = hlfir.declare %0 {data_attr = #cuf.cuda, fortran_attrs = #fir.var_attrs, uniq_name = "_QFsub1Ea"} : (!fir.ref>>>) -> (!fir.ref>>>, !fir.ref>>>) %5:2 = hlfir.declare %1 {fortran_attrs = #fir.var_attrs, uniq_name = "_QFsub1Ea"} : (!fir.ref>>>) -> (!fir.ref>>>, !fir.ref>>>) %11 = fir.convert %4#1 : (!fir.ref>>>) -> !fir.ref> %12 = fir.convert %5#1 : (!fir.ref>>>) -> !fir.ref> - %13 = fir.cuda_allocate %11 : !fir.ref> source(%12 : !fir.ref>) {cuda_attr = #fir.cuda} -> i32 + %13 = cuf.allocate %11 : !fir.ref> source(%12 : !fir.ref>) {data_attr = #cuf.cuda} -> i32 return } -// CHECK: fir.cuda_allocate %{{.*}} : !fir.ref> source(%{{.*}} : !fir.ref>) {cuda_attr = #fir.cuda} -> i32 +// CHECK: cuf.allocate %{{.*}} : !fir.ref> source(%{{.*}} : !fir.ref>) {data_attr = #cuf.cuda} -> i32 // ----- func.func @_QPsub1() { %0 = fir.alloca !fir.box>> {bindc_name = "a", uniq_name = "_QFsub1Ea"} %pinned = fir.alloca i1 - %4:2 = hlfir.declare %0 {cuda_attr = #fir.cuda, fortran_attrs = #fir.var_attrs, uniq_name = "_QFsub1Ea"} : (!fir.ref>>>) -> (!fir.ref>>>, !fir.ref>>>) + %4:2 = hlfir.declare %0 {data_attr = #cuf.cuda, fortran_attrs = #fir.var_attrs, uniq_name = "_QFsub1Ea"} : (!fir.ref>>>) -> (!fir.ref>>>, !fir.ref>>>) %11 = fir.convert %4#1 : (!fir.ref>>>) -> !fir.ref> - %13 = fir.cuda_allocate %11 : !fir.ref> pinned(%pinned : !fir.ref) {cuda_attr = #fir.cuda} -> i32 + %13 = cuf.allocate %11 : !fir.ref> pinned(%pinned : !fir.ref) {data_attr = #cuf.cuda} -> i32 return } -// CHECK: fir.cuda_allocate %{{.*}} : !fir.ref> pinned(%{{.*}} : !fir.ref) {cuda_attr = #fir.cuda} -> i32 +// CHECK: cuf.allocate %{{.*}} : !fir.ref> pinned(%{{.*}} : !fir.ref) {data_attr = #cuf.cuda} -> i32 // ----- func.func @_QPsub1() { %0 = fir.alloca !fir.box>> {bindc_name = "a", uniq_name = "_QFsub1Ea"} - %4:2 = hlfir.declare %0 {cuda_attr = #fir.cuda, fortran_attrs = #fir.var_attrs, uniq_name = "_QFsub1Ea"} : (!fir.ref>>>) -> (!fir.ref>>>, !fir.ref>>>) + %4:2 = hlfir.declare %0 {data_attr = #cuf.cuda, fortran_attrs = #fir.var_attrs, uniq_name = "_QFsub1Ea"} : (!fir.ref>>>) -> (!fir.ref>>>, !fir.ref>>>) %c100 = arith.constant 100 : index %7 = fir.alloca !fir.char<1,100> {bindc_name = "msg", uniq_name = "_QFsub1Emsg"} %8:2 = hlfir.declare %7 typeparams %c100 {uniq_name = "_QFsub1Emsg"} : (!fir.ref>, index) -> (!fir.ref>, !fir.ref>) %9 = fir.embox %8#1 : (!fir.ref>) -> !fir.box> %11 = fir.convert %4#1 : (!fir.ref>>>) -> !fir.ref> %16 = fir.convert %9 : (!fir.box>) -> !fir.box - %13 = fir.cuda_allocate %11 : !fir.ref> errmsg(%16 : !fir.box) {cuda_attr = #fir.cuda, hasStat} -> i32 - %14 = fir.cuda_deallocate %11 : !fir.ref> errmsg(%16 : !fir.box) {cuda_attr = #fir.cuda, hasStat} -> i32 + %13 = cuf.allocate %11 : !fir.ref> errmsg(%16 : !fir.box) {data_attr = #cuf.cuda, hasStat} -> i32 + %14 = cuf.deallocate %11 : !fir.ref> errmsg(%16 : !fir.box) {data_attr = #cuf.cuda, hasStat} -> i32 return } -// CHECK: fir.cuda_allocate %{{.*}} : !fir.ref> errmsg(%{{.*}} : !fir.box) {cuda_attr = #fir.cuda, hasStat} -> i32 -// CHECK: fir.cuda_deallocate %{{.*}} : !fir.ref> errmsg(%{{.*}} : !fir.box) {cuda_attr = #fir.cuda, hasStat} -> i32 +// CHECK: cuf.allocate %{{.*}} : !fir.ref> errmsg(%{{.*}} : !fir.box) {data_attr = #cuf.cuda, hasStat} -> i32 +// CHECK: cuf.deallocate %{{.*}} : !fir.ref> errmsg(%{{.*}} : !fir.box) {data_attr = #cuf.cuda, hasStat} -> i32 // ----- func.func @_QPsub1() { - %0 = fir.cuda_alloc f32 {bindc_name = "r", cuda_attr = #fir.cuda, uniq_name = "_QFsub1Er"} -> !fir.ref - fir.cuda_free %0 : !fir.ref {cuda_attr = #fir.cuda} + %0 = cuf.alloc f32 {bindc_name = "r", data_attr = #cuf.cuda, uniq_name = "_QFsub1Er"} -> !fir.ref + cuf.free %0 : !fir.ref {data_attr = #cuf.cuda} return } -// CHECK: fir.cuda_alloc -// CHECK: fir.cuda_free +// CHECK: cuf.alloc +// CHECK: cuf.free diff --git a/flang/test/Lower/CUDA/cuda-allocatable.cuf b/flang/test/Lower/CUDA/cuda-allocatable.cuf index eff5f13669e9..74a3ec100a8f 100644 --- a/flang/test/Lower/CUDA/cuda-allocatable.cuf +++ b/flang/test/Lower/CUDA/cuda-allocatable.cuf @@ -11,11 +11,11 @@ end subroutine ! CHECK-LABEL: func.func @_QPsub1() ! CHECK: %[[BOX:.*]] = fir.alloca !fir.box>> {bindc_name = "a", uniq_name = "_QFsub1Ea"} -! CHECK: %[[BOX_DECL:.*]]:2 = hlfir.declare %[[BOX]] {cuda_attr = #fir.cuda, fortran_attrs = #fir.var_attrs, uniq_name = "_QFsub1Ea"} : (!fir.ref>>>) -> (!fir.ref>>>, !fir.ref>>>) +! CHECK: %[[BOX_DECL:.*]]:2 = hlfir.declare %[[BOX]] {data_attr = #cuf.cuda, fortran_attrs = #fir.var_attrs, uniq_name = "_QFsub1Ea"} : (!fir.ref>>>) -> (!fir.ref>>>, !fir.ref>>>) ! CHECK: fir.call @_FortranAAllocatableSetBounds -! CHECK: %{{.*}} = fir.cuda_allocate %[[BOX_DECL]]#1 : !fir.ref>>> {cuda_attr = #fir.cuda} -> i32 +! CHECK: %{{.*}} = cuf.allocate %[[BOX_DECL]]#1 : !fir.ref>>> {data_attr = #cuf.cuda} -> i32 -! CHECK: %{{.*}} = fir.cuda_deallocate %[[BOX_DECL]]#1 : !fir.ref>>> {cuda_attr = #fir.cuda} -> i32 +! CHECK: %{{.*}} = cuf.deallocate %[[BOX_DECL]]#1 : !fir.ref>>> {data_attr = #cuf.cuda} -> i32 ! CHECK: %[[BOX_LOAD:.*]] = fir.load %[[BOX_DECL]]#1 : !fir.ref>>> ! CHECK: %[[ADDR:.*]] = fir.box_addr %[[BOX_LOAD]] : (!fir.box>>) -> !fir.heap> @@ -23,7 +23,7 @@ end subroutine ! CHECK: %[[C0:.*]] = arith.constant 0 : i64 ! CHECK: %[[NE_C0:.*]] = arith.cmpi ne, %[[ADDR_I64]], %[[C0]] : i64 ! CHECK: fir.if %[[NE_C0]] { -! CHECK: %{{.*}} = fir.cuda_deallocate %[[BOX_DECL]]#1 : !fir.ref>>> {cuda_attr = #fir.cuda} -> i32 +! CHECK: %{{.*}} = cuf.deallocate %[[BOX_DECL]]#1 : !fir.ref>>> {data_attr = #cuf.cuda} -> i32 ! CHECK: } subroutine sub2() @@ -36,18 +36,18 @@ end subroutine ! CHECK-LABEL: func.func @_QPsub2() ! CHECK: %[[BOX:.*]] = fir.alloca !fir.box>> {bindc_name = "a", uniq_name = "_QFsub2Ea"} -! CHECK: %[[BOX_DECL:.*]]:2 = hlfir.declare %[[BOX]] {cuda_attr = #fir.cuda, fortran_attrs = #fir.var_attrs, uniq_name = "_QFsub2Ea"} : (!fir.ref>>>) -> (!fir.ref>>>, !fir.ref>>>) +! CHECK: %[[BOX_DECL:.*]]:2 = hlfir.declare %[[BOX]] {data_attr = #cuf.cuda, fortran_attrs = #fir.var_attrs, uniq_name = "_QFsub2Ea"} : (!fir.ref>>>) -> (!fir.ref>>>, !fir.ref>>>) ! CHECK: %[[ISTAT:.*]] = fir.alloca i32 {bindc_name = "istat", uniq_name = "_QFsub2Eistat"} ! CHECK: %[[ISTAT_DECL:.*]]:2 = hlfir.declare %[[ISTAT]] {uniq_name = "_QFsub2Eistat"} : (!fir.ref) -> (!fir.ref, !fir.ref) ! CHECK: fir.call @_FortranAAllocatableSetBounds -! CHECK: %[[STAT:.*]] = fir.cuda_allocate %[[BOX_DECL]]#1 : !fir.ref>>> {cuda_attr = #fir.cuda, hasStat} -> i32 +! CHECK: %[[STAT:.*]] = cuf.allocate %[[BOX_DECL]]#1 : !fir.ref>>> {data_attr = #cuf.cuda, hasStat} -> i32 ! CHECK: fir.store %[[STAT]] to %[[ISTAT_DECL]]#1 : !fir.ref -! CHECK: %[[STAT:.*]] = fir.cuda_deallocate %[[BOX_DECL]]#1 : !fir.ref>>> {cuda_attr = #fir.cuda, hasStat} -> i32 +! CHECK: %[[STAT:.*]] = cuf.deallocate %[[BOX_DECL]]#1 : !fir.ref>>> {data_attr = #cuf.cuda, hasStat} -> i32 ! CHECK: fir.store %[[STAT]] to %[[ISTAT_DECL]]#1 : !fir.ref ! CHECK: fir.if %{{.*}} { -! CHECK: %{{.*}} = fir.cuda_deallocate %[[BOX_DECL]]#1 : !fir.ref>>> {cuda_attr = #fir.cuda} -> i32 +! CHECK: %{{.*}} = cuf.deallocate %[[BOX_DECL]]#1 : !fir.ref>>> {data_attr = #cuf.cuda} -> i32 ! CHECK: } subroutine sub3() @@ -58,13 +58,13 @@ end subroutine ! CHECK-LABEL: func.func @_QPsub3() ! CHECK: %[[BOX:.*]] = fir.alloca !fir.box>> {bindc_name = "a", uniq_name = "_QFsub3Ea"} -! CHECK: %[[BOX_DECL:.*]]:2 = hlfir.declare %[[BOX]] {cuda_attr = #fir.cuda, fortran_attrs = #fir.var_attrs, uniq_name = "_QFsub3Ea"} : (!fir.ref>>>) -> (!fir.ref>>>, !fir.ref>>>) +! CHECK: %[[BOX_DECL:.*]]:2 = hlfir.declare %[[BOX]] {data_attr = #cuf.cuda, fortran_attrs = #fir.var_attrs, uniq_name = "_QFsub3Ea"} : (!fir.ref>>>) -> (!fir.ref>>>, !fir.ref>>>) ! CHECK: %[[PLOG:.*]] = fir.alloca !fir.logical<4> {bindc_name = "plog", uniq_name = "_QFsub3Eplog"} ! CHECK: %[[PLOG_DECL:.*]]:2 = hlfir.declare %5 {uniq_name = "_QFsub3Eplog"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) ! CHECK-2: fir.call @_FortranAAllocatableSetBounds -! CHECK: %{{.*}} = fir.cuda_allocate %[[BOX_DECL]]#1 : !fir.ref>>> pinned(%[[PLOG_DECL]]#1 : !fir.ref>) {cuda_attr = #fir.cuda} -> i32 +! CHECK: %{{.*}} = cuf.allocate %[[BOX_DECL]]#1 : !fir.ref>>> pinned(%[[PLOG_DECL]]#1 : !fir.ref>) {data_attr = #cuf.cuda} -> i32 ! CHECK: fir.if %{{.*}} { -! CHECK: %{{.*}} = fir.cuda_deallocate %[[BOX_DECL]]#1 : !fir.ref>>> {cuda_attr = #fir.cuda} -> i32 +! CHECK: %{{.*}} = cuf.deallocate %[[BOX_DECL]]#1 : !fir.ref>>> {data_attr = #cuf.cuda} -> i32 ! CHECK: } subroutine sub4() @@ -75,14 +75,14 @@ end subroutine ! CHECK-LABEL: func.func @_QPsub4() ! CHECK: %[[BOX:.*]] = fir.alloca !fir.box>> {bindc_name = "a", uniq_name = "_QFsub4Ea"} -! CHECK: %[[BOX_DECL:.*]]:2 = hlfir.declare %0 {cuda_attr = #fir.cuda, fortran_attrs = #fir.var_attrs, uniq_name = "_QFsub4Ea"} : (!fir.ref>>>) -> (!fir.ref>>>, !fir.ref>>>) +! CHECK: %[[BOX_DECL:.*]]:2 = hlfir.declare %0 {data_attr = #cuf.cuda, fortran_attrs = #fir.var_attrs, uniq_name = "_QFsub4Ea"} : (!fir.ref>>>) -> (!fir.ref>>>, !fir.ref>>>) ! CHECK: %[[ISTREAM:.*]] = fir.alloca i32 {bindc_name = "istream", uniq_name = "_QFsub4Eistream"} ! CHECK: %[[ISTREAM_DECL:.*]]:2 = hlfir.declare %[[ISTREAM]] {uniq_name = "_QFsub4Eistream"} : (!fir.ref) -> (!fir.ref, !fir.ref) ! CHECK: fir.call @_FortranAAllocatableSetBounds ! CHECK: %[[STREAM:.*]] = fir.load %[[ISTREAM_DECL]]#0 : !fir.ref -! CHECK: %{{.*}} = fir.cuda_allocate %[[BOX_DECL]]#1 : !fir.ref>>> stream(%[[STREAM]] : i32) {cuda_attr = #fir.cuda} -> i32 +! CHECK: %{{.*}} = cuf.allocate %[[BOX_DECL]]#1 : !fir.ref>>> stream(%[[STREAM]] : i32) {data_attr = #cuf.cuda} -> i32 ! CHECK: fir.if %{{.*}} { -! CHECK: %{{.*}} = fir.cuda_deallocate %[[BOX_DECL]]#1 : !fir.ref>>> {cuda_attr = #fir.cuda} -> i32 +! CHECK: %{{.*}} = cuf.deallocate %[[BOX_DECL]]#1 : !fir.ref>>> {data_attr = #cuf.cuda} -> i32 ! CHECK: } subroutine sub5() @@ -93,16 +93,16 @@ end subroutine ! CHECK-LABEL: func.func @_QPsub5() ! CHECK: %[[BOX_A:.*]] = fir.alloca !fir.box>> {bindc_name = "a", uniq_name = "_QFsub5Ea"} -! CHECK: %[[BOX_A_DECL:.*]]:2 = hlfir.declare %[[BOX]] {cuda_attr = #fir.cuda, fortran_attrs = #fir.var_attrs, uniq_name = "_QFsub5Ea"} : (!fir.ref>>>) -> (!fir.ref>>>, !fir.ref>>>) +! CHECK: %[[BOX_A_DECL:.*]]:2 = hlfir.declare %[[BOX]] {data_attr = #cuf.cuda, fortran_attrs = #fir.var_attrs, uniq_name = "_QFsub5Ea"} : (!fir.ref>>>) -> (!fir.ref>>>, !fir.ref>>>) ! CHECK: %[[BOX_B:.*]] = fir.alloca !fir.box>> {bindc_name = "b", uniq_name = "_QFsub5Eb"} ! CHECK: %[[BOX_B_DECL:.*]]:2 = hlfir.declare %[[BOX_B]] {fortran_attrs = #fir.var_attrs, uniq_name = "_QFsub5Eb"} : (!fir.ref>>>) -> (!fir.ref>>>, !fir.ref>>>) ! CHECK: %[[LOAD_B:.*]] = fir.load %[[BOX_B_DECL]]#1 : !fir.ref>>> ! CHECK: fir.call @_FortranAAllocatableSetBounds -! CHECK: %{{.*}} = fir.cuda_allocate %[[BOX_A_DECL]]#1 : !fir.ref>>> source(%[[LOAD_B]] : !fir.box>>) {cuda_attr = #fir.cuda} -> i32 +! CHECK: %{{.*}} = cuf.allocate %[[BOX_A_DECL]]#1 : !fir.ref>>> source(%[[LOAD_B]] : !fir.box>>) {data_attr = #cuf.cuda} -> i32 ! CHECK: fir.if ! CHECK: fir.freemem ! CHECK: fir.if %{{.*}} { -! CHECK: %{{.*}} = fir.cuda_deallocate %[[BOX_A_DECL]]#1 : !fir.ref>>> {cuda_attr = #fir.cuda} -> i32 +! CHECK: %{{.*}} = cuf.deallocate %[[BOX_A_DECL]]#1 : !fir.ref>>> {data_attr = #cuf.cuda} -> i32 ! CHECK: } subroutine sub6() @@ -113,14 +113,14 @@ end subroutine ! CHECK-LABEL: func.func @_QPsub6() ! CHECK: %[[BOX_A:.*]] = fir.alloca !fir.box>> {bindc_name = "a", uniq_name = "_QFsub6Ea"} -! CHECK: %[[BOX_A_DECL:.*]]:2 = hlfir.declare %[[BOX]] {cuda_attr = #fir.cuda, fortran_attrs = #fir.var_attrs, uniq_name = "_QFsub6Ea"} : (!fir.ref>>>) -> (!fir.ref>>>, !fir.ref>>>) +! CHECK: %[[BOX_A_DECL:.*]]:2 = hlfir.declare %[[BOX]] {data_attr = #cuf.cuda, fortran_attrs = #fir.var_attrs, uniq_name = "_QFsub6Ea"} : (!fir.ref>>>) -> (!fir.ref>>>, !fir.ref>>>) ! CHECK: %[[BOX_B:.*]] = fir.alloca !fir.box>> {bindc_name = "b", uniq_name = "_QFsub6Eb"} ! CHECK: %[[BOX_B_DECL:.*]]:2 = hlfir.declare %[[BOX_B]] {fortran_attrs = #fir.var_attrs, uniq_name = "_QFsub6Eb"} : (!fir.ref>>>) -> (!fir.ref>>>, !fir.ref>>>) ! CHECK: %[[LOAD_B:.*]] = fir.load %[[BOX_B_DECL]]#1 : !fir.ref>>> ! CHECK: fir.call @_FortranAAllocatableApplyMold -! CHECK: %{{.*}} = fir.cuda_allocate %[[BOX_A_DECL]]#1 : !fir.ref>>> {cuda_attr = #fir.cuda} -> i32 +! CHECK: %{{.*}} = cuf.allocate %[[BOX_A_DECL]]#1 : !fir.ref>>> {data_attr = #cuf.cuda} -> i32 ! CHECK: fir.if %{{.*}} { -! CHECK: %{{.*}} = fir.cuda_deallocate %[[BOX_A_DECL]]#1 : !fir.ref>>> {cuda_attr = #fir.cuda} -> i32 +! CHECK: %{{.*}} = cuf.deallocate %[[BOX_A_DECL]]#1 : !fir.ref>>> {data_attr = #cuf.cuda} -> i32 ! CHECK: } subroutine sub7() @@ -134,19 +134,19 @@ end subroutine ! CHECK-LABEL: func.func @_QPsub7() ! CHECK: %[[BOX:.*]] = fir.alloca !fir.box>> {bindc_name = "a", uniq_name = "_QFsub7Ea"} -! CHECK: %[[BOX_DECL:.*]]:2 = hlfir.declare %[[BOX]] {cuda_attr = #fir.cuda, fortran_attrs = #fir.var_attrs, uniq_name = "_QFsub7Ea"} : (!fir.ref>>>) -> (!fir.ref>>>, !fir.ref>>>) +! CHECK: %[[BOX_DECL:.*]]:2 = hlfir.declare %[[BOX]] {data_attr = #cuf.cuda, fortran_attrs = #fir.var_attrs, uniq_name = "_QFsub7Ea"} : (!fir.ref>>>) -> (!fir.ref>>>, !fir.ref>>>) ! CHECK: %[[ERR:.*]] = fir.alloca !fir.char<1,50> {bindc_name = "err", uniq_name = "_QFsub7Eerr"} ! CHECK: %[[ERR_DECL:.*]]:2 = hlfir.declare %[[ERR]] typeparams %{{.*}} {uniq_name = "_QFsub7Eerr"} : (!fir.ref>, index) -> (!fir.ref>, !fir.ref>) ! CHECK: %[[ISTAT:.*]] = fir.alloca i32 {bindc_name = "istat", uniq_name = "_QFsub7Eistat"} ! CHECK: %[[ISTAT_DECL:.*]]:2 = hlfir.declare %[[ISTAT]] {uniq_name = "_QFsub7Eistat"} : (!fir.ref) -> (!fir.ref, !fir.ref) ! CHECK: %[[ERR_BOX:.*]] = fir.embox %[[ERR_DECL]]#1 : (!fir.ref>) -> !fir.box> ! CHECK: fir.call @_FortranAAllocatableSetBounds -! CHECK: %[[STAT:.*]] = fir.cuda_allocate %[[BOX_DECL]]#1 : !fir.ref>>> errmsg(%[[ERR_BOX]] : !fir.box>) {cuda_attr = #fir.cuda, hasStat} -> i32 +! CHECK: %[[STAT:.*]] = cuf.allocate %[[BOX_DECL]]#1 : !fir.ref>>> errmsg(%[[ERR_BOX]] : !fir.box>) {data_attr = #cuf.cuda, hasStat} -> i32 ! CHECK: fir.store %[[STAT]] to %[[ISTAT_DECL]]#1 : !fir.ref ! CHECK: %[[ERR_BOX:.*]] = fir.embox %[[ERR_DECL]]#1 : (!fir.ref>) -> !fir.box> -! CHECK: %[[STAT:.*]] = fir.cuda_deallocate %[[BOX_DECL]]#1 : !fir.ref>>> errmsg(%15 : !fir.box>) {cuda_attr = #fir.cuda, hasStat} -> i32 +! CHECK: %[[STAT:.*]] = cuf.deallocate %[[BOX_DECL]]#1 : !fir.ref>>> errmsg(%15 : !fir.box>) {data_attr = #cuf.cuda, hasStat} -> i32 ! CHECK: fir.store %[[STAT]] to %[[ISTAT_DECL]]#1 : !fir.ref ! CHECK: fir.if %{{.*}} { -! CHECK: %{{.*}} = fir.cuda_deallocate %[[BOX_DECL]]#1 : !fir.ref>>> {cuda_attr = #fir.cuda} -> i32 +! CHECK: %{{.*}} = cuf.deallocate %[[BOX_DECL]]#1 : !fir.ref>>> {data_attr = #cuf.cuda} -> i32 ! CHECK: } diff --git a/flang/test/Lower/CUDA/cuda-data-attribute.cuf b/flang/test/Lower/CUDA/cuda-data-attribute.cuf index 3eb42a6a5d40..f7f58a43a143 100644 --- a/flang/test/Lower/CUDA/cuda-data-attribute.cuf +++ b/flang/test/Lower/CUDA/cuda-data-attribute.cuf @@ -5,13 +5,13 @@ module cuda_var real, constant :: mod_a_rc -! CHECK: fir.global @_QMcuda_varEmod_a_rc {cuda_attr = #fir.cuda} : f32 +! CHECK: fir.global @_QMcuda_varEmod_a_rc {data_attr = #cuf.cuda} : f32 real, device :: mod_b_ra -! CHECK: fir.global @_QMcuda_varEmod_b_ra {cuda_attr = #fir.cuda} : f32 +! CHECK: fir.global @_QMcuda_varEmod_b_ra {data_attr = #cuf.cuda} : f32 real, allocatable, managed :: mod_c_rm -! CHECK: fir.global @_QMcuda_varEmod_c_rm {cuda_attr = #fir.cuda} : !fir.box> +! CHECK: fir.global @_QMcuda_varEmod_c_rm {data_attr = #cuf.cuda} : !fir.box> real, allocatable, pinned :: mod_d_rp -! CHECK: fir.global @_QMcuda_varEmod_d_rp {cuda_attr = #fir.cuda} : !fir.box> +! CHECK: fir.global @_QMcuda_varEmod_d_rp {data_attr = #cuf.cuda} : !fir.box> contains @@ -23,44 +23,44 @@ subroutine local_var_attrs end subroutine ! CHECK-LABEL: func.func @_QMcuda_varPlocal_var_attrs() -! CHECK: %{{.*}}:2 = hlfir.declare %{{.*}} {cuda_attr = #fir.cuda, uniq_name = "_QMcuda_varFlocal_var_attrsErd"} : (!fir.ref) -> (!fir.ref, !fir.ref) -! CHECK: %{{.*}}:2 = hlfir.declare %{{.*}} {cuda_attr = #fir.cuda, fortran_attrs = #fir.var_attrs, uniq_name = "_QMcuda_varFlocal_var_attrsErm"} : (!fir.ref>>) -> (!fir.ref>>, !fir.ref>>) -! CHECK: %{{.*}}:2 = hlfir.declare %{{.*}} {cuda_attr = #fir.cuda, fortran_attrs = #fir.var_attrs, uniq_name = "_QMcuda_varFlocal_var_attrsErp"} : (!fir.ref>>) -> (!fir.ref>>, !fir.ref>>) -! CHECK: %{{.*}}:2 = hlfir.declare %{{.*}} {cuda_attr = #fir.cuda, uniq_name = "_QMcuda_varFlocal_var_attrsEru"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %{{.*}}:2 = hlfir.declare %{{.*}} {data_attr = #cuf.cuda, uniq_name = "_QMcuda_varFlocal_var_attrsErd"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %{{.*}}:2 = hlfir.declare %{{.*}} {data_attr = #cuf.cuda, fortran_attrs = #fir.var_attrs, uniq_name = "_QMcuda_varFlocal_var_attrsErm"} : (!fir.ref>>) -> (!fir.ref>>, !fir.ref>>) +! CHECK: %{{.*}}:2 = hlfir.declare %{{.*}} {data_attr = #cuf.cuda, fortran_attrs = #fir.var_attrs, uniq_name = "_QMcuda_varFlocal_var_attrsErp"} : (!fir.ref>>) -> (!fir.ref>>, !fir.ref>>) +! CHECK: %{{.*}}:2 = hlfir.declare %{{.*}} {data_attr = #cuf.cuda, uniq_name = "_QMcuda_varFlocal_var_attrsEru"} : (!fir.ref) -> (!fir.ref, !fir.ref) -! FIR: %{{.*}} = fir.declare %{{.*}} {cuda_attr = #fir.cuda, uniq_name = "_QMcuda_varFlocal_var_attrsErd"} : (!fir.ref) -> !fir.ref -! FIR: %{{.*}} = fir.declare %{{.*}} {cuda_attr = #fir.cuda, fortran_attrs = #fir.var_attrs, uniq_name = "_QMcuda_varFlocal_var_attrsErm"} : (!fir.ref>>) -> !fir.ref>> -! FIR: %{{.*}} = fir.declare %{{.*}} {cuda_attr = #fir.cuda, fortran_attrs = #fir.var_attrs, uniq_name = "_QMcuda_varFlocal_var_attrsErp"} : (!fir.ref>>) -> !fir.ref>> -! FIR: %{{.*}} = fir.declare %{{.*}} {cuda_attr = #fir.cuda, uniq_name = "_QMcuda_varFlocal_var_attrsEru"} : (!fir.ref) -> !fir.ref +! FIR: %{{.*}} = fir.declare %{{.*}} {data_attr = #cuf.cuda, uniq_name = "_QMcuda_varFlocal_var_attrsErd"} : (!fir.ref) -> !fir.ref +! FIR: %{{.*}} = fir.declare %{{.*}} {data_attr = #cuf.cuda, fortran_attrs = #fir.var_attrs, uniq_name = "_QMcuda_varFlocal_var_attrsErm"} : (!fir.ref>>) -> !fir.ref>> +! FIR: %{{.*}} = fir.declare %{{.*}} {data_attr = #cuf.cuda, fortran_attrs = #fir.var_attrs, uniq_name = "_QMcuda_varFlocal_var_attrsErp"} : (!fir.ref>>) -> !fir.ref>> +! FIR: %{{.*}} = fir.declare %{{.*}} {data_attr = #cuf.cuda, uniq_name = "_QMcuda_varFlocal_var_attrsEru"} : (!fir.ref) -> !fir.ref subroutine dummy_arg_device(dd) real, device :: dd end subroutine ! CHECK-LABEL: func.func @_QMcuda_varPdummy_arg_device( -! CHECK-SAME: %[[ARG0:.*]]: !fir.ref {fir.bindc_name = "dd", fir.cuda_attr = #fir.cuda}) { -! CHECK: %{{.*}}:2 = hlfir.declare %[[ARG0]] dummy_scope %{{[0-9]+}} {cuda_attr = #fir.cuda, uniq_name = "_QMcuda_varFdummy_arg_deviceEdd"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) +! CHECK-SAME: %[[ARG0:.*]]: !fir.ref {cuf.data_attr = #cuf.cuda, fir.bindc_name = "dd"}) { +! CHECK: %{{.*}}:2 = hlfir.declare %[[ARG0]] dummy_scope %{{[0-9]+}} {data_attr = #cuf.cuda, uniq_name = "_QMcuda_varFdummy_arg_deviceEdd"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) subroutine dummy_arg_managed(dm) real, allocatable, managed :: dm end subroutine ! CHECK-LABEL: func.func @_QMcuda_varPdummy_arg_managed( -! CHECK-SAME: %[[ARG0:.*]]: !fir.ref>> {fir.bindc_name = "dm", fir.cuda_attr = #fir.cuda}) { -! CHECK: %{{.*}}:2 = hlfir.declare %[[ARG0]] dummy_scope %{{[0-9]+}} {cuda_attr = #fir.cuda, fortran_attrs = #fir.var_attrs, uniq_name = "_QMcuda_varFdummy_arg_managedEdm"} : (!fir.ref>>, !fir.dscope) -> (!fir.ref>>, !fir.ref>>) +! CHECK-SAME: %[[ARG0:.*]]: !fir.ref>> {cuf.data_attr = #cuf.cuda, fir.bindc_name = "dm"}) { +! CHECK: %{{.*}}:2 = hlfir.declare %[[ARG0]] dummy_scope %{{[0-9]+}} {data_attr = #cuf.cuda, fortran_attrs = #fir.var_attrs, uniq_name = "_QMcuda_varFdummy_arg_managedEdm"} : (!fir.ref>>, !fir.dscope) -> (!fir.ref>>, !fir.ref>>) subroutine dummy_arg_pinned(dp) real, allocatable, pinned :: dp end subroutine ! CHECK-LABEL: func.func @_QMcuda_varPdummy_arg_pinned( -! CHECK-SAME: %[[ARG0:.*]]: !fir.ref>> {fir.bindc_name = "dp", fir.cuda_attr = #fir.cuda}) { -! CHECK: %{{.*}}:2 = hlfir.declare %[[ARG0]] dummy_scope %{{[0-9]+}} {cuda_attr = #fir.cuda, fortran_attrs = #fir.var_attrs, uniq_name = "_QMcuda_varFdummy_arg_pinnedEdp"} : (!fir.ref>>, !fir.dscope) -> (!fir.ref>>, !fir.ref>>) +! CHECK-SAME: %[[ARG0:.*]]: !fir.ref>> {cuf.data_attr = #cuf.cuda, fir.bindc_name = "dp"}) { +! CHECK: %{{.*}}:2 = hlfir.declare %[[ARG0]] dummy_scope %{{[0-9]+}} {data_attr = #cuf.cuda, fortran_attrs = #fir.var_attrs, uniq_name = "_QMcuda_varFdummy_arg_pinnedEdp"} : (!fir.ref>>, !fir.dscope) -> (!fir.ref>>, !fir.ref>>) subroutine dummy_arg_unified(du) real, unified :: du end subroutine ! CHECK-LABEL: func.func @_QMcuda_varPdummy_arg_unified( -! CHECK-SAME: %[[ARG0:.*]]: !fir.ref {fir.bindc_name = "du", fir.cuda_attr = #fir.cuda}) -! CHECK: %{{.*}}:2 = hlfir.declare %[[ARG0]] dummy_scope %{{[0-9]+}} {cuda_attr = #fir.cuda, uniq_name = "_QMcuda_varFdummy_arg_unifiedEdu"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) +! CHECK-SAME: %[[ARG0:.*]]: !fir.ref {cuf.data_attr = #cuf.cuda, fir.bindc_name = "du"}) +! CHECK: %{{.*}}:2 = hlfir.declare %[[ARG0]] dummy_scope %{{[0-9]+}} {data_attr = #cuf.cuda, uniq_name = "_QMcuda_varFdummy_arg_unifiedEdu"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) subroutine cuda_alloc_free(n) integer :: n @@ -70,27 +70,27 @@ subroutine cuda_alloc_free(n) end ! CHECK-LABEL: func.func @_QMcuda_varPcuda_alloc_free -! CHECK: %[[ALLOC_A:.*]] = fir.cuda_alloc !fir.array<10xf32> {bindc_name = "a", cuda_attr = #fir.cuda, uniq_name = "_QMcuda_varFcuda_alloc_freeEa"} -> !fir.ref> +! CHECK: %[[ALLOC_A:.*]] = cuf.alloc !fir.array<10xf32> {bindc_name = "a", data_attr = #cuf.cuda, uniq_name = "_QMcuda_varFcuda_alloc_freeEa"} -> !fir.ref> ! CHECK: %[[SHAPE:.*]] = fir.shape %c10 : (index) -> !fir.shape<1> -! CHECK: %[[DECL_A:.*]]:2 = hlfir.declare %[[ALLOC_A]](%[[SHAPE]]) {cuda_attr = #fir.cuda, uniq_name = "_QMcuda_varFcuda_alloc_freeEa"} : (!fir.ref>, !fir.shape<1>) -> (!fir.ref>, !fir.ref>) +! CHECK: %[[DECL_A:.*]]:2 = hlfir.declare %[[ALLOC_A]](%[[SHAPE]]) {data_attr = #cuf.cuda, uniq_name = "_QMcuda_varFcuda_alloc_freeEa"} : (!fir.ref>, !fir.shape<1>) -> (!fir.ref>, !fir.ref>) -! CHECK: %[[ALLOC_U:.*]] = fir.cuda_alloc i32 {bindc_name = "u", cuda_attr = #fir.cuda, uniq_name = "_QMcuda_varFcuda_alloc_freeEu"} -> !fir.ref -! CHECK: %[[DECL_U:.*]]:2 = hlfir.declare %[[ALLOC_U]] {cuda_attr = #fir.cuda, uniq_name = "_QMcuda_varFcuda_alloc_freeEu"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[ALLOC_U:.*]] = cuf.alloc i32 {bindc_name = "u", data_attr = #cuf.cuda, uniq_name = "_QMcuda_varFcuda_alloc_freeEu"} -> !fir.ref +! CHECK: %[[DECL_U:.*]]:2 = hlfir.declare %[[ALLOC_U]] {data_attr = #cuf.cuda, uniq_name = "_QMcuda_varFcuda_alloc_freeEu"} : (!fir.ref) -> (!fir.ref, !fir.ref) -! CHECK: %[[ALLOC_B:.*]] = fir.cuda_alloc !fir.array, %{{.*}} : index {bindc_name = "b", cuda_attr = #fir.cuda, uniq_name = "_QMcuda_varFcuda_alloc_freeEb"} -> !fir.ref> +! CHECK: %[[ALLOC_B:.*]] = cuf.alloc !fir.array, %{{.*}} : index {bindc_name = "b", data_attr = #cuf.cuda, uniq_name = "_QMcuda_varFcuda_alloc_freeEb"} -> !fir.ref> ! CHECK: %[[SHAPE:.*]] = fir.shape %{{.*}} : (index) -> !fir.shape<1> -! CHECK: %[[DECL_B:.*]]:2 = hlfir.declare %[[ALLOC_B]](%[[SHAPE]]) {cuda_attr = #fir.cuda, uniq_name = "_QMcuda_varFcuda_alloc_freeEb"} : (!fir.ref>, !fir.shape<1>) -> (!fir.box>, !fir.ref>) +! CHECK: %[[DECL_B:.*]]:2 = hlfir.declare %[[ALLOC_B]](%[[SHAPE]]) {data_attr = #cuf.cuda, uniq_name = "_QMcuda_varFcuda_alloc_freeEb"} : (!fir.ref>, !fir.shape<1>) -> (!fir.box>, !fir.ref>) -! CHECK: fir.cuda_free %[[DECL_B]]#1 : !fir.ref> {cuda_attr = #fir.cuda} -! CHECK: fir.cuda_free %[[DECL_U]]#1 : !fir.ref {cuda_attr = #fir.cuda} -! CHECK: fir.cuda_free %[[DECL_A]]#1 : !fir.ref> {cuda_attr = #fir.cuda} +! CHECK: cuf.free %[[DECL_B]]#1 : !fir.ref> {data_attr = #cuf.cuda} +! CHECK: cuf.free %[[DECL_U]]#1 : !fir.ref {data_attr = #cuf.cuda} +! CHECK: cuf.free %[[DECL_A]]#1 : !fir.ref> {data_attr = #cuf.cuda} subroutine dummy(x) real, target, device :: x end subroutine ! CHECK: func.func @_QMcuda_varPdummy -! CHECK-NOT: fir.cuda_free +! CHECK-NOT: cuf.free end module diff --git a/flang/test/Lower/CUDA/cuda-data-transfer.cuf b/flang/test/Lower/CUDA/cuda-data-transfer.cuf index 0a2608639bce..084314ed63ec 100644 --- a/flang/test/Lower/CUDA/cuda-data-transfer.cuf +++ b/flang/test/Lower/CUDA/cuda-data-transfer.cuf @@ -29,37 +29,37 @@ end ! CHECK-LABEL: func.func @_QPsub1() -! CHECK: %[[ADEV:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {cuda_attr = #fir.cuda, uniq_name = "_QFsub1Eadev"} : (!fir.ref>, !fir.shape<1>) -> (!fir.ref>, !fir.ref>) +! CHECK: %[[ADEV:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {data_attr = #cuf.cuda, uniq_name = "_QFsub1Eadev"} : (!fir.ref>, !fir.shape<1>) -> (!fir.ref>, !fir.ref>) ! CHECK: %[[AHOST:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {uniq_name = "_QFsub1Eahost"} : (!fir.ref>, !fir.shape<1>) -> (!fir.ref>, !fir.ref>) ! CHECK: %[[I:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFsub1Ei"} : (!fir.ref) -> (!fir.ref, !fir.ref) -! CHECK: %[[M:.*]]:2 = hlfir.declare %{{.*}} {cuda_attr = #fir.cuda, uniq_name = "_QFsub1Em"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[M:.*]]:2 = hlfir.declare %{{.*}} {data_attr = #cuf.cuda, uniq_name = "_QFsub1Em"} : (!fir.ref) -> (!fir.ref, !fir.ref) ! CHECK: %[[C1:.*]] = arith.constant 1 : i32 ! CHECK: %[[LOADED_I:.*]] = fir.load %[[I]]#0 : !fir.ref ! CHECK: %[[ADD:.*]] = arith.addi %[[C1]], %[[LOADED_I]] : i32 ! CHECK: %[[ASSOC:.*]]:3 = hlfir.associate %[[ADD]] {uniq_name = ".cuf_host_tmp"} : (i32) -> (!fir.ref, !fir.ref, i1) -! CHECK: fir.cuda_data_transfer %[[ASSOC]]#0 to %[[M]]#0 {transfer_kind = #fir.cuda_transfer} : !fir.ref, !fir.ref +! CHECK: cuf.data_transfer %[[ASSOC]]#0 to %[[M]]#0 {transfer_kind = #cuf.cuda_transfer} : !fir.ref, !fir.ref ! CHECK: hlfir.end_associate %[[ASSOC]]#1, %[[ASSOC]]#2 : !fir.ref, i1 ! CHECK: %[[C1:.*]] = arith.constant 1 : i32 ! CHECK: %[[ASSOC:.*]]:3 = hlfir.associate %[[C1]] {uniq_name = ".cuf_host_tmp"} : (i32) -> (!fir.ref, !fir.ref, i1) -! CHECK: fir.cuda_data_transfer %[[ASSOC]]#0 to %[[M]]#0 {transfer_kind = #fir.cuda_transfer} : !fir.ref, !fir.ref +! CHECK: cuf.data_transfer %[[ASSOC]]#0 to %[[M]]#0 {transfer_kind = #cuf.cuda_transfer} : !fir.ref, !fir.ref ! CHECK: hlfir.end_associate %[[ASSOC]]#1, %[[ASSOC]]#2 : !fir.ref, i1 -! CHECK: fir.cuda_data_transfer %[[AHOST]]#0 to %[[ADEV]]#0 {transfer_kind = #fir.cuda_transfer} : !fir.ref>, !fir.ref> +! CHECK: cuf.data_transfer %[[AHOST]]#0 to %[[ADEV]]#0 {transfer_kind = #cuf.cuda_transfer} : !fir.ref>, !fir.ref> ! CHECK: %[[ELEMENTAL:.*]] = hlfir.elemental %{{.*}} unordered : (!fir.shape<1>) -> !hlfir.expr<10xi32> { ! CHECK: %[[ASSOC:.*]]:3 = hlfir.associate %[[ELEMENTAL]](%{{.*}}) {uniq_name = ".cuf_host_tmp"} : (!hlfir.expr<10xi32>, !fir.shape<1>) -> (!fir.ref>, !fir.ref>, i1) -! CHECK: fir.cuda_data_transfer %[[ASSOC]]#0 to %[[ADEV]]#0 {transfer_kind = #fir.cuda_transfer} : !fir.ref>, !fir.ref> +! CHECK: cuf.data_transfer %[[ASSOC]]#0 to %[[ADEV]]#0 {transfer_kind = #cuf.cuda_transfer} : !fir.ref>, !fir.ref> ! CHECK: hlfir.end_associate %[[ASSOC]]#1, %[[ASSOC]]#2 : !fir.ref>, i1 ! CHECK: %[[DES_AHOST:.*]] = hlfir.designate %[[AHOST]]#0 (%c1{{.*}}:%c5{{.*}}:%c1{{.*}}) shape %{{.*}} : (!fir.ref>, index, index, index, !fir.shape<1>) -> !fir.ref> ! CHECK: %[[DES_ADEV:.*]] = hlfir.designate %[[ADEV]]#0 (%c1{{.*}}:%c5{{.*}}:%c1{{.*}}) shape %{{.*}} : (!fir.ref>, index, index, index, !fir.shape<1>) -> !fir.ref> -! CHECK: fir.cuda_data_transfer %[[DES_AHOST]] to %[[DES_ADEV]] {transfer_kind = #fir.cuda_transfer} : !fir.ref>, !fir.ref> +! CHECK: cuf.data_transfer %[[DES_AHOST]] to %[[DES_ADEV]] {transfer_kind = #cuf.cuda_transfer} : !fir.ref>, !fir.ref> ! CHECK: %[[ELEMENTAL:.*]] = hlfir.elemental %{{.*}} unordered : (!fir.shape<1>) -> !hlfir.expr<10xi32> ! CHECK: %[[ASSOC:.*]]:3 = hlfir.associate %[[ELEMENTAL]](%{{.*}}) {uniq_name = ".cuf_host_tmp"} : (!hlfir.expr<10xi32>, !fir.shape<1>) -> (!fir.ref>, !fir.ref>, i1) -! CHECK: fir.cuda_data_transfer %[[ASSOC]]#0 to %[[ADEV]]#0 {transfer_kind = #fir.cuda_transfer} : !fir.ref>, !fir.ref> +! CHECK: cuf.data_transfer %[[ASSOC]]#0 to %[[ADEV]]#0 {transfer_kind = #cuf.cuda_transfer} : !fir.ref>, !fir.ref> ! CHECK: hlfir.end_associate %[[ASSOC]]#1, %[[ASSOC]]#2 : !fir.ref>, i1 subroutine sub2() @@ -81,25 +81,25 @@ subroutine sub2() end ! CHECK-LABEL: func.func @_QPsub2() -! CHECK: %[[ADEV:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {cuda_attr = #fir.cuda, uniq_name = "_QFsub2Eadev"} : (!fir.ref>, !fir.shape<1>) -> (!fir.ref>, !fir.ref>) +! CHECK: %[[ADEV:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {data_attr = #cuf.cuda, uniq_name = "_QFsub2Eadev"} : (!fir.ref>, !fir.shape<1>) -> (!fir.ref>, !fir.ref>) ! CHECK: %[[AHOST:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {uniq_name = "_QFsub2Eahost"} : (!fir.ref>, !fir.shape<1>) -> (!fir.ref>, !fir.ref>) -! CHECK: %[[BDEV:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {cuda_attr = #fir.cuda, uniq_name = "_QFsub2Ebdev"} : (!fir.ref>, !fir.shape<1>) -> (!fir.ref>, !fir.ref>) +! CHECK: %[[BDEV:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {data_attr = #cuf.cuda, uniq_name = "_QFsub2Ebdev"} : (!fir.ref>, !fir.shape<1>) -> (!fir.ref>, !fir.ref>) ! CHECK: %[[BHOST:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {uniq_name = "_QFsub2Ebhost"} : (!fir.ref>, !fir.shape<1>) -> (!fir.ref>, !fir.ref>) ! CHECK: %[[I:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFsub2Ei"} : (!fir.ref) -> (!fir.ref, !fir.ref) -! CHECK: %[[M:.*]]:2 = hlfir.declare %{{.*}} {cuda_attr = #fir.cuda, uniq_name = "_QFsub2Em"} : (!fir.ref) -> (!fir.ref, !fir.ref) -! CHECK: fir.cuda_data_transfer %[[ADEV]]#0 to %[[AHOST]]#0 {transfer_kind = #fir.cuda_transfer} : !fir.ref>, !fir.ref> -! CHECK: fir.cuda_data_transfer %[[M]]#0 to %[[I]]#0 {transfer_kind = #fir.cuda_transfer} : !fir.ref, !fir.ref +! CHECK: %[[M:.*]]:2 = hlfir.declare %{{.*}} {data_attr = #cuf.cuda, uniq_name = "_QFsub2Em"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: cuf.data_transfer %[[ADEV]]#0 to %[[AHOST]]#0 {transfer_kind = #cuf.cuda_transfer} : !fir.ref>, !fir.ref> +! CHECK: cuf.data_transfer %[[M]]#0 to %[[I]]#0 {transfer_kind = #cuf.cuda_transfer} : !fir.ref, !fir.ref ! CHECK: %[[DES_ADEV:.*]] = hlfir.designate %[[ADEV]]#0 (%{{.*}}:%{{.*}}:%{{.*}}) shape %{{.*}} : (!fir.ref>, index, index, index, !fir.shape<1>) -> !fir.ref> ! CHECK: %[[DES_AHOST:.*]] = hlfir.designate %[[AHOST]]#0 (%{{.*}}:%{{.*}}:%{{.*}}) shape %{{.*}} : (!fir.ref>, index, index, index, !fir.shape<1>) -> !fir.ref> -! CHECK: fir.cuda_data_transfer %[[DES_ADEV]] to %[[DES_AHOST]] {transfer_kind = #fir.cuda_transfer} : !fir.ref>, !fir.ref> +! CHECK: cuf.data_transfer %[[DES_ADEV]] to %[[DES_AHOST]] {transfer_kind = #cuf.cuda_transfer} : !fir.ref>, !fir.ref> -! CHECK: fir.cuda_data_transfer %[[ADEV]]#0 to %[[BDEV]]#0 {transfer_kind = #fir.cuda_transfer} : !fir.ref>, !fir.ref> +! CHECK: cuf.data_transfer %[[ADEV]]#0 to %[[BDEV]]#0 {transfer_kind = #cuf.cuda_transfer} : !fir.ref>, !fir.ref> ! CHECK: %[[TEMP:.*]] = fir.allocmem !fir.array<10xi32> {bindc_name = ".tmp", uniq_name = ""} ! CHECK: %[[DECL_TEMP:.*]]:2 = hlfir.declare %[[TEMP]](%{{.*}}) {uniq_name = ".tmp"} : (!fir.heap>, !fir.shape<1>) -> (!fir.heap>, !fir.heap>) -! CHECK: %[[ADEV_TEMP:.*]]:2 = hlfir.declare %[[DECL_TEMP]]#1(%{{.*}}) {cuda_attr = #fir.cuda, uniq_name = "_QFsub2Eadev"} : (!fir.heap>, !fir.shape<1>) -> (!fir.heap>, !fir.heap>) -! CHECK: fir.cuda_data_transfer %[[ADEV]]#1 to %[[DECL_TEMP]]#0 {transfer_kind = #fir.cuda_transfer} : !fir.ref>, !fir.heap> +! CHECK: %[[ADEV_TEMP:.*]]:2 = hlfir.declare %[[DECL_TEMP]]#1(%{{.*}}) {data_attr = #cuf.cuda, uniq_name = "_QFsub2Eadev"} : (!fir.heap>, !fir.shape<1>) -> (!fir.heap>, !fir.heap>) +! CHECK: cuf.data_transfer %[[ADEV]]#1 to %[[DECL_TEMP]]#0 {transfer_kind = #cuf.cuda_transfer} : !fir.ref>, !fir.heap> ! CHECK: %[[ELEMENTAL:.*]] = hlfir.elemental %{{.*}} unordered : (!fir.shape<1>) -> !hlfir.expr<10xi32> ! CHECK: hlfir.assign %[[ELEMENTAL]] to %[[BHOST]]#0 : !hlfir.expr<10xi32>, !fir.ref> ! CHECK: fir.freemem %[[DECL_TEMP]]#0 : !fir.heap> @@ -116,12 +116,12 @@ end ! CHECK: %[[TMP:.*]] = fir.alloca !fir.type<_QMmod1Tt1{i:i32}> {bindc_name = ".tmp"} ! CHECK: %[[AHOST:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {uniq_name = "_QFsub3Eahost"} : (!fir.ref>, !fir.shape<1>) -> (!fir.ref>, !fir.ref>) ! CHECK: %[[BHOST:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {uniq_name = "_QFsub3Ebhost"} : (!fir.ref>, !fir.shape<1>) -> (!fir.ref>, !fir.ref>) -! CHECK: %[[T:.*]]:2 = hlfir.declare %7 {cuda_attr = #fir.cuda, uniq_name = "_QFsub3Et"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) +! CHECK: %[[T:.*]]:2 = hlfir.declare %7 {data_attr = #cuf.cuda, uniq_name = "_QFsub3Et"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) ! CHECK: %[[TMP_DECL:.*]]:2 = hlfir.declare %0 {uniq_name = ".tmp"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) -! CHECK: fir.cuda_data_transfer %[[T]]#1 to %[[TMP_DECL]]#0 {transfer_kind = #fir.cuda_transfer} : !fir.ref>, !fir.ref> +! CHECK: cuf.data_transfer %[[T]]#1 to %[[TMP_DECL]]#0 {transfer_kind = #cuf.cuda_transfer} : !fir.ref>, !fir.ref> -! Check that fir.cuda_data_transfer are not generated within cuf kernel +! Check that cuf.data_transfer are not generated within cuf kernel subroutine sub4() integer, parameter :: n = 10 real, device :: adev(n) @@ -137,9 +137,9 @@ subroutine sub4() end subroutine ! CHECK-LABEL: func.func @_QPsub4() -! CHECK: fir.cuda_data_transfer -! CHECK: fir.cuda_kernel<<<*, *>>> -! CHECK-NOT: fir.cuda_data_transfer +! CHECK: cuf.data_transfer +! CHECK: cuf.kernel<<<*, *>>> +! CHECK-NOT: cuf.data_transfer ! CHECK: hlfir.assign attributes(global) subroutine sub5(a) @@ -149,7 +149,7 @@ attributes(global) subroutine sub5(a) end subroutine ! CHECK-LABEL: func.func @_QPsub5 -! CHECK-NOT: fir.cuda_data_transfer +! CHECK-NOT: cuf.data_transfer attributes(host,device) subroutine sub6(a) integer, device :: a @@ -158,4 +158,4 @@ attributes(host,device) subroutine sub6(a) end subroutine ! CHECK-LABEL: func.func @_QPsub6 -! CHECK: fir.cuda_data_transfer +! CHECK: cuf.data_transfer diff --git a/flang/test/Lower/CUDA/cuda-kernel-calls.cuf b/flang/test/Lower/CUDA/cuda-kernel-calls.cuf index 7e28fbb2231a..82d1a61f8e15 100644 --- a/flang/test/Lower/CUDA/cuda-kernel-calls.cuf +++ b/flang/test/Lower/CUDA/cuda-kernel-calls.cuf @@ -16,10 +16,10 @@ contains subroutine host() real, device :: a ! CHECK-LABEL: func.func @_QMtest_callPhost() -! CHECK: %[[A:.*]]:2 = hlfir.declare %{{.*}} {cuda_attr = #fir.cuda, uniq_name = "_QMtest_callFhostEa"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[A:.*]]:2 = hlfir.declare %{{.*}} {data_attr = #cuf.cuda, uniq_name = "_QMtest_callFhostEa"} : (!fir.ref) -> (!fir.ref, !fir.ref) call dev_kernel0<<<10, 20>>>() -! CHECK: fir.cuda_kernel_launch @_QMtest_callPdev_kernel0<<<%c10{{.*}}, %c1{{.*}}, %c1{{.*}}, %c20{{.*}}, %c1{{.*}}, %c1{{.*}}>>>() +! CHECK: cuf.kernel_launch @_QMtest_callPdev_kernel0<<<%c10{{.*}}, %c1{{.*}}, %c1{{.*}}, %c20{{.*}}, %c1{{.*}}, %c1{{.*}}>>>() call dev_kernel0<<< __builtin_dim3(1,1,4), __builtin_dim3(32,1,1) >>> ! CHECK: %[[ADDR_DIM3_GRID:.*]] = fir.address_of(@_QQro._QM__fortran_builtinsT__builtin_dim3.{{.*}}) : !fir.ref> @@ -38,16 +38,16 @@ contains ! CHECK: %[[BLOCK_Y_LOAD:.*]] = fir.load %[[BLOCK_Y]] : !fir.ref ! CHECK: %[[BLOCK_Z:.*]] = hlfir.designate %[[DIM3_BLOCK]]#1{"z"} : (!fir.ref>) -> !fir.ref ! CHECK: %[[BLOCK_Z_LOAD:.*]] = fir.load %[[BLOCK_Z]] : !fir.ref -! CHECK: fir.cuda_kernel_launch @_QMtest_callPdev_kernel0<<<%[[GRID_X_LOAD]], %[[GRID_Y_LOAD]], %[[GRID_Z_LOAD]], %[[BLOCK_X_LOAD]], %[[BLOCK_Y_LOAD]], %[[BLOCK_Z_LOAD]]>>>() +! CHECK: cuf.kernel_launch @_QMtest_callPdev_kernel0<<<%[[GRID_X_LOAD]], %[[GRID_Y_LOAD]], %[[GRID_Z_LOAD]], %[[BLOCK_X_LOAD]], %[[BLOCK_Y_LOAD]], %[[BLOCK_Z_LOAD]]>>>() call dev_kernel0<<<10, 20, 2>>>() -! CHECK: fir.cuda_kernel_launch @_QMtest_callPdev_kernel0<<<%c10{{.*}}, %c1{{.*}}, %c1{{.*}}, %c20{{.*}}, %c1{{.*}}, %c1{{.*}}, %c2{{.*}}>>>() +! CHECK: cuf.kernel_launch @_QMtest_callPdev_kernel0<<<%c10{{.*}}, %c1{{.*}}, %c1{{.*}}, %c20{{.*}}, %c1{{.*}}, %c1{{.*}}, %c2{{.*}}>>>() call dev_kernel0<<<10, 20, 2, 0>>>() -! CHECK: fir.cuda_kernel_launch @_QMtest_callPdev_kernel0<<<%c10{{.*}}, %c1{{.*}}, %c1{{.*}}, %c20{{.*}}, %c1{{.*}}, %c1{{.*}}, %c2{{.*}}, %c0{{.*}}>>>() +! CHECK: cuf.kernel_launch @_QMtest_callPdev_kernel0<<<%c10{{.*}}, %c1{{.*}}, %c1{{.*}}, %c20{{.*}}, %c1{{.*}}, %c1{{.*}}, %c2{{.*}}, %c0{{.*}}>>>() call dev_kernel1<<<1, 32>>>(a) -! CHECK: fir.cuda_kernel_launch @_QMtest_callPdev_kernel1<<<%c1{{.*}}, %c1{{.*}}, %c1{{.*}}, %c32{{.*}}, %c1{{.*}}, %c1{{.*}}>>>(%1#1) : (!fir.ref) +! CHECK: cuf.kernel_launch @_QMtest_callPdev_kernel1<<<%c1{{.*}}, %c1{{.*}}, %c1{{.*}}, %c32{{.*}}, %c1{{.*}}, %c1{{.*}}>>>(%1#1) : (!fir.ref) end end diff --git a/flang/test/Lower/CUDA/cuda-kernel-loop-directive.cuf b/flang/test/Lower/CUDA/cuda-kernel-loop-directive.cuf index e1cc35772618..89de367b723f 100644 --- a/flang/test/Lower/CUDA/cuda-kernel-loop-directive.cuf +++ b/flang/test/Lower/CUDA/cuda-kernel-loop-directive.cuf @@ -20,7 +20,7 @@ subroutine sub1() ! CHECK: %[[LB:.*]] = fir.convert %c1{{.*}} : (i32) -> index ! CHECK: %[[UB:.*]] = fir.convert %c100{{.*}} : (i32) -> index ! CHECK: %[[STEP:.*]] = arith.constant 1 : index -! CHECK: fir.cuda_kernel<<<%c1_i32, %c2_i32>>> (%[[ARG0:.*]] : index) = (%[[LB]] : index) to (%[[UB]] : index) step (%[[STEP]] : index) +! CHECK: cuf.kernel<<<%c1_i32, %c2_i32>>> (%[[ARG0:.*]] : index) = (%[[LB]] : index) to (%[[UB]] : index) step (%[[STEP]] : index) ! CHECK-NOT: fir.do_loop ! CHECK: %[[ARG0_I32:.*]] = fir.convert %[[ARG0]] : (index) -> i32 ! CHECK: fir.store %[[ARG0_I32]] to %[[IV]]#1 : !fir.ref @@ -32,7 +32,7 @@ subroutine sub1() a(i) = a(i) * b(i) end do -! CHECK: fir.cuda_kernel<<<*, *>>> (%{{.*}} : index) = (%{{.*}} : index) to (%{{.*}} : index) step (%{{.*}} : index) +! CHECK: cuf.kernel<<<*, *>>> (%{{.*}} : index) = (%{{.*}} : index) to (%{{.*}} : index) step (%{{.*}} : index) !$cuf kernel do(2) <<< 1, (256,1) >>> do i = 1, n @@ -41,7 +41,7 @@ subroutine sub1() end do end do -! CHECK: fir.cuda_kernel<<<%c1{{.*}}, (%c256{{.*}}, %c1{{.*}})>>> (%[[ARG0:.*]] : index, %[[ARG1:.*]] : index) = (%{{.*}}, %{{.*}} : index, index) to (%{{.*}}, %{{.*}} : index, index) step (%{{.*}}, %{{.*}} : index, index) +! CHECK: cuf.kernel<<<%c1{{.*}}, (%c256{{.*}}, %c1{{.*}})>>> (%[[ARG0:.*]] : index, %[[ARG1:.*]] : index) = (%{{.*}}, %{{.*}} : index, index) to (%{{.*}}, %{{.*}} : index, index) step (%{{.*}}, %{{.*}} : index, index) ! CHECK: %[[ARG0_I32:.*]] = fir.convert %[[ARG0]] : (index) -> i32 ! CHECK: fir.store %[[ARG0_I32]] to %[[IV]]#1 : !fir.ref ! CHECK: %[[ARG1_I32:.*]] = fir.convert %[[ARG1]] : (index) -> i32 @@ -54,7 +54,7 @@ subroutine sub1() c(i,j) = c(i,j) * d(i,j) end do end do -! CHECK: fir.cuda_kernel<<<(%c1{{.*}}, %c0{{.*}}), (%c256{{.*}}, %c1{{.*}})>>> (%{{.*}} : index, %{{.*}} : index) = (%{{.*}}, %{{.*}} : index, index) to (%{{.*}}, %{{.*}} : index, index) step (%{{.*}}, %{{.*}} : index, index) +! CHECK: cuf.kernel<<<(%c1{{.*}}, %c0{{.*}}), (%c256{{.*}}, %c1{{.*}})>>> (%{{.*}} : index, %{{.*}} : index) = (%{{.*}}, %{{.*}} : index, index) to (%{{.*}}, %{{.*}} : index, index) step (%{{.*}}, %{{.*}} : index, index) !$cuf kernel do(2) <<< (*,*), (32,4) >>> do i = 1, n @@ -63,5 +63,5 @@ subroutine sub1() end do end do -! CHECK: fir.cuda_kernel<<<*, (%c32{{.*}}, %c4{{.*}})>>> (%{{.*}} : index, %{{.*}} : index) = (%{{.*}}, %{{.*}} : index, index) to (%{{.*}}, %{{.*}} : index, index) step (%{{.*}}, %{{.*}} : index, index) +! CHECK: cuf.kernel<<<*, (%c32{{.*}}, %c4{{.*}})>>> (%{{.*}} : index, %{{.*}} : index) = (%{{.*}}, %{{.*}} : index, index) to (%{{.*}}, %{{.*}} : index, index) step (%{{.*}}, %{{.*}} : index, index) end diff --git a/flang/test/Lower/CUDA/cuda-mod.cuf b/flang/test/Lower/CUDA/cuda-mod.cuf index ae5bf63d2da4..f03e72e94780 100644 --- a/flang/test/Lower/CUDA/cuda-mod.cuf +++ b/flang/test/Lower/CUDA/cuda-mod.cuf @@ -10,6 +10,6 @@ contains end end module -! CHECK: fir.global @_QMcuf_modEmd {cuda_attr = #fir.cuda} : f32 +! CHECK: fir.global @_QMcuf_modEmd {data_attr = #cuf.cuda} : f32 -! CHECK: func.func @_QMcuf_modPdevsub() attributes {fir.cuda_attr = #fir.cuda_proc} +! CHECK: func.func @_QMcuf_modPdevsub() attributes {cuf.proc_attr = #cuf.cuda_proc} diff --git a/flang/test/Lower/CUDA/cuda-module-use.cuf b/flang/test/Lower/CUDA/cuda-module-use.cuf index 47d3805065a5..130fefab24d9 100644 --- a/flang/test/Lower/CUDA/cuda-module-use.cuf +++ b/flang/test/Lower/CUDA/cuda-module-use.cuf @@ -10,16 +10,16 @@ end ! CHECK-LABEL: func.func @_QPsub1() ! CHECK: %[[ADDR:.*]] = fir.address_of(@_QMcuf_modEmd) : !fir.ref -! CHECK: %{{.*}}:2 = hlfir.declare %[[ADDR]] {cuda_attr = #fir.cuda, uniq_name = "_QMcuf_modEmd"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %{{.*}}:2 = hlfir.declare %[[ADDR]] {data_attr = #cuf.cuda, uniq_name = "_QMcuf_modEmd"} : (!fir.ref) -> (!fir.ref, !fir.ref) attributes(device) subroutine sub2() use cuf_mod call devsub() end -! CHECK-LABEL: func.func @_QPsub2() attributes {fir.cuda_attr = #fir.cuda_proc} +! CHECK-LABEL: func.func @_QPsub2() attributes {cuf.proc_attr = #cuf.cuda_proc} ! CHECK: fir.call @_QMcuf_modPdevsub() -! CHECK-LABEL: fir.global @_QMcuf_modEmd {cuda_attr = #fir.cuda} : f32 +! CHECK-LABEL: fir.global @_QMcuf_modEmd {data_attr = #cuf.cuda} : f32 -! CHECK-LABEL: func.func private @_QMcuf_modPdevsub() attributes {fir.cuda_attr = #fir.cuda_proc} +! CHECK-LABEL: func.func private @_QMcuf_modPdevsub() attributes {cuf.proc_attr = #cuf.cuda_proc} diff --git a/flang/test/Lower/CUDA/cuda-proc-attribute.cuf b/flang/test/Lower/CUDA/cuda-proc-attribute.cuf index d9765f6cd2fe..f8b8dd8e296b 100644 --- a/flang/test/Lower/CUDA/cuda-proc-attribute.cuf +++ b/flang/test/Lower/CUDA/cuda-proc-attribute.cuf @@ -4,40 +4,40 @@ ! Test lowering of CUDA attribute on procedures. attributes(host) subroutine sub_host(); end -! CHECK: func.func @_QPsub_host() attributes {fir.cuda_attr = #fir.cuda_proc} +! CHECK: func.func @_QPsub_host() attributes {cuf.proc_attr = #cuf.cuda_proc} attributes(device) subroutine sub_device(); end -! CHECK: func.func @_QPsub_device() attributes {fir.cuda_attr = #fir.cuda_proc} +! CHECK: func.func @_QPsub_device() attributes {cuf.proc_attr = #cuf.cuda_proc} attributes(host) attributes(device) subroutine sub_host_device; end -! CHECK: func.func @_QPsub_host_device() attributes {fir.cuda_attr = #fir.cuda_proc} +! CHECK: func.func @_QPsub_host_device() attributes {cuf.proc_attr = #cuf.cuda_proc} attributes(device) attributes(host) subroutine sub_device_host; end -! CHECK: func.func @_QPsub_device_host() attributes {fir.cuda_attr = #fir.cuda_proc} +! CHECK: func.func @_QPsub_device_host() attributes {cuf.proc_attr = #cuf.cuda_proc} attributes(global) subroutine sub_global(); end -! CHECK: func.func @_QPsub_global() attributes {fir.cuda_attr = #fir.cuda_proc} +! CHECK: func.func @_QPsub_global() attributes {cuf.proc_attr = #cuf.cuda_proc} attributes(grid_global) subroutine sub_grid_global(); end -! CHECK: func.func @_QPsub_grid_global() attributes {fir.cuda_attr = #fir.cuda_proc} +! CHECK: func.func @_QPsub_grid_global() attributes {cuf.proc_attr = #cuf.cuda_proc} attributes(host) integer function fct_host(); end -! CHECK: func.func @_QPfct_host() -> i32 attributes {fir.cuda_attr = #fir.cuda_proc} +! CHECK: func.func @_QPfct_host() -> i32 attributes {cuf.proc_attr = #cuf.cuda_proc} attributes(device) integer function fct_device(); end -! CHECK: func.func @_QPfct_device() -> i32 attributes {fir.cuda_attr = #fir.cuda_proc} +! CHECK: func.func @_QPfct_device() -> i32 attributes {cuf.proc_attr = #cuf.cuda_proc} attributes(host) attributes(device) integer function fct_host_device; end -! CHECK: func.func @_QPfct_host_device() -> i32 attributes {fir.cuda_attr = #fir.cuda_proc} +! CHECK: func.func @_QPfct_host_device() -> i32 attributes {cuf.proc_attr = #cuf.cuda_proc} attributes(device) attributes(host) integer function fct_device_host; end -! CHECK: func.func @_QPfct_device_host() -> i32 attributes {fir.cuda_attr = #fir.cuda_proc} +! CHECK: func.func @_QPfct_device_host() -> i32 attributes {cuf.proc_attr = #cuf.cuda_proc} attributes(global) launch_bounds(1, 2) subroutine sub_lbounds1(); end -! CHECK: func.func @_QPsub_lbounds1() attributes {fir.cuda_attr = #fir.cuda_proc, fir.cuda_launch_bounds = #fir.launch_bounds} +! CHECK: func.func @_QPsub_lbounds1() attributes {cuf.launch_bounds = #cuf.launch_bounds, cuf.proc_attr = #cuf.cuda_proc} attributes(global) launch_bounds(1, 2, 3) subroutine sub_lbounds2(); end -! CHECK: func.func @_QPsub_lbounds2() attributes {fir.cuda_attr = #fir.cuda_proc, fir.cuda_launch_bounds = #fir.launch_bounds} +! CHECK: func.func @_QPsub_lbounds2() attributes {cuf.launch_bounds = #cuf.launch_bounds, cuf.proc_attr = #cuf.cuda_proc} attributes(global) cluster_dims(1, 2, 3) subroutine sub_clusterdims1(); end -! CHECK: func.func @_QPsub_clusterdims1() attributes {fir.cuda_attr = #fir.cuda_proc, fir.cuda_cluster_dims = #fir.cluster_dims} +! CHECK: func.func @_QPsub_clusterdims1() attributes {cuf.cluster_dims = #cuf.cluster_dims, cuf.proc_attr = #cuf.cuda_proc} diff --git a/flang/tools/bbc/CMakeLists.txt b/flang/tools/bbc/CMakeLists.txt index f21fa3b7bae3..9410fd005660 100644 --- a/flang/tools/bbc/CMakeLists.txt +++ b/flang/tools/bbc/CMakeLists.txt @@ -16,6 +16,8 @@ get_property(dialect_libs GLOBAL PROPERTY MLIR_DIALECT_LIBS) get_property(extension_libs GLOBAL PROPERTY MLIR_EXTENSION_LIBS) target_link_libraries(bbc PRIVATE +CUFAttrs +CUFDialect FIRDialect FIRDialectSupport FIRSupport diff --git a/flang/tools/fir-opt/CMakeLists.txt b/flang/tools/fir-opt/CMakeLists.txt index 43b0c74696f5..43679a9d5357 100644 --- a/flang/tools/fir-opt/CMakeLists.txt +++ b/flang/tools/fir-opt/CMakeLists.txt @@ -11,6 +11,8 @@ if(FLANG_INCLUDE_TESTS) endif() target_link_libraries(fir-opt PRIVATE + CUFAttrs + CUFDialect FIRDialect FIRSupport FIRTransforms diff --git a/flang/tools/tco/CMakeLists.txt b/flang/tools/tco/CMakeLists.txt index 6d83353b4e0d..808219ac361f 100644 --- a/flang/tools/tco/CMakeLists.txt +++ b/flang/tools/tco/CMakeLists.txt @@ -7,6 +7,8 @@ llvm_update_compile_flags(tco) get_property(dialect_libs GLOBAL PROPERTY MLIR_DIALECT_LIBS) get_property(extension_libs GLOBAL PROPERTY MLIR_EXTENSION_LIBS) target_link_libraries(tco PRIVATE + CUFAttrs + CUFDialect FIRCodeGen FIRDialect FIRDialectSupport diff --git a/flang/unittests/Optimizer/CMakeLists.txt b/flang/unittests/Optimizer/CMakeLists.txt index 9c165d998e2e..7299e3ee0529 100644 --- a/flang/unittests/Optimizer/CMakeLists.txt +++ b/flang/unittests/Optimizer/CMakeLists.txt @@ -2,6 +2,7 @@ get_property(dialect_libs GLOBAL PROPERTY MLIR_DIALECT_LIBS) get_property(extension_libs GLOBAL PROPERTY MLIR_EXTENSION_LIBS) set(LIBS + CUFDialect FIRBuilder FIRCodeGen FIRDialect @@ -36,6 +37,7 @@ add_flang_unittest(FlangOptimizerTests KindMappingTest.cpp RTBuilder.cpp DEPENDS + CUFDialect FIRDialect FIRSupport HLFIRDialect diff --git a/flang/unittests/Optimizer/FortranVariableTest.cpp b/flang/unittests/Optimizer/FortranVariableTest.cpp index f5f559ef887c..87efb624735c 100644 --- a/flang/unittests/Optimizer/FortranVariableTest.cpp +++ b/flang/unittests/Optimizer/FortranVariableTest.cpp @@ -51,7 +51,7 @@ TEST_F(FortranVariableTest, SimpleScalar) { /*shape=*/mlir::Value{}, /*typeParams=*/std::nullopt, /*dummy_scope=*/nullptr, name, /*fortran_attrs=*/fir::FortranVariableFlagsAttr{}, - /*cuda_attr=*/fir::CUDADataAttributeAttr{}); + /*data_attr=*/cuf::DataAttributeAttr{}); fir::FortranVariableOpInterface fortranVariable = declare; EXPECT_FALSE(fortranVariable.isArray()); @@ -77,7 +77,7 @@ TEST_F(FortranVariableTest, CharacterScalar) { auto declare = builder->create(loc, addr.getType(), addr, /*shape=*/mlir::Value{}, typeParams, /*dummy_scope=*/nullptr, name, /*fortran_attrs=*/fir::FortranVariableFlagsAttr{}, - /*cuda_attr=*/fir::CUDADataAttributeAttr{}); + /*data_attr=*/cuf::DataAttributeAttr{}); fir::FortranVariableOpInterface fortranVariable = declare; EXPECT_FALSE(fortranVariable.isArray()); @@ -108,7 +108,7 @@ TEST_F(FortranVariableTest, SimpleArray) { auto declare = builder->create(loc, addr.getType(), addr, shape, /*typeParams*/ std::nullopt, /*dummy_scope=*/nullptr, name, /*fortran_attrs=*/fir::FortranVariableFlagsAttr{}, - /*cuda_attr=*/fir::CUDADataAttributeAttr{}); + /*data_attr=*/cuf::DataAttributeAttr{}); fir::FortranVariableOpInterface fortranVariable = declare; EXPECT_TRUE(fortranVariable.isArray()); @@ -139,7 +139,7 @@ TEST_F(FortranVariableTest, CharacterArray) { auto declare = builder->create(loc, addr.getType(), addr, shape, typeParams, /*dummy_scope=*/nullptr, name, /*fortran_attrs=*/fir::FortranVariableFlagsAttr{}, - /*cuda_attr=*/fir::CUDADataAttributeAttr{}); + /*data_attr=*/cuf::DataAttributeAttr{}); fir::FortranVariableOpInterface fortranVariable = declare; EXPECT_TRUE(fortranVariable.isArray());