From f674ddc19fee67401b78f360e530bb064aa31ed8 Mon Sep 17 00:00:00 2001 From: Peter Klausler Date: Sat, 6 May 2023 15:03:39 -0700 Subject: [PATCH] [flang] CUDA Fortran - part 5/5: statement semantics Canonicalize !$CUF KERNEL DO loop nests, similar to OpenACC/OpenMP canonicalization. Check statements and expressions in device contexts for usage that isn't supported. Add more tests, and include some tweaks to standard modules needed to build CUDA Fortran modules. Depends on https://reviews.llvm.org/D150159, https://reviews.llvm.org/D150161, https://reviews.llvm.org/D150162, & https://reviews.llvm.org/D150163. Differential Revision: https://reviews.llvm.org/D150164 --- flang/include/flang/Evaluate/traverse.h | 29 +- flang/include/flang/Parser/tools.h | 127 +++++++ flang/include/flang/Semantics/semantics.h | 5 +- flang/lib/Parser/unparse.cpp | 2 +- flang/lib/Semantics/CMakeLists.txt | 1 + flang/lib/Semantics/canonicalize-acc.cpp | 2 +- flang/lib/Semantics/check-allocate.cpp | 31 +- flang/lib/Semantics/check-cuda.cpp | 416 ++++++++++++++++++++++ flang/lib/Semantics/check-cuda.h | 50 +++ flang/lib/Semantics/check-deallocate.cpp | 10 +- flang/lib/Semantics/resolve-names.cpp | 14 +- flang/lib/Semantics/semantics.cpp | 39 +- flang/module/__fortran_builtins.f90 | 19 + flang/module/iso_c_binding.f90 | 3 +- flang/test/Driver/compiler_options.f90 | 2 +- flang/test/Parser/cuf-sanity-unparse.CUF | 6 +- flang/test/Semantics/cuf04.cuf | 24 ++ flang/test/Semantics/cuf05.cuf | 19 + flang/test/Semantics/cuf06.cuf | 15 + flang/test/Semantics/cuf09.cuf | 76 ++++ flang/tools/f18/CMakeLists.txt | 1 + flang/unittests/Runtime/Time.cpp | 3 + 22 files changed, 848 insertions(+), 46 deletions(-) create mode 100644 flang/lib/Semantics/check-cuda.cpp create mode 100644 flang/lib/Semantics/check-cuda.h create mode 100644 flang/test/Semantics/cuf04.cuf create mode 100644 flang/test/Semantics/cuf05.cuf create mode 100644 flang/test/Semantics/cuf06.cuf create mode 100644 flang/test/Semantics/cuf09.cuf diff --git a/flang/include/flang/Evaluate/traverse.h b/flang/include/flang/Evaluate/traverse.h index 79cef799c8b3..54cdb690ec08 100644 --- a/flang/include/flang/Evaluate/traverse.h +++ b/flang/include/flang/Evaluate/traverse.h @@ -38,6 +38,7 @@ // expression of an ASSOCIATE (or related) construct entity. #include "expression.h" +#include "flang/Common/indirection.h" #include "flang/Semantics/symbol.h" #include "flang/Semantics/type.h" #include @@ -53,6 +54,10 @@ public: Result operator()(const common::Indirection &x) const { return visitor_(x.value()); } + template + Result operator()(const common::ForwardOwningPointer &p) const { + return visitor_(p.get()); + } template Result operator()(const SymbolRef x) const { return visitor_(*x); } @@ -76,13 +81,17 @@ public: return visitor_.Default(); } } - template - Result operator()(const std::variant &u) const { - return common::visit(visitor_, u); + template + Result operator()(const std::variant &u) const { + return common::visit([=](const auto &y) { return visitor_(y); }, u); } template Result operator()(const std::vector &x) const { return CombineContents(x); } + template + Result operator()(const std::pair &x) const { + return Combine(x.first, x.second); + } // Leaves Result operator()(const BOZLiteralConstant &) const { @@ -233,14 +242,24 @@ public: template Result operator()(const Expr &x) const { return visitor_(x.u); } + Result operator()(const Assignment &x) const { + return Combine(x.lhs, x.rhs, x.u); + } + Result operator()(const Assignment::Intrinsic &) const { + return visitor_.Default(); + } + Result operator()(const GenericExprWrapper &x) const { return visitor_(x.v); } + Result operator()(const GenericAssignmentWrapper &x) const { + return visitor_(x.v); + } private: template Result CombineRange(ITER iter, ITER end) const { if (iter == end) { return visitor_.Default(); } else { - Result result{visitor_(*iter++)}; - for (; iter != end; ++iter) { + Result result{visitor_(*iter)}; + for (++iter; iter != end; ++iter) { result = visitor_.Combine(std::move(result), visitor_(*iter)); } return result; diff --git a/flang/include/flang/Parser/tools.h b/flang/include/flang/Parser/tools.h index 48c6ab56dd9f..1e347fab6461 100644 --- a/flang/include/flang/Parser/tools.h +++ b/flang/include/flang/Parser/tools.h @@ -65,6 +65,18 @@ struct UnwrapperHelper { return common::visit([](const auto &y) { return Unwrap(y); }, x); } + template + static const A *Unwrap(const std::tuple &x) { + if constexpr (J < sizeof...(Bs)) { + if (auto result{Unwrap(std::get(x))}) { + return result; + } + return Unwrap(x); + } else { + return nullptr; + } + } + template static const A *Unwrap(const std::optional &o) { if (o) { @@ -122,5 +134,120 @@ template struct HasTypedExpr : std::false_type {}; template struct HasTypedExpr(A::typedExpr), 0)> : std::true_type {}; + +// GetSource() + +template struct GetSourceHelper { + + using Result = std::optional; + + template static Result GetSource(A *p) { + if (p) { + return GetSource(*p); + } else { + return std::nullopt; + } + } + template + static Result GetSource(const common::Indirection &x) { + return GetSource(x.value()); + } + + template + static Result GetSource(const common::Indirection &x) { + return GetSource(x.value()); + } + + template + static Result GetSource(const std::variant &x) { + return common::visit([](const auto &y) { return GetSource(y); }, x); + } + + template + static Result GetSource(const std::tuple &x) { + if constexpr (J < sizeof...(As)) { + constexpr std::size_t index{GET_FIRST ? J : sizeof...(As) - J - 1}; + if (auto result{GetSource(std::get(x))}) { + return result; + } + return GetSource<(J + 1)>(x); + } else { + return {}; + } + } + + template static Result GetSource(const std::optional &o) { + if (o) { + return GetSource(*o); + } else { + return {}; + } + } + + template static Result GetSource(const std::list &x) { + if constexpr (GET_FIRST) { + for (const A &y : x) { + if (auto result{GetSource(y)}) { + return result; + } + } + } else { + for (auto iter{x.rbegin()}; iter != x.rend(); ++iter) { + if (auto result{GetSource(*iter)}) { + return result; + } + } + } + return {}; + } + + template static Result GetSource(const std::vector &x) { + if constexpr (GET_FIRST) { + for (const A &y : x) { + if (auto result{GetSource(y)}) { + return result; + } + } + } else { + for (auto iter{x.rbegin()}; iter != x.rend(); ++iter) { + if (auto result{GetSource(*iter)}) { + return result; + } + } + } + return {}; + } + + template static Result GetSource(A &x) { + if constexpr (HasSource::value) { + return x.source; + } else if constexpr (ConstraintTrait) { + return GetSource(x.thing); + } else if constexpr (WrapperTrait) { + return GetSource(x.v); + } else if constexpr (UnionTrait) { + return GetSource(x.u); + } else if constexpr (TupleTrait) { + return GetSource(x.t); + } else { + return {}; + } + } +}; + +template std::optional GetSource(const A &x) { + return GetSourceHelper::GetSource(x); +} +template std::optional GetSource(A &x) { + return GetSourceHelper::GetSource(const_cast(x)); +} + +template std::optional GetLastSource(const A &x) { + return GetSourceHelper::GetSource(x); +} +template std::optional GetLastSource(A &x) { + return GetSourceHelper::GetSource(const_cast(x)); +} + } // namespace Fortran::parser #endif // FORTRAN_PARSER_TOOLS_H_ diff --git a/flang/include/flang/Semantics/semantics.h b/flang/include/flang/Semantics/semantics.h index f3846c594dc2..48f37b7d4ce9 100644 --- a/flang/include/flang/Semantics/semantics.h +++ b/flang/include/flang/Semantics/semantics.h @@ -214,8 +214,9 @@ public: // Defines builtinsScope_ from the __Fortran_builtins module void UseFortranBuiltinsModule(); const Scope *GetBuiltinsScope() const { return builtinsScope_; } + void UsePPCFortranBuiltinTypesModule(); - const Scope *GetCUDABuiltinsScope(); + const Scope &GetCUDABuiltinsScope(); void UsePPCFortranBuiltinsModule(); Scope *GetPPCBuiltinTypesScope() { return ppcBuiltinTypesScope_; } const Scope *GetPPCBuiltinsScope() const { return ppcBuiltinsScope_; } @@ -281,7 +282,7 @@ private: std::set tempNames_; const Scope *builtinsScope_{nullptr}; // module __Fortran_builtins Scope *ppcBuiltinTypesScope_{nullptr}; // module __Fortran_PPC_types - std::optional CUDABuiltinsScope_; // module __CUDA_builtins + std::optional cudaBuiltinsScope_; // module __CUDA_builtins const Scope *ppcBuiltinsScope_{nullptr}; // module __Fortran_PPC_intrinsics std::list modFileParseTrees_; std::unique_ptr commonBlockMap_; diff --git a/flang/lib/Parser/unparse.cpp b/flang/lib/Parser/unparse.cpp index 304ff96c6662..ff716236b77e 100644 --- a/flang/lib/Parser/unparse.cpp +++ b/flang/lib/Parser/unparse.cpp @@ -1698,7 +1698,7 @@ public: Put('('), Walk(std::get>(x.v.t), ", "), Put(')'); } void Unparse(const CallStmt &x) { // R1521 - if (asFortran_ && x.typedCall.get() && !x.chevrons /*CUDA todo*/) { + if (asFortran_ && x.typedCall.get()) { Put(' '); asFortran_->call(out_, *x.typedCall); Put('\n'); diff --git a/flang/lib/Semantics/CMakeLists.txt b/flang/lib/Semantics/CMakeLists.txt index e8022e1261c8..bfd2f2b97904 100644 --- a/flang/lib/Semantics/CMakeLists.txt +++ b/flang/lib/Semantics/CMakeLists.txt @@ -10,6 +10,7 @@ add_flang_library(FortranSemantics check-call.cpp check-case.cpp check-coarray.cpp + check-cuda.cpp check-data.cpp check-deallocate.cpp check-declarations.cpp diff --git a/flang/lib/Semantics/canonicalize-acc.cpp b/flang/lib/Semantics/canonicalize-acc.cpp index 5afae172cfaa..c188450a92d4 100644 --- a/flang/lib/Semantics/canonicalize-acc.cpp +++ b/flang/lib/Semantics/canonicalize-acc.cpp @@ -65,7 +65,7 @@ private: const auto &outer{std::get>(x.t)}; if (outer->IsDoConcurrent()) { - return; // Tile is not allowed on DO CONURRENT + return; // Tile is not allowed on DO CONCURRENT } for (const parser::DoConstruct *loop{&*outer}; loop && tileArgNb > 0; --tileArgNb) { diff --git a/flang/lib/Semantics/check-allocate.cpp b/flang/lib/Semantics/check-allocate.cpp index 00ee0d0954f9..ece66444ffb4 100644 --- a/flang/lib/Semantics/check-allocate.cpp +++ b/flang/lib/Semantics/check-allocate.cpp @@ -31,6 +31,8 @@ struct AllocateCheckerInfo { bool gotTypeSpec{false}; bool gotSource{false}; bool gotMold{false}; + bool gotStream{false}; + bool gotPinned{false}; }; class AllocationCheckerHelper { @@ -179,8 +181,22 @@ static std::optional CheckAllocateOptions( parserSourceExpr = &mold.v.value(); info.gotMold = true; }, - [](const parser::AllocOpt::Stream &) { /* CUDA coming */ }, - [](const parser::AllocOpt::Pinned &) { /* CUDA coming */ }, + [&](const parser::AllocOpt::Stream &stream) { // CUDA + if (info.gotStream) { + context.Say( + "STREAM may not be duplicated in a ALLOCATE statement"_err_en_US); + stopCheckingAllocate = true; + } + info.gotStream = true; + }, + [&](const parser::AllocOpt::Pinned &pinned) { // CUDA + if (info.gotPinned) { + context.Say( + "PINNED may not be duplicated in a ALLOCATE statement"_err_en_US); + stopCheckingAllocate = true; + } + info.gotPinned = true; + }, }, allocOpt.u); } @@ -569,12 +585,13 @@ bool AllocationCheckerHelper::RunChecks(SemanticsContext &context) { return false; } context.CheckIndexVarRedefine(name_); + const Scope &subpScope{ + GetProgramUnitContaining(context.FindScope(name_.source))}; if (allocateObject_.typedExpr && allocateObject_.typedExpr->v) { - if (auto whyNot{ - WhyNotDefinable(name_.source, context.FindScope(name_.source), - {DefinabilityFlag::PointerDefinition, - DefinabilityFlag::AcceptAllocatable}, - *allocateObject_.typedExpr->v)}) { + if (auto whyNot{WhyNotDefinable(name_.source, subpScope, + {DefinabilityFlag::PointerDefinition, + DefinabilityFlag::AcceptAllocatable}, + *allocateObject_.typedExpr->v)}) { context .Say(name_.source, "Name in ALLOCATE statement is not definable"_err_en_US) diff --git a/flang/lib/Semantics/check-cuda.cpp b/flang/lib/Semantics/check-cuda.cpp new file mode 100644 index 000000000000..c0c6ff4c1a2b --- /dev/null +++ b/flang/lib/Semantics/check-cuda.cpp @@ -0,0 +1,416 @@ +//===-- lib/Semantics/check-cuda.cpp ----------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "check-cuda.h" +#include "flang/Common/template.h" +#include "flang/Evaluate/fold.h" +#include "flang/Evaluate/traverse.h" +#include "flang/Parser/parse-tree-visitor.h" +#include "flang/Parser/parse-tree.h" +#include "flang/Parser/tools.h" +#include "flang/Semantics/expression.h" +#include "flang/Semantics/symbol.h" + +// Once labeled DO constructs have been canonicalized and their parse subtrees +// transformed into parser::DoConstructs, scan the parser::Blocks of the program +// and merge adjacent CUFKernelDoConstructs and DoConstructs whenever the +// CUFKernelDoConstruct doesn't already have an embedded DoConstruct. Also +// emit errors about improper or missing DoConstructs. + +namespace Fortran::parser { +struct Mutator { + template bool Pre(A &) { return true; } + template void Post(A &) {} + bool Pre(Block &); +}; + +bool Mutator::Pre(Block &block) { + for (auto iter{block.begin()}; iter != block.end(); ++iter) { + if (auto *kernel{Unwrap(*iter)}) { + auto &nested{std::get>(kernel->t)}; + if (!nested) { + if (auto next{iter}; ++next != block.end()) { + if (auto *doConstruct{Unwrap(*next)}) { + nested = std::move(*doConstruct); + block.erase(next); + } + } + } + } else { + Walk(*iter, *this); + } + } + return false; +} +} // namespace Fortran::parser + +namespace Fortran::semantics { + +bool CanonicalizeCUDA(parser::Program &program) { + parser::Mutator mutator; + parser::Walk(program, mutator); + return true; +} + +using MaybeMsg = std::optional; + +// Traverses an evaluate::Expr<> in search of unsupported operations +// on the device. + +struct DeviceExprChecker + : public evaluate::AnyTraverse { + using Result = MaybeMsg; + using Base = evaluate::AnyTraverse; + DeviceExprChecker() : Base(*this) {} + using Base::operator(); + Result operator()(const evaluate::ProcedureDesignator &x) const { + if (const Symbol * sym{x.GetInterfaceSymbol()}) { + const auto *subp{ + sym->GetUltimate().detailsIf()}; + if (subp) { + if (auto attrs{subp->cudaSubprogramAttrs()}) { + if (*attrs == common::CUDASubprogramAttrs::HostDevice || + *attrs == common::CUDASubprogramAttrs::Device) { + return {}; + } + } + } + } else if (x.GetSpecificIntrinsic()) { + // TODO(CUDA): Check for unsupported intrinsics here + return {}; + } + return parser::MessageFormattedText( + "'%s' may not be called in device code"_err_en_US, x.GetName()); + } +}; + +template static MaybeMsg CheckUnwrappedExpr(const A &x) { + if (const auto *expr{parser::Unwrap(x)}) { + return DeviceExprChecker{}(expr->typedExpr); + } + return {}; +} + +template +static void CheckUnwrappedExpr( + SemanticsContext &context, SourceName at, const A &x) { + if (const auto *expr{parser::Unwrap(x)}) { + if (auto msg{DeviceExprChecker{}(expr->typedExpr)}) { + context.Say(at, std::move(*msg)); + } + } +} + +template struct ActionStmtChecker { + template static MaybeMsg WhyNotOk(const A &x) { + if constexpr (ConstraintTrait) { + return WhyNotOk(x.thing); + } else if constexpr (WrapperTrait) { + return WhyNotOk(x.v); + } else if constexpr (UnionTrait) { + return WhyNotOk(x.u); + } else if constexpr (TupleTrait) { + return WhyNotOk(x.t); + } else { + return parser::MessageFormattedText{ + "Statement may not appear in device code"_err_en_US}; + } + } + template + static MaybeMsg WhyNotOk(const common::Indirection &x) { + return WhyNotOk(x.value()); + } + template + static MaybeMsg WhyNotOk(const std::variant &x) { + return common::visit([](const auto &x) { return WhyNotOk(x); }, x); + } + template + static MaybeMsg WhyNotOk(const std::tuple &x) { + if constexpr (J == sizeof...(As)) { + return {}; + } else if (auto msg{WhyNotOk(std::get(x))}) { + return msg; + } else { + return WhyNotOk<(J + 1)>(x); + } + } + template static MaybeMsg WhyNotOk(const std::list &x) { + for (const auto &y : x) { + if (MaybeMsg result{WhyNotOk(y)}) { + return result; + } + } + return {}; + } + template static MaybeMsg WhyNotOk(const std::optional &x) { + if (x) { + return WhyNotOk(*x); + } else { + return {}; + } + } + template + static MaybeMsg WhyNotOk(const parser::UnlabeledStatement &x) { + return WhyNotOk(x.statement); + } + template + static MaybeMsg WhyNotOk(const parser::Statement &x) { + return WhyNotOk(x.statement); + } + static MaybeMsg WhyNotOk(const parser::AllocateStmt &) { + return {}; // AllocateObjects are checked elsewhere + } + static MaybeMsg WhyNotOk(const parser::AllocateCoarraySpec &) { + return parser::MessageFormattedText( + "A coarray may not be allocated on the device"_err_en_US); + } + static MaybeMsg WhyNotOk(const parser::DeallocateStmt &) { + return {}; // AllocateObjects are checked elsewhere + } + static MaybeMsg WhyNotOk(const parser::AssignmentStmt &x) { + return DeviceExprChecker{}(x.typedAssignment); + } + static MaybeMsg WhyNotOk(const parser::CallStmt &x) { + return DeviceExprChecker{}(x.typedCall); + } + static MaybeMsg WhyNotOk(const parser::ContinueStmt &) { return {}; } + static MaybeMsg WhyNotOk(const parser::IfStmt &x) { + if (auto result{ + CheckUnwrappedExpr(std::get(x.t))}) { + return result; + } + return WhyNotOk( + std::get>(x.t) + .statement); + } + static MaybeMsg WhyNotOk(const parser::NullifyStmt &x) { + for (const auto &y : x.v) { + if (MaybeMsg result{DeviceExprChecker{}(y.typedExpr)}) { + return result; + } + } + return {}; + } + static MaybeMsg WhyNotOk(const parser::PointerAssignmentStmt &x) { + return DeviceExprChecker{}(x.typedAssignment); + } +}; + +template class DeviceContextChecker { +public: + explicit DeviceContextChecker(SemanticsContext &c) : context_{c} {} + void CheckSubprogram(const parser::Name &name, const parser::Block &body) { + if (name.symbol) { + const auto *subp{ + name.symbol->GetUltimate().detailsIf()}; + if (subp && subp->moduleInterface()) { + subp = subp->moduleInterface() + ->GetUltimate() + .detailsIf(); + } + if (subp && + subp->cudaSubprogramAttrs().value_or( + common::CUDASubprogramAttrs::Host) != + common::CUDASubprogramAttrs::Host) { + Check(body); + } + } + } + void Check(const parser::Block &block) { + for (const auto &epc : block) { + Check(epc); + } + } + +private: + void Check(const parser::ExecutionPartConstruct &epc) { + common::visit( + common::visitors{ + [&](const parser::ExecutableConstruct &x) { Check(x); }, + [&](const parser::Statement> + &x) { + context_.Say(x.source, + "Device code may not contain an ENTRY statement"_err_en_US); + }, + [](const parser::Statement> + &) {}, + [](const parser::Statement> + &) {}, + [](const parser::Statement< + common::Indirection> &) {}, + [](const parser::ErrorRecovery &) {}, + }, + epc.u); + } + void Check(const parser::ExecutableConstruct &ec) { + common::visit( + common::visitors{ + [&](const parser::Statement &stmt) { + Check(stmt.statement, stmt.source); + }, + [&](const common::Indirection &x) { + if (const std::optional &control{ + x.value().GetLoopControl()}) { + common::visit([&](const auto &y) { Check(y); }, control->u); + } + Check(std::get(x.value().t)); + }, + [&](const common::Indirection &x) { + Check(std::get(x.value().t)); + }, + [&](const common::Indirection &x) { + Check(x.value()); + }, + [&](const auto &x) { + if (auto source{parser::GetSource(x)}) { + context_.Say(*source, + "Statement may not appear in device code"_err_en_US); + } + }, + }, + ec.u); + } + void Check(const parser::ActionStmt &stmt, const parser::CharBlock &source) { + common::visit( + common::visitors{ + [&](const auto &x) { + if (auto msg{ActionStmtChecker::WhyNotOk(x)}) { + context_.Say(source, std::move(*msg)); + } + }, + }, + stmt.u); + } + void Check(const parser::IfConstruct &ic) { + const auto &ifS{std::get>(ic.t)}; + CheckUnwrappedExpr(context_, ifS.source, + std::get(ifS.statement.t)); + Check(std::get(ic.t)); + for (const auto &eib : + std::get>(ic.t)) { + const auto &eIfS{std::get>(eib.t)}; + CheckUnwrappedExpr(context_, eIfS.source, + std::get(eIfS.statement.t)); + Check(std::get(eib.t)); + } + if (const auto &eb{ + std::get>(ic.t)}) { + Check(std::get(eb->t)); + } + } + void Check(const parser::LoopControl::Bounds &bounds) { + Check(bounds.lower); + Check(bounds.upper); + if (bounds.step) { + Check(*bounds.step); + } + } + void Check(const parser::LoopControl::Concurrent &x) { + const auto &header{std::get(x.t)}; + for (const auto &cc : + std::get>(header.t)) { + Check(std::get<1>(cc.t)); + Check(std::get<2>(cc.t)); + if (const auto &step{ + std::get>(cc.t)}) { + Check(*step); + } + } + if (const auto &mask{ + std::get>(header.t)}) { + Check(*mask); + } + } + void Check(const parser::ScalarLogicalExpr &x) { + Check(DEREF(parser::Unwrap(x))); + } + void Check(const parser::ScalarIntExpr &x) { + Check(DEREF(parser::Unwrap(x))); + } + void Check(const parser::ScalarExpr &x) { + Check(DEREF(parser::Unwrap(x))); + } + void Check(const parser::Expr &expr) { + if (MaybeMsg msg{DeviceExprChecker{}(expr.typedExpr)}) { + context_.Say(expr.source, std::move(*msg)); + } + } + + SemanticsContext &context_; +}; + +void CUDAChecker::Enter(const parser::SubroutineSubprogram &x) { + DeviceContextChecker{context_}.CheckSubprogram( + std::get( + std::get>(x.t).statement.t), + std::get(x.t).v); +} + +void CUDAChecker::Enter(const parser::FunctionSubprogram &x) { + DeviceContextChecker{context_}.CheckSubprogram( + std::get( + std::get>(x.t).statement.t), + std::get(x.t).v); +} + +void CUDAChecker::Enter(const parser::SeparateModuleSubprogram &x) { + DeviceContextChecker{context_}.CheckSubprogram( + std::get>(x.t).statement.v, + std::get(x.t).v); +} + +// !$CUF KERNEL DO semantic checks + +static int DoConstructTightNesting( + const parser::DoConstruct *doConstruct, const parser::Block *&innerBlock) { + if (!doConstruct || !doConstruct->IsDoNormal()) { + return 0; + } + innerBlock = &std::get(doConstruct->t); + if (innerBlock->size() == 1) { + if (const auto *execConstruct{ + std::get_if(&innerBlock->front().u)}) { + if (const auto *next{ + std::get_if>( + &execConstruct->u)}) { + return 1 + DoConstructTightNesting(&next->value(), innerBlock); + } + } + } + return 1; +} + +void CUDAChecker::Enter(const parser::CUFKernelDoConstruct &x) { + auto source{std::get(x.t).source}; + const auto &directive{std::get(x.t)}; + std::int64_t depth{1}; + if (auto expr{AnalyzeExpr(context_, + std::get>( + directive.t))}) { + depth = evaluate::ToInt64(expr).value_or(0); + if (depth <= 0) { + context_.Say(source, + "!$CUF KERNEL DO (%jd): loop nesting depth must be positive"_err_en_US, + std::intmax_t{depth}); + depth = 1; + } + } + const parser::DoConstruct *doConstruct{common::GetPtrFromOptional( + std::get>(x.t))}; + const parser::Block *innerBlock{nullptr}; + if (DoConstructTightNesting(doConstruct, innerBlock) < depth) { + context_.Say(source, + "!$CUF KERNEL DO (%jd) must be followed by a DO construct with tightly nested outer levels of counted DO loops"_err_en_US, + std::intmax_t{depth}); + } + if (innerBlock) { + DeviceContextChecker{context_}.Check(*innerBlock); + } +} + +} // namespace Fortran::semantics diff --git a/flang/lib/Semantics/check-cuda.h b/flang/lib/Semantics/check-cuda.h new file mode 100644 index 000000000000..d863795f16a7 --- /dev/null +++ b/flang/lib/Semantics/check-cuda.h @@ -0,0 +1,50 @@ +//===-- lib/Semantics/check-cuda.h ------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef FORTRAN_SEMANTICS_CHECK_CUDA_H_ +#define FORTRAN_SEMANTICS_CHECK_CUDA_H_ + +#include "flang/Semantics/semantics.h" +#include + +namespace Fortran::parser { +struct Program; +class Messages; +struct Name; +class CharBlock; +struct ExecutionPartConstruct; +struct ExecutableConstruct; +struct ActionStmt; +struct IfConstruct; +struct CUFKernelDoConstruct; +struct SubroutineSubprogram; +struct FunctionSubprogram; +struct SeparateModuleSubprogram; +} // namespace Fortran::parser + +namespace Fortran::semantics { + +class SemanticsContext; + +class CUDAChecker : public virtual BaseChecker { +public: + explicit CUDAChecker(SemanticsContext &c) : context_{c} {} + void Enter(const parser::SubroutineSubprogram &); + void Enter(const parser::FunctionSubprogram &); + void Enter(const parser::SeparateModuleSubprogram &); + void Enter(const parser::CUFKernelDoConstruct &); + +private: + SemanticsContext &context_; +}; + +bool CanonicalizeCUDA(parser::Program &); + +} // namespace Fortran::semantics + +#endif // FORTRAN_SEMANTICS_CHECK_CUDA_H_ diff --git a/flang/lib/Semantics/check-deallocate.cpp b/flang/lib/Semantics/check-deallocate.cpp index 085dbbf67d2c..2bdded9018f4 100644 --- a/flang/lib/Semantics/check-deallocate.cpp +++ b/flang/lib/Semantics/check-deallocate.cpp @@ -19,10 +19,13 @@ namespace Fortran::semantics { void DeallocateChecker::Leave(const parser::DeallocateStmt &deallocateStmt) { for (const parser::AllocateObject &allocateObject : std::get>(deallocateStmt.t)) { + parser::CharBlock source; + const Symbol *symbol{nullptr}; common::visit( common::visitors{ [&](const parser::Name &name) { - auto const *symbol{name.symbol}; + source = name.source; + symbol = name.symbol; if (context_.HasError(symbol)) { // already reported an error } else if (!IsVariableName(*symbol)) { @@ -58,9 +61,10 @@ void DeallocateChecker::Leave(const parser::DeallocateStmt &deallocateStmt) { [&](const parser::StructureComponent &structureComponent) { // Only perform structureComponent checks if it was successfully // analyzed by expression analysis. + source = structureComponent.component.source; + symbol = structureComponent.component.symbol; if (const auto *expr{GetExpr(context_, allocateObject)}) { - if (const Symbol *symbol{structureComponent.component.symbol}) { - auto source{structureComponent.component.source}; + if (symbol) { if (!IsAllocatableOrPointer(*symbol)) { // C932 context_.Say(source, "Component in DEALLOCATE statement must have the ALLOCATABLE or POINTER attribute"_err_en_US); diff --git a/flang/lib/Semantics/resolve-names.cpp b/flang/lib/Semantics/resolve-names.cpp index 6b503f666af6..0df8a5765124 100644 --- a/flang/lib/Semantics/resolve-names.cpp +++ b/flang/lib/Semantics/resolve-names.cpp @@ -2740,7 +2740,7 @@ void ScopeHandler::SetCUDADataAttr(SourceName source, Symbol &symbol, Say(source, "'%s' already has another CUDA data attribute ('%s')"_err_en_US, symbol.name(), - common::EnumToString(*object->cudaDataAttr()).substr()); + std::string{common::EnumToString(*object->cudaDataAttr())}.c_str()); } else { object->set_cudaDataAttr(attr); } @@ -7700,13 +7700,11 @@ bool ResolveNamesVisitor::Pre(const parser::SpecificationPart &x) { void ResolveNamesVisitor::UseCUDABuiltinNames() { if (FindCUDADeviceContext(&currScope())) { - if (const Scope * CUDABuiltins{context().GetCUDABuiltinsScope()}) { - for (const auto &[name, symbol] : *CUDABuiltins) { - if (!FindInScope(name)) { - auto &localSymbol{MakeSymbol(name)}; - localSymbol.set_details(UseDetails{name, *symbol}); - localSymbol.flags() = symbol->flags(); - } + for (const auto &[name, symbol] : context().GetCUDABuiltinsScope()) { + if (!FindInScope(name)) { + auto &localSymbol{MakeSymbol(name)}; + localSymbol.set_details(UseDetails{name, *symbol}); + localSymbol.flags() = symbol->flags(); } } } diff --git a/flang/lib/Semantics/semantics.cpp b/flang/lib/Semantics/semantics.cpp index b70cd745ebd8..7f2f64ecb1bd 100644 --- a/flang/lib/Semantics/semantics.cpp +++ b/flang/lib/Semantics/semantics.cpp @@ -16,6 +16,7 @@ #include "check-arithmeticif.h" #include "check-case.h" #include "check-coarray.h" +#include "check-cuda.h" #include "check-data.h" #include "check-deallocate.h" #include "check-declarations.h" @@ -69,12 +70,13 @@ static void GetSymbolNames(const Scope &scope, NameToSymbolMap &symbols) { // children are visited, Leave is called after. No two checkers may have the // same Enter or Leave function. Each checker must be constructible from // SemanticsContext and have BaseChecker as a virtual base class. -template class SemanticsVisitor : public virtual C... { +template +class SemanticsVisitor : public virtual BaseChecker, public virtual C... { public: - using C::Enter...; - using C::Leave...; using BaseChecker::Enter; using BaseChecker::Leave; + using C::Enter...; + using C::Leave...; SemanticsVisitor(SemanticsContext &context) : C{context}..., context_{context} {} @@ -158,12 +160,14 @@ private: }; using StatementSemanticsPass1 = ExprChecker; -using StatementSemanticsPass2 = SemanticsVisitor; +using StatementSemanticsPass2 = SemanticsVisitor; +using StatementSemanticsPass3 = + SemanticsVisitor; static bool PerformStatementSemantics( SemanticsContext &context, parser::Program &program) { @@ -174,6 +178,11 @@ static bool PerformStatementSemantics( StatementSemanticsPass1{context}.Walk(program); StatementSemanticsPass2 pass2{context}; pass2.Walk(program); + if (context.languageFeatures().IsEnabled(common::LanguageFeature::OpenACC) || + context.languageFeatures().IsEnabled(common::LanguageFeature::OpenMP) || + context.languageFeatures().IsEnabled(common::LanguageFeature::CUDA)) { + StatementSemanticsPass3{context}.Walk(program); + } if (!context.AnyFatalError()) { pass2.CompileDataInitializationsIntoInitializers(); } @@ -476,11 +485,12 @@ void SemanticsContext::UsePPCFortranBuiltinTypesModule() { } } -const Scope *SemanticsContext::GetCUDABuiltinsScope() { - if (!CUDABuiltinsScope_) { - CUDABuiltinsScope_ = GetBuiltinModule("__cuda_builtins"); +const Scope &SemanticsContext::GetCUDABuiltinsScope() { + if (!cudaBuiltinsScope_) { + cudaBuiltinsScope_ = GetBuiltinModule("__cuda_builtins"); + CHECK(cudaBuiltinsScope_.value() != nullptr); } - return *CUDABuiltinsScope_; + return **cudaBuiltinsScope_; } void SemanticsContext::UsePPCFortranBuiltinsModule() { @@ -525,6 +535,7 @@ bool Semantics::Perform() { parser::CanonicalizeDo(program_) && // force line break CanonicalizeAcc(context_.messages(), program_) && CanonicalizeOmp(context_.messages(), program_) && + CanonicalizeCUDA(program_) && PerformStatementSemantics(context_, program_) && ModFileWriter{context_}.WriteAll(); } @@ -566,7 +577,7 @@ void DoDumpSymbols(llvm::raw_ostream &os, const Scope &scope, int indent) { if (scope.derivedTypeSpec()) { os << " instantiation of " << *scope.derivedTypeSpec(); } - os << '\n'; + os << " sourceRange=" << scope.sourceRange().size() << " bytes\n"; ++indent; for (const auto &pair : scope) { const auto &symbol{*pair.second}; diff --git a/flang/module/__fortran_builtins.f90 b/flang/module/__fortran_builtins.f90 index 0ff35ccbbbc2..295ebbef1322 100644 --- a/flang/module/__fortran_builtins.f90 +++ b/flang/module/__fortran_builtins.f90 @@ -75,4 +75,23 @@ module __Fortran_builtins intrinsic :: __builtin_compiler_options, __builtin_compiler_version + interface operator(==) + module procedure __builtin_c_ptr_eq + end interface + interface operator(/=) + module procedure __builtin_c_ptr_eq + end interface + +contains + + elemental logical function __builtin_c_ptr_eq(x, y) + type(__builtin_c_ptr), intent(in) :: x, y + __builtin_c_ptr_eq = x%__address == y%__address + end function + + elemental logical function __builtin_c_ptr_ne(x, y) + type(__builtin_c_ptr), intent(in) :: x, y + __builtin_c_ptr_ne = x%__address /= y%__address + end function + end module diff --git a/flang/module/iso_c_binding.f90 b/flang/module/iso_c_binding.f90 index 68035579ec62..a839d949c97c 100644 --- a/flang/module/iso_c_binding.f90 +++ b/flang/module/iso_c_binding.f90 @@ -15,7 +15,8 @@ module iso_c_binding c_ptr => __builtin_c_ptr, & c_funptr => __builtin_c_funptr, & c_sizeof => sizeof, & - c_loc => __builtin_c_loc + c_loc => __builtin_c_loc, & + operator(==), operator(/=) type(c_ptr), parameter :: c_null_ptr = c_ptr(0) type(c_funptr), parameter :: c_null_funptr = c_funptr(0) diff --git a/flang/test/Driver/compiler_options.f90 b/flang/test/Driver/compiler_options.f90 index 7b904ca71f4c..c329a5033884 100644 --- a/flang/test/Driver/compiler_options.f90 +++ b/flang/test/Driver/compiler_options.f90 @@ -1,6 +1,6 @@ ! RUN: %flang -S -emit-llvm -o - %s | FileCheck %s ! Test communication of COMPILER_OPTIONS from flang-new to flang-new -fc1. -! CHECK: [[OPTSVAR:@_QQcl\.[0-9a-f]+]] = internal constant [[[OPTSLEN:[0-9]+]] x i8] c"{{.*}}flang-new{{(\.exe)?}} -S -emit-llvm -o - {{.*}}compiler_options.f90" +! CHECK: [[OPTSVAR:@_QQcl\.[0-9a-f]+]] = {{[a-z]+}} constant [[[OPTSLEN:[0-9]+]] x i8] c"{{.*}}flang-new{{(\.exe)?}} -S -emit-llvm -o - {{.*}}compiler_options.f90" program main use ISO_FORTRAN_ENV, only: compiler_options implicit none diff --git a/flang/test/Parser/cuf-sanity-unparse.CUF b/flang/test/Parser/cuf-sanity-unparse.CUF index d4495c4fddcc..7ac39448d7bd 100644 --- a/flang/test/Parser/cuf-sanity-unparse.CUF +++ b/flang/test/Parser/cuf-sanity-unparse.CUF @@ -34,9 +34,9 @@ include "cuf-sanity-common" !CHECK: !$CUF KERNEL DO <<<1_4,(2_4,3_4),STREAM=1_4>>> !CHECK: DO j=1_4,10_4 !CHECK: END DO -!CHECK: CALL globalsub<<<1_4,2_4>>> -!CHECK: CALL globalsub<<<1_4,2_4,3_4>>> -!CHECK: CALL globalsub<<<1_4,2_4,3_4,4_4>>> +!CHECK: CALL globalsub<<<1_4,2_4>>>() +!CHECK: CALL globalsub<<<1_4,2_4,3_4>>>() +!CHECK: CALL globalsub<<<1_4,2_4,3_4,4_4>>>() !CHECK: ALLOCATE(pa(32_4), STREAM=1_4, PINNED=ispinned) !CHECK: END SUBROUTINE !CHECK: END MODULE diff --git a/flang/test/Semantics/cuf04.cuf b/flang/test/Semantics/cuf04.cuf new file mode 100644 index 000000000000..2e2faa90b490 --- /dev/null +++ b/flang/test/Semantics/cuf04.cuf @@ -0,0 +1,24 @@ +! RUN: %python %S/test_errors.py %s %flang_fc1 +! CUDA Fortran section 2.5.6 restrictions +module m + contains + attributes(device) subroutine devsubr(n) + integer, intent(in) :: n + !WARNING: 'x1' should not have the SAVE attribute or initialization in a device subprogram + real, save :: x1 + !WARNING: 'x2' should not have the SAVE attribute or initialization in a device subprogram + real :: x2 = 1. + !ERROR: Device subprogram 'devsubr' cannot call itself + if (n > 0) call devsubr(n-1) + end subroutine + attributes(global) subroutine globsubr + end subroutine + subroutine boring + end subroutine + subroutine test + !ERROR: 'globsubr' is a kernel subroutine and must be called with kernel launch parameters in chevrons + call globsubr + !ERROR: Kernel launch parameters in chevrons may not be used unless calling a kernel subroutine + call boring<<<1,2>>> + end subroutine +end module diff --git a/flang/test/Semantics/cuf05.cuf b/flang/test/Semantics/cuf05.cuf new file mode 100644 index 000000000000..a9fd826d53c0 --- /dev/null +++ b/flang/test/Semantics/cuf05.cuf @@ -0,0 +1,19 @@ +! RUN: %flang_fc1 -fdebug-dump-symbols %s 2>&1 | FileCheck --dump-input-context=500 %s +!CHECK: Global scope: size=0 alignment=1 sourceRange=0 bytes +!CHECK: IntrinsicModules scope: size=0 alignment=1 sourceRange=0 bytes +!CHECK: Module scope: __fortran_builtins +!CHECK: Module scope: __cuda_builtins size=0 alignment=1 +!CHECK: Module scope: __fortran_type_info +!CHECK: Module scope: m size=0 alignment=1 +!CHECK: Subprogram scope: devsubr size=0 alignment=1 +module m + implicit none + contains + attributes(device) subroutine devsubr() + !CHECK: blockdim: Use from blockdim in __cuda_builtins + !CHECK: blockidx: Use from blockidx in __cuda_builtins + !CHECK: griddim: Use from griddim in __cuda_builtins + !CHECK: threadidx: Use from threadidx in __cuda_builtins + !CHECK: warpsize: Use from warpsize in __cuda_builtins + end subroutine +end module diff --git a/flang/test/Semantics/cuf06.cuf b/flang/test/Semantics/cuf06.cuf new file mode 100644 index 000000000000..44dd3c7ccc4c --- /dev/null +++ b/flang/test/Semantics/cuf06.cuf @@ -0,0 +1,15 @@ +! RUN: %python %S/test_errors.py %s %flang_fc1 +module m + use, intrinsic :: __fortran_builtins, only: __builtin_dim3 + contains + attributes(global) subroutine kernel + end subroutine + subroutine test + call kernel<<< 1, 32 >>> ! ok + call kernel<<< __builtin_dim3(1,1), __builtin_dim3(32,1,1) >>> ! ok + !ERROR: Kernel launch grid parameter must be either integer or TYPE(dim3) + call kernel<<< 1.d0, 32 >>> + !ERROR: Kernel launch block parameter must be either integer or TYPE(dim3) + call kernel<<< 1, "abc" >>> + end +end module diff --git a/flang/test/Semantics/cuf09.cuf b/flang/test/Semantics/cuf09.cuf new file mode 100644 index 000000000000..dd70c3b1ff5e --- /dev/null +++ b/flang/test/Semantics/cuf09.cuf @@ -0,0 +1,76 @@ +! RUN: %python %S/test_errors.py %s %flang_fc1 +module m + contains + attributes(device) subroutine devsub + !ERROR: Statement may not appear in device code + !$cuf kernel do <<< 1, 2 >>> + do k=1,10 + end do + end +end + +program main + !ERROR: !$CUF KERNEL DO (1) must be followed by a DO construct with tightly nested outer levels of counted DO loops + !$cuf kernel do <<< 1, 2 >>> + do while (.false.) + end do + !ERROR: !$CUF KERNEL DO (1) must be followed by a DO construct with tightly nested outer levels of counted DO loops + !$cuf kernel do <<< 1, 2 >>> + do + exit + end do + !ERROR: !$CUF KERNEL DO (1) must be followed by a DO construct with tightly nested outer levels of counted DO loops + !$cuf kernel do <<< 1, 2 >>> + do concurrent (j=1:10) + end do + !$cuf kernel do <<< 1, 2 >>> + do 1 j=1,10 +1 continue ! ok + !$cuf kernel do <<< 1, 2 >>> + do j=1,10 + end do ! ok + !$cuf kernel do <<< 1, 2 >>> + do j=1,10 + !ERROR: Statement may not appear in device code + !$cuf kernel do <<< 1, 2 >>> + do k=1,10 + end do + end do + !ERROR: !$CUF KERNEL DO (-1): loop nesting depth must be positive + !$cuf kernel do (-1) <<< 1, 2 >>> + do j=1,10 + end do + !ERROR: !$CUF KERNEL DO (1) must be followed by a DO construct with tightly nested outer levels of counted DO loops + !$cuf kernel do <<< 1, 2 >>> + continue + !ERROR: !$CUF KERNEL DO (2) must be followed by a DO construct with tightly nested outer levels of counted DO loops + !$cuf kernel do (2) <<< 1, 2 >>> + do j=1,10 + end do + !ERROR: !$CUF KERNEL DO (2) must be followed by a DO construct with tightly nested outer levels of counted DO loops + !$cuf kernel do (2) <<< 1, 2 >>> + do j=1,10 + continue + end do + !ERROR: !$CUF KERNEL DO (2) must be followed by a DO construct with tightly nested outer levels of counted DO loops + !$cuf kernel do (2) <<< 1, 2 >>> + do j=1,10 + do k=1,10 + end do + continue + end do + !$cuf kernel do <<< 1, 2 >>> + do j = 1, 10 + !ERROR: 'foo' may not be called in device code + call foo + !ERROR: 'bar' may not be called in device code + x = bar() + !ERROR: 'ifunc' may not be called in device code + if (ifunc() /= 0) continue + !ERROR: 'ifunc' may not be called in device code + if (ifunc() /= 0) then + !ERROR: 'ifunc' may not be called in device code + else if (ifunc() /= 1) then + end if + end do +end diff --git a/flang/tools/f18/CMakeLists.txt b/flang/tools/f18/CMakeLists.txt index b76d01c284a3..d4e882d3e4ec 100644 --- a/flang/tools/f18/CMakeLists.txt +++ b/flang/tools/f18/CMakeLists.txt @@ -10,6 +10,7 @@ set(MODULES "__fortran_type_info" "__fortran_ppc_types" "__fortran_ppc_intrinsics" + "__cuda_builtins" "ieee_arithmetic" "ieee_exceptions" "ieee_features" diff --git a/flang/unittests/Runtime/Time.cpp b/flang/unittests/Runtime/Time.cpp index ceccb4a70805..ec0caa743bcd 100644 --- a/flang/unittests/Runtime/Time.cpp +++ b/flang/unittests/Runtime/Time.cpp @@ -6,6 +6,8 @@ // //===----------------------------------------------------------------------===// +#ifndef __clang__ // 16.0.3 lacks + #include "gtest/gtest.h" #include "flang/Runtime/time-intrinsic.h" #include @@ -166,3 +168,4 @@ TEST(TimeIntrinsics, DateAndTime) { EXPECT_LE(minutes, 59); } } +#endif // __clang__