Mostly mechanical changes in preparation of extracting the Flang-RT "subproject" in #110217. This PR intends to only move pre-existing files to the new folder structure, with no behavioral change. Common files (headers, testing, cmake) shared by Flang-RT and Flang remain in `flang/`. Some cosmetic changes and files paths were necessary: * Relative paths to the new path for the source files and `add_subdirectory`. * Add the new location's include directory to `include_directories` * The unittest/Evaluate directory has unitests for flang-rt and Flang. A new `CMakeLists.txt` was introduced for the flang-rt tests. * Change the `#include` paths relative to the include directive * clang-format on the `#include` directives * Since the paths are part if the copyright header and include guards, a script was used to canonicalize those * `test/Runtime` and runtime tests in `test/Driver` are moved, but the lit.cfg.py mechanism to execute the will only be added in #110217.
114 lines
3.9 KiB
C++
114 lines
3.9 KiB
C++
//===-- lib/cuda/allocatable.cpp --------------------------------*- C++ -*-===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "flang/Runtime/CUDA/allocatable.h"
|
|
#include "flang-rt/runtime/assign-impl.h"
|
|
#include "flang-rt/runtime/descriptor.h"
|
|
#include "flang-rt/runtime/stat.h"
|
|
#include "flang-rt/runtime/terminator.h"
|
|
#include "flang/Runtime/CUDA/common.h"
|
|
#include "flang/Runtime/CUDA/descriptor.h"
|
|
#include "flang/Runtime/CUDA/memmove-function.h"
|
|
#include "flang/Runtime/allocatable.h"
|
|
|
|
#include "cuda_runtime.h"
|
|
|
|
namespace Fortran::runtime::cuda {
|
|
|
|
extern "C" {
|
|
RT_EXT_API_GROUP_BEGIN
|
|
|
|
int RTDEF(CUFAllocatableAllocateSync)(Descriptor &desc, int64_t stream,
|
|
bool *pinned, bool hasStat, const Descriptor *errMsg,
|
|
const char *sourceFile, int sourceLine) {
|
|
int stat{RTNAME(CUFAllocatableAllocate)(
|
|
desc, stream, pinned, hasStat, errMsg, sourceFile, sourceLine)};
|
|
#ifndef RT_DEVICE_COMPILATION
|
|
// Descriptor synchronization is only done when the allocation is done
|
|
// from the host.
|
|
if (stat == StatOk) {
|
|
void *deviceAddr{
|
|
RTNAME(CUFGetDeviceAddress)((void *)&desc, sourceFile, sourceLine)};
|
|
RTNAME(CUFDescriptorSync)
|
|
((Descriptor *)deviceAddr, &desc, sourceFile, sourceLine);
|
|
}
|
|
#endif
|
|
return stat;
|
|
}
|
|
|
|
int RTDEF(CUFAllocatableAllocate)(Descriptor &desc, int64_t stream,
|
|
bool *pinned, bool hasStat, const Descriptor *errMsg,
|
|
const char *sourceFile, int sourceLine) {
|
|
if (desc.HasAddendum()) {
|
|
Terminator terminator{sourceFile, sourceLine};
|
|
// TODO: This require a bit more work to set the correct type descriptor
|
|
// address
|
|
terminator.Crash(
|
|
"not yet implemented: CUDA descriptor allocation with addendum");
|
|
}
|
|
// Perform the standard allocation.
|
|
int stat{RTNAME(AllocatableAllocate)(
|
|
desc, hasStat, errMsg, sourceFile, sourceLine)};
|
|
if (pinned) {
|
|
// Set pinned according to stat. More infrastructre is needed to set it
|
|
// closer to the actual allocation call.
|
|
*pinned = (stat == StatOk);
|
|
}
|
|
return stat;
|
|
}
|
|
|
|
int RTDEF(CUFAllocatableAllocateSource)(Descriptor &alloc,
|
|
const Descriptor &source, int64_t stream, bool *pinned, bool hasStat,
|
|
const Descriptor *errMsg, const char *sourceFile, int sourceLine) {
|
|
int stat{RTNAME(CUFAllocatableAllocate)(
|
|
alloc, stream, pinned, hasStat, errMsg, sourceFile, sourceLine)};
|
|
if (stat == StatOk) {
|
|
Terminator terminator{sourceFile, sourceLine};
|
|
Fortran::runtime::DoFromSourceAssign(
|
|
alloc, source, terminator, &MemmoveHostToDevice);
|
|
}
|
|
return stat;
|
|
}
|
|
|
|
int RTDEF(CUFAllocatableAllocateSourceSync)(Descriptor &alloc,
|
|
const Descriptor &source, int64_t stream, bool *pinned, bool hasStat,
|
|
const Descriptor *errMsg, const char *sourceFile, int sourceLine) {
|
|
int stat{RTNAME(CUFAllocatableAllocateSync)(
|
|
alloc, stream, pinned, hasStat, errMsg, sourceFile, sourceLine)};
|
|
if (stat == StatOk) {
|
|
Terminator terminator{sourceFile, sourceLine};
|
|
Fortran::runtime::DoFromSourceAssign(
|
|
alloc, source, terminator, &MemmoveHostToDevice);
|
|
}
|
|
return stat;
|
|
}
|
|
|
|
int RTDEF(CUFAllocatableDeallocate)(Descriptor &desc, bool hasStat,
|
|
const Descriptor *errMsg, const char *sourceFile, int sourceLine) {
|
|
// Perform the standard allocation.
|
|
int stat{RTNAME(AllocatableDeallocate)(
|
|
desc, hasStat, errMsg, sourceFile, sourceLine)};
|
|
#ifndef RT_DEVICE_COMPILATION
|
|
// Descriptor synchronization is only done when the deallocation is done
|
|
// from the host.
|
|
if (stat == StatOk) {
|
|
void *deviceAddr{
|
|
RTNAME(CUFGetDeviceAddress)((void *)&desc, sourceFile, sourceLine)};
|
|
RTNAME(CUFDescriptorSync)
|
|
((Descriptor *)deviceAddr, &desc, sourceFile, sourceLine);
|
|
}
|
|
#endif
|
|
return stat;
|
|
}
|
|
|
|
RT_EXT_API_GROUP_END
|
|
|
|
} // extern "C"
|
|
|
|
} // namespace Fortran::runtime::cuda
|