Move non-common files from FortranCommon to FortranSupport (analogous to
LLVMSupport) such that
* declarations and definitions that are only used by the Flang compiler,
but not by the runtime, are moved to FortranSupport
* declarations and definitions that are used by both ("common"), the
compiler and the runtime, remain in FortranCommon
* generic STL-like/ADT/utility classes and algorithms remain in
FortranCommon
This allows a for cleaner separation between compiler and runtime
components, which are compiled differently. For instance, runtime
sources must not use STL's `<optional>` which causes problems with CUDA
support. Instead, the surrogate header `flang/Common/optional.h` must be
used. This PR fixes this for `fast-int-sel.h`.
Declarations in include/Runtime are also used by both, but are
header-only. `ISO_Fortran_binding_wrapper.h`, a header used by compiler
and runtime, is also moved into FortranCommon.
72 lines
2.1 KiB
C++
72 lines
2.1 KiB
C++
//===-- runtime/CUDA/allocator.cpp ----------------------------------------===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "flang/Runtime/CUDA/allocator.h"
|
|
#include "../derived.h"
|
|
#include "../stat.h"
|
|
#include "../terminator.h"
|
|
#include "../type-info.h"
|
|
#include "flang/Common/ISO_Fortran_binding_wrapper.h"
|
|
#include "flang/Runtime/CUDA/common.h"
|
|
#include "flang/Runtime/allocator-registry.h"
|
|
#include "flang/Support/Fortran.h"
|
|
|
|
#include "cuda_runtime.h"
|
|
|
|
namespace Fortran::runtime::cuda {
|
|
extern "C" {
|
|
|
|
void RTDEF(CUFRegisterAllocator)() {
|
|
allocatorRegistry.Register(
|
|
kPinnedAllocatorPos, {&CUFAllocPinned, CUFFreePinned});
|
|
allocatorRegistry.Register(
|
|
kDeviceAllocatorPos, {&CUFAllocDevice, CUFFreeDevice});
|
|
allocatorRegistry.Register(
|
|
kManagedAllocatorPos, {&CUFAllocManaged, CUFFreeManaged});
|
|
allocatorRegistry.Register(
|
|
kUnifiedAllocatorPos, {&CUFAllocUnified, CUFFreeUnified});
|
|
}
|
|
}
|
|
|
|
void *CUFAllocPinned(std::size_t sizeInBytes) {
|
|
void *p;
|
|
CUDA_REPORT_IF_ERROR(cudaMallocHost((void **)&p, sizeInBytes));
|
|
return p;
|
|
}
|
|
|
|
void CUFFreePinned(void *p) { CUDA_REPORT_IF_ERROR(cudaFreeHost(p)); }
|
|
|
|
void *CUFAllocDevice(std::size_t sizeInBytes) {
|
|
void *p;
|
|
CUDA_REPORT_IF_ERROR(cudaMalloc(&p, sizeInBytes));
|
|
return p;
|
|
}
|
|
|
|
void CUFFreeDevice(void *p) { CUDA_REPORT_IF_ERROR(cudaFree(p)); }
|
|
|
|
void *CUFAllocManaged(std::size_t sizeInBytes) {
|
|
void *p;
|
|
CUDA_REPORT_IF_ERROR(
|
|
cudaMallocManaged((void **)&p, sizeInBytes, cudaMemAttachGlobal));
|
|
return reinterpret_cast<void *>(p);
|
|
}
|
|
|
|
void CUFFreeManaged(void *p) { CUDA_REPORT_IF_ERROR(cudaFree(p)); }
|
|
|
|
void *CUFAllocUnified(std::size_t sizeInBytes) {
|
|
// Call alloc managed for the time being.
|
|
return CUFAllocManaged(sizeInBytes);
|
|
}
|
|
|
|
void CUFFreeUnified(void *p) {
|
|
// Call free managed for the time being.
|
|
CUFFreeManaged(p);
|
|
}
|
|
|
|
} // namespace Fortran::runtime::cuda
|