Files
clang-p2996/flang/runtime/CUDA/descriptor.cpp
Valentin Clement (バレンタイン クレメン) 7d1c661381 [flang] Allow to pass an async id to allocate the descriptor (#118713)
This is a patch in preparation for the support stream ordered memory
allocator in CUDA Fortran.

This patch adds an asynchronous id to the AllocatableAllocate runtime
function and to Descriptor::Allocate so it can be passed down to the
registered allocator. It is up to the allocator to use this value or
not.

A follow up patch will implement that asynchronous allocator for CUDA
Fortran.
2024-12-04 18:24:40 -08:00

52 lines
1.6 KiB
C++

//===-- runtime/CUDA/descriptor.cpp ---------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "flang/Runtime/CUDA/descriptor.h"
#include "../terminator.h"
#include "flang/Runtime/CUDA/allocator.h"
#include "flang/Runtime/CUDA/common.h"
#include "cuda_runtime.h"
namespace Fortran::runtime::cuda {
extern "C" {
RT_EXT_API_GROUP_BEGIN
Descriptor *RTDEF(CUFAllocDesciptor)(
std::size_t sizeInBytes, const char *sourceFile, int sourceLine) {
return reinterpret_cast<Descriptor *>(
CUFAllocManaged(sizeInBytes, kCudaNoStream));
}
void RTDEF(CUFFreeDesciptor)(
Descriptor *desc, const char *sourceFile, int sourceLine) {
CUFFreeManaged(reinterpret_cast<void *>(desc));
}
void *RTDEF(CUFGetDeviceAddress)(
void *hostPtr, const char *sourceFile, int sourceLine) {
Terminator terminator{sourceFile, sourceLine};
void *p;
CUDA_REPORT_IF_ERROR(cudaGetSymbolAddress((void **)&p, hostPtr));
if (!p) {
terminator.Crash("Could not retrieve symbol's address");
}
return p;
}
void RTDEF(CUFDescriptorSync)(Descriptor *dst, const Descriptor *src,
const char *sourceFile, int sourceLine) {
std::size_t count{src->SizeInBytes()};
CUDA_REPORT_IF_ERROR(cudaMemcpy(
(void *)dst, (const void *)src, count, cudaMemcpyHostToDevice));
}
RT_EXT_API_GROUP_END
}
} // namespace Fortran::runtime::cuda