//===-- runtime/CUDA/memory.cpp -------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #include "flang/Runtime/CUDA/memory.h" #include "../terminator.h" #include "flang/Runtime/CUDA/common.h" #include "flang/Runtime/assign.h" #include "cuda_runtime.h" namespace Fortran::runtime::cuda { static void *MemmoveHostToDevice( void *dst, const void *src, std::size_t count) { // TODO: Use cudaMemcpyAsync when we have support for stream. CUDA_REPORT_IF_ERROR(cudaMemcpy(dst, src, count, cudaMemcpyHostToDevice)); return dst; } static void *MemmoveDeviceToHost( void *dst, const void *src, std::size_t count) { // TODO: Use cudaMemcpyAsync when we have support for stream. CUDA_REPORT_IF_ERROR(cudaMemcpy(dst, src, count, cudaMemcpyDeviceToHost)); return dst; } static void *MemmoveDeviceToDevice( void *dst, const void *src, std::size_t count) { // TODO: Use cudaMemcpyAsync when we have support for stream. CUDA_REPORT_IF_ERROR(cudaMemcpy(dst, src, count, cudaMemcpyDeviceToDevice)); return dst; } extern "C" { void *RTDEF(CUFMemAlloc)( std::size_t bytes, unsigned type, const char *sourceFile, int sourceLine) { void *ptr = nullptr; if (bytes != 0) { if (type == kMemTypeDevice) { CUDA_REPORT_IF_ERROR(cudaMalloc((void **)&ptr, bytes)); } else if (type == kMemTypeManaged || type == kMemTypeUnified) { CUDA_REPORT_IF_ERROR( cudaMallocManaged((void **)&ptr, bytes, cudaMemAttachGlobal)); } else if (type == kMemTypePinned) { CUDA_REPORT_IF_ERROR(cudaMallocHost((void **)&ptr, bytes)); } else { Terminator terminator{sourceFile, sourceLine}; terminator.Crash("unsupported memory type"); } } return ptr; } void RTDEF(CUFMemFree)( void *ptr, unsigned type, const char *sourceFile, int sourceLine) { if (!ptr) return; if (type == kMemTypeDevice || type == kMemTypeManaged || type == kMemTypeUnified) { CUDA_REPORT_IF_ERROR(cudaFree(ptr)); } else if (type == kMemTypePinned) { CUDA_REPORT_IF_ERROR(cudaFreeHost(ptr)); } else { Terminator terminator{sourceFile, sourceLine}; terminator.Crash("unsupported memory type"); } } void RTDEF(CUFMemsetDescriptor)( Descriptor *desc, void *value, const char *sourceFile, int sourceLine) { Terminator terminator{sourceFile, sourceLine}; terminator.Crash("not yet implemented: CUDA data transfer from a scalar " "value to a descriptor"); } void RTDEF(CUFDataTransferPtrPtr)(void *dst, void *src, std::size_t bytes, unsigned mode, const char *sourceFile, int sourceLine) { cudaMemcpyKind kind; if (mode == kHostToDevice) { kind = cudaMemcpyHostToDevice; } else if (mode == kDeviceToHost) { kind = cudaMemcpyDeviceToHost; } else if (mode == kDeviceToDevice) { kind = cudaMemcpyDeviceToDevice; } else { Terminator terminator{sourceFile, sourceLine}; terminator.Crash("host to host copy not supported"); } // TODO: Use cudaMemcpyAsync when we have support for stream. CUDA_REPORT_IF_ERROR(cudaMemcpy(dst, src, bytes, kind)); } void RTDEF(CUFDataTransferDescPtr)(Descriptor *desc, void *addr, std::size_t bytes, unsigned mode, const char *sourceFile, int sourceLine) { Terminator terminator{sourceFile, sourceLine}; terminator.Crash( "not yet implemented: CUDA data transfer from a pointer to a descriptor"); } void RTDEF(CUFDataTransferPtrDesc)(void *addr, Descriptor *desc, std::size_t bytes, unsigned mode, const char *sourceFile, int sourceLine) { Terminator terminator{sourceFile, sourceLine}; terminator.Crash( "not yet implemented: CUDA data transfer from a descriptor to a pointer"); } void RTDECL(CUFDataTransferDescDesc)(Descriptor *dstDesc, Descriptor *srcDesc, unsigned mode, const char *sourceFile, int sourceLine) { Terminator terminator{sourceFile, sourceLine}; MemmoveFct memmoveFct; if (mode == kHostToDevice) { memmoveFct = &MemmoveHostToDevice; } else if (mode == kDeviceToHost) { memmoveFct = &MemmoveDeviceToHost; } else if (mode == kDeviceToDevice) { memmoveFct = &MemmoveDeviceToDevice; } Fortran::runtime::Assign( *dstDesc, *srcDesc, terminator, MaybeReallocate, memmoveFct); } } } // namespace Fortran::runtime::cuda