From 56b792322aaaa82883d56a322a94448de519f789 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Valentin=20Clement=20=28=E3=83=90=E3=83=AC=E3=83=B3?= =?UTF-8?q?=E3=82=BF=E3=82=A4=E3=83=B3=20=E3=82=AF=E3=83=AC=E3=83=A1?= =?UTF-8?q?=E3=83=B3=29?= Date: Wed, 9 Apr 2025 17:34:48 -0700 Subject: [PATCH] [flang][cuda] Use the aysncId in device allocation (#135099) Use `cudaMallocAsync` in the `CUFAllocDevice` allocator when asyncId is provided. More work is needed to be able to call `cudaFreeAsync` since the allocated address and stream needs to be tracked. --- flang-rt/lib/cuda/allocator.cpp | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/flang-rt/lib/cuda/allocator.cpp b/flang-rt/lib/cuda/allocator.cpp index d606ab2d4313..a1c3a2c1b2ea 100644 --- a/flang-rt/lib/cuda/allocator.cpp +++ b/flang-rt/lib/cuda/allocator.cpp @@ -9,6 +9,7 @@ #include "flang/Runtime/CUDA/allocator.h" #include "flang-rt/runtime/allocator-registry.h" #include "flang-rt/runtime/derived.h" +#include "flang-rt/runtime/descriptor.h" #include "flang-rt/runtime/environment.h" #include "flang-rt/runtime/stat.h" #include "flang-rt/runtime/terminator.h" @@ -43,14 +44,18 @@ void *CUFAllocPinned( void CUFFreePinned(void *p) { CUDA_REPORT_IF_ERROR(cudaFreeHost(p)); } -void *CUFAllocDevice( - std::size_t sizeInBytes, [[maybe_unused]] std::int64_t asyncId) { +void *CUFAllocDevice(std::size_t sizeInBytes, std::int64_t asyncId) { void *p; if (Fortran::runtime::executionEnvironment.cudaDeviceIsManaged) { CUDA_REPORT_IF_ERROR( cudaMallocManaged((void **)&p, sizeInBytes, cudaMemAttachGlobal)); } else { - CUDA_REPORT_IF_ERROR(cudaMalloc(&p, sizeInBytes)); + if (asyncId == kNoAsyncId) { + CUDA_REPORT_IF_ERROR(cudaMalloc(&p, sizeInBytes)); + } else { + CUDA_REPORT_IF_ERROR( + cudaMallocAsync(&p, sizeInBytes, (cudaStream_t)asyncId)); + } } return p; }