diff --git a/flang-rt/include/flang-rt/runtime/allocator-registry.h b/flang-rt/include/flang-rt/runtime/allocator-registry.h index 1a59ec8b1ef5..33e8e2c7d785 100644 --- a/flang-rt/include/flang-rt/runtime/allocator-registry.h +++ b/flang-rt/include/flang-rt/runtime/allocator-registry.h @@ -11,6 +11,7 @@ #include "flang/Common/api-attrs.h" #include "flang/Runtime/allocator-registry-consts.h" +#include #include #include @@ -18,7 +19,7 @@ namespace Fortran::runtime { -using AllocFct = void *(*)(std::size_t); +using AllocFct = void *(*)(std::size_t, std::int64_t); using FreeFct = void (*)(void *); typedef struct Allocator_t { @@ -26,10 +27,11 @@ typedef struct Allocator_t { FreeFct free{nullptr}; } Allocator_t; -#ifdef RT_DEVICE_COMPILATION -static RT_API_ATTRS void *MallocWrapper(std::size_t size) { +static RT_API_ATTRS void *MallocWrapper( + std::size_t size, [[maybe_unused]] std::int64_t) { return std::malloc(size); } +#ifdef RT_DEVICE_COMPILATION static RT_API_ATTRS void FreeWrapper(void *p) { return std::free(p); } #endif @@ -39,7 +41,7 @@ struct AllocatorRegistry { : allocators{{&MallocWrapper, &FreeWrapper}} {} #else constexpr AllocatorRegistry() { - allocators[kDefaultAllocator] = {&std::malloc, &std::free}; + allocators[kDefaultAllocator] = {&MallocWrapper, &std::free}; }; #endif RT_API_ATTRS void Register(int, Allocator_t); diff --git a/flang-rt/lib/cuda/allocator.cpp b/flang-rt/lib/cuda/allocator.cpp index d6529957bc93..d606ab2d4313 100644 --- a/flang-rt/lib/cuda/allocator.cpp +++ b/flang-rt/lib/cuda/allocator.cpp @@ -34,7 +34,8 @@ void RTDEF(CUFRegisterAllocator)() { } } -void *CUFAllocPinned(std::size_t sizeInBytes) { +void *CUFAllocPinned( + std::size_t sizeInBytes, [[maybe_unused]] std::int64_t asyncId) { void *p; CUDA_REPORT_IF_ERROR(cudaMallocHost((void **)&p, sizeInBytes)); return p; @@ -42,7 +43,8 @@ void *CUFAllocPinned(std::size_t sizeInBytes) { void CUFFreePinned(void *p) { CUDA_REPORT_IF_ERROR(cudaFreeHost(p)); } -void *CUFAllocDevice(std::size_t sizeInBytes) { +void *CUFAllocDevice( + std::size_t sizeInBytes, [[maybe_unused]] std::int64_t asyncId) { void *p; if (Fortran::runtime::executionEnvironment.cudaDeviceIsManaged) { CUDA_REPORT_IF_ERROR( @@ -55,7 +57,8 @@ void *CUFAllocDevice(std::size_t sizeInBytes) { void CUFFreeDevice(void *p) { CUDA_REPORT_IF_ERROR(cudaFree(p)); } -void *CUFAllocManaged(std::size_t sizeInBytes) { +void *CUFAllocManaged( + std::size_t sizeInBytes, [[maybe_unused]] std::int64_t asyncId) { void *p; CUDA_REPORT_IF_ERROR( cudaMallocManaged((void **)&p, sizeInBytes, cudaMemAttachGlobal)); @@ -64,9 +67,10 @@ void *CUFAllocManaged(std::size_t sizeInBytes) { void CUFFreeManaged(void *p) { CUDA_REPORT_IF_ERROR(cudaFree(p)); } -void *CUFAllocUnified(std::size_t sizeInBytes) { +void *CUFAllocUnified( + std::size_t sizeInBytes, [[maybe_unused]] std::int64_t asyncId) { // Call alloc managed for the time being. - return CUFAllocManaged(sizeInBytes); + return CUFAllocManaged(sizeInBytes, asyncId); } void CUFFreeUnified(void *p) { diff --git a/flang-rt/lib/cuda/descriptor.cpp b/flang-rt/lib/cuda/descriptor.cpp index d44ab2e45d2a..175e8c0ef843 100644 --- a/flang-rt/lib/cuda/descriptor.cpp +++ b/flang-rt/lib/cuda/descriptor.cpp @@ -20,7 +20,8 @@ RT_EXT_API_GROUP_BEGIN Descriptor *RTDEF(CUFAllocDescriptor)( std::size_t sizeInBytes, const char *sourceFile, int sourceLine) { - return reinterpret_cast(CUFAllocManaged(sizeInBytes)); + return reinterpret_cast( + CUFAllocManaged(sizeInBytes, /*asyncId*/ -1)); } void RTDEF(CUFFreeDescriptor)( diff --git a/flang-rt/lib/runtime/descriptor.cpp b/flang-rt/lib/runtime/descriptor.cpp index 495e25e96ade..c660d4f2ff5a 100644 --- a/flang-rt/lib/runtime/descriptor.cpp +++ b/flang-rt/lib/runtime/descriptor.cpp @@ -170,7 +170,7 @@ RT_API_ATTRS int Descriptor::Allocate() { // Zero size allocation is possible in Fortran and the resulting // descriptor must be allocated/associated. Since std::malloc(0) // result is implementation defined, always allocate at least one byte. - void *p{alloc(byteSize ? byteSize : 1)}; + void *p{alloc(byteSize ? byteSize : 1, /*asyncId=*/-1)}; if (!p) { return CFI_ERROR_MEM_ALLOCATION; } diff --git a/flang-rt/lib/runtime/pointer.cpp b/flang-rt/lib/runtime/pointer.cpp index 0cd46cd05e2d..fd2427f4124b 100644 --- a/flang-rt/lib/runtime/pointer.cpp +++ b/flang-rt/lib/runtime/pointer.cpp @@ -129,7 +129,7 @@ RT_API_ATTRS void *AllocateValidatedPointerPayload( byteSize = ((byteSize + align - 1) / align) * align; std::size_t total{byteSize + sizeof(std::uintptr_t)}; AllocFct alloc{allocatorRegistry.GetAllocator(allocatorIdx)}; - void *p{alloc(total)}; + void *p{alloc(total, /*asyncId=*/-1)}; if (p && allocatorIdx == 0) { // Fill the footer word with the XOR of the ones' complement of // the base address, which is a value that would be highly unlikely diff --git a/flang/include/flang/Runtime/CUDA/allocator.h b/flang/include/flang/Runtime/CUDA/allocator.h index 4fb4c94c5e9b..18ddf75ac385 100644 --- a/flang/include/flang/Runtime/CUDA/allocator.h +++ b/flang/include/flang/Runtime/CUDA/allocator.h @@ -20,16 +20,16 @@ extern "C" { void RTDECL(CUFRegisterAllocator)(); } -void *CUFAllocPinned(std::size_t); +void *CUFAllocPinned(std::size_t, std::int64_t); void CUFFreePinned(void *); -void *CUFAllocDevice(std::size_t); +void *CUFAllocDevice(std::size_t, std::int64_t); void CUFFreeDevice(void *); -void *CUFAllocManaged(std::size_t); +void *CUFAllocManaged(std::size_t, std::int64_t); void CUFFreeManaged(void *); -void *CUFAllocUnified(std::size_t); +void *CUFAllocUnified(std::size_t, std::int64_t); void CUFFreeUnified(void *); } // namespace Fortran::runtime::cuda