[flang][cuda] Add support for NV_CUDAFOR_DEVICE_IS_MANAGED (#133778)

Add support for the environment variable `NV_CUDAFOR_DEVICE_IS_MANAGED`
as described in the documentation:
https://docs.nvidia.com/hpc-sdk/compilers/cuda-fortran-prog-guide/index.html#controlling-device-data-is-managed.

This mainly switch device allocation to managed allocation.
This commit is contained in:
Valentin Clement (バレンタイン クレメン)
2025-03-31 13:17:21 -07:00
committed by GitHub
parent b739a3cb65
commit 0b31f08537
4 changed files with 28 additions and 2 deletions

View File

@@ -59,6 +59,7 @@ struct ExecutionEnvironment {
// CUDA related variables
std::size_t cudaStackLimit{0}; // ACC_OFFLOAD_STACK_SIZE
bool cudaDeviceIsManaged{false}; // NV_CUDAFOR_DEVICE_IS_MANAGED
};
RT_OFFLOAD_VAR_GROUP_BEGIN

View File

@@ -9,6 +9,7 @@
#include "flang/Runtime/CUDA/allocator.h"
#include "flang-rt/runtime/allocator-registry.h"
#include "flang-rt/runtime/derived.h"
#include "flang-rt/runtime/environment.h"
#include "flang-rt/runtime/stat.h"
#include "flang-rt/runtime/terminator.h"
#include "flang-rt/runtime/type-info.h"
@@ -43,7 +44,12 @@ void CUFFreePinned(void *p) { CUDA_REPORT_IF_ERROR(cudaFreeHost(p)); }
void *CUFAllocDevice(std::size_t sizeInBytes) {
void *p;
CUDA_REPORT_IF_ERROR(cudaMalloc(&p, sizeInBytes));
if (Fortran::runtime::executionEnvironment.cudaDeviceIsManaged) {
CUDA_REPORT_IF_ERROR(
cudaMallocManaged((void **)&p, sizeInBytes, cudaMemAttachGlobal));
} else {
CUDA_REPORT_IF_ERROR(cudaMalloc(&p, sizeInBytes));
}
return p;
}

View File

@@ -9,6 +9,7 @@
#include "flang/Runtime/CUDA/memory.h"
#include "flang-rt/runtime/assign-impl.h"
#include "flang-rt/runtime/descriptor.h"
#include "flang-rt/runtime/environment.h"
#include "flang-rt/runtime/terminator.h"
#include "flang/Runtime/CUDA/common.h"
#include "flang/Runtime/CUDA/descriptor.h"
@@ -26,7 +27,12 @@ void *RTDEF(CUFMemAlloc)(
void *ptr = nullptr;
if (bytes != 0) {
if (type == kMemTypeDevice) {
CUDA_REPORT_IF_ERROR(cudaMalloc((void **)&ptr, bytes));
if (Fortran::runtime::executionEnvironment.cudaDeviceIsManaged) {
CUDA_REPORT_IF_ERROR(
cudaMallocManaged((void **)&ptr, bytes, cudaMemAttachGlobal));
} else {
CUDA_REPORT_IF_ERROR(cudaMalloc((void **)&ptr, bytes));
}
} else if (type == kMemTypeManaged || type == kMemTypeUnified) {
CUDA_REPORT_IF_ERROR(
cudaMallocManaged((void **)&ptr, bytes, cudaMemAttachGlobal));

View File

@@ -155,6 +155,19 @@ void ExecutionEnvironment::Configure(int ac, const char *av[],
}
}
if (auto *x{std::getenv("NV_CUDAFOR_DEVICE_IS_MANAGED")}) {
char *end;
auto n{std::strtol(x, &end, 10)};
if (n >= 0 && n <= 1 && *end == '\0') {
cudaDeviceIsManaged = n != 0;
} else {
std::fprintf(stderr,
"Fortran runtime: NV_CUDAFOR_DEVICE_IS_MANAGED=%s is invalid; "
"ignored\n",
x);
}
}
// TODO: Set RP/ROUND='PROCESSOR_DEFINED' from environment
}