[flang][cuda] Add support for NV_CUDAFOR_DEVICE_IS_MANAGED (#133778)
Add support for the environment variable `NV_CUDAFOR_DEVICE_IS_MANAGED` as described in the documentation: https://docs.nvidia.com/hpc-sdk/compilers/cuda-fortran-prog-guide/index.html#controlling-device-data-is-managed. This mainly switch device allocation to managed allocation.
This commit is contained in:
committed by
GitHub
parent
b739a3cb65
commit
0b31f08537
@@ -59,6 +59,7 @@ struct ExecutionEnvironment {
|
||||
|
||||
// CUDA related variables
|
||||
std::size_t cudaStackLimit{0}; // ACC_OFFLOAD_STACK_SIZE
|
||||
bool cudaDeviceIsManaged{false}; // NV_CUDAFOR_DEVICE_IS_MANAGED
|
||||
};
|
||||
|
||||
RT_OFFLOAD_VAR_GROUP_BEGIN
|
||||
|
||||
@@ -9,6 +9,7 @@
|
||||
#include "flang/Runtime/CUDA/allocator.h"
|
||||
#include "flang-rt/runtime/allocator-registry.h"
|
||||
#include "flang-rt/runtime/derived.h"
|
||||
#include "flang-rt/runtime/environment.h"
|
||||
#include "flang-rt/runtime/stat.h"
|
||||
#include "flang-rt/runtime/terminator.h"
|
||||
#include "flang-rt/runtime/type-info.h"
|
||||
@@ -43,7 +44,12 @@ void CUFFreePinned(void *p) { CUDA_REPORT_IF_ERROR(cudaFreeHost(p)); }
|
||||
|
||||
void *CUFAllocDevice(std::size_t sizeInBytes) {
|
||||
void *p;
|
||||
CUDA_REPORT_IF_ERROR(cudaMalloc(&p, sizeInBytes));
|
||||
if (Fortran::runtime::executionEnvironment.cudaDeviceIsManaged) {
|
||||
CUDA_REPORT_IF_ERROR(
|
||||
cudaMallocManaged((void **)&p, sizeInBytes, cudaMemAttachGlobal));
|
||||
} else {
|
||||
CUDA_REPORT_IF_ERROR(cudaMalloc(&p, sizeInBytes));
|
||||
}
|
||||
return p;
|
||||
}
|
||||
|
||||
|
||||
@@ -9,6 +9,7 @@
|
||||
#include "flang/Runtime/CUDA/memory.h"
|
||||
#include "flang-rt/runtime/assign-impl.h"
|
||||
#include "flang-rt/runtime/descriptor.h"
|
||||
#include "flang-rt/runtime/environment.h"
|
||||
#include "flang-rt/runtime/terminator.h"
|
||||
#include "flang/Runtime/CUDA/common.h"
|
||||
#include "flang/Runtime/CUDA/descriptor.h"
|
||||
@@ -26,7 +27,12 @@ void *RTDEF(CUFMemAlloc)(
|
||||
void *ptr = nullptr;
|
||||
if (bytes != 0) {
|
||||
if (type == kMemTypeDevice) {
|
||||
CUDA_REPORT_IF_ERROR(cudaMalloc((void **)&ptr, bytes));
|
||||
if (Fortran::runtime::executionEnvironment.cudaDeviceIsManaged) {
|
||||
CUDA_REPORT_IF_ERROR(
|
||||
cudaMallocManaged((void **)&ptr, bytes, cudaMemAttachGlobal));
|
||||
} else {
|
||||
CUDA_REPORT_IF_ERROR(cudaMalloc((void **)&ptr, bytes));
|
||||
}
|
||||
} else if (type == kMemTypeManaged || type == kMemTypeUnified) {
|
||||
CUDA_REPORT_IF_ERROR(
|
||||
cudaMallocManaged((void **)&ptr, bytes, cudaMemAttachGlobal));
|
||||
|
||||
@@ -155,6 +155,19 @@ void ExecutionEnvironment::Configure(int ac, const char *av[],
|
||||
}
|
||||
}
|
||||
|
||||
if (auto *x{std::getenv("NV_CUDAFOR_DEVICE_IS_MANAGED")}) {
|
||||
char *end;
|
||||
auto n{std::strtol(x, &end, 10)};
|
||||
if (n >= 0 && n <= 1 && *end == '\0') {
|
||||
cudaDeviceIsManaged = n != 0;
|
||||
} else {
|
||||
std::fprintf(stderr,
|
||||
"Fortran runtime: NV_CUDAFOR_DEVICE_IS_MANAGED=%s is invalid; "
|
||||
"ignored\n",
|
||||
x);
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: Set RP/ROUND='PROCESSOR_DEFINED' from environment
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user