[OpenMP] Add environment variables to change stack / heap size in the CUDA plugin
This patch adds support for two environment variables to configure the device. ``LIBOMPTARGET_STACK_SIZE`` sets the amount of memory in bytes that each thread has for its stack. ``LIBOMPTARGET_HEAP_SIZE`` sets the amount of heap memory that can be allocated using malloc / free on the device. Reviewed By: jdoerfert Differential Revision: https://reviews.llvm.org/D106627
This commit is contained in:
committed by
Huber, Joseph
parent
4a76bd0e31
commit
3817ba13ae
@@ -30,6 +30,8 @@ variables is defined below.
|
||||
* ``LIBOMPTARGET_PROFILE=<Filename>``
|
||||
* ``LIBOMPTARGET_MEMORY_MANAGER_THRESHOLD=<Num>``
|
||||
* ``LIBOMPTARGET_INFO=<Num>``
|
||||
* ``LIBOMPTARGET_HEAP_SIZE=<Num>``
|
||||
* ``LIBOMPTARGET_STACK_SIZE=<Num>``
|
||||
|
||||
LIBOMPTARGET_DEBUG
|
||||
""""""""""""""""""
|
||||
@@ -321,6 +323,21 @@ default. The solution is to add an explicit map clause in the target region.
|
||||
return sum;
|
||||
}
|
||||
|
||||
LIBOMPTARGET_STACK_SIZE
|
||||
"""""""""""""""""""""""
|
||||
|
||||
This environment variable sets the stack size in bytes for the CUDA plugin. This
|
||||
can be used to increase or decrease the standard amount of memory reserved for
|
||||
each thread's stack.
|
||||
|
||||
LIBOMPTARGET_HEAP_SIZE
|
||||
"""""""""""""""""""""""
|
||||
|
||||
This environment variable sets the amount of memory in bytes that can be
|
||||
allocated using ``malloc`` and ``free`` for the CUDA plugin. This is necessary
|
||||
for some applications that allocate too much memory either through the user or
|
||||
globalization.
|
||||
|
||||
.. toctree::
|
||||
:hidden:
|
||||
:maxdepth: 1
|
||||
|
||||
@@ -642,11 +642,34 @@ public:
|
||||
DeviceData[DeviceId].BlocksPerGrid = EnvTeamLimit;
|
||||
}
|
||||
|
||||
size_t StackLimit;
|
||||
size_t HeapLimit;
|
||||
if (const char *EnvStr = getenv("LIBOMPTARGET_STACK_SIZE")) {
|
||||
StackLimit = std::stol(EnvStr);
|
||||
if (cuCtxSetLimit(CU_LIMIT_STACK_SIZE, StackLimit) != CUDA_SUCCESS)
|
||||
return OFFLOAD_FAIL;
|
||||
} else {
|
||||
if (cuCtxGetLimit(&StackLimit, CU_LIMIT_STACK_SIZE) != CUDA_SUCCESS)
|
||||
return OFFLOAD_FAIL;
|
||||
}
|
||||
if (const char *EnvStr = getenv("LIBOMPTARGET_HEAP_SIZE")) {
|
||||
HeapLimit = std::stol(EnvStr);
|
||||
if (cuCtxSetLimit(CU_LIMIT_MALLOC_HEAP_SIZE, HeapLimit) != CUDA_SUCCESS)
|
||||
return OFFLOAD_FAIL;
|
||||
} else {
|
||||
if (cuCtxGetLimit(&HeapLimit, CU_LIMIT_MALLOC_HEAP_SIZE) != CUDA_SUCCESS)
|
||||
return OFFLOAD_FAIL;
|
||||
}
|
||||
|
||||
INFO(OMP_INFOTYPE_PLUGIN_KERNEL, DeviceId,
|
||||
"Device supports up to %d CUDA blocks and %d threads with a "
|
||||
"warp size of %d\n",
|
||||
DeviceData[DeviceId].BlocksPerGrid,
|
||||
DeviceData[DeviceId].ThreadsPerBlock, DeviceData[DeviceId].WarpSize);
|
||||
INFO(OMP_INFOTYPE_PLUGIN_KERNEL, DeviceId,
|
||||
"Device heap size is %d Bytes, device stack size is %d Bytes per "
|
||||
"thread\n",
|
||||
(int)HeapLimit, (int)StackLimit);
|
||||
|
||||
// Set default number of teams
|
||||
if (EnvNumTeams > 0) {
|
||||
|
||||
Reference in New Issue
Block a user