This patch adds support for using dynamic shared memory in the new device runtime. The new function `__kmpc_get_dynamic_shared` will return a pointer to the buffer of dynamic shared memory. Currently the amount of memory allocated is set by an environment variable. In the future this amount will be added to the amount used for the smart stack which will be configured in a similar way. Reviewed By: tianshilei1992 Differential Revision: https://reviews.llvm.org/D110006
32 lines
834 B
C
32 lines
834 B
C
// RUN: %libomptarget-compile-nvptx64-nvidia-cuda -fopenmp-target-new-runtime
|
|
// RUN: env LIBOMPTARGET_SHARED_MEMORY_SIZE=4 \
|
|
// RUN: %libomptarget-run-nvptx64-nvidia-cuda | %fcheck-nvptx64-nvidia-cuda
|
|
// REQUIRES: nvptx64-nvidia-cuda
|
|
|
|
#include <omp.h>
|
|
#include <stdio.h>
|
|
|
|
void *get_dynamic_shared() { return NULL; }
|
|
#pragma omp begin declare variant match(device = {arch(nvptx64)})
|
|
extern void *__kmpc_get_dynamic_shared();
|
|
void *get_dynamic_shared() { return __kmpc_get_dynamic_shared(); }
|
|
#pragma omp end declare variant
|
|
|
|
int main() {
|
|
int x;
|
|
#pragma omp target parallel map(from : x)
|
|
{
|
|
int *buf = get_dynamic_shared();
|
|
#pragma omp barrier
|
|
if (omp_get_thread_num() == 0)
|
|
*buf = 1;
|
|
#pragma omp barrier
|
|
if (omp_get_thread_num() == 1)
|
|
x = *buf;
|
|
}
|
|
|
|
// CHECK: PASS
|
|
if (x == 1)
|
|
printf("PASS\n");
|
|
}
|