Files
clang-p2996/openmp/libomptarget/test/mapping/auto_zero_copy.cpp
carlobertolli ae99966a27 [OpenMP] Enable automatic unified shared memory on MI300A. (#77512)
This patch enables applications that did not request OpenMP
unified_shared_memory to run with the same zero-copy behavior, where
mapped memory does not result in extra memory allocations and memory
copies, but CPU-allocated memory is accessed from the device. The name
for this behavior is "automatic zero-copy" and it relies on detecting:
that the runtime is running on a MI300A, that the user did not select
unified_shared_memory in their program, and that XNACK (unified memory
support) is enabled in the current GPU configuration. If all these
conditions are met, then automatic zero-copy is triggered.

This patch also introduces an environment variable OMPX_APU_MAPS that,
if set, triggers automatic zero-copy also on non APU GPUs (e.g., on
discrete GPUs).
This patch is still missing support for global variables, which will be
provided in a subsequent patch.

Co-authored-by: Thorsten Blass <thorsten.blass@amd.com>
2024-01-22 10:30:22 -06:00

58 lines
1.9 KiB
C++

// clang-format off
// RUN: %libomptarget-compilexx-generic
// RUN: env OMPX_APU_MAPS=1 HSA_XNACK=1 LIBOMPTARGET_INFO=30 %libomptarget-run-generic 2>&1 \
// RUN: | %fcheck-generic -check-prefix=INFO_ZERO -check-prefix=CHECK
// RUN: %libomptarget-compilexx-generic
// RUN: env HSA_XNACK=0 LIBOMPTARGET_INFO=30 %libomptarget-run-generic 2>&1 \
// RUN: | %fcheck-generic -check-prefix=INFO_COPY -check-prefix=CHECK
// UNSUPPORTED: aarch64-unknown-linux-gnu
// UNSUPPORTED: aarch64-unknown-linux-gnu-LTO
// UNSUPPORTED: nvptx64-nvidia-cuda
// UNSUPPORTED: nvptx64-nvidia-cuda-LTO
// UNSUPPORTED: x86_64-pc-linux-gnu
// UNSUPPORTED: x86_64-pc-linux-gnu-LTO
// REQUIRES: unified_shared_memory
// clang-format on
#include <cstdio>
int main() {
int n = 1024;
// test various mapping types
int *a = new int[n];
int k = 3;
int b[n];
for (int i = 0; i < n; i++)
b[i] = i;
// clang-format off
// INFO_ZERO: Return HstPtrBegin 0x{{.*}} Size=4096 for unified shared memory
// INFO_ZERO: Return HstPtrBegin 0x{{.*}} Size=4096 for unified shared memory
// INFO_COPY: Creating new map entry with HstPtrBase=0x{{.*}}, HstPtrBegin=0x{{.*}}, TgtAllocBegin=0x{{.*}}, TgtPtrBegin=0x{{.*}}, Size=4096,
// INFO_COPY: Creating new map entry with HstPtrBase=0x{{.*}}, HstPtrBegin=0x{{.*}}, TgtAllocBegin=0x{{.*}}, TgtPtrBegin=0x{{.*}}, Size=4096,
// INFO_COPY: Mapping exists with HstPtrBegin=0x{{.*}}, TgtPtrBegin=0x{{.*}}, Size=4096, DynRefCount=1 (update suppressed)
// INFO_COPY: Mapping exists with HstPtrBegin=0x{{.*}}, TgtPtrBegin=0x{{.*}}, Size=4096, DynRefCount=1 (update suppressed)
// clang-format on
#pragma omp target teams distribute parallel for map(tofrom : a[ : n]) \
map(to : b[ : n])
for (int i = 0; i < n; i++)
a[i] = i + b[i] + k;
int err = 0;
for (int i = 0; i < n; i++)
if (a[i] != i + b[i] + k)
err++;
// CHECK: PASS
if (err == 0)
printf("PASS\n");
return err;
}