// clang-format off // RUN: %libomptarget-compilexx-generic // RUN: env OMPX_APU_MAPS=1 HSA_XNACK=1 LIBOMPTARGET_INFO=60 %libomptarget-run-generic 2>&1 \ // RUN: | %fcheck-generic -check-prefix=CHECK // REQUIRES: amdgpu // REQUIRES: unified_shared_memory // clang-format on #include #include /// Test for globals under automatic zero-copy. /// Because we are building without unified_shared_memory /// requirement pragma, all globals are allocated in the device /// memory of all used GPUs. To ensure those globals contain the intended /// values, we need to execute H2D and D2H memory copies even if we are running /// in automatic zero-copy. This only applies to globals. Local variables (their /// host pointers) are passed to the kernels by-value, according to the /// automatic zero-copy behavior. #pragma omp begin declare target int32_t x; // 4 bytes int32_t z[10]; // 40 bytes int32_t *k; // 20 bytes #pragma omp end declare target int main() { int32_t *dev_k = nullptr; x = 3; int32_t y = -1; for (size_t t = 0; t < 10; t++) z[t] = t; k = new int32_t[5]; printf("Host pointer for k = %p\n", k); for (size_t t = 0; t < 5; t++) k[t] = -t; /// target update to forces a copy between host and device global, which we must /// execute to keep the two global copies consistent. CHECK: Copying data from /// host to device, HstPtr={{.*}}, TgtPtr={{.*}}, Size=40, Name=z #pragma omp target update to(z[ : 10]) /// target map with always modifier (for x) forces a copy between host and /// device global, which we must execute to keep the two global copies /// consistent. k's content (host address) is passed by-value to the kernel /// (Size=20 case). y, being a local variable, is also passed by-value to the /// kernel (Size=4 case) CHECK: Return HstPtrBegin {{.*}} Size=4 for unified /// shared memory CHECK: Return HstPtrBegin {{.*}} Size=20 for unified shared /// memory CHECK: Copying data from host to device, HstPtr={{.*}}, /// TgtPtr={{.*}}, Size=4, Name=x #pragma omp target map(to : k[ : 5]) map(always, tofrom : x) map(tofrom : y) \ map(from : dev_k) { x++; y++; for (size_t t = 0; t < 10; t++) z[t]++; dev_k = k; } /// CHECK-NOT: Copying data from device to host, TgtPtr={{.*}}, HstPtr={{.*}}, /// Size=20, Name=k /// CHECK: Copying data from device to host, TgtPtr={{.*}}, HstPtr={{.*}}, /// Size=4, Name=x /// CHECK: Copying data from device to host, TgtPtr={{.*}}, HstPtr={{.*}}, /// Size=40, Name=z #pragma omp target update from(z[ : 10]) /// CHECK-NOT: k pointer not correctly passed to kernel if (dev_k != k) printf("k pointer not correctly passed to kernel\n"); delete[] k; return 0; }