Many tests in the `offload` project have requirements defined by which targets are not supported rather than which platforms are supported. This patch aims to streamline the requirement definitions by adding four new feature tags: `host`, `gpu`, `amdgpu`, and `nvidiagpu`.
80 lines
2.6 KiB
C++
80 lines
2.6 KiB
C++
// clang-format off
|
|
// RUN: %libomptarget-compilexx-generic
|
|
// RUN: env OMPX_APU_MAPS=1 HSA_XNACK=1 LIBOMPTARGET_INFO=60 %libomptarget-run-generic 2>&1 \
|
|
// RUN: | %fcheck-generic -check-prefix=CHECK
|
|
|
|
// REQUIRES: amdgpu
|
|
// REQUIRES: unified_shared_memory
|
|
|
|
// clang-format on
|
|
|
|
#include <cstdint>
|
|
#include <cstdio>
|
|
|
|
/// Test for globals under automatic zero-copy.
|
|
/// Because we are building without unified_shared_memory
|
|
/// requirement pragma, all globals are allocated in the device
|
|
/// memory of all used GPUs. To ensure those globals contain the intended
|
|
/// values, we need to execute H2D and D2H memory copies even if we are running
|
|
/// in automatic zero-copy. This only applies to globals. Local variables (their
|
|
/// host pointers) are passed to the kernels by-value, according to the
|
|
/// automatic zero-copy behavior.
|
|
|
|
#pragma omp begin declare target
|
|
int32_t x; // 4 bytes
|
|
int32_t z[10]; // 40 bytes
|
|
int32_t *k; // 20 bytes
|
|
#pragma omp end declare target
|
|
|
|
int main() {
|
|
int32_t *dev_k = nullptr;
|
|
x = 3;
|
|
int32_t y = -1;
|
|
for (size_t t = 0; t < 10; t++)
|
|
z[t] = t;
|
|
k = new int32_t[5];
|
|
|
|
printf("Host pointer for k = %p\n", k);
|
|
for (size_t t = 0; t < 5; t++)
|
|
k[t] = -t;
|
|
|
|
/// target update to forces a copy between host and device global, which we must
|
|
/// execute to keep the two global copies consistent. CHECK: Copying data from
|
|
/// host to device, HstPtr={{.*}}, TgtPtr={{.*}}, Size=40, Name=z
|
|
#pragma omp target update to(z[ : 10])
|
|
|
|
/// target map with always modifier (for x) forces a copy between host and
|
|
/// device global, which we must execute to keep the two global copies
|
|
/// consistent. k's content (host address) is passed by-value to the kernel
|
|
/// (Size=20 case). y, being a local variable, is also passed by-value to the
|
|
/// kernel (Size=4 case) CHECK: Return HstPtrBegin {{.*}} Size=4 for unified
|
|
/// shared memory CHECK: Return HstPtrBegin {{.*}} Size=20 for unified shared
|
|
/// memory CHECK: Copying data from host to device, HstPtr={{.*}},
|
|
/// TgtPtr={{.*}}, Size=4, Name=x
|
|
#pragma omp target map(to : k[ : 5]) map(always, tofrom : x) map(tofrom : y) \
|
|
map(from : dev_k)
|
|
{
|
|
x++;
|
|
y++;
|
|
for (size_t t = 0; t < 10; t++)
|
|
z[t]++;
|
|
dev_k = k;
|
|
}
|
|
/// CHECK-NOT: Copying data from device to host, TgtPtr={{.*}}, HstPtr={{.*}},
|
|
/// Size=20, Name=k
|
|
|
|
/// CHECK: Copying data from device to host, TgtPtr={{.*}}, HstPtr={{.*}},
|
|
/// Size=4, Name=x
|
|
|
|
/// CHECK: Copying data from device to host, TgtPtr={{.*}}, HstPtr={{.*}},
|
|
/// Size=40, Name=z
|
|
#pragma omp target update from(z[ : 10])
|
|
|
|
/// CHECK-NOT: k pointer not correctly passed to kernel
|
|
if (dev_k != k)
|
|
printf("k pointer not correctly passed to kernel\n");
|
|
|
|
delete[] k;
|
|
return 0;
|
|
}
|