Files
clang-p2996/openmp/libomptarget/test/mapping/lambda_mapping.cpp
Johannes Doerfert d3921e4670 [OpenMP] Basic BumpAllocator for (AMD)GPUs (#69806)
The patch contains a basic BumpAllocator for (AMD)GPUs to allow us to
run more tests. The allocator implements `malloc`, both internally and
externally, while we continue to default to the NVIDIA `malloc` when we
target NVIDIA GPUs. Once we have smarter or customizable allocators we
should consider this choice, for now, this allocator is better than
none. It traps if it is out of memory, making it easy to debug. Heap
size is configured via `LIBOMPTARGET_HEAP_SIZE` and defaults to 512MB.
It allows to track allocation statistics via
`LIBOMPTARGET_DEVICE_RTL_DEBUG=8` (together with
`-fopenmp-target-debug=8`). Two tests were added, and one was enabled.

This is the next step towards fixing
 https://github.com/llvm/llvm-project/issues/66708
2023-10-21 14:49:30 -07:00

54 lines
1.2 KiB
C++

// Unonptimized, we need 24000000 bytes heap
// RUN: %libomptarget-compilexx-generic
// RUN: env LIBOMPTARGET_HEAP_SIZE=24000000 \
// RUN: %libomptarget-run-generic 2>&1 | %fcheck-generic
// RUN: %libomptarget-compileoptxx-run-and-check-generic
#include <iostream>
template <typename LOOP_BODY>
inline void forall(int Begin, int End, LOOP_BODY LoopBody) {
#pragma omp target parallel for schedule(static)
for (int I = Begin; I < End; ++I) {
LoopBody(I);
}
}
#define N (1000)
//
// Demonstration of the RAJA abstraction using lambdas
// Requires data mapping onto the target section
//
int main() {
double A[N], B[N], C[N];
for (int I = 0; I < N; I++) {
A[I] = I + 1;
B[I] = -I;
C[I] = -9;
}
#pragma omp target data map(tofrom : C[0 : N]) map(to : A[0 : N], B[0 : N])
{
forall(0, N, [&](int I) { C[I] += A[I] + B[I]; });
}
int Fail = 0;
for (int I = 0; I < N; I++) {
if (C[I] != -8) {
std::cout << "Failed at " << I << " with val " << C[I] << std::endl;
Fail = 1;
}
}
// CHECK: Succeeded
if (Fail) {
std::cout << "Failed" << std::endl;
} else {
std::cout << "Succeeded" << std::endl;
}
return 0;
}