The patch contains a basic BumpAllocator for (AMD)GPUs to allow us to run more tests. The allocator implements `malloc`, both internally and externally, while we continue to default to the NVIDIA `malloc` when we target NVIDIA GPUs. Once we have smarter or customizable allocators we should consider this choice, for now, this allocator is better than none. It traps if it is out of memory, making it easy to debug. Heap size is configured via `LIBOMPTARGET_HEAP_SIZE` and defaults to 512MB. It allows to track allocation statistics via `LIBOMPTARGET_DEVICE_RTL_DEBUG=8` (together with `-fopenmp-target-debug=8`). Two tests were added, and one was enabled. This is the next step towards fixing https://github.com/llvm/llvm-project/issues/66708
54 lines
1.2 KiB
C++
54 lines
1.2 KiB
C++
// Unonptimized, we need 24000000 bytes heap
|
|
// RUN: %libomptarget-compilexx-generic
|
|
// RUN: env LIBOMPTARGET_HEAP_SIZE=24000000 \
|
|
// RUN: %libomptarget-run-generic 2>&1 | %fcheck-generic
|
|
// RUN: %libomptarget-compileoptxx-run-and-check-generic
|
|
|
|
#include <iostream>
|
|
|
|
template <typename LOOP_BODY>
|
|
inline void forall(int Begin, int End, LOOP_BODY LoopBody) {
|
|
#pragma omp target parallel for schedule(static)
|
|
for (int I = Begin; I < End; ++I) {
|
|
LoopBody(I);
|
|
}
|
|
}
|
|
|
|
#define N (1000)
|
|
|
|
//
|
|
// Demonstration of the RAJA abstraction using lambdas
|
|
// Requires data mapping onto the target section
|
|
//
|
|
int main() {
|
|
double A[N], B[N], C[N];
|
|
|
|
for (int I = 0; I < N; I++) {
|
|
A[I] = I + 1;
|
|
B[I] = -I;
|
|
C[I] = -9;
|
|
}
|
|
|
|
#pragma omp target data map(tofrom : C[0 : N]) map(to : A[0 : N], B[0 : N])
|
|
{
|
|
forall(0, N, [&](int I) { C[I] += A[I] + B[I]; });
|
|
}
|
|
|
|
int Fail = 0;
|
|
for (int I = 0; I < N; I++) {
|
|
if (C[I] != -8) {
|
|
std::cout << "Failed at " << I << " with val " << C[I] << std::endl;
|
|
Fail = 1;
|
|
}
|
|
}
|
|
|
|
// CHECK: Succeeded
|
|
if (Fail) {
|
|
std::cout << "Failed" << std::endl;
|
|
} else {
|
|
std::cout << "Succeeded" << std::endl;
|
|
}
|
|
|
|
return 0;
|
|
}
|