Files
clang-p2996/openmp/libomptarget/test/offloading/bug49021.cpp
Johannes Doerfert e5a3d5ba88 [OpenMP][NFC] Enable more runtime tests and also run them with O3
The test run fine on my AMD GPU machine, we should verify them on others
too and put them into our regular testing. Not testing O1/2/3 is really
bad and not testing all architecturs is similarly problematic.

Differential Revision: https://reviews.llvm.org/D148576
2023-07-31 15:45:53 -07:00

83 lines
2.1 KiB
C++

// RUN: %libomptarget-compilexx-generic -O3 && %libomptarget-run-generic
// RUN: %libomptarget-compilexx-generic -O3 -ffast-math && %libomptarget-run-generic
// RUN: %libomptarget-compileoptxx-generic -O3 && %libomptarget-run-generic
// RUN: %libomptarget-compileoptxx-generic -O3 -ffast-math && %libomptarget-run-generic
#include <iostream>
template <typename T> int test_map() {
std::cout << "map(T)" << std::endl;
T a(0.2), a_check;
#pragma omp target map(from : a_check)
{ a_check = a; }
if (a_check != a) {
std::cout << " wrong results";
return 1;
}
return 0;
}
template <typename T> int test_reduction() {
std::cout << "flat parallelism" << std::endl;
T sum(0), sum_host(0);
const int size = 100;
T array[size];
for (int i = 0; i < size; i++) {
array[i] = i;
sum_host += array[i];
}
#pragma omp target teams distribute parallel for map(to : array[ : size]) \
reduction(+ : sum)
for (int i = 0; i < size; i++)
sum += array[i];
if (sum != sum_host)
std::cout << " wrong results " << sum << " host " << sum_host << std::endl;
std::cout << "hierarchical parallelism" << std::endl;
const int nblock(10), block_size(10);
T block_sum[nblock];
#pragma omp target teams distribute map(to : array[ : size]) \
map(from : block_sum[ : nblock])
for (int ib = 0; ib < nblock; ib++) {
T partial_sum = 0;
const int istart = ib * block_size;
const int iend = (ib + 1) * block_size;
#pragma omp parallel for reduction(+ : partial_sum)
for (int i = istart; i < iend; i++)
partial_sum += array[i];
block_sum[ib] = partial_sum;
}
sum = 0;
for (int ib = 0; ib < nblock; ib++) {
sum += block_sum[ib];
}
if (sum != sum_host) {
std::cout << " wrong results " << sum << " host " << sum_host << std::endl;
return 1;
}
return 0;
}
template <typename T> int test_POD() {
int ret = 0;
ret |= test_map<T>();
ret |= test_reduction<T>();
return ret;
}
int main() {
int ret = 0;
std::cout << "Testing float" << std::endl;
ret |= test_POD<float>();
std::cout << "Testing double" << std::endl;
ret |= test_POD<double>();
return ret;
}