Previously an opt-in flag `-fopenmp-new-driver` was used to enable the new offloading driver. After passing tests for a few months it should be sufficiently mature to flip the switch and make it the default. The new offloading driver is now enabled if there is OpenMP and OpenMP offloading present and the new `-fno-openmp-new-driver` is not present. The new offloading driver has three main benefits over the old method: - Static library support - Device-side LTO - Unified clang driver stages Depends on D122683 Differential Revision: https://reviews.llvm.org/D122831
86 lines
2.2 KiB
C++
86 lines
2.2 KiB
C++
// RUN: %libomptarget-compilexx-generic -O3 && %libomptarget-run-generic
|
|
|
|
// Hangs
|
|
// UNSUPPORTED: amdgcn-amd-amdhsa
|
|
// UNSUPPORTED: amdgcn-amd-amdhsa-oldDriver
|
|
|
|
#include <iostream>
|
|
|
|
template <typename T> int test_map() {
|
|
std::cout << "map(complex<>)" << std::endl;
|
|
T a(0.2), a_check;
|
|
#pragma omp target map(from : a_check)
|
|
{ a_check = a; }
|
|
|
|
if (a_check != a) {
|
|
std::cout << " wrong results";
|
|
return 1;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
template <typename T> int test_reduction() {
|
|
std::cout << "flat parallelism" << std::endl;
|
|
T sum(0), sum_host(0);
|
|
const int size = 100;
|
|
T array[size];
|
|
for (int i = 0; i < size; i++) {
|
|
array[i] = i;
|
|
sum_host += array[i];
|
|
}
|
|
|
|
#pragma omp target teams distribute parallel for map(to: array[:size]) \
|
|
reduction(+ : sum)
|
|
for (int i = 0; i < size; i++)
|
|
sum += array[i];
|
|
|
|
if (sum != sum_host)
|
|
std::cout << " wrong results " << sum << " host " << sum_host << std::endl;
|
|
|
|
std::cout << "hierarchical parallelism" << std::endl;
|
|
const int nblock(10), block_size(10);
|
|
T block_sum[nblock];
|
|
#pragma omp target teams distribute map(to \
|
|
: array[:size]) \
|
|
map(from \
|
|
: block_sum[:nblock])
|
|
for (int ib = 0; ib < nblock; ib++) {
|
|
T partial_sum = 0;
|
|
const int istart = ib * block_size;
|
|
const int iend = (ib + 1) * block_size;
|
|
#pragma omp parallel for reduction(+ : partial_sum)
|
|
for (int i = istart; i < iend; i++)
|
|
partial_sum += array[i];
|
|
block_sum[ib] = partial_sum;
|
|
}
|
|
|
|
sum = 0;
|
|
for (int ib = 0; ib < nblock; ib++) {
|
|
sum += block_sum[ib];
|
|
}
|
|
|
|
if (sum != sum_host) {
|
|
std::cout << " wrong results " << sum << " host " << sum_host << std::endl;
|
|
return 1;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
template <typename T> int test_complex() {
|
|
int ret = 0;
|
|
ret |= test_map<T>();
|
|
ret |= test_reduction<T>();
|
|
return ret;
|
|
}
|
|
|
|
int main() {
|
|
int ret = 0;
|
|
std::cout << "Testing float" << std::endl;
|
|
ret |= test_complex<float>();
|
|
std::cout << "Testing double" << std::endl;
|
|
ret |= test_complex<double>();
|
|
return ret;
|
|
}
|