This revision simplifies Clang codegen for parallel regions in OpenMP GPU target offloading and corresponding changes in libomptarget: SPMD/non-SPMD parallel calls are unified under a single `kmpc_parallel_51` runtime entry point for parallel regions (which will be commonized between target, host-side parallel regions), data sharing is internalized to the runtime. Tests have been auto-generated using `update_cc_test_checks.py`. Also, the revision contains changes to OpenMPOpt for remark creation on target offloading regions. Reviewed By: jdoerfert, Meinersbur Differential Revision: https://reviews.llvm.org/D95976
37 lines
716 B
C++
37 lines
716 B
C++
// RUN: %libomptarget-compilexx-run-and-check-aarch64-unknown-linux-gnu
|
|
// RUN: %libomptarget-compilexx-run-and-check-powerpc64-ibm-linux-gnu
|
|
// RUN: %libomptarget-compilexx-run-and-check-powerpc64le-ibm-linux-gnu
|
|
// RUN: %libomptarget-compilexx-run-and-check-x86_64-pc-linux-gnu
|
|
// RUN: %libomptarget-compilexx-run-and-check-nvptx64-nvidia-cuda
|
|
|
|
#include <cassert>
|
|
#include <iostream>
|
|
|
|
void work(int *C) {
|
|
#pragma omp atomic
|
|
++(*C);
|
|
}
|
|
|
|
void use(int *C) {
|
|
#pragma omp parallel num_threads(2)
|
|
work(C);
|
|
}
|
|
|
|
int main() {
|
|
int C = 0;
|
|
#pragma omp target map(C)
|
|
{
|
|
use(&C);
|
|
#pragma omp parallel num_threads(2)
|
|
use(&C);
|
|
}
|
|
|
|
assert(C >= 2 && C <= 6);
|
|
|
|
std::cout << "PASS\n";
|
|
|
|
return 0;
|
|
}
|
|
|
|
// CHECK: PASS
|