[OPENMP] Resolve lost LoopTripCnt for subsequent loops in same thread.
Remove loopTripCnt from threaded device stack after consuming it. Added a libomptarget DP message to aid in future debugging and to validate the added testcase, which only runs in Debug build. Differential Revision: https://reviews.llvm.org/D64808 llvm-svn: 366349
This commit is contained in:
@@ -732,8 +732,11 @@ int target(int64_t device_id, void *host_ptr, int32_t arg_num,
|
||||
uint64_t ltc = 0;
|
||||
TblMapMtx.lock();
|
||||
auto I = Device.LoopTripCnt.find(__kmpc_global_thread_num(NULL));
|
||||
if (I != Device.LoopTripCnt.end())
|
||||
std::swap(ltc, I->second);
|
||||
if (I != Device.LoopTripCnt.end()) {
|
||||
ltc = I->second;
|
||||
Device.LoopTripCnt.erase(I);
|
||||
DP("loop trip count is %lu.\n", ltc);
|
||||
}
|
||||
TblMapMtx.unlock();
|
||||
|
||||
// Launch device execution.
|
||||
|
||||
36
openmp/libomptarget/test/offloading/looptripcnt.c
Normal file
36
openmp/libomptarget/test/offloading/looptripcnt.c
Normal file
@@ -0,0 +1,36 @@
|
||||
// RUN: %libomptarget-compile-aarch64-unknown-linux-gnu && env LIBOMPTARGET_DEBUG=1 %libomptarget-run-aarch64-unknown-linux-gnu 2>&1 | %fcheck-aarch64-unknown-linux-gnu -allow-empty -check-prefix=DEBUG
|
||||
// RUN: %libomptarget-compile-powerpc64-ibm-linux-gnu && env LIBOMPTARGET_DEBUG=1 %libomptarget-run-powerpc64-ibm-linux-gnu 2>&1 | %fcheck-powerpc64-ibm-linux-gnu -allow-empty -check-prefix=DEBUG
|
||||
// RUN: %libomptarget-compile-powerpc64le-ibm-linux-gnu && env LIBOMPTARGET_DEBUG=1 %libomptarget-run-powerpc64le-ibm-linux-gnu 2>&1 | %fcheck-powerpc64le-ibm-linux-gnu -allow-empty -check-prefix=DEBUG
|
||||
// RUN: %libomptarget-compile-x86_64-pc-linux-gnu && env LIBOMPTARGET_DEBUG=1 %libomptarget-run-x86_64-pc-linux-gnu 2>&1 | %fcheck-x86_64-pc-linux-gnu -allow-empty -check-prefix=DEBUG
|
||||
// REQUIRES: libomptarget-debug
|
||||
|
||||
/*
|
||||
Test for looptripcount being popped from runtime stack.
|
||||
*/
|
||||
#include <stdio.h>
|
||||
#include <omp.h>
|
||||
int main()
|
||||
{
|
||||
int N = 128;
|
||||
int NN = 1024;
|
||||
int num_teams[NN];
|
||||
int num_threads[NN];
|
||||
|
||||
printf("#pragma omp target teams distribute parallel for thread_limit(4)\n");
|
||||
#pragma omp target teams distribute parallel for thread_limit(4)
|
||||
for (int j = 0; j< N; j++) {
|
||||
num_threads[j] = omp_get_num_threads();
|
||||
num_teams[j] = omp_get_num_teams();
|
||||
}
|
||||
printf("num_threads %d num_teams %d\n", num_threads[0], num_teams[0]);
|
||||
// DEBUG: loop trip count is 128
|
||||
printf("#pragma omp target teams distribute parallel for\n");
|
||||
#pragma omp target teams distribute parallel for
|
||||
for (int j = 0; j< N; j++) {
|
||||
num_threads[j] = omp_get_num_threads();
|
||||
num_teams[j] = omp_get_num_teams();
|
||||
}
|
||||
printf("num_threads %d num_teams %d\n", num_threads[0], num_teams[0]);
|
||||
// DEBUG: loop trip count is 128
|
||||
return 0;
|
||||
}
|
||||
Reference in New Issue
Block a user