__ompt_get_task_info_internal function is adapted to support thread_num determination during the execution of multiple nested serialized parallel regions enclosed by a regular parallel region. Consider the following program that contains parallel region R1 executed by two threads. Let the worker thread T of region R1 executes serialized parallel regions R2 that encloses another serialized parallel region R3. Note that the thread T is the master thread of both R2 and R3 regions. Assume that __ompt_get_task_info_internal function is called with the argument "ancestor_level == 1" during the execution of region R3. The function should determine the "thread_num" of the thread T inside the team of region R2, whose implicit task is at level 1 inside the hierarchy of active tasks. Since the thread T is the master thread of region R2, one should expected that "thread_num" takes a value 0. After the while loop finishes, the following stands: "lwt != NULL", "prev_lwt == NULL", "prev_team" represents the team information about the innermost serialized parallel region R3. This results in executing the assignment "thread_num = prev_team->t.t_master_tid". Note that "prev_team->t.t_master_tid" was initialized at the moment of R2’s creation and represents the "thread_num" of the thread T inside the region R1 which encloses R2. Since the thread T is the worker thread of the region R1, "the thread_num" takes value 1, which is a contradiction. This patch proposes to use "lwt" instead of "prev_lwt" when determining the "thread_num". If "lwt" exists, the task at the requested level belongs to the serialized parallel region. Since the serialized parallel region is executed by one thread only, the "thread_num" takes value 0. Similarly, assume that __ompt_get_task_info_internal function is called with the argument "ancestor_level == 2" during the execution of region R3. The function should determine the "thread_num" of the thread T inside the team of region R1. Since the thread is the worker inside the region R1, one should expected that "thread_num" takes value 1. After the loop finishes, the following stands: "lwt == NULL", "prev_lwt != NULL", "prev_team" represents the team information about the innermost serialized parallel region R3. This leads to execution of the assignment "thread_num = 0", which causes a contradiction. Ignoring the "prev_lwt" leads to executing the assignment "thread_num = prev_team->t.t_master_tid" instead. From the previous explanation, it is obvious that "thread_num" takes value 1. Note that the "prev_lwt" variable is marked as unnecessary and thus removed. This patch introduces the test case which represents the OpenMP program described earlier in the summary. Differential Revision: https://reviews.llvm.org/D110699
134 lines
4.9 KiB
C
134 lines
4.9 KiB
C
// RUN: %libomp-compile-and-run | FileCheck %s
|
|
// REQUIRES: ompt
|
|
|
|
#include "callback.h"
|
|
#include <omp.h>
|
|
|
|
|
|
__attribute__ ((noinline)) // workaround for bug in icc
|
|
void print_task_info_at(int ancestor_level, int id)
|
|
{
|
|
#pragma omp critical
|
|
{
|
|
int task_type;
|
|
char buffer[2048];
|
|
ompt_data_t *parallel_data;
|
|
ompt_data_t *task_data;
|
|
int thread_num;
|
|
ompt_get_task_info(ancestor_level, &task_type, &task_data, NULL,
|
|
¶llel_data, &thread_num);
|
|
format_task_type(task_type, buffer);
|
|
printf("%" PRIu64 ": ancestor_level=%d id=%d task_type=%s=%d "
|
|
"parallel_id=%" PRIu64 " task_id=%" PRIu64
|
|
" thread_num=%d\n",
|
|
ompt_get_thread_data()->value, ancestor_level, id, buffer,
|
|
task_type, parallel_data->value, task_data->value, thread_num);
|
|
}
|
|
};
|
|
|
|
__attribute__ ((noinline)) // workaround for bug in icc
|
|
void print_innermost_task_info(int id)
|
|
{
|
|
print_task_info_at(0, id);
|
|
}
|
|
|
|
|
|
int main()
|
|
{
|
|
|
|
#pragma omp parallel num_threads(2)
|
|
{
|
|
// sync threads before checking the output
|
|
#pragma omp barrier
|
|
// region 0
|
|
if (omp_get_thread_num() == 1) {
|
|
// executed by worker thread only
|
|
// assert that thread_num is 1
|
|
print_innermost_task_info(1);
|
|
|
|
#pragma omp parallel num_threads(1)
|
|
{
|
|
// serialized region 1
|
|
// assert that thread_num is 0
|
|
print_innermost_task_info(2);
|
|
|
|
#pragma omp parallel num_threads(1)
|
|
{
|
|
// serialized region 2
|
|
// assert that thread_num is 0
|
|
print_innermost_task_info(3);
|
|
|
|
// Check the value of thread_num while iterating over the hierarchy
|
|
// of active tasks.
|
|
print_task_info_at(0, 3);
|
|
print_task_info_at(1, 2);
|
|
print_task_info_at(2, 1);
|
|
|
|
}
|
|
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
// Check if libomp supports the callbacks for this test.
|
|
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_create'
|
|
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_implicit_task'
|
|
|
|
|
|
// CHECK: {{^}}0: NULL_POINTER=[[NULL:.*$]]
|
|
// CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_initial_task_begin: parallel_id=[[PARALLEL_ID_0:[0-9]+]], task_id=[[TASK_ID_0:[0-9]+]], actual_parallelism=1, index=1, flags=1
|
|
|
|
// region 0
|
|
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin: parent_task_id=[[TASK_ID_0]],
|
|
// CHECK-SAME: parallel_id=[[PARALLEL_ID_1:[0-9]+]]
|
|
// CHECK-DAG: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID_1]], task_id=[[TASK_ID_1:[0-9]+]]
|
|
// CHECK-DAG: {{^}}[[WORKER_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID_1]], task_id=[[TASK_ID_2:[0-9]+]]
|
|
// assert some info about implicit task executed by worker thread
|
|
// thread_num is the most important
|
|
// CHECK: {{^}}[[WORKER_ID]]: ancestor_level=0 id=1
|
|
// CHECK-SAME: parallel_id=[[PARALLEL_ID_1]] task_id=[[TASK_ID_2]]
|
|
// CHECK-SAME: thread_num=1
|
|
|
|
// serialized region 1
|
|
// CHECK: {{^}}[[WORKER_ID]]: ompt_event_parallel_begin: parent_task_id=[[TASK_ID_2]],
|
|
// CHECK-SAME: parallel_id=[[PARALLEL_ID_2:[0-9]+]]
|
|
// CHECK-DAG: {{^}}[[WORKER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID_2]], task_id=[[TASK_ID_3:[0-9]+]]
|
|
// assert some information about the implicit task of the serialized region 1
|
|
// pay attention that thread_num should take value 0
|
|
// CHECK: {{^}}[[WORKER_ID]]: ancestor_level=0 id=2
|
|
// CHECK-SAME: parallel_id=[[PARALLEL_ID_2]] task_id=[[TASK_ID_3]]
|
|
// CHECK-SAME: thread_num=0
|
|
|
|
// serialized region 2
|
|
// CHECK: {{^}}[[WORKER_ID]]: ompt_event_parallel_begin: parent_task_id=[[TASK_ID_3]],
|
|
// CHECK-SAME: parallel_id=[[PARALLEL_ID_3:[0-9]+]]
|
|
// CHECK-DAG: {{^}}[[WORKER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID_3]], task_id=[[TASK_ID_4:[0-9]+]]
|
|
// assert some information about the implicit task of the serialized region 2
|
|
// pay attention that thread_num should take value 0
|
|
// CHECK: {{^}}[[WORKER_ID]]: ancestor_level=0 id=3
|
|
// CHECK-SAME: parallel_id=[[PARALLEL_ID_3]] task_id=[[TASK_ID_4]]
|
|
// CHECK-SAME: thread_num=0
|
|
|
|
// Check the value of thread_num argument while iterating over the hierarchy
|
|
// of active tasks. The expected is that thread_num takes the value checked
|
|
// above in the test case (0, 0, 1 - respectively).
|
|
|
|
// Thread is the master thread of the region 2, so thread_num should be 0.
|
|
// CHECK: {{^}}[[WORKER_ID]]: ancestor_level=0 id=3
|
|
// CHECK-SAME: parallel_id=[[PARALLEL_ID_3]] task_id=[[TASK_ID_4]]
|
|
// CHECK-SAME: thread_num=0
|
|
|
|
// Thread is the master thread of the region 1, so thread_num should be 0.
|
|
// CHECK: {{^}}[[WORKER_ID]]: ancestor_level=1 id=2
|
|
// CHECK-SAME: parallel_id=[[PARALLEL_ID_2]] task_id=[[TASK_ID_3]]
|
|
// CHECK-SAME: thread_num=0
|
|
|
|
// Thread is the worker thread of the region 0, so thread_num should be 1.
|
|
// CHECK: {{^}}[[WORKER_ID]]: ancestor_level=2 id=1
|
|
// CHECK-SAME: parallel_id=[[PARALLEL_ID_1]] task_id=[[TASK_ID_2]]
|
|
// CHECK-SAME: thread_num=1
|
|
|
|
return 0;
|
|
}
|