The problem is the lack of dispatch buffers when thousands of loops with nowait, about 10 iterations each, are executed by hundreds of threads. We only have built-in 7 dispatch buffers, but there is a need in dozens or hundreds of buffers. The problem can be fixed by setting KMP_MAX_DISP_BUF to bigger value. In order to give users same possibility I changed build-time control into run-time one, adding API just in case. This change adds an environment variable KMP_DISP_NUM_BUFFERS and a new API function kmp_set_disp_num_buffers(int num_buffers). The KMP_DISP_NUM_BUFFERS envirable works only before serial initialization, because during the serial initialization we already allocate buffers for the hot team, so it is too late to change the number of buffers later (or we need to reallocate buffers for all teams which sounds too complicated). The kmp_set_defaults() routine does not work for this envirable, because it calls serial initialization before reading the parameter string. So a new routine, kmp_set_disp_num_buffers(), is created so that it can set our internal global variable before the library initialization. If both the envirable and API used the envirable wins. Differential Revision: http://reviews.llvm.org/D20697 llvm-svn: 271318
77 lines
1.9 KiB
C
77 lines
1.9 KiB
C
// RUN: %libomp-compile && env KMP_DISP_NUM_BUFFERS=0 %libomp-run
|
|
// RUN: env KMP_DISP_NUM_BUFFERS=1 %libomp-run && env KMP_DISP_NUM_BUFFERS=3 %libomp-run
|
|
// RUN: env KMP_DISP_NUM_BUFFERS=4 %libomp-run && env KMP_DISP_NUM_BUFFERS=7 %libomp-run
|
|
// RUN: %libomp-compile -DMY_SCHEDULE=guided && env KMP_DISP_NUM_BUFFERS=1 %libomp-run
|
|
// RUN: env KMP_DISP_NUM_BUFFERS=3 %libomp-run && env KMP_DISP_NUM_BUFFERS=4 %libomp-run
|
|
// RUN: env KMP_DISP_NUM_BUFFERS=7 %libomp-run
|
|
#include <stdio.h>
|
|
#include <omp.h>
|
|
#include <stdlib.h>
|
|
#include <limits.h>
|
|
#include "omp_testsuite.h"
|
|
|
|
#define INCR 7
|
|
#define MY_MAX 200
|
|
#define MY_MIN -200
|
|
#define NUM_LOOPS 100
|
|
#ifndef MY_SCHEDULE
|
|
# define MY_SCHEDULE dynamic
|
|
#endif
|
|
|
|
int a, b, a_known_value, b_known_value;
|
|
|
|
int test_kmp_set_disp_num_buffers()
|
|
{
|
|
int success = 1;
|
|
a = 0;
|
|
b = 0;
|
|
// run many small dynamic loops to stress the dispatch buffer system
|
|
#pragma omp parallel
|
|
{
|
|
int i,j;
|
|
for (j = 0; j < NUM_LOOPS; j++) {
|
|
#pragma omp for schedule(MY_SCHEDULE) nowait
|
|
for (i = MY_MIN; i < MY_MAX; i+=INCR) {
|
|
#pragma omp atomic
|
|
a++;
|
|
}
|
|
#pragma omp for schedule(MY_SCHEDULE) nowait
|
|
for (i = MY_MAX; i >= MY_MIN; i-=INCR) {
|
|
#pragma omp atomic
|
|
b++;
|
|
}
|
|
}
|
|
}
|
|
// detect failure
|
|
if (a != a_known_value || b != b_known_value) {
|
|
success = 0;
|
|
printf("a = %d (should be %d), b = %d (should be %d)\n", a, a_known_value,
|
|
b, b_known_value);
|
|
}
|
|
return success;
|
|
}
|
|
|
|
int main(int argc, char** argv)
|
|
{
|
|
int i,j;
|
|
int num_failed=0;
|
|
|
|
// figure out the known values to compare with calculated result
|
|
a_known_value = 0;
|
|
b_known_value = 0;
|
|
|
|
for (j = 0; j < NUM_LOOPS; j++) {
|
|
for (i = MY_MIN; i < MY_MAX; i+=INCR)
|
|
a_known_value++;
|
|
for (i = MY_MAX; i >= MY_MIN; i-=INCR)
|
|
b_known_value++;
|
|
}
|
|
|
|
for(i = 0; i < REPETITIONS; i++) {
|
|
if(!test_kmp_set_disp_num_buffers()) {
|
|
num_failed++;
|
|
}
|
|
}
|
|
return num_failed;
|
|
}
|