Summary: Normally, the implementation of `puts` simply writes a second newline charcter after printing the first string. However, because the GPU does everything in batches of the SIMT group size, this will end up with very poor output where you get the strings printed and then 1-64 newline characters all in a row. Optimizations like to turn `printf` calls into `puts` so it's a good idea to make this produce the expected output. The least invasive way I could do this was to add a new opcode. It's a little bloated, but it avoids an unneccessary and slow send operation to configure this.
36 lines
596 B
C
36 lines
596 B
C
// RUN: %libomptarget-compile-run-and-check-generic
|
|
|
|
// REQUIRES: libc
|
|
|
|
#include <stdio.h>
|
|
|
|
#pragma omp declare target to(stdout)
|
|
|
|
int main() {
|
|
// CHECK: PASS
|
|
#pragma omp target
|
|
{ fputs("PASS\n", stdout); }
|
|
|
|
// CHECK: PASS
|
|
#pragma omp target nowait
|
|
{ fputs("PASS\n", stdout); }
|
|
|
|
// CHECK: PASS
|
|
#pragma omp target nowait
|
|
{ fputs("PASS\n", stdout); }
|
|
|
|
#pragma omp taskwait
|
|
|
|
// CHECK: PASS
|
|
// CHECK: PASS
|
|
// CHECK: PASS
|
|
// CHECK: PASS
|
|
// CHECK: PASS
|
|
// CHECK: PASS
|
|
// CHECK: PASS
|
|
// CHECK: PASS
|
|
#pragma omp target teams num_teams(4)
|
|
#pragma omp parallel num_threads(2)
|
|
{ puts("PASS\n"); }
|
|
}
|