Files
clang-p2996/openmp/libomptarget/test/offloading/info.c
Joel E. Denny 48421ac441 [OpenMP] Improve ref count debug messages
For example, without this patch:

```
$ cat test.c
int main() {
  int x;
  #pragma omp target enter data map(alloc: x)
  #pragma omp target exit data map(release: x)
  ;
  return 0;
}
$ clang -fopenmp -fopenmp-targets=nvptx64-nvidia-cuda test.c
$ LIBOMPTARGET_DEBUG=1 ./a.out |& grep 'Creating\|Mapping exists'
Libomptarget --> Creating new map entry with HstPtrBegin=0x00007ffcace8e448, TgtPtrBegin=0x00007f12ef600000, Size=4, Name=unknown
Libomptarget --> Mapping exists with HstPtrBegin=0x00007ffcace8e448, TgtPtrBegin=0x00007f12ef600000, Size=4, updated RefCount=1
```

There are two problems in this example:

* `RefCount` is not reported when a mapping is created, but it might
  be 1 or infinite.  In this case, because it's created by `omp target
  enter data`, it's 1.  Seeing that would make later `RefCount`
  messages easier to understand.
* `RefCount` is still 1 at the `omp target exit data`, but it's
  reported as `updated`.  The reason it's still 1 is that, upon
  deletions, the reference count is generally not updated in
  `DeviceTy::getTgtPtrBegin`, where the report is produced.  Instead,
  it's zeroed later in `DeviceTy::deallocTgtPtr`, where it's actually
  removed from the mapping table.

This patch makes the following changes:

* Report the reference count when creating a mapping.
* Where an existing mapping is reported, always report a reference
  count action:
    * `update suppressed` when `UpdateRefCount=false`
    * `incremented`
    * `decremented`
    * `deferred final decrement`, which replaces the misleading
      `updated` in the above example
* Add comments to `DeviceTy::getTgtPtrBegin` to explain why it does
  not zero the reference count.  (Please advise if these comments miss
  the point.)
* For unified shared memory, don't report confusing messages like
  `RefCount=` or `RefCount= updated` given that reference counts are
  irrelevant in this case.  Instead, just report `for unified shared
  memory`.
* Use `INFO` not `DP` consistently for `Mapping exists` messages.
* Fix device table dumps to print `INF` instead of `-1` for an
  infinite reference count.

Reviewed By: jhuber6, grokos

Differential Revision: https://reviews.llvm.org/D104559
2021-06-23 09:57:19 -04:00

66 lines
4.2 KiB
C

// RUN: %libomptarget-compile-nvptx64-nvidia-cuda -gline-tables-only && env LIBOMPTARGET_INFO=63 %libomptarget-run-nvptx64-nvidia-cuda 2>&1 | %fcheck-nvptx64-nvidia-cuda -allow-empty -check-prefix=INFO
// REQUIRES: nvptx64-nvidia-cuda
#include <stdio.h>
#include <omp.h>
#define N 64
#pragma omp declare target
int global;
#pragma omp end declare target
extern void __tgt_set_info_flag(unsigned);
int main() {
int A[N];
int B[N];
int C[N];
int val = 1;
// INFO: CUDA device 0 info: Device supports up to {{[0-9]+}} CUDA blocks and {{[0-9]+}} threads with a warp size of {{[0-9]+}}
// INFO: Libomptarget device 0 info: Entering OpenMP data region at info.c:{{[0-9]+}}:{{[0-9]+}} with 3 arguments:
// INFO: Libomptarget device 0 info: alloc(A[0:64])[256]
// INFO: Libomptarget device 0 info: tofrom(B[0:64])[256]
// INFO: Libomptarget device 0 info: to(C[0:64])[256]
// INFO: Libomptarget device 0 info: Creating new map entry with HstPtrBegin={{.*}}, TgtPtrBegin={{.*}}, Size=256, RefCount=1, Name=A[0:64]
// INFO: Libomptarget device 0 info: Creating new map entry with HstPtrBegin={{.*}}, TgtPtrBegin={{.*}}, Size=256, RefCount=1, Name=B[0:64]
// INFO: Libomptarget device 0 info: Copying data from host to device, HstPtr={{.*}}, TgtPtr={{.*}}, Size=256, Name=B[0:64]
// INFO: Libomptarget device 0 info: Creating new map entry with HstPtrBegin={{.*}}, TgtPtrBegin={{.*}}, Size=256, RefCount=1, Name=C[0:64]
// INFO: Libomptarget device 0 info: Copying data from host to device, HstPtr={{.*}}, TgtPtr={{.*}}, Size=256, Name=C[0:64]
// INFO: Libomptarget device 0 info: OpenMP Host-Device pointer mappings after block at info.c:{{[0-9]+}}:{{[0-9]+}}:
// INFO: Libomptarget device 0 info: Host Ptr Target Ptr Size (B) RefCount Declaration
// INFO: Libomptarget device 0 info: {{.*}} {{.*}} 256 1 C[0:64] at info.c:{{[0-9]+}}:{{[0-9]+}}
// INFO: Libomptarget device 0 info: {{.*}} {{.*}} 256 1 B[0:64] at info.c:{{[0-9]+}}:{{[0-9]+}}
// INFO: Libomptarget device 0 info: {{.*}} {{.*}} 256 1 A[0:64] at info.c:{{[0-9]+}}:{{[0-9]+}}
// INFO: Libomptarget device 0 info: Entering OpenMP kernel at info.c:{{[0-9]+}}:{{[0-9]+}} with 1 arguments:
// INFO: Libomptarget device 0 info: firstprivate(val)[4]
// INFO: CUDA device 0 info: Launching kernel __omp_offloading_{{.*}}main{{.*}} with {{[0-9]+}} blocks and {{[0-9]+}} threads in Generic mode
// INFO: Libomptarget device 0 info: OpenMP Host-Device pointer mappings after block at info.c:{{[0-9]+}}:{{[0-9]+}}:
// INFO: Libomptarget device 0 info: Host Ptr Target Ptr Size (B) RefCount Declaration
// INFO: Libomptarget device 0 info: {{.*}} {{.*}} 256 1 C[0:64] at info.c:{{[0-9]+}}:{{[0-9]+}}
// INFO: Libomptarget device 0 info: {{.*}} {{.*}} 256 1 B[0:64] at info.c:{{[0-9]+}}:{{[0-9]+}}
// INFO: Libomptarget device 0 info: {{.*}} {{.*}} 256 1 A[0:64] at info.c:{{[0-9]+}}:{{[0-9]+}}
// INFO: Libomptarget device 0 info: Exiting OpenMP data region at info.c:{{[0-9]+}}:{{[0-9]+}} with 3 arguments:
// INFO: Libomptarget device 0 info: alloc(A[0:64])[256]
// INFO: Libomptarget device 0 info: tofrom(B[0:64])[256]
// INFO: Libomptarget device 0 info: to(C[0:64])[256]
// INFO: Libomptarget device 0 info: Copying data from device to host, TgtPtr={{.*}}, HstPtr={{.*}}, Size=256, Name=B[0:64]
// INFO: Libomptarget device 0 info: Removing map entry with HstPtrBegin={{.*}}, TgtPtrBegin={{.*}}, Size=256, Name=C[0:64]
// INFO: Libomptarget device 0 info: Removing map entry with HstPtrBegin={{.*}}, TgtPtrBegin={{.*}}, Size=256, Name=B[0:64]
// INFO: Libomptarget device 0 info: Removing map entry with HstPtrBegin={{.*}}, TgtPtrBegin={{.*}}, Size=256, Name=A[0:64]
// INFO: Libomptarget device 0 info: OpenMP Host-Device pointer mappings after block at info.c:[[#%u,]]:[[#%u,]]:
// INFO: Libomptarget device 0 info: Host Ptr Target Ptr Size (B) RefCount Declaration
// INFO: Libomptarget device 0 info: [[#%#x,]] [[#%#x,]] 4 INF unknown at unknown:0:0
#pragma omp target data map(alloc:A[0:N]) map(tofrom:B[0:N]) map(to:C[0:N])
#pragma omp target firstprivate(val)
{ val = 1; }
__tgt_set_info_flag(0x0);
// INFO-NOT: Libomptarget device 0 info: {{.*}}
#pragma omp target
{ }
return 0;
}