In kernel language mode, use user's grid and blocks size directly. No validity check, which means if user's values are too large, the launch will fail, similar to what CUDA and HIP are doing right now.
39 lines
1.0 KiB
C
39 lines
1.0 KiB
C
// RUN: %libomptarget-compile-generic
|
|
// RUN: env LIBOMPTARGET_INFO=63 %libomptarget-run-generic 2>&1 | %fcheck-generic
|
|
//
|
|
// UNSUPPORTED: x86_64-pc-linux-gnu
|
|
// UNSUPPORTED: x86_64-pc-linux-gnu-LTO
|
|
// UNSUPPORTED: aarch64-unknown-linux-gnu
|
|
// UNSUPPORTED: aarch64-unknown-linux-gnu-LTO
|
|
|
|
#include <assert.h>
|
|
#include <ompx.h>
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
|
|
int main(int argc, char *argv[]) {
|
|
const int num_blocks = 64;
|
|
const int block_size = 64;
|
|
const int N = num_blocks * block_size;
|
|
int *data = (int *)malloc(N * sizeof(int));
|
|
|
|
// CHECK: "PluginInterface" device 0 info: Launching kernel __omp_offloading_{{.*}} with 64 blocks and 64 threads in SPMD mode
|
|
|
|
#pragma omp target teams ompx_bare num_teams(num_blocks) thread_limit(block_size) map(from: data[0:N])
|
|
{
|
|
int bid = ompx_block_id_x();
|
|
int bdim = ompx_block_dim_x();
|
|
int tid = ompx_thread_id_x();
|
|
int idx = bid * bdim + tid;
|
|
data[idx] = idx;
|
|
}
|
|
|
|
for (int i = 0; i < N; ++i)
|
|
assert(data[i] == i);
|
|
|
|
// CHECK: PASS
|
|
printf("PASS\n");
|
|
|
|
return 0;
|
|
}
|