Files
clang-p2996/offload/DeviceRTL/src/Tasking.cpp
Johannes Doerfert 330d8983d2 [Offload] Move /openmp/libomptarget to /offload (#75125)
In a nutshell, this moves our libomptarget code to populate the offload
subproject.

With this commit, users need to enable the new LLVM/Offload subproject
as a runtime in their cmake configuration.
No further changes are expected for downstream code.

Tests and other components still depend on OpenMP and have also not been
renamed. The results below are for a build in which OpenMP and Offload
are enabled runtimes. In addition to the pure `git mv`, we needed to
adjust some CMake files. Nothing is intended to change semantics.

```
ninja check-offload
```
Works with the X86 and AMDGPU offload tests

```
ninja check-openmp
```
Still works but doesn't build offload tests anymore.

```
ls install/lib
```
Shows all expected libraries, incl.
- `libomptarget.devicertl.a`
- `libomptarget-nvptx-sm_90.bc`
- `libomptarget.rtl.amdgpu.so` -> `libomptarget.rtl.amdgpu.so.18git`
- `libomptarget.so` -> `libomptarget.so.18git`

Fixes: https://github.com/llvm/llvm-project/issues/75124

---------

Co-authored-by: Saiyedul Islam <Saiyedul.Islam@amd.com>
2024-04-22 09:51:33 -07:00

108 lines
3.8 KiB
C++

//===-------- Tasking.cpp - NVPTX OpenMP tasks support ------------ C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// Task implementation support.
//
// TODO: We should not allocate and execute the task in two steps. A new API is
// needed for that though.
//
//===----------------------------------------------------------------------===//
#include "Interface.h"
#include "State.h"
#include "Types.h"
#include "Utils.h"
using namespace ompx;
#pragma omp begin declare target device_type(nohost)
extern "C" {
TaskDescriptorTy *__kmpc_omp_task_alloc(IdentTy *, int32_t, int32_t,
size_t TaskSizeInclPrivateValues,
size_t SharedValuesSize,
TaskFnTy TaskFn) {
auto TaskSizeInclPrivateValuesPadded =
utils::roundUp(TaskSizeInclPrivateValues, uint64_t(sizeof(void *)));
auto TaskSizeTotal = TaskSizeInclPrivateValuesPadded + SharedValuesSize;
TaskDescriptorTy *TaskDescriptor = (TaskDescriptorTy *)memory::allocGlobal(
TaskSizeTotal, "explicit task descriptor");
TaskDescriptor->Payload =
utils::advance(TaskDescriptor, TaskSizeInclPrivateValuesPadded);
TaskDescriptor->TaskFn = TaskFn;
return TaskDescriptor;
}
int32_t __kmpc_omp_task(IdentTy *Loc, uint32_t TId,
TaskDescriptorTy *TaskDescriptor) {
return __kmpc_omp_task_with_deps(Loc, TId, TaskDescriptor, 0, 0, 0, 0);
}
int32_t __kmpc_omp_task_with_deps(IdentTy *Loc, uint32_t TId,
TaskDescriptorTy *TaskDescriptor, int32_t,
void *, int32_t, void *) {
state::DateEnvironmentRAII DERAII(Loc);
TaskDescriptor->TaskFn(0, TaskDescriptor);
memory::freeGlobal(TaskDescriptor, "explicit task descriptor");
return 0;
}
void __kmpc_omp_task_begin_if0(IdentTy *Loc, uint32_t TId,
TaskDescriptorTy *TaskDescriptor) {
state::enterDataEnvironment(Loc);
}
void __kmpc_omp_task_complete_if0(IdentTy *Loc, uint32_t TId,
TaskDescriptorTy *TaskDescriptor) {
state::exitDataEnvironment();
memory::freeGlobal(TaskDescriptor, "explicit task descriptor");
}
void __kmpc_omp_wait_deps(IdentTy *Loc, uint32_t TId, int32_t, void *, int32_t,
void *) {}
void __kmpc_taskgroup(IdentTy *Loc, uint32_t TId) {}
void __kmpc_end_taskgroup(IdentTy *Loc, uint32_t TId) {}
int32_t __kmpc_omp_taskyield(IdentTy *Loc, uint32_t TId, int) { return 0; }
int32_t __kmpc_omp_taskwait(IdentTy *Loc, uint32_t TId) { return 0; }
void __kmpc_taskloop(IdentTy *Loc, uint32_t TId,
TaskDescriptorTy *TaskDescriptor, int,
uint64_t *LowerBound, uint64_t *UpperBound, int64_t, int,
int32_t, uint64_t, void *) {
// Skip task entirely if empty iteration space.
if (*LowerBound > *UpperBound)
return;
// The compiler has already stored lb and ub in the TaskDescriptorTy structure
// as we are using a single task to execute the entire loop, we can leave
// the initial task_t untouched
__kmpc_omp_task_with_deps(Loc, TId, TaskDescriptor, 0, 0, 0, 0);
}
int omp_in_final(void) {
// treat all tasks as final... Specs may expect runtime to keep
// track more precisely if a task was actively set by users... This
// is not explicitly specified; will treat as if runtime can
// actively decide to put a non-final task into a final one.
return 1;
}
int omp_get_max_task_priority(void) { return 0; }
}
#pragma omp end declare target