Files
clang-p2996/offload/DeviceRTL/src/Allocator.cpp
Joseph Huber bb7ab2557c [OpenMP] Port the OpenMP device runtime to direct C++ compilation (#123673)
Summary:
This removes the use of OpenMP offloading to build the device runtime.
The main benefit here is that we no longer need to rely on offloading
semantics to build a device only runtime. Things like variants are now
no longer needed and can just be simple if-defs. In the future, I will
remove most of the special handling here and fold it into calls to the
`<gpuintrin.h>` functions instead. Additionally I will rework the
compilation to make this a separate runtime.

The current plan is to have this, but make including OpenMP and
offloading either automatically add it, or print a warning if it's
missing. This will allow us to use a normal CMake workflow and delete
all the weird 'lets pull the clang binary out of the build' business.
```
-DRUNTIMES_amdgcn-amd-amdhsa_LLVM_ENABLE_RUNTIMES=offload
-DLLVM_RUNTIME_TARGETS=amdgcn-amd-amdhsa
```

After that, linking the OpenMP device runtime will be `-Xoffload-linker
-lomp`. I.e. no more fat binary business.

Only look at the most recent commit since this includes the two
dependencies
(fix to AMDGPUEmitPrintfBinding and the PointerToMember bug).
2025-02-05 08:18:52 -06:00

78 lines
2.4 KiB
C++

//===------ State.cpp - OpenMP State & ICV interface ------------- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
//===----------------------------------------------------------------------===//
#include "Shared/Environment.h"
#include "Allocator.h"
#include "Configuration.h"
#include "DeviceTypes.h"
#include "DeviceUtils.h"
#include "Mapping.h"
#include "Synchronization.h"
using namespace ompx;
[[gnu::used, gnu::retain, gnu::weak,
gnu::visibility(
"protected")]] DeviceMemoryPoolTy __omp_rtl_device_memory_pool;
[[gnu::used, gnu::retain, gnu::weak,
gnu::visibility("protected")]] DeviceMemoryPoolTrackingTy
__omp_rtl_device_memory_pool_tracker;
/// Stateless bump allocator that uses the __omp_rtl_device_memory_pool
/// directly.
struct BumpAllocatorTy final {
void *alloc(uint64_t Size) {
Size = utils::roundUp(Size, uint64_t(allocator::ALIGNMENT));
if (config::isDebugMode(DeviceDebugKind::AllocationTracker)) {
atomic::add(&__omp_rtl_device_memory_pool_tracker.NumAllocations, 1,
atomic::seq_cst);
atomic::add(&__omp_rtl_device_memory_pool_tracker.AllocationTotal, Size,
atomic::seq_cst);
atomic::min(&__omp_rtl_device_memory_pool_tracker.AllocationMin, Size,
atomic::seq_cst);
atomic::max(&__omp_rtl_device_memory_pool_tracker.AllocationMax, Size,
atomic::seq_cst);
}
uint64_t *Data =
reinterpret_cast<uint64_t *>(&__omp_rtl_device_memory_pool.Ptr);
uint64_t End =
reinterpret_cast<uint64_t>(Data) + __omp_rtl_device_memory_pool.Size;
uint64_t OldData = atomic::add(Data, Size, atomic::seq_cst);
if (OldData + Size > End)
__builtin_trap();
return reinterpret_cast<void *>(OldData);
}
void free(void *) {}
};
BumpAllocatorTy BumpAllocator;
/// allocator namespace implementation
///
///{
void allocator::init(bool IsSPMD, KernelEnvironmentTy &KernelEnvironment) {
// TODO: Check KernelEnvironment for an allocator choice as soon as we have
// more than one.
}
void *allocator::alloc(uint64_t Size) { return BumpAllocator.alloc(Size); }
void allocator::free(void *Ptr) { BumpAllocator.free(Ptr); }
///}