Summary: This removes the use of OpenMP offloading to build the device runtime. The main benefit here is that we no longer need to rely on offloading semantics to build a device only runtime. Things like variants are now no longer needed and can just be simple if-defs. In the future, I will remove most of the special handling here and fold it into calls to the `<gpuintrin.h>` functions instead. Additionally I will rework the compilation to make this a separate runtime. The current plan is to have this, but make including OpenMP and offloading either automatically add it, or print a warning if it's missing. This will allow us to use a normal CMake workflow and delete all the weird 'lets pull the clang binary out of the build' business. ``` -DRUNTIMES_amdgcn-amd-amdhsa_LLVM_ENABLE_RUNTIMES=offload -DLLVM_RUNTIME_TARGETS=amdgcn-amd-amdhsa ``` After that, linking the OpenMP device runtime will be `-Xoffload-linker -lomp`. I.e. no more fat binary business. Only look at the most recent commit since this includes the two dependencies (fix to AMDGPUEmitPrintfBinding and the PointerToMember bug).
49 lines
1.4 KiB
C++
49 lines
1.4 KiB
C++
//===------- LibC.cpp - Simple implementation of libc functions --- C++ ---===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "LibC.h"
|
|
|
|
#if defined(__AMDGPU__) && !defined(OMPTARGET_HAS_LIBC)
|
|
extern "C" int vprintf(const char *format, __builtin_va_list) { return -1; }
|
|
#else
|
|
extern "C" int vprintf(const char *format, __builtin_va_list);
|
|
#endif
|
|
|
|
extern "C" {
|
|
[[gnu::weak]] int memcmp(const void *lhs, const void *rhs, size_t count) {
|
|
auto *L = reinterpret_cast<const unsigned char *>(lhs);
|
|
auto *R = reinterpret_cast<const unsigned char *>(rhs);
|
|
|
|
for (size_t I = 0; I < count; ++I)
|
|
if (L[I] != R[I])
|
|
return (int)L[I] - (int)R[I];
|
|
|
|
return 0;
|
|
}
|
|
|
|
[[gnu::weak]] void memset(void *dst, int C, size_t count) {
|
|
auto *dstc = reinterpret_cast<char *>(dst);
|
|
for (size_t I = 0; I < count; ++I)
|
|
dstc[I] = C;
|
|
}
|
|
|
|
[[gnu::weak]] int printf(const char *Format, ...) {
|
|
__builtin_va_list vlist;
|
|
__builtin_va_start(vlist, Format);
|
|
return ::vprintf(Format, vlist);
|
|
}
|
|
}
|
|
|
|
namespace ompx {
|
|
[[clang::no_builtin("printf")]] int printf(const char *Format, ...) {
|
|
__builtin_va_list vlist;
|
|
__builtin_va_start(vlist, Format);
|
|
return ::vprintf(Format, vlist);
|
|
}
|
|
} // namespace ompx
|