[OpenMP][libc] Remove special handling for OpenMP printf (#98940)

Summary: Currently there are several layers to handle `printf`. Since we now have varargs and an implementation of `printf` this can be heavily simplified. 1. The frontend renames `printf` into `omp_vprintf` and gives it an argument buffer. Removing 1. triggered some code in the AMDGPU backend menat for HIP / OpenCL, so I hadded an exception to it. 2. Forward this to CUDA vprintf or ignore it. We no longer need special handling for it since we have varargs. So now we just forward this to CUDA vprintf if we have libc, otherwise just leave `printf` as an external function and expect that `libc` will be linked in.
2024-07-26 16:03:36 -05:00
parent 26d92826a5
commit 069e8bcd82
11 changed files with 17 additions and 184 deletions
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -5986,8 +5986,6 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
        getTarget().getTriple().isAMDGCN() ||
        (getTarget().getTriple().isSPIRV() &&
         getTarget().getTriple().getVendor() == Triple::VendorType::AMD)) {
-      if (getLangOpts().OpenMPIsTargetDevice)
-        return EmitOpenMPDevicePrintfCallExpr(E);
      if (getTarget().getTriple().isNVPTX())
        return EmitNVPTXDevicePrintfCallExpr(E);
      if ((getTarget().getTriple().isAMDGCN() ||
--- a/clang/lib/CodeGen/CGGPUBuiltin.cpp
+++ b/clang/lib/CodeGen/CGGPUBuiltin.cpp
@@ -42,28 +42,6 @@ llvm::Function *GetVprintfDeclaration(llvm::Module &M) {
      VprintfFuncType, llvm::GlobalVariable::ExternalLinkage, "vprintf", &M);
 }

-llvm::Function *GetOpenMPVprintfDeclaration(CodeGenModule &CGM) {
-  const char *Name = "__llvm_omp_vprintf";
-  llvm::Module &M = CGM.getModule();
-  llvm::Type *ArgTypes[] = {llvm::PointerType::getUnqual(M.getContext()),
-                            llvm::PointerType::getUnqual(M.getContext()),
-                            llvm::Type::getInt32Ty(M.getContext())};
-  llvm::FunctionType *VprintfFuncType = llvm::FunctionType::get(
-      llvm::Type::getInt32Ty(M.getContext()), ArgTypes, false);
-
-  if (auto *F = M.getFunction(Name)) {
-    if (F->getFunctionType() != VprintfFuncType) {
-      CGM.Error(SourceLocation(),
-                "Invalid type declaration for __llvm_omp_vprintf");
-      return nullptr;
-    }
-    return F;
-  }
-
-  return llvm::Function::Create(
-      VprintfFuncType, llvm::GlobalVariable::ExternalLinkage, Name, &M);
-}
-
 // Transforms a call to printf into a call to the NVPTX vprintf syscall (which
 // isn't particularly special; it's invoked just like a regular function).
 // vprintf takes two args: A format string, and a pointer to a buffer containing
@@ -213,10 +191,3 @@ RValue CodeGenFunction::EmitAMDGPUDevicePrintfCallExpr(const CallExpr *E) {
  Builder.SetInsertPoint(IRB.GetInsertBlock(), IRB.GetInsertPoint());
  return RValue::get(Printf);
 }
-
-RValue CodeGenFunction::EmitOpenMPDevicePrintfCallExpr(const CallExpr *E) {
-  assert(getTarget().getTriple().isNVPTX() ||
-         getTarget().getTriple().isAMDGCN());
-  return EmitDevicePrintfCallExpr(E, this, GetOpenMPVprintfDeclaration(CGM),
-                                  true);
-}
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -4536,7 +4536,6 @@ public:

  RValue EmitNVPTXDevicePrintfCallExpr(const CallExpr *E);
  RValue EmitAMDGPUDevicePrintfCallExpr(const CallExpr *E);
-  RValue EmitOpenMPDevicePrintfCallExpr(const CallExpr *E);

  RValue EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
                         const CallExpr *E, ReturnValueSlot ReturnValue);
--- a/libc/config/gpu/entrypoints.txt
+++ b/libc/config/gpu/entrypoints.txt
@@ -226,7 +226,6 @@ set(TARGET_LIBC_ENTRYPOINTS

    # gpu/rpc.h entrypoints
    libc.src.gpu.rpc_host_call
-    libc.src.gpu.rpc_fprintf
 )

 set(TARGET_LIBM_ENTRYPOINTS
--- a/libc/spec/gpu_ext.td
+++ b/libc/spec/gpu_ext.td
@@ -10,14 +10,6 @@ def GPUExtensions : StandardSpec<"GPUExtensions"> {
            RetValSpec<VoidType>,
            [ArgSpec<VoidPtr>, ArgSpec<VoidPtr>, ArgSpec<SizeTType>]
        >,
-        FunctionSpec<
-            "rpc_fprintf",
-            RetValSpec<IntType>,
-            [ArgSpec<FILERestrictedPtr>,
-             ArgSpec<ConstCharRestrictedPtr>,
-             ArgSpec<VoidPtr>,
-             ArgSpec<SizeTType>]
-        >,
    ]
  >;
  let Headers = [
--- a/libc/src/gpu/CMakeLists.txt
+++ b/libc/src/gpu/CMakeLists.txt
@@ -8,15 +8,3 @@ add_entrypoint_object(
    libc.src.__support.RPC.rpc_client
    libc.src.__support.GPU.utils
 )
-
-add_entrypoint_object(
-  rpc_fprintf
-  SRCS
-    rpc_fprintf.cpp
-  HDRS
-    rpc_fprintf.h
-  DEPENDS
-    libc.src.stdio.gpu.gpu_file
-    libc.src.__support.RPC.rpc_client
-    libc.src.__support.GPU.utils
-)
--- a/libc/src/gpu/rpc_fprintf.cpp
+++ b/libc/src/gpu/rpc_fprintf.cpp
@@ -1,75 +0,0 @@
-//===-- GPU implementation of fprintf -------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "rpc_fprintf.h"
-
-#include "src/__support/CPP/string_view.h"
-#include "src/__support/GPU/utils.h"
-#include "src/__support/RPC/rpc_client.h"
-#include "src/__support/common.h"
-#include "src/__support/macros/config.h"
-#include "src/stdio/gpu/file.h"
-
-namespace LIBC_NAMESPACE_DECL {
-
-template <uint16_t opcode>
-int fprintf_impl(::FILE *__restrict file, const char *__restrict format,
-                 size_t format_size, void *args, size_t args_size) {
-  uint64_t mask = gpu::get_lane_mask();
-  rpc::Client::Port port = rpc::client.open<opcode>();
-
-  if constexpr (opcode == RPC_PRINTF_TO_STREAM) {
-    port.send([&](rpc::Buffer *buffer) {
-      buffer->data[0] = reinterpret_cast<uintptr_t>(file);
-    });
-  }
-
-  port.send_n(format, format_size);
-  port.recv([&](rpc::Buffer *buffer) {
-    args_size = static_cast<size_t>(buffer->data[0]);
-  });
-  port.send_n(args, args_size);
-
-  uint32_t ret = 0;
-  for (;;) {
-    const char *str = nullptr;
-    port.recv([&](rpc::Buffer *buffer) {
-      ret = static_cast<uint32_t>(buffer->data[0]);
-      str = reinterpret_cast<const char *>(buffer->data[1]);
-    });
-    // If any lanes have a string argument it needs to be copied back.
-    if (!gpu::ballot(mask, str))
-      break;
-
-    uint64_t size = str ? internal::string_length(str) + 1 : 0;
-    port.send_n(str, size);
-  }
-
-  port.close();
-  return ret;
-}
-
-// TODO: Delete this and port OpenMP to use `printf`.
-// place of varargs. Once varargs support is added we will use that to
-// implement the real version.
-LLVM_LIBC_FUNCTION(int, rpc_fprintf,
-                   (::FILE *__restrict stream, const char *__restrict format,
-                    void *args, size_t size)) {
-  cpp::string_view str(format);
-  if (stream == stdout)
-    return fprintf_impl<RPC_PRINTF_TO_STDOUT>(stream, format, str.size() + 1,
-                                              args, size);
-  else if (stream == stderr)
-    return fprintf_impl<RPC_PRINTF_TO_STDERR>(stream, format, str.size() + 1,
-                                              args, size);
-  else
-    return fprintf_impl<RPC_PRINTF_TO_STREAM>(stream, format, str.size() + 1,
-                                              args, size);
-}
-
-} // namespace LIBC_NAMESPACE_DECL
--- a/libc/src/gpu/rpc_fprintf.h
+++ b/libc/src/gpu/rpc_fprintf.h
@@ -1,23 +0,0 @@
-//===-- Implementation header for RPC functions -----------------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIBC_SRC_GPU_RPC_HOST_CALL_H
-#define LLVM_LIBC_SRC_GPU_RPC_HOST_CALL_H
-
-#include "hdr/types/FILE.h"
-#include "src/__support/macros/config.h"
-#include <stddef.h>
-
-namespace LIBC_NAMESPACE_DECL {
-
-int rpc_fprintf(::FILE *__restrict stream, const char *__restrict format,
-                void *argc, size_t size);
-
-} // namespace LIBC_NAMESPACE_DECL
-
-#endif // LLVM_LIBC_SRC_GPU_RPC_HOST_CALL_H
--- a/llvm/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp
@@ -437,7 +437,8 @@ bool AMDGPUPrintfRuntimeBindingImpl::run(Module &M) {
    return false;

  auto PrintfFunction = M.getFunction("printf");
-  if (!PrintfFunction || !PrintfFunction->isDeclaration())
+  if (!PrintfFunction || !PrintfFunction->isDeclaration() ||
+      M.getModuleFlag("openmp"))
    return false;

  for (auto &U : PrintfFunction->uses()) {
--- a/offload/DeviceRTL/include/LibC.h
+++ b/offload/DeviceRTL/include/LibC.h
@@ -18,7 +18,6 @@ extern "C" {

 int memcmp(const void *lhs, const void *rhs, size_t count);
 void memset(void *dst, int C, size_t count);
-
 int printf(const char *format, ...);
 }

--- a/offload/DeviceRTL/src/LibC.cpp
+++ b/offload/DeviceRTL/src/LibC.cpp
@@ -11,44 +11,33 @@
 #pragma omp begin declare target device_type(nohost)

 namespace impl {
-int32_t omp_vprintf(const char *Format, void *Arguments, uint32_t);
+int32_t omp_vprintf(const char *Format, __builtin_va_list vlist);
 }

+#ifndef OMPTARGET_HAS_LIBC
+namespace impl {
 #pragma omp begin declare variant match(                                       \
        device = {arch(nvptx, nvptx64)},                                       \
            implementation = {extension(match_any)})
-extern "C" int32_t vprintf(const char *, void *);
-namespace impl {
-int32_t omp_vprintf(const char *Format, void *Arguments, uint32_t) {
-  return vprintf(Format, Arguments);
+extern "C" int vprintf(const char *format, ...);
+int omp_vprintf(const char *Format, __builtin_va_list vlist) {
+  return vprintf(Format, vlist);
 }
-} // namespace impl
 #pragma omp end declare variant

 #pragma omp begin declare variant match(device = {arch(amdgcn)})
-
-#ifdef OMPTARGET_HAS_LIBC
-// TODO: Remove this handling once we have varargs support.
-extern "C" struct FILE *stdout;
-extern "C" int32_t rpc_fprintf(FILE *, const char *, void *, uint64_t);
-
-namespace impl {
-int32_t omp_vprintf(const char *Format, void *Arguments, uint32_t Size) {
-  return rpc_fprintf(stdout, Format, Arguments, Size);
-}
-} // namespace impl
-#else
-// We do not have a vprintf implementation for AMD GPU so we use a stub.
-namespace impl {
-int32_t omp_vprintf(const char *Format, void *Arguments, uint32_t) {
-  return -1;
-}
-} // namespace impl
-#endif
+int omp_vprintf(const char *Format, __builtin_va_list) { return -1; }
 #pragma omp end declare variant
+} // namespace impl
+
+extern "C" int printf(const char *Format, ...) {
+  __builtin_va_list vlist;
+  __builtin_va_start(vlist, Format);
+  return impl::omp_vprintf(Format, vlist);
+}
+#endif // OMPTARGET_HAS_LIBC

 extern "C" {
-
 [[gnu::weak]] int memcmp(const void *lhs, const void *rhs, size_t count) {
  auto *L = reinterpret_cast<const unsigned char *>(lhs);
  auto *R = reinterpret_cast<const unsigned char *>(rhs);
@@ -65,11 +54,6 @@ extern "C" {
  for (size_t I = 0; I < count; ++I)
    dstc[I] = C;
 }
-
-/// printf() calls are rewritten by CGGPUBuiltin to __llvm_omp_vprintf
-int32_t __llvm_omp_vprintf(const char *Format, void *Arguments, uint32_t Size) {
-  return impl::omp_vprintf(Format, Arguments, Size);
-}
 }

 #pragma omp end declare target