From 3c64a98180148340ed72aa2c19054ddfbcfa72e1 Mon Sep 17 00:00:00 2001
From: Joseph Huber <huberjn@outlook.com>
Date: Mon, 1 Jul 2024 06:30:15 -0500
Subject: [PATCH] [libc] Partially implement 'errno' on the GPU (#97107)

Summary:
The `errno` variable is expected to be `thread_local` by the standard.
However, the GPU targets do not support `thread_local` and implementing
that would be a large endeavor. Because of that, we previously didn't
provide the `errno` symbol at all. However, to build some programs we at
least need to be able to link against `errno`. Many things that would
normally set `errno` completely ignore it currently (i.e. stdio) but
some programs still need to be able to link against correct C programs.

For this purpose this patch exports the `errno` symbol as a simple
global. Internally, this will be updated atomically so it's at least not
racy. Externally, this will be on the user. I've updated the
documentation to state as such. This is required to get `libc++` to
build.
---
 libc/docs/gpu/motivation.rst  |  2 +-
 libc/include/errno.h.def      |  9 +++++----
 libc/src/errno/libc_errno.cpp | 13 +++++++++----
 3 files changed, 15 insertions(+), 9 deletions(-)
diff --git a/libc/docs/gpu/motivation.rst b/libc/docs/gpu/motivation.rst
index 7e5336dbbe5d..e637d67fad75 100644
--- a/libc/docs/gpu/motivation.rst
+++ b/libc/docs/gpu/motivation.rst
@@ -44,7 +44,7 @@ Limitations
 
 We only implement a subset of the standard C library. The GPU does not
 currently support thread local variables in all cases, so variables like
-``errno`` are not provided. Furthermore, the GPU under the OpenCL execution
+``errno`` are atomic and global. Furthermore, the GPU under the OpenCL execution
 model cannot safely provide a mutex interface. This means that features like
 file buffering are not implemented on the GPU. We can also not easily provide
 threading features on the GPU due to the execution model so these will be
diff --git a/libc/include/errno.h.def b/libc/include/errno.h.def
index 3ffcd3fe4c72..1f7120e63bfc 100644
--- a/libc/include/errno.h.def
+++ b/libc/include/errno.h.def
@@ -25,8 +25,9 @@
 #include "llvm-libc-macros/generic-error-number-macros.h"
 #endif
 
-#if !defined(__AMDGPU__) && !defined(__NVPTX__)
-
+#if defined(__AMDGPU__) || defined(__NVPTX__)
+extern int __llvmlibc_errno; // Not thread_local!
+#else
 #ifdef __cplusplus
 extern "C" {
 extern thread_local int __llvmlibc_errno;
@@ -34,8 +35,8 @@ extern thread_local int __llvmlibc_errno;
 #else
 extern _Thread_local int __llvmlibc_errno;
 #endif // __cplusplus
-
-#define errno __llvmlibc_errno
 #endif
 
+#define errno __llvmlibc_errno
+
 #endif // LLVM_LIBC_ERRNO_H
diff --git a/libc/src/errno/libc_errno.cpp b/libc/src/errno/libc_errno.cpp
index 64f9f522ca29..bd1438c22614 100644
--- a/libc/src/errno/libc_errno.cpp
+++ b/libc/src/errno/libc_errno.cpp
@@ -7,16 +7,21 @@
 //===----------------------------------------------------------------------===//
 
 #include "libc_errno.h"
+#include "src/__support/CPP/atomic.h"
 
 #ifdef LIBC_TARGET_ARCH_IS_GPU
-// LIBC_THREAD_LOCAL on GPU currently does nothing.  So essentially this is just
+// LIBC_THREAD_LOCAL on GPU currently does nothing. So essentially this is just
 // a global errno for gpu to use for now.
 extern "C" {
-LIBC_THREAD_LOCAL int __llvmlibc_gpu_errno;
+LIBC_THREAD_LOCAL LIBC_NAMESPACE::cpp::Atomic<int> __llvmlibc_errno;
 }
 
-void LIBC_NAMESPACE::Errno::operator=(int a) { __llvmlibc_gpu_errno = a; }
-LIBC_NAMESPACE::Errno::operator int() { return __llvmlibc_gpu_errno; }
+void LIBC_NAMESPACE::Errno::operator=(int a) {
+  __llvmlibc_errno.store(a, cpp::MemoryOrder::RELAXED);
+}
+LIBC_NAMESPACE::Errno::operator int() {
+  return __llvmlibc_errno.load(cpp::MemoryOrder::RELAXED);
+}
 
 #elif !defined(LIBC_COPT_PUBLIC_PACKAGING)
 // This mode is for unit testing.  We just use our internal errno.