[CUDA] pass -fno-threadsafe-statics to GPU sub-compilations. (#117074)

We do not have support for the threadsafe statics on the GPU side. However, we do sometimes end up with empty local static initializers, and those happen to trigger calls to `__cxa_guard*`, which breaks compilation. Partially addresses https://github.com/llvm/llvm-project/issues/117023
2024-11-22 10:19:59 -08:00
parent 4be09f0624
commit 689c532192
3 changed files with 17 additions and 3 deletions
--- a/clang/lib/Driver/ToolChains/Cuda.cpp
+++ b/clang/lib/Driver/ToolChains/Cuda.cpp
@@ -856,8 +856,9 @@ void CudaToolChain::addClangTargetOptions(
          DeviceOffloadingKind == Action::OFK_Cuda) &&
         "Only OpenMP or CUDA offloading kinds are supported for NVIDIA GPUs.");

-  CC1Args.append(
-      {"-fcuda-is-device", "-mllvm", "-enable-memcpyopt-without-libcalls"});
+  CC1Args.append({"-fcuda-is-device", "-mllvm",
+                  "-enable-memcpyopt-without-libcalls",
+                  "-fno-threadsafe-statics"});

  // Unsized function arguments used for variadics were introduced in CUDA-9.0
  // We still do not support generating code that actually uses variadic
--- a/clang/lib/Driver/ToolChains/HIPAMD.cpp
+++ b/clang/lib/Driver/ToolChains/HIPAMD.cpp
@@ -238,7 +238,7 @@ void HIPAMDToolChain::addClangTargetOptions(
  assert(DeviceOffloadingKind == Action::OFK_HIP &&
         "Only HIP offloading kinds are supported for GPUs.");

-  CC1Args.push_back("-fcuda-is-device");
+  CC1Args.append({"-fcuda-is-device", "-fno-threadsafe-statics"});

  if (!DriverArgs.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc,
                          false))
--- a/clang/test/Driver/cuda-no-threadsafe-statics.cu
+++ b/clang/test/Driver/cuda-no-threadsafe-statics.cu
@@ -0,0 +1,13 @@
+// Check that -fno-thread-safe-statics get passed down to device-side
+// compilation only.
+//
+// RUN: %clang -### -x cuda --target=x86_64-linux-gnu -c --cuda-gpu-arch=sm_20 %s \
+// RUN:            -nocudainc -nocudalib 2>&1 | FileCheck %s
+
+// RUN: %clang -### -x hip --target=x86_64-linux-gnu -c --cuda-gpu-arch=gfx1010 %s \
+// RUN:            -nocudainc -nocudalib 2>&1 | FileCheck %s
+//
+// CHECK: "-fcuda-is-device"
+// CHECK-SAME: "-fno-threadsafe-statics"
+// CHECK: "-triple" "x86_64-unknown-linux-gnu"
+// CHECK-NOT: "-fno-threadsafe-statics"