[OpenMP] Replace AMDGPU fences with generic scoped fences (#119619)

Summary: This is simpler and more common. I would've replaced the CUDA uses and made this the same but currently it doesn't codegen these fences fully and just emits a full system wide barrier as a fallback.
2024-12-12 07:54:51 -06:00
parent 86779da52b
commit f4ee5a673f
2 changed files with 13 additions and 39 deletions
--- a/offload/DeviceRTL/include/Synchronization.h
+++ b/offload/DeviceRTL/include/Synchronization.h
@@ -26,6 +26,14 @@ enum OrderingTy {
  seq_cst = __ATOMIC_SEQ_CST,
 };

+enum ScopeTy {
+  system = __MEMORY_SCOPE_SYSTEM,
+  device_ = __MEMORY_SCOPE_DEVICE,
+  workgroup = __MEMORY_SCOPE_WRKGRP,
+  wavefront = __MEMORY_SCOPE_WVFRNT,
+  single = __MEMORY_SCOPE_SINGLE,
+};
+
 enum MemScopeTy {
  all,    // All threads on all devices
  device, // All threads on the device
--- a/offload/DeviceRTL/src/Synchronization.cpp
+++ b/offload/DeviceRTL/src/Synchronization.cpp
@@ -232,50 +232,16 @@ void namedBarrier() {
  fence::team(atomic::release);
 }

-// sema checking of amdgcn_fence is aggressive. Intention is to patch clang
-// so that it is usable within a template environment and so that a runtime
-// value of the memory order is expanded to this switch within clang/llvm.
 void fenceTeam(atomic::OrderingTy Ordering) {
-  switch (Ordering) {
-  default:
-    __builtin_unreachable();
-  case atomic::aquire:
-    return __builtin_amdgcn_fence(atomic::aquire, "workgroup");
-  case atomic::release:
-    return __builtin_amdgcn_fence(atomic::release, "workgroup");
-  case atomic::acq_rel:
-    return __builtin_amdgcn_fence(atomic::acq_rel, "workgroup");
-  case atomic::seq_cst:
-    return __builtin_amdgcn_fence(atomic::seq_cst, "workgroup");
-  }
+  return __scoped_atomic_thread_fence(Ordering, atomic::workgroup);
 }
+
 void fenceKernel(atomic::OrderingTy Ordering) {
-  switch (Ordering) {
-  default:
-    __builtin_unreachable();
-  case atomic::aquire:
-    return __builtin_amdgcn_fence(atomic::aquire, "agent");
-  case atomic::release:
-    return __builtin_amdgcn_fence(atomic::release, "agent");
-  case atomic::acq_rel:
-    return __builtin_amdgcn_fence(atomic::acq_rel, "agent");
-  case atomic::seq_cst:
-    return __builtin_amdgcn_fence(atomic::seq_cst, "agent");
-  }
+  return __scoped_atomic_thread_fence(Ordering, atomic::device_);
 }
+
 void fenceSystem(atomic::OrderingTy Ordering) {
-  switch (Ordering) {
-  default:
-    __builtin_unreachable();
-  case atomic::aquire:
-    return __builtin_amdgcn_fence(atomic::aquire, "");
-  case atomic::release:
-    return __builtin_amdgcn_fence(atomic::release, "");
-  case atomic::acq_rel:
-    return __builtin_amdgcn_fence(atomic::acq_rel, "");
-  case atomic::seq_cst:
-    return __builtin_amdgcn_fence(atomic::seq_cst, "");
-  }
+  return __scoped_atomic_thread_fence(Ordering, atomic::system);
 }

 void syncWarp(__kmpc_impl_lanemask_t) {