[OpenMP] Replace AMDGPU fences with generic scoped fences (#119619)

Summary:
This is simpler and more common. I would've replaced the CUDA uses and
made this the same but currently it doesn't codegen these fences fully
and just emits a full system wide barrier as a fallback.
This commit is contained in:
Joseph Huber
2024-12-12 07:54:51 -06:00
committed by GitHub
parent 86779da52b
commit f4ee5a673f
2 changed files with 13 additions and 39 deletions

View File

@@ -26,6 +26,14 @@ enum OrderingTy {
seq_cst = __ATOMIC_SEQ_CST,
};
enum ScopeTy {
system = __MEMORY_SCOPE_SYSTEM,
device_ = __MEMORY_SCOPE_DEVICE,
workgroup = __MEMORY_SCOPE_WRKGRP,
wavefront = __MEMORY_SCOPE_WVFRNT,
single = __MEMORY_SCOPE_SINGLE,
};
enum MemScopeTy {
all, // All threads on all devices
device, // All threads on the device

View File

@@ -232,50 +232,16 @@ void namedBarrier() {
fence::team(atomic::release);
}
// sema checking of amdgcn_fence is aggressive. Intention is to patch clang
// so that it is usable within a template environment and so that a runtime
// value of the memory order is expanded to this switch within clang/llvm.
void fenceTeam(atomic::OrderingTy Ordering) {
switch (Ordering) {
default:
__builtin_unreachable();
case atomic::aquire:
return __builtin_amdgcn_fence(atomic::aquire, "workgroup");
case atomic::release:
return __builtin_amdgcn_fence(atomic::release, "workgroup");
case atomic::acq_rel:
return __builtin_amdgcn_fence(atomic::acq_rel, "workgroup");
case atomic::seq_cst:
return __builtin_amdgcn_fence(atomic::seq_cst, "workgroup");
}
return __scoped_atomic_thread_fence(Ordering, atomic::workgroup);
}
void fenceKernel(atomic::OrderingTy Ordering) {
switch (Ordering) {
default:
__builtin_unreachable();
case atomic::aquire:
return __builtin_amdgcn_fence(atomic::aquire, "agent");
case atomic::release:
return __builtin_amdgcn_fence(atomic::release, "agent");
case atomic::acq_rel:
return __builtin_amdgcn_fence(atomic::acq_rel, "agent");
case atomic::seq_cst:
return __builtin_amdgcn_fence(atomic::seq_cst, "agent");
}
return __scoped_atomic_thread_fence(Ordering, atomic::device_);
}
void fenceSystem(atomic::OrderingTy Ordering) {
switch (Ordering) {
default:
__builtin_unreachable();
case atomic::aquire:
return __builtin_amdgcn_fence(atomic::aquire, "");
case atomic::release:
return __builtin_amdgcn_fence(atomic::release, "");
case atomic::acq_rel:
return __builtin_amdgcn_fence(atomic::acq_rel, "");
case atomic::seq_cst:
return __builtin_amdgcn_fence(atomic::seq_cst, "");
}
return __scoped_atomic_thread_fence(Ordering, atomic::system);
}
void syncWarp(__kmpc_impl_lanemask_t) {