[OpenMP] Replace AMDGPU fences with generic scoped fences (#119619)
Summary: This is simpler and more common. I would've replaced the CUDA uses and made this the same but currently it doesn't codegen these fences fully and just emits a full system wide barrier as a fallback.
This commit is contained in:
@@ -26,6 +26,14 @@ enum OrderingTy {
|
||||
seq_cst = __ATOMIC_SEQ_CST,
|
||||
};
|
||||
|
||||
enum ScopeTy {
|
||||
system = __MEMORY_SCOPE_SYSTEM,
|
||||
device_ = __MEMORY_SCOPE_DEVICE,
|
||||
workgroup = __MEMORY_SCOPE_WRKGRP,
|
||||
wavefront = __MEMORY_SCOPE_WVFRNT,
|
||||
single = __MEMORY_SCOPE_SINGLE,
|
||||
};
|
||||
|
||||
enum MemScopeTy {
|
||||
all, // All threads on all devices
|
||||
device, // All threads on the device
|
||||
|
||||
@@ -232,50 +232,16 @@ void namedBarrier() {
|
||||
fence::team(atomic::release);
|
||||
}
|
||||
|
||||
// sema checking of amdgcn_fence is aggressive. Intention is to patch clang
|
||||
// so that it is usable within a template environment and so that a runtime
|
||||
// value of the memory order is expanded to this switch within clang/llvm.
|
||||
void fenceTeam(atomic::OrderingTy Ordering) {
|
||||
switch (Ordering) {
|
||||
default:
|
||||
__builtin_unreachable();
|
||||
case atomic::aquire:
|
||||
return __builtin_amdgcn_fence(atomic::aquire, "workgroup");
|
||||
case atomic::release:
|
||||
return __builtin_amdgcn_fence(atomic::release, "workgroup");
|
||||
case atomic::acq_rel:
|
||||
return __builtin_amdgcn_fence(atomic::acq_rel, "workgroup");
|
||||
case atomic::seq_cst:
|
||||
return __builtin_amdgcn_fence(atomic::seq_cst, "workgroup");
|
||||
}
|
||||
return __scoped_atomic_thread_fence(Ordering, atomic::workgroup);
|
||||
}
|
||||
|
||||
void fenceKernel(atomic::OrderingTy Ordering) {
|
||||
switch (Ordering) {
|
||||
default:
|
||||
__builtin_unreachable();
|
||||
case atomic::aquire:
|
||||
return __builtin_amdgcn_fence(atomic::aquire, "agent");
|
||||
case atomic::release:
|
||||
return __builtin_amdgcn_fence(atomic::release, "agent");
|
||||
case atomic::acq_rel:
|
||||
return __builtin_amdgcn_fence(atomic::acq_rel, "agent");
|
||||
case atomic::seq_cst:
|
||||
return __builtin_amdgcn_fence(atomic::seq_cst, "agent");
|
||||
}
|
||||
return __scoped_atomic_thread_fence(Ordering, atomic::device_);
|
||||
}
|
||||
|
||||
void fenceSystem(atomic::OrderingTy Ordering) {
|
||||
switch (Ordering) {
|
||||
default:
|
||||
__builtin_unreachable();
|
||||
case atomic::aquire:
|
||||
return __builtin_amdgcn_fence(atomic::aquire, "");
|
||||
case atomic::release:
|
||||
return __builtin_amdgcn_fence(atomic::release, "");
|
||||
case atomic::acq_rel:
|
||||
return __builtin_amdgcn_fence(atomic::acq_rel, "");
|
||||
case atomic::seq_cst:
|
||||
return __builtin_amdgcn_fence(atomic::seq_cst, "");
|
||||
}
|
||||
return __scoped_atomic_thread_fence(Ordering, atomic::system);
|
||||
}
|
||||
|
||||
void syncWarp(__kmpc_impl_lanemask_t) {
|
||||
|
||||
Reference in New Issue
Block a user