[HIP] Add HIP scope atomic operations
Add an AtomicScopeModel for HIP and support for OpenCL builtins that are missing in HIP. Patch by: Michael Liao Revised by: Anshil Ghandi Reviewed by: Yaxun Liu Differential Revision: https://reviews.llvm.org/D113925
This commit is contained in:
@@ -6305,6 +6305,7 @@ public:
|
||||
bool isCmpXChg() const {
|
||||
return getOp() == AO__c11_atomic_compare_exchange_strong ||
|
||||
getOp() == AO__c11_atomic_compare_exchange_weak ||
|
||||
getOp() == AO__hip_atomic_compare_exchange_strong ||
|
||||
getOp() == AO__opencl_atomic_compare_exchange_strong ||
|
||||
getOp() == AO__opencl_atomic_compare_exchange_weak ||
|
||||
getOp() == AO__atomic_compare_exchange ||
|
||||
@@ -6341,7 +6342,10 @@ public:
|
||||
auto Kind =
|
||||
(Op >= AO__opencl_atomic_load && Op <= AO__opencl_atomic_fetch_max)
|
||||
? AtomicScopeModelKind::OpenCL
|
||||
: AtomicScopeModelKind::None;
|
||||
: (Op >= AO__hip_atomic_compare_exchange_strong &&
|
||||
Op <= AO__hip_atomic_fetch_max)
|
||||
? AtomicScopeModelKind::HIP
|
||||
: AtomicScopeModelKind::None;
|
||||
return AtomicScopeModel::create(Kind);
|
||||
}
|
||||
|
||||
|
||||
@@ -854,6 +854,18 @@ ATOMIC_BUILTIN(__opencl_atomic_fetch_max, "v.", "t")
|
||||
ATOMIC_BUILTIN(__atomic_fetch_min, "v.", "t")
|
||||
ATOMIC_BUILTIN(__atomic_fetch_max, "v.", "t")
|
||||
|
||||
// HIP atomic builtins.
|
||||
// FIXME: Is `__hip_atomic_compare_exchange_n` or
|
||||
// `__hip_atomic_compare_exchange_weak` needed?
|
||||
ATOMIC_BUILTIN(__hip_atomic_compare_exchange_strong, "v.", "t")
|
||||
ATOMIC_BUILTIN(__hip_atomic_exchange, "v.", "t")
|
||||
ATOMIC_BUILTIN(__hip_atomic_fetch_add, "v.", "t")
|
||||
ATOMIC_BUILTIN(__hip_atomic_fetch_and, "v.", "t")
|
||||
ATOMIC_BUILTIN(__hip_atomic_fetch_or, "v.", "t")
|
||||
ATOMIC_BUILTIN(__hip_atomic_fetch_xor, "v.", "t")
|
||||
ATOMIC_BUILTIN(__hip_atomic_fetch_min, "v.", "t")
|
||||
ATOMIC_BUILTIN(__hip_atomic_fetch_max, "v.", "t")
|
||||
|
||||
#undef ATOMIC_BUILTIN
|
||||
|
||||
// Non-overloaded atomic builtins.
|
||||
|
||||
@@ -40,6 +40,11 @@ namespace clang {
|
||||
/// Update getAsString.
|
||||
///
|
||||
enum class SyncScope {
|
||||
HIPSingleThread,
|
||||
HIPWavefront,
|
||||
HIPWorkgroup,
|
||||
HIPAgent,
|
||||
HIPSystem,
|
||||
OpenCLWorkGroup,
|
||||
OpenCLDevice,
|
||||
OpenCLAllSVMDevices,
|
||||
@@ -49,6 +54,16 @@ enum class SyncScope {
|
||||
|
||||
inline llvm::StringRef getAsString(SyncScope S) {
|
||||
switch (S) {
|
||||
case SyncScope::HIPSingleThread:
|
||||
return "hip_singlethread";
|
||||
case SyncScope::HIPWavefront:
|
||||
return "hip_wavefront";
|
||||
case SyncScope::HIPWorkgroup:
|
||||
return "hip_workgroup";
|
||||
case SyncScope::HIPAgent:
|
||||
return "hip_agent";
|
||||
case SyncScope::HIPSystem:
|
||||
return "hip_system";
|
||||
case SyncScope::OpenCLWorkGroup:
|
||||
return "opencl_workgroup";
|
||||
case SyncScope::OpenCLDevice:
|
||||
@@ -62,7 +77,7 @@ inline llvm::StringRef getAsString(SyncScope S) {
|
||||
}
|
||||
|
||||
/// Defines the kind of atomic scope models.
|
||||
enum class AtomicScopeModelKind { None, OpenCL };
|
||||
enum class AtomicScopeModelKind { None, OpenCL, HIP };
|
||||
|
||||
/// Defines the interface for synch scope model.
|
||||
class AtomicScopeModel {
|
||||
@@ -138,6 +153,58 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
/// Defines the synch scope model for HIP.
|
||||
class AtomicScopeHIPModel : public AtomicScopeModel {
|
||||
public:
|
||||
/// The enum values match the pre-defined macros
|
||||
/// __HIP_MEMORY_SCOPE_*, which are used to define memory_scope_*
|
||||
/// enums in hip-c.h.
|
||||
enum ID {
|
||||
SingleThread = 1,
|
||||
Wavefront = 2,
|
||||
Workgroup = 3,
|
||||
Agent = 4,
|
||||
System = 5,
|
||||
Last = System
|
||||
};
|
||||
|
||||
AtomicScopeHIPModel() {}
|
||||
|
||||
SyncScope map(unsigned S) const override {
|
||||
switch (static_cast<ID>(S)) {
|
||||
case SingleThread:
|
||||
return SyncScope::HIPSingleThread;
|
||||
case Wavefront:
|
||||
return SyncScope::HIPWavefront;
|
||||
case Workgroup:
|
||||
return SyncScope::HIPWorkgroup;
|
||||
case Agent:
|
||||
return SyncScope::HIPAgent;
|
||||
case System:
|
||||
return SyncScope::HIPSystem;
|
||||
}
|
||||
llvm_unreachable("Invalid language synch scope value");
|
||||
}
|
||||
|
||||
bool isValid(unsigned S) const override {
|
||||
return S >= static_cast<unsigned>(SingleThread) &&
|
||||
S <= static_cast<unsigned>(Last);
|
||||
}
|
||||
|
||||
ArrayRef<unsigned> getRuntimeValues() const override {
|
||||
static_assert(Last == System, "Does not include all synch scopes");
|
||||
static const unsigned Scopes[] = {
|
||||
static_cast<unsigned>(SingleThread), static_cast<unsigned>(Wavefront),
|
||||
static_cast<unsigned>(Workgroup), static_cast<unsigned>(Agent),
|
||||
static_cast<unsigned>(System)};
|
||||
return llvm::makeArrayRef(Scopes);
|
||||
}
|
||||
|
||||
unsigned getFallBackValue() const override {
|
||||
return static_cast<unsigned>(System);
|
||||
}
|
||||
};
|
||||
|
||||
inline std::unique_ptr<AtomicScopeModel>
|
||||
AtomicScopeModel::create(AtomicScopeModelKind K) {
|
||||
switch (K) {
|
||||
@@ -145,9 +212,11 @@ AtomicScopeModel::create(AtomicScopeModelKind K) {
|
||||
return std::unique_ptr<AtomicScopeModel>{};
|
||||
case AtomicScopeModelKind::OpenCL:
|
||||
return std::make_unique<AtomicScopeOpenCLModel>();
|
||||
case AtomicScopeModelKind::HIP:
|
||||
return std::make_unique<AtomicScopeHIPModel>();
|
||||
}
|
||||
llvm_unreachable("Invalid atomic scope model kind");
|
||||
}
|
||||
}
|
||||
} // namespace clang
|
||||
|
||||
#endif
|
||||
|
||||
@@ -4713,6 +4713,13 @@ unsigned AtomicExpr::getNumSubExprs(AtomicOp Op) {
|
||||
case AO__atomic_fetch_max:
|
||||
return 3;
|
||||
|
||||
case AO__hip_atomic_exchange:
|
||||
case AO__hip_atomic_fetch_add:
|
||||
case AO__hip_atomic_fetch_and:
|
||||
case AO__hip_atomic_fetch_or:
|
||||
case AO__hip_atomic_fetch_xor:
|
||||
case AO__hip_atomic_fetch_min:
|
||||
case AO__hip_atomic_fetch_max:
|
||||
case AO__opencl_atomic_store:
|
||||
case AO__opencl_atomic_exchange:
|
||||
case AO__opencl_atomic_fetch_add:
|
||||
@@ -4728,7 +4735,7 @@ unsigned AtomicExpr::getNumSubExprs(AtomicOp Op) {
|
||||
case AO__c11_atomic_compare_exchange_strong:
|
||||
case AO__c11_atomic_compare_exchange_weak:
|
||||
return 5;
|
||||
|
||||
case AO__hip_atomic_compare_exchange_strong:
|
||||
case AO__opencl_atomic_compare_exchange_strong:
|
||||
case AO__opencl_atomic_compare_exchange_weak:
|
||||
case AO__atomic_compare_exchange:
|
||||
|
||||
@@ -524,6 +524,7 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest,
|
||||
llvm_unreachable("Already handled!");
|
||||
|
||||
case AtomicExpr::AO__c11_atomic_compare_exchange_strong:
|
||||
case AtomicExpr::AO__hip_atomic_compare_exchange_strong:
|
||||
case AtomicExpr::AO__opencl_atomic_compare_exchange_strong:
|
||||
emitAtomicCmpXchgFailureSet(CGF, E, false, Dest, Ptr, Val1, Val2,
|
||||
FailureOrder, Size, Order, Scope);
|
||||
@@ -586,6 +587,7 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest,
|
||||
}
|
||||
|
||||
case AtomicExpr::AO__c11_atomic_exchange:
|
||||
case AtomicExpr::AO__hip_atomic_exchange:
|
||||
case AtomicExpr::AO__opencl_atomic_exchange:
|
||||
case AtomicExpr::AO__atomic_exchange_n:
|
||||
case AtomicExpr::AO__atomic_exchange:
|
||||
@@ -597,6 +599,7 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest,
|
||||
: llvm::Instruction::Add;
|
||||
LLVM_FALLTHROUGH;
|
||||
case AtomicExpr::AO__c11_atomic_fetch_add:
|
||||
case AtomicExpr::AO__hip_atomic_fetch_add:
|
||||
case AtomicExpr::AO__opencl_atomic_fetch_add:
|
||||
case AtomicExpr::AO__atomic_fetch_add:
|
||||
Op = E->getValueType()->isFloatingType() ? llvm::AtomicRMWInst::FAdd
|
||||
@@ -618,6 +621,7 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest,
|
||||
PostOpMinMax = true;
|
||||
LLVM_FALLTHROUGH;
|
||||
case AtomicExpr::AO__c11_atomic_fetch_min:
|
||||
case AtomicExpr::AO__hip_atomic_fetch_min:
|
||||
case AtomicExpr::AO__opencl_atomic_fetch_min:
|
||||
case AtomicExpr::AO__atomic_fetch_min:
|
||||
Op = E->getValueType()->isSignedIntegerType() ? llvm::AtomicRMWInst::Min
|
||||
@@ -628,6 +632,7 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest,
|
||||
PostOpMinMax = true;
|
||||
LLVM_FALLTHROUGH;
|
||||
case AtomicExpr::AO__c11_atomic_fetch_max:
|
||||
case AtomicExpr::AO__hip_atomic_fetch_max:
|
||||
case AtomicExpr::AO__opencl_atomic_fetch_max:
|
||||
case AtomicExpr::AO__atomic_fetch_max:
|
||||
Op = E->getValueType()->isSignedIntegerType() ? llvm::AtomicRMWInst::Max
|
||||
@@ -638,6 +643,7 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest,
|
||||
PostOp = llvm::Instruction::And;
|
||||
LLVM_FALLTHROUGH;
|
||||
case AtomicExpr::AO__c11_atomic_fetch_and:
|
||||
case AtomicExpr::AO__hip_atomic_fetch_and:
|
||||
case AtomicExpr::AO__opencl_atomic_fetch_and:
|
||||
case AtomicExpr::AO__atomic_fetch_and:
|
||||
Op = llvm::AtomicRMWInst::And;
|
||||
@@ -647,6 +653,7 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest,
|
||||
PostOp = llvm::Instruction::Or;
|
||||
LLVM_FALLTHROUGH;
|
||||
case AtomicExpr::AO__c11_atomic_fetch_or:
|
||||
case AtomicExpr::AO__hip_atomic_fetch_or:
|
||||
case AtomicExpr::AO__opencl_atomic_fetch_or:
|
||||
case AtomicExpr::AO__atomic_fetch_or:
|
||||
Op = llvm::AtomicRMWInst::Or;
|
||||
@@ -656,6 +663,7 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest,
|
||||
PostOp = llvm::Instruction::Xor;
|
||||
LLVM_FALLTHROUGH;
|
||||
case AtomicExpr::AO__c11_atomic_fetch_xor:
|
||||
case AtomicExpr::AO__hip_atomic_fetch_xor:
|
||||
case AtomicExpr::AO__opencl_atomic_fetch_xor:
|
||||
case AtomicExpr::AO__atomic_fetch_xor:
|
||||
Op = llvm::AtomicRMWInst::Xor;
|
||||
@@ -857,6 +865,7 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
|
||||
case AtomicExpr::AO__c11_atomic_compare_exchange_strong:
|
||||
case AtomicExpr::AO__c11_atomic_compare_exchange_weak:
|
||||
case AtomicExpr::AO__opencl_atomic_compare_exchange_strong:
|
||||
case AtomicExpr::AO__hip_atomic_compare_exchange_strong:
|
||||
case AtomicExpr::AO__opencl_atomic_compare_exchange_weak:
|
||||
case AtomicExpr::AO__atomic_compare_exchange_n:
|
||||
case AtomicExpr::AO__atomic_compare_exchange:
|
||||
@@ -873,6 +882,7 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
|
||||
|
||||
case AtomicExpr::AO__c11_atomic_fetch_add:
|
||||
case AtomicExpr::AO__c11_atomic_fetch_sub:
|
||||
case AtomicExpr::AO__hip_atomic_fetch_add:
|
||||
case AtomicExpr::AO__opencl_atomic_fetch_add:
|
||||
case AtomicExpr::AO__opencl_atomic_fetch_sub:
|
||||
if (MemTy->isPointerType()) {
|
||||
@@ -902,6 +912,7 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
|
||||
case AtomicExpr::AO__c11_atomic_exchange:
|
||||
case AtomicExpr::AO__opencl_atomic_store:
|
||||
case AtomicExpr::AO__opencl_atomic_exchange:
|
||||
case AtomicExpr::AO__hip_atomic_exchange:
|
||||
case AtomicExpr::AO__atomic_store_n:
|
||||
case AtomicExpr::AO__atomic_exchange_n:
|
||||
case AtomicExpr::AO__c11_atomic_fetch_and:
|
||||
@@ -916,8 +927,11 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
|
||||
case AtomicExpr::AO__opencl_atomic_fetch_min:
|
||||
case AtomicExpr::AO__opencl_atomic_fetch_max:
|
||||
case AtomicExpr::AO__atomic_fetch_and:
|
||||
case AtomicExpr::AO__hip_atomic_fetch_and:
|
||||
case AtomicExpr::AO__atomic_fetch_or:
|
||||
case AtomicExpr::AO__hip_atomic_fetch_or:
|
||||
case AtomicExpr::AO__atomic_fetch_xor:
|
||||
case AtomicExpr::AO__hip_atomic_fetch_xor:
|
||||
case AtomicExpr::AO__atomic_fetch_nand:
|
||||
case AtomicExpr::AO__atomic_and_fetch:
|
||||
case AtomicExpr::AO__atomic_or_fetch:
|
||||
@@ -926,7 +940,9 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
|
||||
case AtomicExpr::AO__atomic_max_fetch:
|
||||
case AtomicExpr::AO__atomic_min_fetch:
|
||||
case AtomicExpr::AO__atomic_fetch_max:
|
||||
case AtomicExpr::AO__hip_atomic_fetch_max:
|
||||
case AtomicExpr::AO__atomic_fetch_min:
|
||||
case AtomicExpr::AO__hip_atomic_fetch_min:
|
||||
Val1 = EmitValToTemp(*this, E->getVal1());
|
||||
break;
|
||||
}
|
||||
@@ -968,11 +984,14 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
|
||||
case AtomicExpr::AO__c11_atomic_fetch_add:
|
||||
case AtomicExpr::AO__opencl_atomic_fetch_add:
|
||||
case AtomicExpr::AO__atomic_fetch_add:
|
||||
case AtomicExpr::AO__hip_atomic_fetch_add:
|
||||
case AtomicExpr::AO__c11_atomic_fetch_and:
|
||||
case AtomicExpr::AO__opencl_atomic_fetch_and:
|
||||
case AtomicExpr::AO__hip_atomic_fetch_and:
|
||||
case AtomicExpr::AO__atomic_fetch_and:
|
||||
case AtomicExpr::AO__c11_atomic_fetch_or:
|
||||
case AtomicExpr::AO__opencl_atomic_fetch_or:
|
||||
case AtomicExpr::AO__hip_atomic_fetch_or:
|
||||
case AtomicExpr::AO__atomic_fetch_or:
|
||||
case AtomicExpr::AO__c11_atomic_fetch_nand:
|
||||
case AtomicExpr::AO__atomic_fetch_nand:
|
||||
@@ -984,6 +1003,7 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
|
||||
case AtomicExpr::AO__opencl_atomic_fetch_min:
|
||||
case AtomicExpr::AO__opencl_atomic_fetch_max:
|
||||
case AtomicExpr::AO__atomic_fetch_xor:
|
||||
case AtomicExpr::AO__hip_atomic_fetch_xor:
|
||||
case AtomicExpr::AO__c11_atomic_fetch_max:
|
||||
case AtomicExpr::AO__c11_atomic_fetch_min:
|
||||
case AtomicExpr::AO__atomic_add_fetch:
|
||||
@@ -993,7 +1013,9 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
|
||||
case AtomicExpr::AO__atomic_sub_fetch:
|
||||
case AtomicExpr::AO__atomic_xor_fetch:
|
||||
case AtomicExpr::AO__atomic_fetch_max:
|
||||
case AtomicExpr::AO__hip_atomic_fetch_max:
|
||||
case AtomicExpr::AO__atomic_fetch_min:
|
||||
case AtomicExpr::AO__hip_atomic_fetch_min:
|
||||
case AtomicExpr::AO__atomic_max_fetch:
|
||||
case AtomicExpr::AO__atomic_min_fetch:
|
||||
// For these, only library calls for certain sizes exist.
|
||||
@@ -1014,9 +1036,11 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
|
||||
case AtomicExpr::AO__c11_atomic_exchange:
|
||||
case AtomicExpr::AO__c11_atomic_compare_exchange_weak:
|
||||
case AtomicExpr::AO__c11_atomic_compare_exchange_strong:
|
||||
case AtomicExpr::AO__hip_atomic_compare_exchange_strong:
|
||||
case AtomicExpr::AO__opencl_atomic_load:
|
||||
case AtomicExpr::AO__opencl_atomic_store:
|
||||
case AtomicExpr::AO__opencl_atomic_exchange:
|
||||
case AtomicExpr::AO__hip_atomic_exchange:
|
||||
case AtomicExpr::AO__opencl_atomic_compare_exchange_weak:
|
||||
case AtomicExpr::AO__opencl_atomic_compare_exchange_strong:
|
||||
case AtomicExpr::AO__atomic_load_n:
|
||||
@@ -1080,6 +1104,7 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
|
||||
case AtomicExpr::AO__c11_atomic_compare_exchange_strong:
|
||||
case AtomicExpr::AO__opencl_atomic_compare_exchange_weak:
|
||||
case AtomicExpr::AO__opencl_atomic_compare_exchange_strong:
|
||||
case AtomicExpr::AO__hip_atomic_compare_exchange_strong:
|
||||
case AtomicExpr::AO__atomic_compare_exchange:
|
||||
case AtomicExpr::AO__atomic_compare_exchange_n:
|
||||
LibCallName = "__atomic_compare_exchange";
|
||||
@@ -1101,6 +1126,7 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
|
||||
case AtomicExpr::AO__opencl_atomic_exchange:
|
||||
case AtomicExpr::AO__atomic_exchange_n:
|
||||
case AtomicExpr::AO__atomic_exchange:
|
||||
case AtomicExpr::AO__hip_atomic_exchange:
|
||||
LibCallName = "__atomic_exchange";
|
||||
AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1.getPointer(),
|
||||
MemTy, E->getExprLoc(), TInfo.Width);
|
||||
@@ -1133,6 +1159,7 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
|
||||
case AtomicExpr::AO__c11_atomic_fetch_add:
|
||||
case AtomicExpr::AO__opencl_atomic_fetch_add:
|
||||
case AtomicExpr::AO__atomic_fetch_add:
|
||||
case AtomicExpr::AO__hip_atomic_fetch_add:
|
||||
LibCallName = "__atomic_fetch_add";
|
||||
AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1.getPointer(),
|
||||
LoweredMemTy, E->getExprLoc(), TInfo.Width);
|
||||
@@ -1144,6 +1171,7 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
|
||||
LLVM_FALLTHROUGH;
|
||||
case AtomicExpr::AO__c11_atomic_fetch_and:
|
||||
case AtomicExpr::AO__opencl_atomic_fetch_and:
|
||||
case AtomicExpr::AO__hip_atomic_fetch_and:
|
||||
case AtomicExpr::AO__atomic_fetch_and:
|
||||
LibCallName = "__atomic_fetch_and";
|
||||
AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1.getPointer(),
|
||||
@@ -1156,6 +1184,7 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
|
||||
LLVM_FALLTHROUGH;
|
||||
case AtomicExpr::AO__c11_atomic_fetch_or:
|
||||
case AtomicExpr::AO__opencl_atomic_fetch_or:
|
||||
case AtomicExpr::AO__hip_atomic_fetch_or:
|
||||
case AtomicExpr::AO__atomic_fetch_or:
|
||||
LibCallName = "__atomic_fetch_or";
|
||||
AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1.getPointer(),
|
||||
@@ -1180,6 +1209,7 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
|
||||
LLVM_FALLTHROUGH;
|
||||
case AtomicExpr::AO__c11_atomic_fetch_xor:
|
||||
case AtomicExpr::AO__opencl_atomic_fetch_xor:
|
||||
case AtomicExpr::AO__hip_atomic_fetch_xor:
|
||||
case AtomicExpr::AO__atomic_fetch_xor:
|
||||
LibCallName = "__atomic_fetch_xor";
|
||||
AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1.getPointer(),
|
||||
@@ -1190,6 +1220,7 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
|
||||
LLVM_FALLTHROUGH;
|
||||
case AtomicExpr::AO__c11_atomic_fetch_min:
|
||||
case AtomicExpr::AO__atomic_fetch_min:
|
||||
case AtomicExpr::AO__hip_atomic_fetch_min:
|
||||
case AtomicExpr::AO__opencl_atomic_fetch_min:
|
||||
LibCallName = E->getValueType()->isSignedIntegerType()
|
||||
? "__atomic_fetch_min"
|
||||
@@ -1202,6 +1233,7 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
|
||||
LLVM_FALLTHROUGH;
|
||||
case AtomicExpr::AO__c11_atomic_fetch_max:
|
||||
case AtomicExpr::AO__atomic_fetch_max:
|
||||
case AtomicExpr::AO__hip_atomic_fetch_max:
|
||||
case AtomicExpr::AO__opencl_atomic_fetch_max:
|
||||
LibCallName = E->getValueType()->isSignedIntegerType()
|
||||
? "__atomic_fetch_max"
|
||||
|
||||
@@ -9339,17 +9339,28 @@ AMDGPUTargetCodeGenInfo::getLLVMSyncScopeID(const LangOptions &LangOpts,
|
||||
llvm::LLVMContext &Ctx) const {
|
||||
std::string Name;
|
||||
switch (Scope) {
|
||||
case SyncScope::HIPSingleThread:
|
||||
Name = "singlethread";
|
||||
break;
|
||||
case SyncScope::HIPWavefront:
|
||||
case SyncScope::OpenCLSubGroup:
|
||||
Name = "wavefront";
|
||||
break;
|
||||
case SyncScope::HIPWorkgroup:
|
||||
case SyncScope::OpenCLWorkGroup:
|
||||
Name = "workgroup";
|
||||
break;
|
||||
case SyncScope::HIPAgent:
|
||||
case SyncScope::OpenCLDevice:
|
||||
Name = "agent";
|
||||
break;
|
||||
case SyncScope::HIPSystem:
|
||||
case SyncScope::OpenCLAllSVMDevices:
|
||||
Name = "";
|
||||
break;
|
||||
case SyncScope::OpenCLSubGroup:
|
||||
Name = "wavefront";
|
||||
default:
|
||||
assert(false && "NOT IMPLEMENTED");
|
||||
break;
|
||||
}
|
||||
|
||||
if (Ordering != llvm::AtomicOrdering::SequentiallyConsistent) {
|
||||
|
||||
@@ -505,6 +505,11 @@ static void InitializeStandardPredefinedMacros(const TargetInfo &TI,
|
||||
if (LangOpts.HIP) {
|
||||
Builder.defineMacro("__HIP__");
|
||||
Builder.defineMacro("__HIPCC__");
|
||||
Builder.defineMacro("__HIP_MEMORY_SCOPE_SINGLETHREAD", "1");
|
||||
Builder.defineMacro("__HIP_MEMORY_SCOPE_WAVEFRONT", "2");
|
||||
Builder.defineMacro("__HIP_MEMORY_SCOPE_WORKGROUP", "3");
|
||||
Builder.defineMacro("__HIP_MEMORY_SCOPE_AGENT", "4");
|
||||
Builder.defineMacro("__HIP_MEMORY_SCOPE_SYSTEM", "5");
|
||||
if (LangOpts.CUDAIsDevice)
|
||||
Builder.defineMacro("__HIP_DEVICE_COMPILE__");
|
||||
}
|
||||
|
||||
@@ -5380,6 +5380,8 @@ ExprResult Sema::BuildAtomicExpr(SourceRange CallRange, SourceRange ExprRange,
|
||||
"need to update code for modified C11 atomics");
|
||||
bool IsOpenCL = Op >= AtomicExpr::AO__opencl_atomic_init &&
|
||||
Op <= AtomicExpr::AO__opencl_atomic_fetch_max;
|
||||
bool IsHIP = Op >= AtomicExpr::AO__hip_atomic_compare_exchange_strong &&
|
||||
Op <= AtomicExpr::AO__hip_atomic_fetch_max;
|
||||
bool IsC11 = (Op >= AtomicExpr::AO__c11_atomic_init &&
|
||||
Op <= AtomicExpr::AO__c11_atomic_fetch_min) ||
|
||||
IsOpenCL;
|
||||
@@ -5411,7 +5413,9 @@ ExprResult Sema::BuildAtomicExpr(SourceRange CallRange, SourceRange ExprRange,
|
||||
case AtomicExpr::AO__atomic_store_n:
|
||||
Form = Copy;
|
||||
break;
|
||||
|
||||
case AtomicExpr::AO__hip_atomic_fetch_add:
|
||||
case AtomicExpr::AO__hip_atomic_fetch_min:
|
||||
case AtomicExpr::AO__hip_atomic_fetch_max:
|
||||
case AtomicExpr::AO__c11_atomic_fetch_add:
|
||||
case AtomicExpr::AO__c11_atomic_fetch_sub:
|
||||
case AtomicExpr::AO__opencl_atomic_fetch_add:
|
||||
@@ -5426,6 +5430,9 @@ ExprResult Sema::BuildAtomicExpr(SourceRange CallRange, SourceRange ExprRange,
|
||||
case AtomicExpr::AO__c11_atomic_fetch_and:
|
||||
case AtomicExpr::AO__c11_atomic_fetch_or:
|
||||
case AtomicExpr::AO__c11_atomic_fetch_xor:
|
||||
case AtomicExpr::AO__hip_atomic_fetch_and:
|
||||
case AtomicExpr::AO__hip_atomic_fetch_or:
|
||||
case AtomicExpr::AO__hip_atomic_fetch_xor:
|
||||
case AtomicExpr::AO__c11_atomic_fetch_nand:
|
||||
case AtomicExpr::AO__opencl_atomic_fetch_and:
|
||||
case AtomicExpr::AO__opencl_atomic_fetch_or:
|
||||
@@ -5452,6 +5459,7 @@ ExprResult Sema::BuildAtomicExpr(SourceRange CallRange, SourceRange ExprRange,
|
||||
break;
|
||||
|
||||
case AtomicExpr::AO__c11_atomic_exchange:
|
||||
case AtomicExpr::AO__hip_atomic_exchange:
|
||||
case AtomicExpr::AO__opencl_atomic_exchange:
|
||||
case AtomicExpr::AO__atomic_exchange_n:
|
||||
Form = Xchg;
|
||||
@@ -5463,6 +5471,7 @@ ExprResult Sema::BuildAtomicExpr(SourceRange CallRange, SourceRange ExprRange,
|
||||
|
||||
case AtomicExpr::AO__c11_atomic_compare_exchange_strong:
|
||||
case AtomicExpr::AO__c11_atomic_compare_exchange_weak:
|
||||
case AtomicExpr::AO__hip_atomic_compare_exchange_strong:
|
||||
case AtomicExpr::AO__opencl_atomic_compare_exchange_strong:
|
||||
case AtomicExpr::AO__opencl_atomic_compare_exchange_weak:
|
||||
Form = C11CmpXchg;
|
||||
@@ -5475,7 +5484,7 @@ ExprResult Sema::BuildAtomicExpr(SourceRange CallRange, SourceRange ExprRange,
|
||||
}
|
||||
|
||||
unsigned AdjustedNumArgs = NumArgs[Form];
|
||||
if (IsOpenCL && Op != AtomicExpr::AO__opencl_atomic_init)
|
||||
if ((IsOpenCL || IsHIP) && Op != AtomicExpr::AO__opencl_atomic_init)
|
||||
++AdjustedNumArgs;
|
||||
// Check we have the right number of arguments.
|
||||
if (Args.size() < AdjustedNumArgs) {
|
||||
@@ -5614,7 +5623,7 @@ ExprResult Sema::BuildAtomicExpr(SourceRange CallRange, SourceRange ExprRange,
|
||||
// arguments are actually passed as pointers.
|
||||
QualType ByValType = ValType; // 'CP'
|
||||
bool IsPassedByAddress = false;
|
||||
if (!IsC11 && !IsN) {
|
||||
if (!IsC11 && !IsHIP && !IsN) {
|
||||
ByValType = Ptr->getType();
|
||||
IsPassedByAddress = true;
|
||||
}
|
||||
|
||||
302
clang/test/CodeGenCUDA/atomic-ops.cu
Normal file
302
clang/test/CodeGenCUDA/atomic-ops.cu
Normal file
@@ -0,0 +1,302 @@
|
||||
// RUN: %clang_cc1 -x hip -std=c++11 -triple amdgcn -fcuda-is-device -emit-llvm %s -o - | FileCheck %s
|
||||
#include "Inputs/cuda.h"
|
||||
|
||||
// CHECK-LABEL: @_Z24atomic32_op_singlethreadPiii
|
||||
// CHECK: cmpxchg i32* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("singlethread-one-as")
|
||||
// CHECK: atomicrmw xchg i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("singlethread-one-as")
|
||||
// CHECK: atomicrmw add i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("singlethread-one-as")
|
||||
// CHECK: atomicrmw and i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("singlethread-one-as")
|
||||
// CHECK: atomicrmw or i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("singlethread-one-as")
|
||||
// CHECK: atomicrmw xor i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("singlethread-one-as")
|
||||
// CHECK: atomicrmw min i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("singlethread-one-as")
|
||||
// CHECK: atomicrmw max i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("singlethread-one-as")
|
||||
__device__ int atomic32_op_singlethread(int *ptr, int val, int desired) {
|
||||
bool flag = __hip_atomic_compare_exchange_strong(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD);
|
||||
val = __hip_atomic_exchange(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD);
|
||||
val = __hip_atomic_fetch_add(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD);
|
||||
val = __hip_atomic_fetch_and(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD);
|
||||
val = __hip_atomic_fetch_or(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD);
|
||||
val = __hip_atomic_fetch_xor(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD);
|
||||
val = __hip_atomic_fetch_min(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD);
|
||||
val = __hip_atomic_fetch_max(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD);
|
||||
return flag ? val : desired;
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @_Z25atomicu32_op_singlethreadPjjj
|
||||
// CHECK: atomicrmw umin i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("singlethread-one-as")
|
||||
// CHECK: atomicrmw umax i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("singlethread-one-as")
|
||||
__device__ unsigned int atomicu32_op_singlethread(unsigned int *ptr, unsigned int val, unsigned int desired) {
|
||||
val = __hip_atomic_fetch_min(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD);
|
||||
val = __hip_atomic_fetch_max(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD);
|
||||
return val;
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @_Z21atomic32_op_wavefrontPiii
|
||||
// CHECK: cmpxchg i32* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("wavefront-one-as")
|
||||
// CHECK: atomicrmw xchg i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("wavefront-one-as")
|
||||
// CHECK: atomicrmw add i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("wavefront-one-as")
|
||||
// CHECK: atomicrmw and i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("wavefront-one-as")
|
||||
// CHECK: atomicrmw or i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("wavefront-one-as")
|
||||
// CHECK: atomicrmw xor i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("wavefront-one-as")
|
||||
// CHECK: atomicrmw min i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("wavefront-one-as")
|
||||
// CHECK: atomicrmw max i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("wavefront-one-as")
|
||||
__device__ int atomic32_op_wavefront(int *ptr, int val, int desired) {
|
||||
bool flag = __hip_atomic_compare_exchange_strong(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT);
|
||||
val = __hip_atomic_exchange(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT);
|
||||
val = __hip_atomic_fetch_add(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT);
|
||||
val = __hip_atomic_fetch_and(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT);
|
||||
val = __hip_atomic_fetch_or(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT);
|
||||
val = __hip_atomic_fetch_xor(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT);
|
||||
val = __hip_atomic_fetch_min(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT);
|
||||
val = __hip_atomic_fetch_max(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT);
|
||||
return flag ? val : desired;
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @_Z22atomicu32_op_wavefrontPjjj
|
||||
// CHECK: atomicrmw umin i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("wavefront-one-as")
|
||||
// CHECK: atomicrmw umax i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("wavefront-one-as")
|
||||
__device__ unsigned int atomicu32_op_wavefront(unsigned int *ptr, unsigned int val, unsigned int desired) {
|
||||
val = __hip_atomic_fetch_min(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT);
|
||||
val = __hip_atomic_fetch_max(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT);
|
||||
return val;
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @_Z21atomic32_op_workgroupPiii
|
||||
// CHECK: cmpxchg i32* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("workgroup-one-as")
|
||||
// CHECK: atomicrmw xchg i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("workgroup-one-as")
|
||||
// CHECK: atomicrmw add i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("workgroup-one-as")
|
||||
// CHECK: atomicrmw and i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("workgroup-one-as")
|
||||
// CHECK: atomicrmw or i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("workgroup-one-as")
|
||||
// CHECK: atomicrmw xor i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("workgroup-one-as")
|
||||
// CHECK: atomicrmw min i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("workgroup-one-as")
|
||||
// CHECK: atomicrmw max i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("workgroup-one-as")
|
||||
__device__ int atomic32_op_workgroup(int *ptr, int val, int desired) {
|
||||
bool flag = __hip_atomic_compare_exchange_strong(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP);
|
||||
val = __hip_atomic_exchange(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP);
|
||||
val = __hip_atomic_fetch_add(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP);
|
||||
val = __hip_atomic_fetch_and(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP);
|
||||
val = __hip_atomic_fetch_or(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP);
|
||||
val = __hip_atomic_fetch_xor(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP);
|
||||
val = __hip_atomic_fetch_min(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP);
|
||||
val = __hip_atomic_fetch_max(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP);
|
||||
return flag ? val : desired;
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @_Z22atomicu32_op_workgroupPjjj
|
||||
// CHECK: atomicrmw umin i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("workgroup-one-as")
|
||||
// CHECK: atomicrmw umax i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("workgroup-one-as")
|
||||
__device__ unsigned int atomicu32_op_workgroup(unsigned int *ptr, unsigned int val, unsigned int desired) {
|
||||
val = __hip_atomic_fetch_min(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP);
|
||||
val = __hip_atomic_fetch_max(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP);
|
||||
return val;
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @_Z17atomic32_op_agentPiii
|
||||
// CHECK: cmpxchg i32* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("agent-one-as")
|
||||
// CHECK: atomicrmw xchg i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("agent-one-as")
|
||||
// CHECK: atomicrmw add i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("agent-one-as")
|
||||
// CHECK: atomicrmw and i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("agent-one-as")
|
||||
// CHECK: atomicrmw or i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("agent-one-as")
|
||||
// CHECK: atomicrmw xor i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("agent-one-as")
|
||||
// CHECK: atomicrmw min i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("agent-one-as")
|
||||
// CHECK: atomicrmw max i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("agent-one-as")
|
||||
__device__ int atomic32_op_agent(int *ptr, int val, int desired) {
|
||||
bool flag = __hip_atomic_compare_exchange_strong(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
|
||||
val = __hip_atomic_exchange(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
|
||||
val = __hip_atomic_fetch_add(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
|
||||
val = __hip_atomic_fetch_and(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
|
||||
val = __hip_atomic_fetch_or(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
|
||||
val = __hip_atomic_fetch_xor(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
|
||||
val = __hip_atomic_fetch_min(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
|
||||
val = __hip_atomic_fetch_max(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
|
||||
return flag ? val : desired;
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @_Z18atomicu32_op_agentPjjj
|
||||
// CHECK: atomicrmw umin i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("agent-one-as")
|
||||
// CHECK: atomicrmw umax i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("agent-one-as")
|
||||
__device__ unsigned int atomicu32_op_agent(unsigned int *ptr, unsigned int val, unsigned int desired) {
|
||||
val = __hip_atomic_fetch_min(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
|
||||
val = __hip_atomic_fetch_max(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
|
||||
return val;
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @_Z18atomic32_op_systemPiii
|
||||
// CHECK: cmpxchg i32* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("one-as")
|
||||
// CHECK: atomicrmw xchg i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("one-as")
|
||||
// CHECK: atomicrmw add i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("one-as")
|
||||
// CHECK: atomicrmw and i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("one-as")
|
||||
// CHECK: atomicrmw or i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("one-as")
|
||||
// CHECK: atomicrmw xor i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("one-as")
|
||||
// CHECK: atomicrmw min i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("one-as")
|
||||
// CHECK: atomicrmw max i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("one-as")
|
||||
__device__ int atomic32_op_system(int *ptr, int val, int desired) {
|
||||
bool flag = __hip_atomic_compare_exchange_strong(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
|
||||
val = __hip_atomic_exchange(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
|
||||
val = __hip_atomic_fetch_add(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
|
||||
val = __hip_atomic_fetch_and(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
|
||||
val = __hip_atomic_fetch_or(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
|
||||
val = __hip_atomic_fetch_xor(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
|
||||
val = __hip_atomic_fetch_min(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
|
||||
val = __hip_atomic_fetch_max(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
|
||||
return flag ? val : desired;
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @_Z19atomicu32_op_systemPjjj
|
||||
// CHECK: atomicrmw umin i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("one-as")
|
||||
// CHECK: atomicrmw umax i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("one-as")
|
||||
__device__ unsigned int atomicu32_op_system(unsigned int *ptr, unsigned int val, unsigned int desired) {
|
||||
val = __hip_atomic_fetch_min(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
|
||||
val = __hip_atomic_fetch_max(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
|
||||
return val;
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @_Z24atomic64_op_singlethreadPxxx
|
||||
// CHECK: cmpxchg i64* {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("singlethread-one-as")
|
||||
// CHECK: atomicrmw xchg i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("singlethread-one-as")
|
||||
// CHECK: atomicrmw add i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("singlethread-one-as")
|
||||
// CHECK: atomicrmw and i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("singlethread-one-as")
|
||||
// CHECK: atomicrmw or i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("singlethread-one-as")
|
||||
// CHECK: atomicrmw xor i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("singlethread-one-as")
|
||||
// CHECK: atomicrmw min i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("singlethread-one-as")
|
||||
// CHECK: atomicrmw max i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("singlethread-one-as")
|
||||
__device__ long long atomic64_op_singlethread(long long *ptr, long long val, long long desired) {
|
||||
bool flag = __hip_atomic_compare_exchange_strong(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD);
|
||||
val = __hip_atomic_exchange(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD);
|
||||
val = __hip_atomic_fetch_add(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD);
|
||||
val = __hip_atomic_fetch_and(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD);
|
||||
val = __hip_atomic_fetch_or(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD);
|
||||
val = __hip_atomic_fetch_xor(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD);
|
||||
val = __hip_atomic_fetch_min(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD);
|
||||
val = __hip_atomic_fetch_max(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD);
|
||||
return flag ? val : desired;
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @_Z25atomicu64_op_singlethreadPyyy
|
||||
// CHECK: atomicrmw umin i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("singlethread-one-as")
|
||||
// CHECK: atomicrmw umax i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("singlethread-one-as")
|
||||
__device__ unsigned long long atomicu64_op_singlethread(unsigned long long *ptr, unsigned long long val, unsigned long long desired) {
|
||||
val = __hip_atomic_fetch_min(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD);
|
||||
val = __hip_atomic_fetch_max(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD);
|
||||
return val;
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @_Z21atomic64_op_wavefrontPxxx
|
||||
// CHECK: cmpxchg i64* {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("wavefront-one-as")
|
||||
// CHECK: atomicrmw xchg i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("wavefront-one-as")
|
||||
// CHECK: atomicrmw add i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("wavefront-one-as")
|
||||
// CHECK: atomicrmw and i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("wavefront-one-as")
|
||||
// CHECK: atomicrmw or i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("wavefront-one-as")
|
||||
// CHECK: atomicrmw xor i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("wavefront-one-as")
|
||||
// CHECK: atomicrmw min i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("wavefront-one-as")
|
||||
// CHECK: atomicrmw max i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("wavefront-one-as")
|
||||
__device__ long long atomic64_op_wavefront(long long *ptr, long long val, long long desired) {
|
||||
bool flag = __hip_atomic_compare_exchange_strong(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT);
|
||||
val = __hip_atomic_exchange(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT);
|
||||
val = __hip_atomic_fetch_add(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT);
|
||||
val = __hip_atomic_fetch_and(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT);
|
||||
val = __hip_atomic_fetch_or(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT);
|
||||
val = __hip_atomic_fetch_xor(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT);
|
||||
val = __hip_atomic_fetch_min(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT);
|
||||
val = __hip_atomic_fetch_max(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT);
|
||||
return flag ? val : desired;
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @_Z22atomicu64_op_wavefrontPyyy
|
||||
// CHECK: atomicrmw umin i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("wavefront-one-as")
|
||||
// CHECK: atomicrmw umax i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("wavefront-one-as")
|
||||
__device__ unsigned long long atomicu64_op_wavefront(unsigned long long *ptr, unsigned long long val, unsigned long long desired) {
|
||||
val = __hip_atomic_fetch_min(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT);
|
||||
val = __hip_atomic_fetch_max(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT);
|
||||
return val;
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @_Z21atomic64_op_workgroupPxxx
|
||||
// CHECK: cmpxchg i64* {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("workgroup-one-as")
|
||||
// CHECK: atomicrmw xchg i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("workgroup-one-as")
|
||||
// CHECK: atomicrmw add i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("workgroup-one-as")
|
||||
// CHECK: atomicrmw and i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("workgroup-one-as")
|
||||
// CHECK: atomicrmw or i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("workgroup-one-as")
|
||||
// CHECK: atomicrmw xor i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("workgroup-one-as")
|
||||
// CHECK: atomicrmw min i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("workgroup-one-as")
|
||||
// CHECK: atomicrmw max i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("workgroup-one-as")
|
||||
__device__ long long atomic64_op_workgroup(long long *ptr, long long val, long long desired) {
|
||||
bool flag = __hip_atomic_compare_exchange_strong(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP);
|
||||
val = __hip_atomic_exchange(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP);
|
||||
val = __hip_atomic_fetch_add(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP);
|
||||
val = __hip_atomic_fetch_and(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP);
|
||||
val = __hip_atomic_fetch_or(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP);
|
||||
val = __hip_atomic_fetch_xor(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP);
|
||||
val = __hip_atomic_fetch_min(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP);
|
||||
val = __hip_atomic_fetch_max(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP);
|
||||
return flag ? val : desired;
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @_Z22atomicu64_op_workgroupPyyy
|
||||
// CHECK: atomicrmw umin i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("workgroup-one-as")
|
||||
// CHECK: atomicrmw umax i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("workgroup-one-as")
|
||||
__device__ unsigned long long atomicu64_op_workgroup(unsigned long long *ptr, unsigned long long val, unsigned long long desired) {
|
||||
val = __hip_atomic_fetch_min(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP);
|
||||
val = __hip_atomic_fetch_max(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP);
|
||||
return val;
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @_Z17atomic64_op_agentPxxx
|
||||
// CHECK: cmpxchg i64* {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("agent-one-as")
|
||||
// CHECK: atomicrmw xchg i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("agent-one-as")
|
||||
// CHECK: atomicrmw add i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("agent-one-as")
|
||||
// CHECK: atomicrmw and i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("agent-one-as")
|
||||
// CHECK: atomicrmw or i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("agent-one-as")
|
||||
// CHECK: atomicrmw xor i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("agent-one-as")
|
||||
// CHECK: atomicrmw min i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("agent-one-as")
|
||||
// CHECK: atomicrmw max i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("agent-one-as")
|
||||
__device__ long long atomic64_op_agent(long long *ptr, long long val, long long desired) {
|
||||
bool flag = __hip_atomic_compare_exchange_strong(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
|
||||
val = __hip_atomic_exchange(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
|
||||
val = __hip_atomic_fetch_add(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
|
||||
val = __hip_atomic_fetch_and(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
|
||||
val = __hip_atomic_fetch_or(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
|
||||
val = __hip_atomic_fetch_xor(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
|
||||
val = __hip_atomic_fetch_min(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
|
||||
val = __hip_atomic_fetch_max(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
|
||||
return flag ? val : desired;
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @_Z18atomicu64_op_agentPyyy
|
||||
// CHECK: atomicrmw umin i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("agent-one-as")
|
||||
// CHECK: atomicrmw umax i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("agent-one-as")
|
||||
__device__ unsigned long long atomicu64_op_agent(unsigned long long *ptr, unsigned long long val, unsigned long long desired) {
|
||||
val = __hip_atomic_fetch_min(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
|
||||
val = __hip_atomic_fetch_max(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
|
||||
return val;
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @_Z18atomic64_op_systemPxxx
|
||||
// CHECK: cmpxchg i64* {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("one-as")
|
||||
// CHECK: atomicrmw xchg i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("one-as")
|
||||
// CHECK: atomicrmw add i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("one-as")
|
||||
// CHECK: atomicrmw and i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("one-as")
|
||||
// CHECK: atomicrmw or i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("one-as")
|
||||
// CHECK: atomicrmw xor i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("one-as")
|
||||
// CHECK: atomicrmw min i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("one-as")
|
||||
// CHECK: atomicrmw max i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("one-as")
|
||||
__device__ long long atomic64_op_system(long long *ptr, long long val, long long desired) {
|
||||
bool flag = __hip_atomic_compare_exchange_strong(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
|
||||
val = __hip_atomic_exchange(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
|
||||
val = __hip_atomic_fetch_add(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
|
||||
val = __hip_atomic_fetch_and(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
|
||||
val = __hip_atomic_fetch_or(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
|
||||
val = __hip_atomic_fetch_xor(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
|
||||
val = __hip_atomic_fetch_min(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
|
||||
val = __hip_atomic_fetch_max(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
|
||||
return flag ? val : desired;
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @_Z19atomicu64_op_systemPyyy
|
||||
// CHECK: atomicrmw umin i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("one-as")
|
||||
// CHECK: atomicrmw umax i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("one-as")
|
||||
__device__ unsigned long long atomicu64_op_system(unsigned long long *ptr, unsigned long long val, unsigned long long desired) {
|
||||
val = __hip_atomic_fetch_min(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
|
||||
val = __hip_atomic_fetch_max(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
|
||||
return val;
|
||||
}
|
||||
Reference in New Issue
Block a user