[PowerPC] enable AtomicExpandImpl::expandAtomicCmpXchg for powerpc (#142395)

In PowerPC, the AtomicCmpXchgInst is lowered to
ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS. However, this node does not handle
the weak attribute of AtomicCmpXchgInst. As a result, when compiling C++
atomic_compare_exchange_weak_explicit, the generated assembly includes a
"reservation lost" loop — i.e., it branches back and retries if the
stwcx. (store-conditional) fails. This differs from GCC’s codegen, which
does not include that loop for weak compare-exchange.

Since PowerPC uses LL/SC-style atomic instructions, the patch enables
AtomicExpandImpl::expandAtomicCmpXchg for PowerPC. With this, the weak
attribute is properly respected, and the "reservation lost" loop is
removed for weak operations.

---------

Co-authored-by: Matt Arsenault <arsenm2@gmail.com>
This commit is contained in:
zhijian lin
2025-06-13 09:14:48 -04:00
committed by GitHub
parent a59e4acd75
commit 85a9f2e148
17 changed files with 3133 additions and 2111 deletions

View File

@@ -254,20 +254,20 @@ public:
/// support for these atomic instructions, and also have different options
/// w.r.t. what they should expand to.
enum class AtomicExpansionKind {
None, // Don't expand the instruction.
CastToInteger, // Cast the atomic instruction to another type, e.g. from
// floating-point to integer type.
None, // Don't expand the instruction.
CastToInteger, // Cast the atomic instruction to another type, e.g. from
// floating-point to integer type.
LLSC, // Expand the instruction into loadlinked/storeconditional; used
// by ARM/AArch64.
// by ARM/AArch64/PowerPC.
LLOnly, // Expand the (load) instruction into just a load-linked, which has
// greater atomic guarantees than a normal load.
CmpXChg, // Expand the instruction into cmpxchg; used by at least X86.
MaskedIntrinsic, // Use a target-specific intrinsic for the LL/SC loop.
BitTestIntrinsic, // Use a target-specific intrinsic for special bit
// operations; used by X86.
CmpArithIntrinsic,// Use a target-specific intrinsic for special compare
// operations; used by X86.
Expand, // Generic expansion in terms of other atomic operations.
MaskedIntrinsic, // Use a target-specific intrinsic for the LL/SC loop.
BitTestIntrinsic, // Use a target-specific intrinsic for special bit
// operations; used by X86.
CmpArithIntrinsic, // Use a target-specific intrinsic for special compare
// operations; used by X86.
Expand, // Generic expansion in terms of other atomic operations.
// Rewrite to a non-atomic form for use in a known non-preemptible
// environment.

View File

@@ -1835,6 +1835,19 @@ let TargetPrefix = "ppc" in {
Intrinsic<[],[],[]>;
def int_ppc_iospace_eieio : ClangBuiltin<"__builtin_ppc_iospace_eieio">,
Intrinsic<[],[],[]>;
def int_ppc_lbarx :
Intrinsic<[llvm_i32_ty], [llvm_ptr_ty],
[IntrReadMem, IntrArgMemOnly]>;
def int_ppc_lharx :
Intrinsic<[llvm_i32_ty],[llvm_ptr_ty],
[IntrReadMem, IntrArgMemOnly]>;
def int_ppc_lwarx :
Intrinsic<[llvm_i32_ty], [llvm_ptr_ty],
[IntrReadMem, IntrArgMemOnly]>;
def int_ppc_ldarx :
Intrinsic<[llvm_i64_ty],[llvm_ptr_ty],
[IntrReadMem, IntrArgMemOnly]>;
def int_ppc_stdcx :
ClangBuiltin<"__builtin_ppc_stdcx">,
Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i64_ty],
@@ -1844,7 +1857,7 @@ let TargetPrefix = "ppc" in {
Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty],
[IntrWriteMem, IntrArgMemOnly]>;
def int_ppc_sthcx :
Intrinsic<[llvm_i32_ty], [ llvm_ptr_ty, llvm_i32_ty ],
Intrinsic<[llvm_i32_ty], [ llvm_ptr_ty, llvm_i32_ty],
[IntrWriteMem, IntrArgMemOnly, IntrNoDuplicate]>;
def int_ppc_stbcx :
ClangBuiltin<"__builtin_ppc_stbcx">,

View File

@@ -1442,6 +1442,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setMinimumJumpTableEntries(PPCMinimumJumpTableEntries);
setMinFunctionAlignment(Align(4));
setMinCmpXchgSizeInBits(Subtarget.hasPartwordAtomics() ? 8 : 32);
auto CPUDirective = Subtarget.getCPUDirective();
switch (CPUDirective) {
@@ -12690,6 +12691,76 @@ static Instruction *callIntrinsic(IRBuilderBase &Builder, Intrinsic::ID Id) {
return Builder.CreateIntrinsic(Id, {});
}
Value *PPCTargetLowering::emitLoadLinked(IRBuilderBase &Builder, Type *ValueTy,
Value *Addr,
AtomicOrdering Ord) const {
unsigned SZ = ValueTy->getPrimitiveSizeInBits();
assert((SZ == 8 || SZ == 16 || SZ == 32 || SZ == 64) &&
"Only 8/16/32/64-bit atomic loads supported");
Intrinsic::ID IntID;
switch (SZ) {
default:
llvm_unreachable("Unexpected PrimitiveSize");
case 8:
IntID = Intrinsic::ppc_lbarx;
assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
break;
case 16:
IntID = Intrinsic::ppc_lharx;
assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
break;
case 32:
IntID = Intrinsic::ppc_lwarx;
break;
case 64:
IntID = Intrinsic::ppc_ldarx;
break;
}
Value *Call =
Builder.CreateIntrinsic(IntID, Addr, /*FMFSource=*/nullptr, "larx");
return Builder.CreateTruncOrBitCast(Call, ValueTy);
}
// Perform a store-conditional operation to Addr. Return the status of the
// store. This should be 0 if the store succeeded, non-zero otherwise.
Value *PPCTargetLowering::emitStoreConditional(IRBuilderBase &Builder,
Value *Val, Value *Addr,
AtomicOrdering Ord) const {
Type *Ty = Val->getType();
unsigned SZ = Ty->getPrimitiveSizeInBits();
assert((SZ == 8 || SZ == 16 || SZ == 32 || SZ == 64) &&
"Only 8/16/32/64-bit atomic loads supported");
Intrinsic::ID IntID;
switch (SZ) {
default:
llvm_unreachable("Unexpected PrimitiveSize");
case 8:
IntID = Intrinsic::ppc_stbcx;
assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
break;
case 16:
IntID = Intrinsic::ppc_sthcx;
assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
break;
case 32:
IntID = Intrinsic::ppc_stwcx;
break;
case 64:
IntID = Intrinsic::ppc_stdcx;
break;
}
if (SZ == 8 || SZ == 16)
Val = Builder.CreateZExt(Val, Builder.getInt32Ty());
Value *Call = Builder.CreateIntrinsic(IntID, {Addr, Val},
/*FMFSource=*/nullptr, "stcx");
return Builder.CreateXor(Call, Builder.getInt32(1));
}
// The mappings for emitLeading/TrailingFence is taken from
// http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
Instruction *PPCTargetLowering::emitLeadingFence(IRBuilderBase &Builder,
@@ -19651,7 +19722,7 @@ PPCTargetLowering::shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const {
unsigned Size = AI->getNewValOperand()->getType()->getPrimitiveSizeInBits();
if (shouldInlineQuadwordAtomics() && Size == 128)
return AtomicExpansionKind::MaskedIntrinsic;
return TargetLowering::shouldExpandAtomicCmpXchgInIR(AI);
return AtomicExpansionKind::LLSC;
}
static Intrinsic::ID

View File

@@ -927,6 +927,12 @@ namespace llvm {
return true;
}
Value *emitLoadLinked(IRBuilderBase &Builder, Type *ValueTy, Value *Addr,
AtomicOrdering Ord) const override;
Value *emitStoreConditional(IRBuilderBase &Builder, Value *Val, Value *Addr,
AtomicOrdering Ord) const override;
Instruction *emitLeadingFence(IRBuilderBase &Builder, Instruction *Inst,
AtomicOrdering Ord) const override;
Instruction *emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst,

View File

@@ -2023,6 +2023,8 @@ def SLBSYNC : XForm_0<31, 338, (outs), (ins), "slbsync", IIC_SprSLBSYNC, []>;
} // IsISA3_0
def : Pat<(int_ppc_ldarx ForceXForm:$ptr),
(LDARX ForceXForm:$ptr)>;
def : Pat<(int_ppc_stdcx ForceXForm:$dst, g8rc:$A),
(RLWINM (STDCX g8rc:$A, ForceXForm:$dst), 31, 31, 31)>;
def : Pat<(PPCStoreCond ForceXForm:$dst, g8rc:$A, 8),

View File

@@ -5143,7 +5143,6 @@ def : Pat<(int_ppc_store2r gprc:$a, ForceXForm:$ptr),
def : Pat<(int_ppc_store4r gprc:$a, ForceXForm:$ptr),
(STWBRX gprc:$a, ForceXForm:$ptr)>;
// Fast 32-bit reverse bits algorithm:
// Step 1: 1-bit swap (swap odd 1-bit and even 1-bit):
// n = ((n >> 1) & 0x55555555) | ((n << 1) & 0xAAAAAAAA);
@@ -5324,10 +5323,14 @@ def CFENCE : PPCPostRAExpPseudo<(outs), (ins gprc:$cr), "#CFENCE", []>;
def : Pat<(i64 (bitreverse i64:$A)),
(OR8 (RLDICR DWBytes7654.DWord, 32, 31), DWBytes3210.DWord)>;
def : Pat<(int_ppc_lwarx ForceXForm:$ptr),
(LWARX ForceXForm:$ptr)>;
def : Pat<(int_ppc_stwcx ForceXForm:$dst, gprc:$A),
(RLWINM (STWCX gprc:$A, ForceXForm:$dst), 31, 31, 31)>;
def : Pat<(PPCStoreCond ForceXForm:$dst, gprc:$A, 4),
(RLWINM (STWCX gprc:$A, ForceXForm:$dst), 31, 31, 31)>;
def : Pat<(int_ppc_lbarx ForceXForm:$ptr),
(LBARX ForceXForm:$ptr)>;
def : Pat<(int_ppc_stbcx ForceXForm:$dst, gprc:$A),
(RLWINM (STBCX gprc:$A, ForceXForm:$dst), 31, 31, 31)>;
def : Pat<(PPCStoreCond ForceXForm:$dst, gprc:$A, 1),
@@ -5360,6 +5363,9 @@ def : Pat<(int_ppc_mtmsr gprc:$RS),
(MTMSR $RS, 0)>;
let Predicates = [IsISA2_07] in {
def : Pat<(int_ppc_lharx ForceXForm:$ptr),
(LHARX ForceXForm:$ptr)>;
def : Pat<(int_ppc_sthcx ForceXForm:$dst, gprc:$A),
(RLWINM (STHCX gprc:$A, ForceXForm:$dst), 31, 31, 31)>;
def : Pat<(PPCStoreCond ForceXForm:$dst, gprc:$A, 2),

View File

@@ -15,50 +15,57 @@ define signext i32 @main() nounwind {
; CHECK-NEXT: stdu 1, -48(1)
; CHECK-NEXT: li 3, -32477
; CHECK-NEXT: std 0, 64(1)
; CHECK-NEXT: li 4, 234
; CHECK-NEXT: addi 6, 1, 46
; CHECK-NEXT: sth 3, 46(1)
; CHECK-NEXT: lis 3, 0
; CHECK-NEXT: addi 3, 1, 46
; CHECK-NEXT: lharx 4, 0, 3
; CHECK-NEXT: clrlwi 4, 4, 16
; CHECK-NEXT: cmplwi 4, 33059
; CHECK-NEXT: bne 0, .LBB0_4
; CHECK-NEXT: # %bb.1: # %cmpxchg.fencedstore
; CHECK-NEXT: sync
; CHECK-NEXT: ori 3, 3, 33059
; CHECK-NEXT: .LBB0_1: # %L.entry
; CHECK-NEXT: #
; CHECK-NEXT: lharx 5, 0, 6
; CHECK-NEXT: cmpw 5, 3
; CHECK-NEXT: bne 0, .LBB0_3
; CHECK-NEXT: # %bb.2: # %L.entry
; CHECK-NEXT: #
; CHECK-NEXT: sthcx. 4, 0, 6
; CHECK-NEXT: bne 0, .LBB0_1
; CHECK-NEXT: .LBB0_3: # %L.entry
; CHECK-NEXT: cmplwi 5, 33059
; CHECK-NEXT: li 4, 234
; CHECK-NEXT: .p2align 5
; CHECK-NEXT: .LBB0_2: # %cmpxchg.trystore
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: sthcx. 4, 0, 3
; CHECK-NEXT: beq 0, .LBB0_7
; CHECK-NEXT: # %bb.3: # %cmpxchg.releasedload
; CHECK-NEXT: # in Loop: Header=BB0_2 Depth=1
; CHECK-NEXT: lharx 5, 0, 3
; CHECK-NEXT: clrlwi 5, 5, 16
; CHECK-NEXT: cmplwi 5, 33059
; CHECK-NEXT: beq 0, .LBB0_2
; CHECK-NEXT: .LBB0_4: # %cmpxchg.nostore
; CHECK-NEXT: lwsync
; CHECK-NEXT: bne 0, .LBB0_6
; CHECK-NEXT: # %bb.4: # %L.B0000
; CHECK-NEXT: b .LBB0_8
; CHECK-NEXT: .LBB0_5: # %L.B0000
; CHECK-NEXT: lhz 3, 46(1)
; CHECK-NEXT: cmplwi 3, 234
; CHECK-NEXT: bne 0, .LBB0_7
; CHECK-NEXT: # %bb.5: # %L.B0001
; CHECK-NEXT: cmplwi 3, 234
; CHECK-NEXT: bne 0, .LBB0_9
; CHECK-NEXT: # %bb.6: # %L.B0001
; CHECK-NEXT: addis 3, 2, .L_MergedGlobals@toc@ha
; CHECK-NEXT: addi 3, 3, .L_MergedGlobals@toc@l
; CHECK-NEXT: bl puts
; CHECK-NEXT: nop
; CHECK-NEXT: li 3, 0
; CHECK-NEXT: b .LBB0_9
; CHECK-NEXT: .LBB0_6: # %L.B0003
; CHECK-NEXT: b .LBB0_11
; CHECK-NEXT: .LBB0_7: # %cmpxchg.success
; CHECK-NEXT: lwsync
; CHECK-NEXT: b .LBB0_5
; CHECK-NEXT: .LBB0_8: # %L.B0003
; CHECK-NEXT: addis 3, 2, .L_MergedGlobals@toc@ha
; CHECK-NEXT: addi 3, 3, .L_MergedGlobals@toc@l
; CHECK-NEXT: addi 3, 3, 16
; CHECK-NEXT: b .LBB0_8
; CHECK-NEXT: .LBB0_7: # %L.B0005
; CHECK-NEXT: b .LBB0_10
; CHECK-NEXT: .LBB0_9: # %L.B0005
; CHECK-NEXT: addis 3, 2, .L_MergedGlobals@toc@ha
; CHECK-NEXT: addi 3, 3, .L_MergedGlobals@toc@l
; CHECK-NEXT: addi 3, 3, 64
; CHECK-NEXT: .LBB0_8: # %L.B0003
; CHECK-NEXT: .LBB0_10: # %L.B0003
; CHECK-NEXT: bl puts
; CHECK-NEXT: nop
; CHECK-NEXT: li 3, 1
; CHECK-NEXT: .LBB0_9: # %L.B0003
; CHECK-NEXT: .LBB0_11: # %L.B0003
; CHECK-NEXT: addi 1, 1, 48
; CHECK-NEXT: ld 0, 16(1)
; CHECK-NEXT: mtlr 0
@@ -69,64 +76,69 @@ define signext i32 @main() nounwind {
; CHECK-P7-NEXT: mflr 0
; CHECK-P7-NEXT: stdu 1, -48(1)
; CHECK-P7-NEXT: li 3, -32477
; CHECK-P7-NEXT: std 0, 64(1)
; CHECK-P7-NEXT: addi 4, 1, 46
; CHECK-P7-NEXT: li 6, 234
; CHECK-P7-NEXT: std 0, 64(1)
; CHECK-P7-NEXT: sth 3, 46(1)
; CHECK-P7-NEXT: lis 3, 0
; CHECK-P7-NEXT: rldicr 3, 4, 0, 61
; CHECK-P7-NEXT: rlwinm 4, 4, 3, 27, 27
; CHECK-P7-NEXT: lwarx 5, 0, 3
; CHECK-P7-NEXT: srw 6, 5, 4
; CHECK-P7-NEXT: clrlwi 6, 6, 16
; CHECK-P7-NEXT: cmplwi 6, 33059
; CHECK-P7-NEXT: bne 0, .LBB0_4
; CHECK-P7-NEXT: # %bb.1: # %cmpxchg.fencedstore
; CHECK-P7-NEXT: lis 6, 0
; CHECK-P7-NEXT: li 7, 234
; CHECK-P7-NEXT: sync
; CHECK-P7-NEXT: ori 5, 3, 33059
; CHECK-P7-NEXT: rlwinm 3, 4, 3, 27, 27
; CHECK-P7-NEXT: rldicr 4, 4, 0, 61
; CHECK-P7-NEXT: slw 7, 5, 3
; CHECK-P7-NEXT: li 5, 0
; CHECK-P7-NEXT: slw 6, 6, 3
; CHECK-P7-NEXT: ori 5, 5, 65535
; CHECK-P7-NEXT: slw 5, 5, 3
; CHECK-P7-NEXT: and 6, 6, 5
; CHECK-P7-NEXT: and 7, 7, 5
; CHECK-P7-NEXT: .LBB0_1: # %L.entry
; CHECK-P7-NEXT: #
; CHECK-P7-NEXT: lwarx 9, 0, 4
; CHECK-P7-NEXT: and 8, 9, 5
; CHECK-P7-NEXT: cmpw 8, 7
; CHECK-P7-NEXT: bne 0, .LBB0_3
; CHECK-P7-NEXT: # %bb.2: # %L.entry
; CHECK-P7-NEXT: #
; CHECK-P7-NEXT: andc 9, 9, 5
; CHECK-P7-NEXT: or 9, 9, 6
; CHECK-P7-NEXT: stwcx. 9, 0, 4
; CHECK-P7-NEXT: bne 0, .LBB0_1
; CHECK-P7-NEXT: .LBB0_3: # %L.entry
; CHECK-P7-NEXT: srw 3, 8, 3
; CHECK-P7-NEXT: ori 6, 6, 65535
; CHECK-P7-NEXT: slw 7, 7, 4
; CHECK-P7-NEXT: slw 6, 6, 4
; CHECK-P7-NEXT: not 6, 6
; CHECK-P7-NEXT: .p2align 4
; CHECK-P7-NEXT: .LBB0_2: # %cmpxchg.trystore
; CHECK-P7-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-P7-NEXT: and 5, 5, 6
; CHECK-P7-NEXT: or 5, 5, 7
; CHECK-P7-NEXT: stwcx. 5, 0, 3
; CHECK-P7-NEXT: beq 0, .LBB0_7
; CHECK-P7-NEXT: # %bb.3: # %cmpxchg.releasedload
; CHECK-P7-NEXT: # in Loop: Header=BB0_2 Depth=1
; CHECK-P7-NEXT: lwarx 5, 0, 3
; CHECK-P7-NEXT: srw 8, 5, 4
; CHECK-P7-NEXT: clrlwi 8, 8, 16
; CHECK-P7-NEXT: cmplwi 8, 33059
; CHECK-P7-NEXT: beq 0, .LBB0_2
; CHECK-P7-NEXT: .LBB0_4: # %cmpxchg.nostore
; CHECK-P7-NEXT: lwsync
; CHECK-P7-NEXT: cmplwi 3, 33059
; CHECK-P7-NEXT: bne 0, .LBB0_6
; CHECK-P7-NEXT: # %bb.4: # %L.B0000
; CHECK-P7-NEXT: b .LBB0_8
; CHECK-P7-NEXT: .LBB0_5: # %L.B0000
; CHECK-P7-NEXT: lhz 3, 46(1)
; CHECK-P7-NEXT: cmplwi 3, 234
; CHECK-P7-NEXT: bne 0, .LBB0_7
; CHECK-P7-NEXT: # %bb.5: # %L.B0001
; CHECK-P7-NEXT: cmplwi 3, 234
; CHECK-P7-NEXT: bne 0, .LBB0_9
; CHECK-P7-NEXT: # %bb.6: # %L.B0001
; CHECK-P7-NEXT: addis 3, 2, .L_MergedGlobals@toc@ha
; CHECK-P7-NEXT: addi 3, 3, .L_MergedGlobals@toc@l
; CHECK-P7-NEXT: bl puts
; CHECK-P7-NEXT: nop
; CHECK-P7-NEXT: li 3, 0
; CHECK-P7-NEXT: b .LBB0_9
; CHECK-P7-NEXT: .LBB0_6: # %L.B0003
; CHECK-P7-NEXT: b .LBB0_11
; CHECK-P7-NEXT: .LBB0_7: # %cmpxchg.success
; CHECK-P7-NEXT: lwsync
; CHECK-P7-NEXT: b .LBB0_5
; CHECK-P7-NEXT: .LBB0_8: # %L.B0003
; CHECK-P7-NEXT: addis 3, 2, .L_MergedGlobals@toc@ha
; CHECK-P7-NEXT: addi 3, 3, .L_MergedGlobals@toc@l
; CHECK-P7-NEXT: addi 3, 3, 16
; CHECK-P7-NEXT: b .LBB0_8
; CHECK-P7-NEXT: .LBB0_7: # %L.B0005
; CHECK-P7-NEXT: b .LBB0_10
; CHECK-P7-NEXT: .LBB0_9: # %L.B0005
; CHECK-P7-NEXT: addis 3, 2, .L_MergedGlobals@toc@ha
; CHECK-P7-NEXT: addi 3, 3, .L_MergedGlobals@toc@l
; CHECK-P7-NEXT: addi 3, 3, 64
; CHECK-P7-NEXT: .LBB0_8: # %L.B0003
; CHECK-P7-NEXT: .LBB0_10: # %L.B0003
; CHECK-P7-NEXT: bl puts
; CHECK-P7-NEXT: nop
; CHECK-P7-NEXT: li 3, 1
; CHECK-P7-NEXT: .LBB0_9: # %L.B0003
; CHECK-P7-NEXT: .LBB0_11: # %L.B0003
; CHECK-P7-NEXT: addi 1, 1, 48
; CHECK-P7-NEXT: ld 0, 16(1)
; CHECK-P7-NEXT: mtlr 0

File diff suppressed because it is too large Load Diff

View File

@@ -42,8 +42,8 @@ define i64 @exchange_and_cmp(ptr %mem) nounwind {
define i8 @exchange_and_cmp8(ptr %mem) nounwind {
; CHECK-LABEL: exchange_and_cmp8:
; CHECK-BE: xori
; CHECK-LE-NOT: xori
; CHECK-BE: or r{{.*}} r{{.*}} r{{.*}}
; CHECK-LE-NOT: or r{{.*}} r{{.*}} r{{.*}}
; CHECK-P8U: lbarx
%tmppair = cmpxchg ptr %mem, i8 0, i8 1 monotonic monotonic
%tmp = extractvalue { i8, i1 } %tmppair, 0

View File

@@ -12,62 +12,60 @@
define i32 @foo(ptr noundef %cp, ptr noundef %old, i32 noundef %c) {
; CHECK-LABEL: foo:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: lwz r7, 0(r4)
; CHECK-NEXT: stw r3, -4(r1)
; CHECK-NEXT: stw r4, -8(r1)
; CHECK-NEXT: lwz r7, 0(r4)
; CHECK-NEXT: stw r5, -12(r1)
; CHECK-NEXT: stw r5, -16(r1)
; CHECK-NEXT: L..BB0_1: # %entry
; CHECK-NEXT: #
; CHECK-NEXT: lwarx r6, 0, r3
; CHECK-NEXT: cmpw cr1, r6, r7
; CHECK-NEXT: bne cr1, L..BB0_3
; CHECK-NEXT: # %bb.2: # %entry
; CHECK-NEXT: #
; CHECK-NEXT: stwcx. r5, 0, r3
; CHECK-NEXT: bne cr0, L..BB0_1
; CHECK-NEXT: L..BB0_3: # %entry
; CHECK-NEXT: cmplw r6, r7
; CHECK-NEXT: bne cr0, L..BB0_2
; CHECK-NEXT: # %bb.1: # %cmpxchg.fencedstore
; CHECK-NEXT: stwcx. r5, 0, r3
; CHECK-NEXT: beq cr0, L..BB0_5
; CHECK-NEXT: # %bb.4: # %cmpxchg.store_expected
; CHECK-NEXT: L..BB0_2: # %cmpxchg.failure
; CHECK-NEXT: crxor 4*cr5+lt, 4*cr5+lt, 4*cr5+lt
; CHECK-NEXT: # %bb.3: # %cmpxchg.store_expected
; CHECK-NEXT: stw r6, 0(r4)
; CHECK-NEXT: L..BB0_5: # %cmpxchg.continue
; CHECK-NEXT: L..BB0_4: # %cmpxchg.continue
; CHECK-NEXT: li r3, 0
; CHECK-NEXT: li r4, 1
; CHECK-NEXT: isel r3, r4, r3, 4*cr1+eq
; CHECK-NEXT: isel r3, r4, r3, 4*cr5+lt
; CHECK-NEXT: stb r3, -17(r1)
; CHECK-NEXT: blr
; CHECK-NEXT: L..BB0_5:
; CHECK-NEXT: creqv 4*cr5+lt, 4*cr5+lt, 4*cr5+lt
; CHECK-NEXT: b L..BB0_4
;
; CHECK64-LABEL: foo:
; CHECK64: # %bb.0: # %entry
; CHECK64-NEXT: lwz r7, 0(r4)
; CHECK64-NEXT: std r3, -8(r1)
; CHECK64-NEXT: std r4, -16(r1)
; CHECK64-NEXT: lwz r7, 0(r4)
; CHECK64-NEXT: stw r5, -20(r1)
; CHECK64-NEXT: stw r5, -24(r1)
; CHECK64-NEXT: L..BB0_1: # %entry
; CHECK64-NEXT: #
; CHECK64-NEXT: lwarx r6, 0, r3
; CHECK64-NEXT: cmpw cr1, r6, r7
; CHECK64-NEXT: bne cr1, L..BB0_3
; CHECK64-NEXT: # %bb.2: # %entry
; CHECK64-NEXT: #
; CHECK64-NEXT: stwcx. r5, 0, r3
; CHECK64-NEXT: bne cr0, L..BB0_1
; CHECK64-NEXT: L..BB0_3: # %entry
; CHECK64-NEXT: cmplw r6, r7
; CHECK64-NEXT: bne cr0, L..BB0_2
; CHECK64-NEXT: # %bb.1: # %cmpxchg.fencedstore
; CHECK64-NEXT: stwcx. r5, 0, r3
; CHECK64-NEXT: beq cr0, L..BB0_5
; CHECK64-NEXT: # %bb.4: # %cmpxchg.store_expected
; CHECK64-NEXT: L..BB0_2: # %cmpxchg.failure
; CHECK64-NEXT: crxor 4*cr5+lt, 4*cr5+lt, 4*cr5+lt
; CHECK64-NEXT: # %bb.3: # %cmpxchg.store_expected
; CHECK64-NEXT: stw r6, 0(r4)
; CHECK64-NEXT: L..BB0_5: # %cmpxchg.continue
; CHECK64-NEXT: L..BB0_4: # %cmpxchg.continue
; CHECK64-NEXT: li r3, 0
; CHECK64-NEXT: li r4, 1
; CHECK64-NEXT: isel r3, r4, r3, 4*cr1+eq
; CHECK64-NEXT: isel r3, r4, r3, 4*cr5+lt
; CHECK64-NEXT: li r4, 1
; CHECK64-NEXT: stb r3, -25(r1)
; CHECK64-NEXT: li r3, 0
; CHECK64-NEXT: isel r3, r4, r3, 4*cr1+eq
; CHECK64-NEXT: isel r3, r4, r3, 4*cr5+lt
; CHECK64-NEXT: blr
; CHECK64-NEXT: L..BB0_5:
; CHECK64-NEXT: creqv 4*cr5+lt, 4*cr5+lt, 4*cr5+lt
; CHECK64-NEXT: b L..BB0_4
entry:
%cp.addr = alloca ptr, align 4
%old.addr = alloca ptr, align 4

View File

@@ -9,33 +9,37 @@ define float @test_add(ptr %ptr, float %incr) {
; CHECK-64: # %bb.0: # %entry
; CHECK-64-NEXT: sync
; CHECK-64-NEXT: lfs 0, 0(3)
; CHECK-64-NEXT: b .LBB0_2
; CHECK-64-NEXT: .LBB0_1: # %atomicrmw.start
; CHECK-64-NEXT: #
; CHECK-64-NEXT: stw 6, -4(1)
; CHECK-64-NEXT: cmplw 6, 4
; CHECK-64-NEXT: lfs 0, -4(1)
; CHECK-64-NEXT: beq 0, .LBB0_5
; CHECK-64-NEXT: .LBB0_2: # %atomicrmw.start
; CHECK-64-NEXT: # =>This Loop Header: Depth=1
; CHECK-64-NEXT: # Child Loop BB0_3 Depth 2
; CHECK-64-NEXT: b .LBB0_3
; CHECK-64-NEXT: .LBB0_1: # %cmpxchg.nostore
; CHECK-64-NEXT: # in Loop: Header=BB0_3 Depth=1
; CHECK-64-NEXT: crxor 20, 20, 20
; CHECK-64-NEXT: .LBB0_2: # %cmpxchg.end
; CHECK-64-NEXT: # in Loop: Header=BB0_3 Depth=1
; CHECK-64-NEXT: stw 4, -12(1)
; CHECK-64-NEXT: lfs 0, -12(1)
; CHECK-64-NEXT: bc 12, 20, .LBB0_7
; CHECK-64-NEXT: .LBB0_3: # %atomicrmw.start
; CHECK-64-NEXT: # =>This Loop Header: Depth=1
; CHECK-64-NEXT: # Child Loop BB0_4 Depth 2
; CHECK-64-NEXT: fadds 2, 0, 1
; CHECK-64-NEXT: stfs 2, -8(1)
; CHECK-64-NEXT: stfs 0, -12(1)
; CHECK-64-NEXT: lwz 5, -8(1)
; CHECK-64-NEXT: lwz 4, -12(1)
; CHECK-64-NEXT: .LBB0_3: # %atomicrmw.start
; CHECK-64-NEXT: # Parent Loop BB0_2 Depth=1
; CHECK-64-NEXT: # => This Inner Loop Header: Depth=2
; CHECK-64-NEXT: lwarx 6, 0, 3
; CHECK-64-NEXT: cmpw 6, 4
; CHECK-64-NEXT: bne 0, .LBB0_1
; CHECK-64-NEXT: # %bb.4: # %atomicrmw.start
; CHECK-64-NEXT: #
; CHECK-64-NEXT: stfs 2, -4(1)
; CHECK-64-NEXT: stfs 0, -8(1)
; CHECK-64-NEXT: lwz 5, -4(1)
; CHECK-64-NEXT: lwz 6, -8(1)
; CHECK-64-NEXT: .LBB0_4: # %cmpxchg.start
; CHECK-64-NEXT: # Parent Loop BB0_3 Depth=1
; CHECK-64-NEXT: # => This Inner Loop Header: Depth=2
; CHECK-64-NEXT: lwarx 4, 0, 3
; CHECK-64-NEXT: cmplw 4, 6
; CHECK-64-NEXT: bne 0, .LBB0_1
; CHECK-64-NEXT: # %bb.5: # %cmpxchg.fencedstore
; CHECK-64-NEXT: # in Loop: Header=BB0_4 Depth=2
; CHECK-64-NEXT: stwcx. 5, 0, 3
; CHECK-64-NEXT: bne 0, .LBB0_3
; CHECK-64-NEXT: b .LBB0_1
; CHECK-64-NEXT: .LBB0_5: # %atomicrmw.end
; CHECK-64-NEXT: bne 0, .LBB0_4
; CHECK-64-NEXT: # %bb.6: # in Loop: Header=BB0_3 Depth=1
; CHECK-64-NEXT: creqv 20, 20, 20
; CHECK-64-NEXT: b .LBB0_2
; CHECK-64-NEXT: .LBB0_7: # %atomicrmw.end
; CHECK-64-NEXT: fmr 1, 0
; CHECK-64-NEXT: lwsync
; CHECK-64-NEXT: blr
@@ -46,33 +50,37 @@ define float @test_add(ptr %ptr, float %incr) {
; CHECK-32-NEXT: .cfi_def_cfa_offset 32
; CHECK-32-NEXT: sync
; CHECK-32-NEXT: lfs 0, 0(3)
; CHECK-32-NEXT: b .LBB0_2
; CHECK-32-NEXT: .LBB0_1: # %atomicrmw.start
; CHECK-32-NEXT: #
; CHECK-32-NEXT: stw 6, 28(1)
; CHECK-32-NEXT: cmplw 6, 4
; CHECK-32-NEXT: lfs 0, 28(1)
; CHECK-32-NEXT: beq 0, .LBB0_5
; CHECK-32-NEXT: .LBB0_2: # %atomicrmw.start
; CHECK-32-NEXT: # =>This Loop Header: Depth=1
; CHECK-32-NEXT: # Child Loop BB0_3 Depth 2
; CHECK-32-NEXT: b .LBB0_3
; CHECK-32-NEXT: .LBB0_1: # %cmpxchg.nostore
; CHECK-32-NEXT: # in Loop: Header=BB0_3 Depth=1
; CHECK-32-NEXT: crxor 20, 20, 20
; CHECK-32-NEXT: .LBB0_2: # %cmpxchg.end
; CHECK-32-NEXT: # in Loop: Header=BB0_3 Depth=1
; CHECK-32-NEXT: stw 4, 20(1)
; CHECK-32-NEXT: lfs 0, 20(1)
; CHECK-32-NEXT: bc 12, 20, .LBB0_7
; CHECK-32-NEXT: .LBB0_3: # %atomicrmw.start
; CHECK-32-NEXT: # =>This Loop Header: Depth=1
; CHECK-32-NEXT: # Child Loop BB0_4 Depth 2
; CHECK-32-NEXT: fadds 2, 0, 1
; CHECK-32-NEXT: stfs 2, 24(1)
; CHECK-32-NEXT: stfs 0, 20(1)
; CHECK-32-NEXT: lwz 5, 24(1)
; CHECK-32-NEXT: lwz 4, 20(1)
; CHECK-32-NEXT: .LBB0_3: # %atomicrmw.start
; CHECK-32-NEXT: # Parent Loop BB0_2 Depth=1
; CHECK-32-NEXT: # => This Inner Loop Header: Depth=2
; CHECK-32-NEXT: lwarx 6, 0, 3
; CHECK-32-NEXT: cmpw 6, 4
; CHECK-32-NEXT: bne 0, .LBB0_1
; CHECK-32-NEXT: # %bb.4: # %atomicrmw.start
; CHECK-32-NEXT: #
; CHECK-32-NEXT: stfs 2, 28(1)
; CHECK-32-NEXT: stfs 0, 24(1)
; CHECK-32-NEXT: lwz 5, 28(1)
; CHECK-32-NEXT: lwz 6, 24(1)
; CHECK-32-NEXT: .LBB0_4: # %cmpxchg.start
; CHECK-32-NEXT: # Parent Loop BB0_3 Depth=1
; CHECK-32-NEXT: # => This Inner Loop Header: Depth=2
; CHECK-32-NEXT: lwarx 4, 0, 3
; CHECK-32-NEXT: cmplw 4, 6
; CHECK-32-NEXT: bne 0, .LBB0_1
; CHECK-32-NEXT: # %bb.5: # %cmpxchg.fencedstore
; CHECK-32-NEXT: # in Loop: Header=BB0_4 Depth=2
; CHECK-32-NEXT: stwcx. 5, 0, 3
; CHECK-32-NEXT: bne 0, .LBB0_3
; CHECK-32-NEXT: b .LBB0_1
; CHECK-32-NEXT: .LBB0_5: # %atomicrmw.end
; CHECK-32-NEXT: bne 0, .LBB0_4
; CHECK-32-NEXT: # %bb.6: # in Loop: Header=BB0_3 Depth=1
; CHECK-32-NEXT: creqv 20, 20, 20
; CHECK-32-NEXT: b .LBB0_2
; CHECK-32-NEXT: .LBB0_7: # %atomicrmw.end
; CHECK-32-NEXT: fmr 1, 0
; CHECK-32-NEXT: lwsync
; CHECK-32-NEXT: addi 1, 1, 32

View File

@@ -5,49 +5,47 @@ define i8 @atomicrmw_usub_cond_i8(ptr %ptr, i8 %val) {
; CHECK-LABEL: atomicrmw_usub_cond_i8:
; CHECK: # %bb.0:
; CHECK-NEXT: sync
; CHECK-NEXT: mr 5, 3
; CHECK-NEXT: rlwinm 7, 5, 3, 27, 28
; CHECK-NEXT: lbz 3, 0(3)
; CHECK-NEXT: xori 7, 7, 24
; CHECK-NEXT: li 8, 255
; CHECK-NEXT: clrlwi 6, 4, 24
; CHECK-NEXT: rldicr 5, 5, 0, 61
; CHECK-NEXT: slw 8, 8, 7
; CHECK-NEXT: rldicr 5, 3, 0, 61
; CHECK-NEXT: not 3, 3
; CHECK-NEXT: li 6, 255
; CHECK-NEXT: lwz 8, 0(5)
; CHECK-NEXT: rlwinm 3, 3, 3, 27, 28
; CHECK-NEXT: slw 6, 6, 3
; CHECK-NEXT: not 6, 6
; CHECK-NEXT: clrlwi 7, 4, 24
; CHECK-NEXT: b .LBB0_2
; CHECK-NEXT: .LBB0_1: # %atomicrmw.start
; CHECK-NEXT: #
; CHECK-NEXT: srw 3, 11, 7
; CHECK-NEXT: cmplw 3, 9
; CHECK-NEXT: beq 0, .LBB0_7
; CHECK-NEXT: .LBB0_2: # %atomicrmw.start
; CHECK-NEXT: # =>This Loop Header: Depth=1
; CHECK-NEXT: # Child Loop BB0_5 Depth 2
; CHECK-NEXT: clrlwi 9, 3, 24
; CHECK-NEXT: cmplw 9, 6
; CHECK-NEXT: blt 0, .LBB0_4
; CHECK-NEXT: # %bb.3:
; CHECK-NEXT: sub 3, 3, 4
; CHECK-NEXT: .LBB0_4: # %atomicrmw.start
; CHECK-NEXT: #
; CHECK-NEXT: slw 3, 3, 7
; CHECK-NEXT: slw 10, 9, 7
; CHECK-NEXT: and 3, 3, 8
; CHECK-NEXT: and 10, 10, 8
; CHECK-NEXT: .LBB0_5: # %atomicrmw.start
; CHECK-NEXT: # Parent Loop BB0_2 Depth=1
; CHECK-NEXT: # => This Inner Loop Header: Depth=2
; CHECK-NEXT: lwarx 12, 0, 5
; CHECK-NEXT: and 11, 12, 8
; CHECK-NEXT: cmpw 11, 10
; CHECK-NEXT: bne 0, .LBB0_1
; CHECK-NEXT: # %bb.6: # %atomicrmw.start
; CHECK-NEXT: #
; CHECK-NEXT: andc 12, 12, 8
; CHECK-NEXT: or 12, 12, 3
; CHECK-NEXT: stwcx. 12, 0, 5
; CHECK-NEXT: bne 0, .LBB0_5
; CHECK-NEXT: b .LBB0_1
; CHECK-NEXT: .LBB0_7: # %atomicrmw.end
; CHECK-NEXT: .LBB0_1: # %cmpxchg.nostore
; CHECK-NEXT: # in Loop: Header=BB0_2 Depth=1
; CHECK-NEXT: mr 8, 9
; CHECK-NEXT: .LBB0_2: # %atomicrmw.start
; CHECK-NEXT: # =>This Loop Header: Depth=1
; CHECK-NEXT: # Child Loop BB0_5 Depth 2
; CHECK-NEXT: srw 9, 8, 3
; CHECK-NEXT: clrlwi 10, 9, 24
; CHECK-NEXT: cmplw 10, 7
; CHECK-NEXT: blt 0, .LBB0_4
; CHECK-NEXT: # %bb.3: # in Loop: Header=BB0_2 Depth=1
; CHECK-NEXT: sub 9, 9, 4
; CHECK-NEXT: .LBB0_4: # %atomicrmw.start
; CHECK-NEXT: # in Loop: Header=BB0_2 Depth=1
; CHECK-NEXT: clrlwi 9, 9, 24
; CHECK-NEXT: slw 9, 9, 3
; CHECK-NEXT: and 10, 8, 6
; CHECK-NEXT: or 10, 10, 9
; CHECK-NEXT: .LBB0_5: # %cmpxchg.start
; CHECK-NEXT: # Parent Loop BB0_2 Depth=1
; CHECK-NEXT: # => This Inner Loop Header: Depth=2
; CHECK-NEXT: lwarx 9, 0, 5
; CHECK-NEXT: cmplw 9, 8
; CHECK-NEXT: bne 0, .LBB0_1
; CHECK-NEXT: # %bb.6: # %cmpxchg.fencedstore
; CHECK-NEXT: # in Loop: Header=BB0_5 Depth=2
; CHECK-NEXT: stwcx. 10, 0, 5
; CHECK-NEXT: bne 0, .LBB0_5
; CHECK-NEXT: # %bb.7:
; CHECK-NEXT: mr 8, 9
; CHECK-NEXT: # %bb.8: # %atomicrmw.end
; CHECK-NEXT: srw 3, 9, 3
; CHECK-NEXT: lwsync
; CHECK-NEXT: blr
%result = atomicrmw usub_cond ptr %ptr, i8 %val seq_cst
@@ -58,50 +56,49 @@ define i16 @atomicrmw_usub_cond_i16(ptr %ptr, i16 %val) {
; CHECK-LABEL: atomicrmw_usub_cond_i16:
; CHECK: # %bb.0:
; CHECK-NEXT: sync
; CHECK-NEXT: mr 5, 3
; CHECK-NEXT: li 8, 0
; CHECK-NEXT: lhz 3, 0(3)
; CHECK-NEXT: rlwinm 7, 5, 3, 27, 27
; CHECK-NEXT: xori 7, 7, 16
; CHECK-NEXT: ori 8, 8, 65535
; CHECK-NEXT: clrlwi 6, 4, 16
; CHECK-NEXT: rldicr 5, 5, 0, 61
; CHECK-NEXT: slw 8, 8, 7
; CHECK-NEXT: rldicr 5, 3, 0, 61
; CHECK-NEXT: clrlwi 3, 3, 30
; CHECK-NEXT: lis 6, 0
; CHECK-NEXT: xori 3, 3, 2
; CHECK-NEXT: lwz 8, 0(5)
; CHECK-NEXT: ori 6, 6, 65535
; CHECK-NEXT: slwi 3, 3, 3
; CHECK-NEXT: slw 6, 6, 3
; CHECK-NEXT: not 6, 6
; CHECK-NEXT: clrlwi 7, 4, 16
; CHECK-NEXT: b .LBB1_2
; CHECK-NEXT: .LBB1_1: # %atomicrmw.start
; CHECK-NEXT: #
; CHECK-NEXT: srw 3, 11, 7
; CHECK-NEXT: cmplw 3, 9
; CHECK-NEXT: beq 0, .LBB1_7
; CHECK-NEXT: .LBB1_2: # %atomicrmw.start
; CHECK-NEXT: # =>This Loop Header: Depth=1
; CHECK-NEXT: # Child Loop BB1_5 Depth 2
; CHECK-NEXT: clrlwi 9, 3, 16
; CHECK-NEXT: cmplw 9, 6
; CHECK-NEXT: blt 0, .LBB1_4
; CHECK-NEXT: # %bb.3:
; CHECK-NEXT: sub 3, 3, 4
; CHECK-NEXT: .LBB1_4: # %atomicrmw.start
; CHECK-NEXT: #
; CHECK-NEXT: slw 3, 3, 7
; CHECK-NEXT: slw 10, 9, 7
; CHECK-NEXT: and 3, 3, 8
; CHECK-NEXT: and 10, 10, 8
; CHECK-NEXT: .LBB1_5: # %atomicrmw.start
; CHECK-NEXT: # Parent Loop BB1_2 Depth=1
; CHECK-NEXT: # => This Inner Loop Header: Depth=2
; CHECK-NEXT: lwarx 12, 0, 5
; CHECK-NEXT: and 11, 12, 8
; CHECK-NEXT: cmpw 11, 10
; CHECK-NEXT: bne 0, .LBB1_1
; CHECK-NEXT: # %bb.6: # %atomicrmw.start
; CHECK-NEXT: #
; CHECK-NEXT: andc 12, 12, 8
; CHECK-NEXT: or 12, 12, 3
; CHECK-NEXT: stwcx. 12, 0, 5
; CHECK-NEXT: bne 0, .LBB1_5
; CHECK-NEXT: b .LBB1_1
; CHECK-NEXT: .LBB1_7: # %atomicrmw.end
; CHECK-NEXT: .LBB1_1: # %cmpxchg.nostore
; CHECK-NEXT: # in Loop: Header=BB1_2 Depth=1
; CHECK-NEXT: mr 8, 9
; CHECK-NEXT: .LBB1_2: # %atomicrmw.start
; CHECK-NEXT: # =>This Loop Header: Depth=1
; CHECK-NEXT: # Child Loop BB1_5 Depth 2
; CHECK-NEXT: srw 9, 8, 3
; CHECK-NEXT: clrlwi 10, 9, 16
; CHECK-NEXT: cmplw 10, 7
; CHECK-NEXT: blt 0, .LBB1_4
; CHECK-NEXT: # %bb.3: # in Loop: Header=BB1_2 Depth=1
; CHECK-NEXT: sub 9, 9, 4
; CHECK-NEXT: .LBB1_4: # %atomicrmw.start
; CHECK-NEXT: # in Loop: Header=BB1_2 Depth=1
; CHECK-NEXT: clrlwi 9, 9, 16
; CHECK-NEXT: slw 9, 9, 3
; CHECK-NEXT: and 10, 8, 6
; CHECK-NEXT: or 10, 10, 9
; CHECK-NEXT: .LBB1_5: # %cmpxchg.start
; CHECK-NEXT: # Parent Loop BB1_2 Depth=1
; CHECK-NEXT: # => This Inner Loop Header: Depth=2
; CHECK-NEXT: lwarx 9, 0, 5
; CHECK-NEXT: cmplw 9, 8
; CHECK-NEXT: bne 0, .LBB1_1
; CHECK-NEXT: # %bb.6: # %cmpxchg.fencedstore
; CHECK-NEXT: # in Loop: Header=BB1_5 Depth=2
; CHECK-NEXT: stwcx. 10, 0, 5
; CHECK-NEXT: bne 0, .LBB1_5
; CHECK-NEXT: # %bb.7:
; CHECK-NEXT: mr 8, 9
; CHECK-NEXT: # %bb.8: # %atomicrmw.end
; CHECK-NEXT: srw 3, 9, 3
; CHECK-NEXT: lwsync
; CHECK-NEXT: blr
%result = atomicrmw usub_cond ptr %ptr, i16 %val seq_cst
@@ -114,34 +111,33 @@ define i32 @atomicrmw_usub_cond_i32(ptr %ptr, i32 %val) {
; CHECK-NEXT: sync
; CHECK-NEXT: lwz 6, 0(3)
; CHECK-NEXT: b .LBB2_2
; CHECK-NEXT: .LBB2_1: # %atomicrmw.start
; CHECK-NEXT: #
; CHECK-NEXT: cmplw 5, 6
; CHECK-NEXT: mr 6, 5
; CHECK-NEXT: beq 0, .LBB2_7
; CHECK-NEXT: .LBB2_2: # %atomicrmw.start
; CHECK-NEXT: # =>This Loop Header: Depth=1
; CHECK-NEXT: # Child Loop BB2_5 Depth 2
; CHECK-NEXT: cmplw 6, 4
; CHECK-NEXT: .LBB2_1: # %cmpxchg.nostore
; CHECK-NEXT: # in Loop: Header=BB2_2 Depth=1
; CHECK-NEXT: mr 6, 5
; CHECK-NEXT: .LBB2_2: # %atomicrmw.start
; CHECK-NEXT: # =>This Loop Header: Depth=1
; CHECK-NEXT: # Child Loop BB2_5 Depth 2
; CHECK-NEXT: cmplw 6, 4
; CHECK-NEXT: bge 0, .LBB2_4
; CHECK-NEXT: # %bb.3: # %atomicrmw.start
; CHECK-NEXT: #
; CHECK-NEXT: mr 7, 6
; CHECK-NEXT: # %bb.3: # %atomicrmw.start
; CHECK-NEXT: # in Loop: Header=BB2_2 Depth=1
; CHECK-NEXT: mr 7, 6
; CHECK-NEXT: b .LBB2_5
; CHECK-NEXT: .LBB2_4:
; CHECK-NEXT: sub 7, 6, 4
; CHECK-NEXT: .LBB2_5: # %atomicrmw.start
; CHECK-NEXT: # Parent Loop BB2_2 Depth=1
; CHECK-NEXT: # => This Inner Loop Header: Depth=2
; CHECK-NEXT: .LBB2_4: # in Loop: Header=BB2_2 Depth=1
; CHECK-NEXT: sub 7, 6, 4
; CHECK-NEXT: .LBB2_5: # %cmpxchg.start
; CHECK-NEXT: # Parent Loop BB2_2 Depth=1
; CHECK-NEXT: # => This Inner Loop Header: Depth=2
; CHECK-NEXT: lwarx 5, 0, 3
; CHECK-NEXT: cmpw 5, 6
; CHECK-NEXT: bne 0, .LBB2_1
; CHECK-NEXT: # %bb.6: # %atomicrmw.start
; CHECK-NEXT: #
; CHECK-NEXT: cmplw 5, 6
; CHECK-NEXT: bne 0, .LBB2_1
; CHECK-NEXT: # %bb.6: # %cmpxchg.fencedstore
; CHECK-NEXT: # in Loop: Header=BB2_5 Depth=2
; CHECK-NEXT: stwcx. 7, 0, 3
; CHECK-NEXT: bne 0, .LBB2_5
; CHECK-NEXT: b .LBB2_1
; CHECK-NEXT: .LBB2_7: # %atomicrmw.end
; CHECK-NEXT: bne 0, .LBB2_5
; CHECK-NEXT: # %bb.7:
; CHECK-NEXT: mr 6, 5
; CHECK-NEXT: # %bb.8: # %atomicrmw.end
; CHECK-NEXT: mr 3, 5
; CHECK-NEXT: lwsync
; CHECK-NEXT: blr
@@ -155,34 +151,33 @@ define i64 @atomicrmw_usub_cond_i64(ptr %ptr, i64 %val) {
; CHECK-NEXT: sync
; CHECK-NEXT: ld 6, 0(3)
; CHECK-NEXT: b .LBB3_2
; CHECK-NEXT: .LBB3_1: # %atomicrmw.start
; CHECK-NEXT: #
; CHECK-NEXT: cmpld 5, 6
; CHECK-NEXT: mr 6, 5
; CHECK-NEXT: beq 0, .LBB3_7
; CHECK-NEXT: .LBB3_2: # %atomicrmw.start
; CHECK-NEXT: # =>This Loop Header: Depth=1
; CHECK-NEXT: # Child Loop BB3_5 Depth 2
; CHECK-NEXT: cmpld 6, 4
; CHECK-NEXT: .LBB3_1: # %cmpxchg.nostore
; CHECK-NEXT: # in Loop: Header=BB3_2 Depth=1
; CHECK-NEXT: mr 6, 5
; CHECK-NEXT: .LBB3_2: # %atomicrmw.start
; CHECK-NEXT: # =>This Loop Header: Depth=1
; CHECK-NEXT: # Child Loop BB3_5 Depth 2
; CHECK-NEXT: cmpld 6, 4
; CHECK-NEXT: bge 0, .LBB3_4
; CHECK-NEXT: # %bb.3: # %atomicrmw.start
; CHECK-NEXT: #
; CHECK-NEXT: mr 7, 6
; CHECK-NEXT: # %bb.3: # %atomicrmw.start
; CHECK-NEXT: # in Loop: Header=BB3_2 Depth=1
; CHECK-NEXT: mr 7, 6
; CHECK-NEXT: b .LBB3_5
; CHECK-NEXT: .LBB3_4:
; CHECK-NEXT: sub 7, 6, 4
; CHECK-NEXT: .LBB3_5: # %atomicrmw.start
; CHECK-NEXT: # Parent Loop BB3_2 Depth=1
; CHECK-NEXT: # => This Inner Loop Header: Depth=2
; CHECK-NEXT: .LBB3_4: # in Loop: Header=BB3_2 Depth=1
; CHECK-NEXT: sub 7, 6, 4
; CHECK-NEXT: .LBB3_5: # %cmpxchg.start
; CHECK-NEXT: # Parent Loop BB3_2 Depth=1
; CHECK-NEXT: # => This Inner Loop Header: Depth=2
; CHECK-NEXT: ldarx 5, 0, 3
; CHECK-NEXT: cmpd 5, 6
; CHECK-NEXT: bne 0, .LBB3_1
; CHECK-NEXT: # %bb.6: # %atomicrmw.start
; CHECK-NEXT: #
; CHECK-NEXT: cmpld 5, 6
; CHECK-NEXT: bne 0, .LBB3_1
; CHECK-NEXT: # %bb.6: # %cmpxchg.fencedstore
; CHECK-NEXT: # in Loop: Header=BB3_5 Depth=2
; CHECK-NEXT: stdcx. 7, 0, 3
; CHECK-NEXT: bne 0, .LBB3_5
; CHECK-NEXT: b .LBB3_1
; CHECK-NEXT: .LBB3_7: # %atomicrmw.end
; CHECK-NEXT: bne 0, .LBB3_5
; CHECK-NEXT: # %bb.7:
; CHECK-NEXT: mr 6, 5
; CHECK-NEXT: # %bb.8: # %atomicrmw.end
; CHECK-NEXT: mr 3, 5
; CHECK-NEXT: lwsync
; CHECK-NEXT: blr
@@ -194,52 +189,49 @@ define i8 @atomicrmw_usub_sat_i8(ptr %ptr, i8 %val) {
; CHECK-LABEL: atomicrmw_usub_sat_i8:
; CHECK: # %bb.0:
; CHECK-NEXT: sync
; CHECK-NEXT: mr 5, 3
; CHECK-NEXT: rlwinm 6, 5, 3, 27, 28
; CHECK-NEXT: lbz 3, 0(3)
; CHECK-NEXT: xori 6, 6, 24
; CHECK-NEXT: li 7, 255
; CHECK-NEXT: clrlwi 4, 4, 24
; CHECK-NEXT: rldicr 5, 5, 0, 61
; CHECK-NEXT: slw 7, 7, 6
; CHECK-NEXT: rldicr 5, 3, 0, 61
; CHECK-NEXT: not 3, 3
; CHECK-NEXT: li 6, 255
; CHECK-NEXT: lwz 7, 0(5)
; CHECK-NEXT: rlwinm 3, 3, 3, 27, 28
; CHECK-NEXT: slw 6, 6, 3
; CHECK-NEXT: not 6, 6
; CHECK-NEXT: clrlwi 4, 4, 24
; CHECK-NEXT: b .LBB4_2
; CHECK-NEXT: .LBB4_1: # %atomicrmw.start
; CHECK-NEXT: #
; CHECK-NEXT: srw 3, 10, 6
; CHECK-NEXT: cmplw 3, 8
; CHECK-NEXT: beq 0, .LBB4_7
; CHECK-NEXT: .LBB4_2: # %atomicrmw.start
; CHECK-NEXT: # =>This Loop Header: Depth=1
; CHECK-NEXT: # Child Loop BB4_5 Depth 2
; CHECK-NEXT: clrlwi 8, 3, 24
; CHECK-NEXT: sub 3, 8, 4
; CHECK-NEXT: cmplw 3, 8
; CHECK-NEXT: .LBB4_1: # %cmpxchg.nostore
; CHECK-NEXT: # in Loop: Header=BB4_2 Depth=1
; CHECK-NEXT: mr 7, 8
; CHECK-NEXT: .LBB4_2: # %atomicrmw.start
; CHECK-NEXT: # =>This Loop Header: Depth=1
; CHECK-NEXT: # Child Loop BB4_5 Depth 2
; CHECK-NEXT: srw 8, 7, 3
; CHECK-NEXT: clrlwi 9, 8, 24
; CHECK-NEXT: sub 8, 9, 4
; CHECK-NEXT: cmplw 8, 9
; CHECK-NEXT: li 9, 0
; CHECK-NEXT: bgt 0, .LBB4_4
; CHECK-NEXT: # %bb.3: # %atomicrmw.start
; CHECK-NEXT: #
; CHECK-NEXT: mr 9, 3
; CHECK-NEXT: .LBB4_4: # %atomicrmw.start
; CHECK-NEXT: #
; CHECK-NEXT: slw 3, 9, 6
; CHECK-NEXT: slw 9, 8, 6
; CHECK-NEXT: and 3, 3, 7
; CHECK-NEXT: and 9, 9, 7
; CHECK-NEXT: .LBB4_5: # %atomicrmw.start
; CHECK-NEXT: # Parent Loop BB4_2 Depth=1
; CHECK-NEXT: # => This Inner Loop Header: Depth=2
; CHECK-NEXT: lwarx 11, 0, 5
; CHECK-NEXT: and 10, 11, 7
; CHECK-NEXT: cmpw 10, 9
; CHECK-NEXT: bne 0, .LBB4_1
; CHECK-NEXT: # %bb.6: # %atomicrmw.start
; CHECK-NEXT: #
; CHECK-NEXT: andc 11, 11, 7
; CHECK-NEXT: or 11, 11, 3
; CHECK-NEXT: stwcx. 11, 0, 5
; CHECK-NEXT: bne 0, .LBB4_5
; CHECK-NEXT: b .LBB4_1
; CHECK-NEXT: .LBB4_7: # %atomicrmw.end
; CHECK-NEXT: bgt 0, .LBB4_4
; CHECK-NEXT: # %bb.3: # %atomicrmw.start
; CHECK-NEXT: # in Loop: Header=BB4_2 Depth=1
; CHECK-NEXT: mr 9, 8
; CHECK-NEXT: .LBB4_4: # %atomicrmw.start
; CHECK-NEXT: # in Loop: Header=BB4_2 Depth=1
; CHECK-NEXT: slw 8, 9, 3
; CHECK-NEXT: and 9, 7, 6
; CHECK-NEXT: or 9, 9, 8
; CHECK-NEXT: .LBB4_5: # %cmpxchg.start
; CHECK-NEXT: # Parent Loop BB4_2 Depth=1
; CHECK-NEXT: # => This Inner Loop Header: Depth=2
; CHECK-NEXT: lwarx 8, 0, 5
; CHECK-NEXT: cmplw 8, 7
; CHECK-NEXT: bne 0, .LBB4_1
; CHECK-NEXT: # %bb.6: # %cmpxchg.fencedstore
; CHECK-NEXT: # in Loop: Header=BB4_5 Depth=2
; CHECK-NEXT: stwcx. 9, 0, 5
; CHECK-NEXT: bne 0, .LBB4_5
; CHECK-NEXT: # %bb.7:
; CHECK-NEXT: mr 7, 8
; CHECK-NEXT: # %bb.8: # %atomicrmw.end
; CHECK-NEXT: srw 3, 8, 3
; CHECK-NEXT: lwsync
; CHECK-NEXT: blr
%result = atomicrmw usub_sat ptr %ptr, i8 %val seq_cst
@@ -250,53 +242,51 @@ define i16 @atomicrmw_usub_sat_i16(ptr %ptr, i16 %val) {
; CHECK-LABEL: atomicrmw_usub_sat_i16:
; CHECK: # %bb.0:
; CHECK-NEXT: sync
; CHECK-NEXT: mr 5, 3
; CHECK-NEXT: li 7, 0
; CHECK-NEXT: lhz 3, 0(3)
; CHECK-NEXT: rlwinm 6, 5, 3, 27, 27
; CHECK-NEXT: xori 6, 6, 16
; CHECK-NEXT: ori 7, 7, 65535
; CHECK-NEXT: clrlwi 4, 4, 16
; CHECK-NEXT: rldicr 5, 5, 0, 61
; CHECK-NEXT: slw 7, 7, 6
; CHECK-NEXT: rldicr 5, 3, 0, 61
; CHECK-NEXT: clrlwi 3, 3, 30
; CHECK-NEXT: lis 6, 0
; CHECK-NEXT: xori 3, 3, 2
; CHECK-NEXT: lwz 7, 0(5)
; CHECK-NEXT: ori 6, 6, 65535
; CHECK-NEXT: slwi 3, 3, 3
; CHECK-NEXT: slw 6, 6, 3
; CHECK-NEXT: not 6, 6
; CHECK-NEXT: clrlwi 4, 4, 16
; CHECK-NEXT: b .LBB5_2
; CHECK-NEXT: .LBB5_1: # %atomicrmw.start
; CHECK-NEXT: #
; CHECK-NEXT: srw 3, 10, 6
; CHECK-NEXT: cmplw 3, 8
; CHECK-NEXT: beq 0, .LBB5_7
; CHECK-NEXT: .LBB5_2: # %atomicrmw.start
; CHECK-NEXT: # =>This Loop Header: Depth=1
; CHECK-NEXT: # Child Loop BB5_5 Depth 2
; CHECK-NEXT: clrlwi 8, 3, 16
; CHECK-NEXT: sub 3, 8, 4
; CHECK-NEXT: cmplw 3, 8
; CHECK-NEXT: .LBB5_1: # %cmpxchg.nostore
; CHECK-NEXT: # in Loop: Header=BB5_2 Depth=1
; CHECK-NEXT: mr 7, 8
; CHECK-NEXT: .LBB5_2: # %atomicrmw.start
; CHECK-NEXT: # =>This Loop Header: Depth=1
; CHECK-NEXT: # Child Loop BB5_5 Depth 2
; CHECK-NEXT: srw 8, 7, 3
; CHECK-NEXT: clrlwi 9, 8, 16
; CHECK-NEXT: sub 8, 9, 4
; CHECK-NEXT: cmplw 8, 9
; CHECK-NEXT: li 9, 0
; CHECK-NEXT: bgt 0, .LBB5_4
; CHECK-NEXT: # %bb.3: # %atomicrmw.start
; CHECK-NEXT: #
; CHECK-NEXT: mr 9, 3
; CHECK-NEXT: .LBB5_4: # %atomicrmw.start
; CHECK-NEXT: #
; CHECK-NEXT: slw 3, 9, 6
; CHECK-NEXT: slw 9, 8, 6
; CHECK-NEXT: and 3, 3, 7
; CHECK-NEXT: and 9, 9, 7
; CHECK-NEXT: .LBB5_5: # %atomicrmw.start
; CHECK-NEXT: # Parent Loop BB5_2 Depth=1
; CHECK-NEXT: # => This Inner Loop Header: Depth=2
; CHECK-NEXT: lwarx 11, 0, 5
; CHECK-NEXT: and 10, 11, 7
; CHECK-NEXT: cmpw 10, 9
; CHECK-NEXT: bne 0, .LBB5_1
; CHECK-NEXT: # %bb.6: # %atomicrmw.start
; CHECK-NEXT: #
; CHECK-NEXT: andc 11, 11, 7
; CHECK-NEXT: or 11, 11, 3
; CHECK-NEXT: stwcx. 11, 0, 5
; CHECK-NEXT: bne 0, .LBB5_5
; CHECK-NEXT: b .LBB5_1
; CHECK-NEXT: .LBB5_7: # %atomicrmw.end
; CHECK-NEXT: bgt 0, .LBB5_4
; CHECK-NEXT: # %bb.3: # %atomicrmw.start
; CHECK-NEXT: # in Loop: Header=BB5_2 Depth=1
; CHECK-NEXT: mr 9, 8
; CHECK-NEXT: .LBB5_4: # %atomicrmw.start
; CHECK-NEXT: # in Loop: Header=BB5_2 Depth=1
; CHECK-NEXT: slw 8, 9, 3
; CHECK-NEXT: and 9, 7, 6
; CHECK-NEXT: or 9, 9, 8
; CHECK-NEXT: .LBB5_5: # %cmpxchg.start
; CHECK-NEXT: # Parent Loop BB5_2 Depth=1
; CHECK-NEXT: # => This Inner Loop Header: Depth=2
; CHECK-NEXT: lwarx 8, 0, 5
; CHECK-NEXT: cmplw 8, 7
; CHECK-NEXT: bne 0, .LBB5_1
; CHECK-NEXT: # %bb.6: # %cmpxchg.fencedstore
; CHECK-NEXT: # in Loop: Header=BB5_5 Depth=2
; CHECK-NEXT: stwcx. 9, 0, 5
; CHECK-NEXT: bne 0, .LBB5_5
; CHECK-NEXT: # %bb.7:
; CHECK-NEXT: mr 7, 8
; CHECK-NEXT: # %bb.8: # %atomicrmw.end
; CHECK-NEXT: srw 3, 8, 3
; CHECK-NEXT: lwsync
; CHECK-NEXT: blr
%result = atomicrmw usub_sat ptr %ptr, i16 %val seq_cst
@@ -309,33 +299,32 @@ define i32 @atomicrmw_usub_sat_i32(ptr %ptr, i32 %val) {
; CHECK-NEXT: sync
; CHECK-NEXT: lwz 6, 0(3)
; CHECK-NEXT: b .LBB6_2
; CHECK-NEXT: .LBB6_1: # %atomicrmw.start
; CHECK-NEXT: #
; CHECK-NEXT: cmplw 5, 6
; CHECK-NEXT: mr 6, 5
; CHECK-NEXT: beq 0, .LBB6_6
; CHECK-NEXT: .LBB6_2: # %atomicrmw.start
; CHECK-NEXT: # =>This Loop Header: Depth=1
; CHECK-NEXT: # Child Loop BB6_4 Depth 2
; CHECK-NEXT: sub 5, 6, 4
; CHECK-NEXT: cmplw 5, 6
; CHECK-NEXT: .LBB6_1: # %cmpxchg.nostore
; CHECK-NEXT: # in Loop: Header=BB6_2 Depth=1
; CHECK-NEXT: mr 6, 5
; CHECK-NEXT: .LBB6_2: # %atomicrmw.start
; CHECK-NEXT: # =>This Loop Header: Depth=1
; CHECK-NEXT: # Child Loop BB6_4 Depth 2
; CHECK-NEXT: sub 5, 6, 4
; CHECK-NEXT: cmplw 5, 6
; CHECK-NEXT: li 7, 0
; CHECK-NEXT: bgt 0, .LBB6_4
; CHECK-NEXT: # %bb.3: # %atomicrmw.start
; CHECK-NEXT: #
; CHECK-NEXT: mr 7, 5
; CHECK-NEXT: .LBB6_4: # %atomicrmw.start
; CHECK-NEXT: # Parent Loop BB6_2 Depth=1
; CHECK-NEXT: # => This Inner Loop Header: Depth=2
; CHECK-NEXT: bgt 0, .LBB6_4
; CHECK-NEXT: # %bb.3: # %atomicrmw.start
; CHECK-NEXT: # in Loop: Header=BB6_2 Depth=1
; CHECK-NEXT: mr 7, 5
; CHECK-NEXT: .LBB6_4: # %cmpxchg.start
; CHECK-NEXT: # Parent Loop BB6_2 Depth=1
; CHECK-NEXT: # => This Inner Loop Header: Depth=2
; CHECK-NEXT: lwarx 5, 0, 3
; CHECK-NEXT: cmpw 5, 6
; CHECK-NEXT: bne 0, .LBB6_1
; CHECK-NEXT: # %bb.5: # %atomicrmw.start
; CHECK-NEXT: #
; CHECK-NEXT: cmplw 5, 6
; CHECK-NEXT: bne 0, .LBB6_1
; CHECK-NEXT: # %bb.5: # %cmpxchg.fencedstore
; CHECK-NEXT: # in Loop: Header=BB6_4 Depth=2
; CHECK-NEXT: stwcx. 7, 0, 3
; CHECK-NEXT: bne 0, .LBB6_4
; CHECK-NEXT: b .LBB6_1
; CHECK-NEXT: .LBB6_6: # %atomicrmw.end
; CHECK-NEXT: bne 0, .LBB6_4
; CHECK-NEXT: # %bb.6:
; CHECK-NEXT: mr 6, 5
; CHECK-NEXT: # %bb.7: # %atomicrmw.end
; CHECK-NEXT: mr 3, 5
; CHECK-NEXT: lwsync
; CHECK-NEXT: blr
@@ -349,33 +338,32 @@ define i64 @atomicrmw_usub_sat_i64(ptr %ptr, i64 %val) {
; CHECK-NEXT: sync
; CHECK-NEXT: ld 6, 0(3)
; CHECK-NEXT: b .LBB7_2
; CHECK-NEXT: .LBB7_1: # %atomicrmw.start
; CHECK-NEXT: #
; CHECK-NEXT: cmpld 5, 6
; CHECK-NEXT: mr 6, 5
; CHECK-NEXT: beq 0, .LBB7_6
; CHECK-NEXT: .LBB7_2: # %atomicrmw.start
; CHECK-NEXT: # =>This Loop Header: Depth=1
; CHECK-NEXT: # Child Loop BB7_4 Depth 2
; CHECK-NEXT: subc 5, 6, 4
; CHECK-NEXT: .LBB7_1: # %cmpxchg.nostore
; CHECK-NEXT: # in Loop: Header=BB7_2 Depth=1
; CHECK-NEXT: mr 6, 5
; CHECK-NEXT: .LBB7_2: # %atomicrmw.start
; CHECK-NEXT: # =>This Loop Header: Depth=1
; CHECK-NEXT: # Child Loop BB7_4 Depth 2
; CHECK-NEXT: subc 5, 6, 4
; CHECK-NEXT: li 7, 0
; CHECK-NEXT: addze. 8, 7
; CHECK-NEXT: beq 0, .LBB7_4
; CHECK-NEXT: # %bb.3: # %atomicrmw.start
; CHECK-NEXT: #
; CHECK-NEXT: mr 7, 5
; CHECK-NEXT: .LBB7_4: # %atomicrmw.start
; CHECK-NEXT: # Parent Loop BB7_2 Depth=1
; CHECK-NEXT: # => This Inner Loop Header: Depth=2
; CHECK-NEXT: beq 0, .LBB7_4
; CHECK-NEXT: # %bb.3: # %atomicrmw.start
; CHECK-NEXT: # in Loop: Header=BB7_2 Depth=1
; CHECK-NEXT: mr 7, 5
; CHECK-NEXT: .LBB7_4: # %cmpxchg.start
; CHECK-NEXT: # Parent Loop BB7_2 Depth=1
; CHECK-NEXT: # => This Inner Loop Header: Depth=2
; CHECK-NEXT: ldarx 5, 0, 3
; CHECK-NEXT: cmpd 5, 6
; CHECK-NEXT: bne 0, .LBB7_1
; CHECK-NEXT: # %bb.5: # %atomicrmw.start
; CHECK-NEXT: #
; CHECK-NEXT: cmpld 5, 6
; CHECK-NEXT: bne 0, .LBB7_1
; CHECK-NEXT: # %bb.5: # %cmpxchg.fencedstore
; CHECK-NEXT: # in Loop: Header=BB7_4 Depth=2
; CHECK-NEXT: stdcx. 7, 0, 3
; CHECK-NEXT: bne 0, .LBB7_4
; CHECK-NEXT: b .LBB7_1
; CHECK-NEXT: .LBB7_6: # %atomicrmw.end
; CHECK-NEXT: bne 0, .LBB7_4
; CHECK-NEXT: # %bb.6:
; CHECK-NEXT: mr 6, 5
; CHECK-NEXT: # %bb.7: # %atomicrmw.end
; CHECK-NEXT: mr 3, 5
; CHECK-NEXT: lwsync
; CHECK-NEXT: blr

View File

@@ -5,51 +5,49 @@ define i8 @atomicrmw_uinc_wrap_i8(ptr %ptr, i8 %val) {
; CHECK-LABEL: atomicrmw_uinc_wrap_i8:
; CHECK: # %bb.0:
; CHECK-NEXT: sync
; CHECK-NEXT: mr 5, 3
; CHECK-NEXT: rlwinm 6, 5, 3, 27, 28
; CHECK-NEXT: lbz 3, 0(3)
; CHECK-NEXT: xori 6, 6, 24
; CHECK-NEXT: li 7, 255
; CHECK-NEXT: clrlwi 4, 4, 24
; CHECK-NEXT: rldicr 5, 5, 0, 61
; CHECK-NEXT: slw 7, 7, 6
; CHECK-NEXT: rldicr 5, 3, 0, 61
; CHECK-NEXT: not 3, 3
; CHECK-NEXT: li 6, 255
; CHECK-NEXT: lwz 7, 0(5)
; CHECK-NEXT: rlwinm 3, 3, 3, 27, 28
; CHECK-NEXT: slw 6, 6, 3
; CHECK-NEXT: not 6, 6
; CHECK-NEXT: clrlwi 4, 4, 24
; CHECK-NEXT: b .LBB0_2
; CHECK-NEXT: .LBB0_1: # %atomicrmw.start
; CHECK-NEXT: #
; CHECK-NEXT: srw 3, 10, 6
; CHECK-NEXT: cmplw 3, 8
; CHECK-NEXT: beq 0, .LBB0_7
; CHECK-NEXT: .LBB0_2: # %atomicrmw.start
; CHECK-NEXT: # =>This Loop Header: Depth=1
; CHECK-NEXT: # Child Loop BB0_5 Depth 2
; CHECK-NEXT: clrlwi 8, 3, 24
; CHECK-NEXT: cmplw 8, 4
; CHECK-NEXT: .LBB0_1: # %cmpxchg.nostore
; CHECK-NEXT: # in Loop: Header=BB0_2 Depth=1
; CHECK-NEXT: mr 7, 8
; CHECK-NEXT: .LBB0_2: # %atomicrmw.start
; CHECK-NEXT: # =>This Loop Header: Depth=1
; CHECK-NEXT: # Child Loop BB0_5 Depth 2
; CHECK-NEXT: srw 8, 7, 3
; CHECK-NEXT: clrlwi 9, 8, 24
; CHECK-NEXT: cmplw 9, 4
; CHECK-NEXT: li 9, 0
; CHECK-NEXT: bge 0, .LBB0_4
; CHECK-NEXT: # %bb.3: # %atomicrmw.start
; CHECK-NEXT: #
; CHECK-NEXT: addi 9, 3, 1
; CHECK-NEXT: .LBB0_4: # %atomicrmw.start
; CHECK-NEXT: #
; CHECK-NEXT: slw 3, 9, 6
; CHECK-NEXT: slw 9, 8, 6
; CHECK-NEXT: and 3, 3, 7
; CHECK-NEXT: and 9, 9, 7
; CHECK-NEXT: .LBB0_5: # %atomicrmw.start
; CHECK-NEXT: # Parent Loop BB0_2 Depth=1
; CHECK-NEXT: # => This Inner Loop Header: Depth=2
; CHECK-NEXT: lwarx 11, 0, 5
; CHECK-NEXT: and 10, 11, 7
; CHECK-NEXT: cmpw 10, 9
; CHECK-NEXT: bne 0, .LBB0_1
; CHECK-NEXT: # %bb.6: # %atomicrmw.start
; CHECK-NEXT: #
; CHECK-NEXT: andc 11, 11, 7
; CHECK-NEXT: or 11, 11, 3
; CHECK-NEXT: stwcx. 11, 0, 5
; CHECK-NEXT: bne 0, .LBB0_5
; CHECK-NEXT: b .LBB0_1
; CHECK-NEXT: .LBB0_7: # %atomicrmw.end
; CHECK-NEXT: # %bb.3: # %atomicrmw.start
; CHECK-NEXT: # in Loop: Header=BB0_2 Depth=1
; CHECK-NEXT: addi 9, 8, 1
; CHECK-NEXT: .LBB0_4: # %atomicrmw.start
; CHECK-NEXT: # in Loop: Header=BB0_2 Depth=1
; CHECK-NEXT: clrlwi 8, 9, 24
; CHECK-NEXT: slw 8, 8, 3
; CHECK-NEXT: and 9, 7, 6
; CHECK-NEXT: or 9, 9, 8
; CHECK-NEXT: .LBB0_5: # %cmpxchg.start
; CHECK-NEXT: # Parent Loop BB0_2 Depth=1
; CHECK-NEXT: # => This Inner Loop Header: Depth=2
; CHECK-NEXT: lwarx 8, 0, 5
; CHECK-NEXT: cmplw 8, 7
; CHECK-NEXT: bne 0, .LBB0_1
; CHECK-NEXT: # %bb.6: # %cmpxchg.fencedstore
; CHECK-NEXT: # in Loop: Header=BB0_5 Depth=2
; CHECK-NEXT: stwcx. 9, 0, 5
; CHECK-NEXT: bne 0, .LBB0_5
; CHECK-NEXT: # %bb.7:
; CHECK-NEXT: mr 7, 8
; CHECK-NEXT: # %bb.8: # %atomicrmw.end
; CHECK-NEXT: srw 3, 8, 3
; CHECK-NEXT: lwsync
; CHECK-NEXT: blr
%result = atomicrmw uinc_wrap ptr %ptr, i8 %val seq_cst
@@ -60,52 +58,51 @@ define i16 @atomicrmw_uinc_wrap_i16(ptr %ptr, i16 %val) {
; CHECK-LABEL: atomicrmw_uinc_wrap_i16:
; CHECK: # %bb.0:
; CHECK-NEXT: sync
; CHECK-NEXT: mr 5, 3
; CHECK-NEXT: li 7, 0
; CHECK-NEXT: lhz 3, 0(3)
; CHECK-NEXT: rlwinm 6, 5, 3, 27, 27
; CHECK-NEXT: xori 6, 6, 16
; CHECK-NEXT: ori 7, 7, 65535
; CHECK-NEXT: clrlwi 4, 4, 16
; CHECK-NEXT: rldicr 5, 5, 0, 61
; CHECK-NEXT: slw 7, 7, 6
; CHECK-NEXT: rldicr 5, 3, 0, 61
; CHECK-NEXT: clrlwi 3, 3, 30
; CHECK-NEXT: lis 6, 0
; CHECK-NEXT: xori 3, 3, 2
; CHECK-NEXT: lwz 7, 0(5)
; CHECK-NEXT: ori 6, 6, 65535
; CHECK-NEXT: slwi 3, 3, 3
; CHECK-NEXT: slw 6, 6, 3
; CHECK-NEXT: not 6, 6
; CHECK-NEXT: clrlwi 4, 4, 16
; CHECK-NEXT: b .LBB1_2
; CHECK-NEXT: .LBB1_1: # %atomicrmw.start
; CHECK-NEXT: #
; CHECK-NEXT: srw 3, 10, 6
; CHECK-NEXT: cmplw 3, 8
; CHECK-NEXT: beq 0, .LBB1_7
; CHECK-NEXT: .LBB1_2: # %atomicrmw.start
; CHECK-NEXT: # =>This Loop Header: Depth=1
; CHECK-NEXT: # Child Loop BB1_5 Depth 2
; CHECK-NEXT: clrlwi 8, 3, 16
; CHECK-NEXT: cmplw 8, 4
; CHECK-NEXT: .LBB1_1: # %cmpxchg.nostore
; CHECK-NEXT: # in Loop: Header=BB1_2 Depth=1
; CHECK-NEXT: mr 7, 8
; CHECK-NEXT: .LBB1_2: # %atomicrmw.start
; CHECK-NEXT: # =>This Loop Header: Depth=1
; CHECK-NEXT: # Child Loop BB1_5 Depth 2
; CHECK-NEXT: srw 8, 7, 3
; CHECK-NEXT: clrlwi 9, 8, 16
; CHECK-NEXT: cmplw 9, 4
; CHECK-NEXT: li 9, 0
; CHECK-NEXT: bge 0, .LBB1_4
; CHECK-NEXT: # %bb.3: # %atomicrmw.start
; CHECK-NEXT: #
; CHECK-NEXT: addi 9, 3, 1
; CHECK-NEXT: .LBB1_4: # %atomicrmw.start
; CHECK-NEXT: #
; CHECK-NEXT: slw 3, 9, 6
; CHECK-NEXT: slw 9, 8, 6
; CHECK-NEXT: and 3, 3, 7
; CHECK-NEXT: and 9, 9, 7
; CHECK-NEXT: .LBB1_5: # %atomicrmw.start
; CHECK-NEXT: # Parent Loop BB1_2 Depth=1
; CHECK-NEXT: # => This Inner Loop Header: Depth=2
; CHECK-NEXT: lwarx 11, 0, 5
; CHECK-NEXT: and 10, 11, 7
; CHECK-NEXT: cmpw 10, 9
; CHECK-NEXT: bne 0, .LBB1_1
; CHECK-NEXT: # %bb.6: # %atomicrmw.start
; CHECK-NEXT: #
; CHECK-NEXT: andc 11, 11, 7
; CHECK-NEXT: or 11, 11, 3
; CHECK-NEXT: stwcx. 11, 0, 5
; CHECK-NEXT: bne 0, .LBB1_5
; CHECK-NEXT: b .LBB1_1
; CHECK-NEXT: .LBB1_7: # %atomicrmw.end
; CHECK-NEXT: # %bb.3: # %atomicrmw.start
; CHECK-NEXT: # in Loop: Header=BB1_2 Depth=1
; CHECK-NEXT: addi 9, 8, 1
; CHECK-NEXT: .LBB1_4: # %atomicrmw.start
; CHECK-NEXT: # in Loop: Header=BB1_2 Depth=1
; CHECK-NEXT: clrlwi 8, 9, 16
; CHECK-NEXT: slw 8, 8, 3
; CHECK-NEXT: and 9, 7, 6
; CHECK-NEXT: or 9, 9, 8
; CHECK-NEXT: .LBB1_5: # %cmpxchg.start
; CHECK-NEXT: # Parent Loop BB1_2 Depth=1
; CHECK-NEXT: # => This Inner Loop Header: Depth=2
; CHECK-NEXT: lwarx 8, 0, 5
; CHECK-NEXT: cmplw 8, 7
; CHECK-NEXT: bne 0, .LBB1_1
; CHECK-NEXT: # %bb.6: # %cmpxchg.fencedstore
; CHECK-NEXT: # in Loop: Header=BB1_5 Depth=2
; CHECK-NEXT: stwcx. 9, 0, 5
; CHECK-NEXT: bne 0, .LBB1_5
; CHECK-NEXT: # %bb.7:
; CHECK-NEXT: mr 7, 8
; CHECK-NEXT: # %bb.8: # %atomicrmw.end
; CHECK-NEXT: srw 3, 8, 3
; CHECK-NEXT: lwsync
; CHECK-NEXT: blr
%result = atomicrmw uinc_wrap ptr %ptr, i16 %val seq_cst
@@ -118,32 +115,31 @@ define i32 @atomicrmw_uinc_wrap_i32(ptr %ptr, i32 %val) {
; CHECK-NEXT: sync
; CHECK-NEXT: lwz 6, 0(3)
; CHECK-NEXT: b .LBB2_2
; CHECK-NEXT: .LBB2_1: # %atomicrmw.start
; CHECK-NEXT: #
; CHECK-NEXT: cmplw 5, 6
; CHECK-NEXT: mr 6, 5
; CHECK-NEXT: beq 0, .LBB2_6
; CHECK-NEXT: .LBB2_2: # %atomicrmw.start
; CHECK-NEXT: # =>This Loop Header: Depth=1
; CHECK-NEXT: # Child Loop BB2_4 Depth 2
; CHECK-NEXT: cmplw 6, 4
; CHECK-NEXT: .LBB2_1: # %cmpxchg.nostore
; CHECK-NEXT: # in Loop: Header=BB2_2 Depth=1
; CHECK-NEXT: mr 6, 5
; CHECK-NEXT: .LBB2_2: # %atomicrmw.start
; CHECK-NEXT: # =>This Loop Header: Depth=1
; CHECK-NEXT: # Child Loop BB2_4 Depth 2
; CHECK-NEXT: cmplw 6, 4
; CHECK-NEXT: li 7, 0
; CHECK-NEXT: bge 0, .LBB2_4
; CHECK-NEXT: # %bb.3: # %atomicrmw.start
; CHECK-NEXT: #
; CHECK-NEXT: # %bb.3: # %atomicrmw.start
; CHECK-NEXT: # in Loop: Header=BB2_2 Depth=1
; CHECK-NEXT: addi 7, 6, 1
; CHECK-NEXT: .LBB2_4: # %atomicrmw.start
; CHECK-NEXT: # Parent Loop BB2_2 Depth=1
; CHECK-NEXT: # => This Inner Loop Header: Depth=2
; CHECK-NEXT: .LBB2_4: # %cmpxchg.start
; CHECK-NEXT: # Parent Loop BB2_2 Depth=1
; CHECK-NEXT: # => This Inner Loop Header: Depth=2
; CHECK-NEXT: lwarx 5, 0, 3
; CHECK-NEXT: cmpw 5, 6
; CHECK-NEXT: bne 0, .LBB2_1
; CHECK-NEXT: # %bb.5: # %atomicrmw.start
; CHECK-NEXT: #
; CHECK-NEXT: cmplw 5, 6
; CHECK-NEXT: bne 0, .LBB2_1
; CHECK-NEXT: # %bb.5: # %cmpxchg.fencedstore
; CHECK-NEXT: # in Loop: Header=BB2_4 Depth=2
; CHECK-NEXT: stwcx. 7, 0, 3
; CHECK-NEXT: bne 0, .LBB2_4
; CHECK-NEXT: b .LBB2_1
; CHECK-NEXT: .LBB2_6: # %atomicrmw.end
; CHECK-NEXT: bne 0, .LBB2_4
; CHECK-NEXT: # %bb.6:
; CHECK-NEXT: mr 6, 5
; CHECK-NEXT: # %bb.7: # %atomicrmw.end
; CHECK-NEXT: mr 3, 5
; CHECK-NEXT: lwsync
; CHECK-NEXT: blr
@@ -157,32 +153,31 @@ define i64 @atomicrmw_uinc_wrap_i64(ptr %ptr, i64 %val) {
; CHECK-NEXT: sync
; CHECK-NEXT: ld 6, 0(3)
; CHECK-NEXT: b .LBB3_2
; CHECK-NEXT: .LBB3_1: # %atomicrmw.start
; CHECK-NEXT: #
; CHECK-NEXT: cmpld 5, 6
; CHECK-NEXT: mr 6, 5
; CHECK-NEXT: beq 0, .LBB3_6
; CHECK-NEXT: .LBB3_2: # %atomicrmw.start
; CHECK-NEXT: # =>This Loop Header: Depth=1
; CHECK-NEXT: # Child Loop BB3_4 Depth 2
; CHECK-NEXT: cmpld 6, 4
; CHECK-NEXT: .LBB3_1: # %cmpxchg.nostore
; CHECK-NEXT: # in Loop: Header=BB3_2 Depth=1
; CHECK-NEXT: mr 6, 5
; CHECK-NEXT: .LBB3_2: # %atomicrmw.start
; CHECK-NEXT: # =>This Loop Header: Depth=1
; CHECK-NEXT: # Child Loop BB3_4 Depth 2
; CHECK-NEXT: cmpld 6, 4
; CHECK-NEXT: li 7, 0
; CHECK-NEXT: bge 0, .LBB3_4
; CHECK-NEXT: # %bb.3: # %atomicrmw.start
; CHECK-NEXT: #
; CHECK-NEXT: # %bb.3: # %atomicrmw.start
; CHECK-NEXT: # in Loop: Header=BB3_2 Depth=1
; CHECK-NEXT: addi 7, 6, 1
; CHECK-NEXT: .LBB3_4: # %atomicrmw.start
; CHECK-NEXT: # Parent Loop BB3_2 Depth=1
; CHECK-NEXT: # => This Inner Loop Header: Depth=2
; CHECK-NEXT: .LBB3_4: # %cmpxchg.start
; CHECK-NEXT: # Parent Loop BB3_2 Depth=1
; CHECK-NEXT: # => This Inner Loop Header: Depth=2
; CHECK-NEXT: ldarx 5, 0, 3
; CHECK-NEXT: cmpd 5, 6
; CHECK-NEXT: bne 0, .LBB3_1
; CHECK-NEXT: # %bb.5: # %atomicrmw.start
; CHECK-NEXT: #
; CHECK-NEXT: cmpld 5, 6
; CHECK-NEXT: bne 0, .LBB3_1
; CHECK-NEXT: # %bb.5: # %cmpxchg.fencedstore
; CHECK-NEXT: # in Loop: Header=BB3_4 Depth=2
; CHECK-NEXT: stdcx. 7, 0, 3
; CHECK-NEXT: bne 0, .LBB3_4
; CHECK-NEXT: b .LBB3_1
; CHECK-NEXT: .LBB3_6: # %atomicrmw.end
; CHECK-NEXT: bne 0, .LBB3_4
; CHECK-NEXT: # %bb.6:
; CHECK-NEXT: mr 6, 5
; CHECK-NEXT: # %bb.7: # %atomicrmw.end
; CHECK-NEXT: mr 3, 5
; CHECK-NEXT: lwsync
; CHECK-NEXT: blr
@@ -194,52 +189,50 @@ define i8 @atomicrmw_udec_wrap_i8(ptr %ptr, i8 %val) {
; CHECK-LABEL: atomicrmw_udec_wrap_i8:
; CHECK: # %bb.0:
; CHECK-NEXT: sync
; CHECK-NEXT: mr 5, 3
; CHECK-NEXT: rlwinm 7, 5, 3, 27, 28
; CHECK-NEXT: lbz 3, 0(3)
; CHECK-NEXT: xori 7, 7, 24
; CHECK-NEXT: li 8, 255
; CHECK-NEXT: clrlwi 6, 4, 24
; CHECK-NEXT: rldicr 5, 5, 0, 61
; CHECK-NEXT: slw 8, 8, 7
; CHECK-NEXT: rldicr 5, 3, 0, 61
; CHECK-NEXT: not 3, 3
; CHECK-NEXT: li 6, 255
; CHECK-NEXT: lwz 8, 0(5)
; CHECK-NEXT: rlwinm 3, 3, 3, 27, 28
; CHECK-NEXT: slw 6, 6, 3
; CHECK-NEXT: not 6, 6
; CHECK-NEXT: clrlwi 7, 4, 24
; CHECK-NEXT: b .LBB4_2
; CHECK-NEXT: .LBB4_1: # %atomicrmw.start
; CHECK-NEXT: #
; CHECK-NEXT: srw 3, 11, 7
; CHECK-NEXT: cmplw 3, 9
; CHECK-NEXT: beq 0, .LBB4_7
; CHECK-NEXT: .LBB4_2: # %atomicrmw.start
; CHECK-NEXT: # =>This Loop Header: Depth=1
; CHECK-NEXT: # Child Loop BB4_5 Depth 2
; CHECK-NEXT: andi. 9, 3, 255
; CHECK-NEXT: cmplw 1, 9, 6
; CHECK-NEXT: .LBB4_1: # %cmpxchg.nostore
; CHECK-NEXT: # in Loop: Header=BB4_2 Depth=1
; CHECK-NEXT: mr 8, 9
; CHECK-NEXT: .LBB4_2: # %atomicrmw.start
; CHECK-NEXT: # =>This Loop Header: Depth=1
; CHECK-NEXT: # Child Loop BB4_5 Depth 2
; CHECK-NEXT: srw 9, 8, 3
; CHECK-NEXT: andi. 10, 9, 255
; CHECK-NEXT: cmplw 1, 10, 7
; CHECK-NEXT: cror 20, 2, 5
; CHECK-NEXT: mr 10, 4
; CHECK-NEXT: mr 10, 4
; CHECK-NEXT: bc 12, 20, .LBB4_4
; CHECK-NEXT: # %bb.3: # %atomicrmw.start
; CHECK-NEXT: #
; CHECK-NEXT: addi 10, 3, -1
; CHECK-NEXT: .LBB4_4: # %atomicrmw.start
; CHECK-NEXT: #
; CHECK-NEXT: slw 3, 10, 7
; CHECK-NEXT: slw 10, 9, 7
; CHECK-NEXT: and 3, 3, 8
; CHECK-NEXT: and 10, 10, 8
; CHECK-NEXT: .LBB4_5: # %atomicrmw.start
; CHECK-NEXT: # Parent Loop BB4_2 Depth=1
; CHECK-NEXT: # => This Inner Loop Header: Depth=2
; CHECK-NEXT: lwarx 12, 0, 5
; CHECK-NEXT: and 11, 12, 8
; CHECK-NEXT: cmpw 11, 10
; CHECK-NEXT: bne 0, .LBB4_1
; CHECK-NEXT: # %bb.6: # %atomicrmw.start
; CHECK-NEXT: #
; CHECK-NEXT: andc 12, 12, 8
; CHECK-NEXT: or 12, 12, 3
; CHECK-NEXT: stwcx. 12, 0, 5
; CHECK-NEXT: bne 0, .LBB4_5
; CHECK-NEXT: b .LBB4_1
; CHECK-NEXT: .LBB4_7: # %atomicrmw.end
; CHECK-NEXT: # %bb.3: # %atomicrmw.start
; CHECK-NEXT: # in Loop: Header=BB4_2 Depth=1
; CHECK-NEXT: addi 10, 9, -1
; CHECK-NEXT: .LBB4_4: # %atomicrmw.start
; CHECK-NEXT: # in Loop: Header=BB4_2 Depth=1
; CHECK-NEXT: clrlwi 9, 10, 24
; CHECK-NEXT: slw 9, 9, 3
; CHECK-NEXT: and 10, 8, 6
; CHECK-NEXT: or 10, 10, 9
; CHECK-NEXT: .LBB4_5: # %cmpxchg.start
; CHECK-NEXT: # Parent Loop BB4_2 Depth=1
; CHECK-NEXT: # => This Inner Loop Header: Depth=2
; CHECK-NEXT: lwarx 9, 0, 5
; CHECK-NEXT: cmplw 9, 8
; CHECK-NEXT: bne 0, .LBB4_1
; CHECK-NEXT: # %bb.6: # %cmpxchg.fencedstore
; CHECK-NEXT: # in Loop: Header=BB4_5 Depth=2
; CHECK-NEXT: stwcx. 10, 0, 5
; CHECK-NEXT: bne 0, .LBB4_5
; CHECK-NEXT: # %bb.7:
; CHECK-NEXT: mr 8, 9
; CHECK-NEXT: # %bb.8: # %atomicrmw.end
; CHECK-NEXT: srw 3, 9, 3
; CHECK-NEXT: lwsync
; CHECK-NEXT: blr
%result = atomicrmw udec_wrap ptr %ptr, i8 %val seq_cst
@@ -250,53 +243,52 @@ define i16 @atomicrmw_udec_wrap_i16(ptr %ptr, i16 %val) {
; CHECK-LABEL: atomicrmw_udec_wrap_i16:
; CHECK: # %bb.0:
; CHECK-NEXT: sync
; CHECK-NEXT: mr 5, 3
; CHECK-NEXT: li 8, 0
; CHECK-NEXT: lhz 3, 0(3)
; CHECK-NEXT: rlwinm 7, 5, 3, 27, 27
; CHECK-NEXT: xori 7, 7, 16
; CHECK-NEXT: ori 8, 8, 65535
; CHECK-NEXT: clrlwi 6, 4, 16
; CHECK-NEXT: rldicr 5, 5, 0, 61
; CHECK-NEXT: slw 8, 8, 7
; CHECK-NEXT: rldicr 5, 3, 0, 61
; CHECK-NEXT: clrlwi 3, 3, 30
; CHECK-NEXT: lis 6, 0
; CHECK-NEXT: xori 3, 3, 2
; CHECK-NEXT: lwz 8, 0(5)
; CHECK-NEXT: ori 6, 6, 65535
; CHECK-NEXT: slwi 3, 3, 3
; CHECK-NEXT: slw 6, 6, 3
; CHECK-NEXT: not 6, 6
; CHECK-NEXT: clrlwi 7, 4, 16
; CHECK-NEXT: b .LBB5_2
; CHECK-NEXT: .LBB5_1: # %atomicrmw.start
; CHECK-NEXT: #
; CHECK-NEXT: srw 3, 11, 7
; CHECK-NEXT: cmplw 3, 9
; CHECK-NEXT: beq 0, .LBB5_7
; CHECK-NEXT: .LBB5_2: # %atomicrmw.start
; CHECK-NEXT: # =>This Loop Header: Depth=1
; CHECK-NEXT: # Child Loop BB5_5 Depth 2
; CHECK-NEXT: andi. 9, 3, 65535
; CHECK-NEXT: cmplw 1, 9, 6
; CHECK-NEXT: .LBB5_1: # %cmpxchg.nostore
; CHECK-NEXT: # in Loop: Header=BB5_2 Depth=1
; CHECK-NEXT: mr 8, 9
; CHECK-NEXT: .LBB5_2: # %atomicrmw.start
; CHECK-NEXT: # =>This Loop Header: Depth=1
; CHECK-NEXT: # Child Loop BB5_5 Depth 2
; CHECK-NEXT: srw 9, 8, 3
; CHECK-NEXT: andi. 10, 9, 65535
; CHECK-NEXT: cmplw 1, 10, 7
; CHECK-NEXT: cror 20, 2, 5
; CHECK-NEXT: mr 10, 4
; CHECK-NEXT: mr 10, 4
; CHECK-NEXT: bc 12, 20, .LBB5_4
; CHECK-NEXT: # %bb.3: # %atomicrmw.start
; CHECK-NEXT: #
; CHECK-NEXT: addi 10, 3, -1
; CHECK-NEXT: .LBB5_4: # %atomicrmw.start
; CHECK-NEXT: #
; CHECK-NEXT: slw 3, 10, 7
; CHECK-NEXT: slw 10, 9, 7
; CHECK-NEXT: and 3, 3, 8
; CHECK-NEXT: and 10, 10, 8
; CHECK-NEXT: .LBB5_5: # %atomicrmw.start
; CHECK-NEXT: # Parent Loop BB5_2 Depth=1
; CHECK-NEXT: # => This Inner Loop Header: Depth=2
; CHECK-NEXT: lwarx 12, 0, 5
; CHECK-NEXT: and 11, 12, 8
; CHECK-NEXT: cmpw 11, 10
; CHECK-NEXT: bne 0, .LBB5_1
; CHECK-NEXT: # %bb.6: # %atomicrmw.start
; CHECK-NEXT: #
; CHECK-NEXT: andc 12, 12, 8
; CHECK-NEXT: or 12, 12, 3
; CHECK-NEXT: stwcx. 12, 0, 5
; CHECK-NEXT: bne 0, .LBB5_5
; CHECK-NEXT: b .LBB5_1
; CHECK-NEXT: .LBB5_7: # %atomicrmw.end
; CHECK-NEXT: # %bb.3: # %atomicrmw.start
; CHECK-NEXT: # in Loop: Header=BB5_2 Depth=1
; CHECK-NEXT: addi 10, 9, -1
; CHECK-NEXT: .LBB5_4: # %atomicrmw.start
; CHECK-NEXT: # in Loop: Header=BB5_2 Depth=1
; CHECK-NEXT: clrlwi 9, 10, 16
; CHECK-NEXT: slw 9, 9, 3
; CHECK-NEXT: and 10, 8, 6
; CHECK-NEXT: or 10, 10, 9
; CHECK-NEXT: .LBB5_5: # %cmpxchg.start
; CHECK-NEXT: # Parent Loop BB5_2 Depth=1
; CHECK-NEXT: # => This Inner Loop Header: Depth=2
; CHECK-NEXT: lwarx 9, 0, 5
; CHECK-NEXT: cmplw 9, 8
; CHECK-NEXT: bne 0, .LBB5_1
; CHECK-NEXT: # %bb.6: # %cmpxchg.fencedstore
; CHECK-NEXT: # in Loop: Header=BB5_5 Depth=2
; CHECK-NEXT: stwcx. 10, 0, 5
; CHECK-NEXT: bne 0, .LBB5_5
; CHECK-NEXT: # %bb.7:
; CHECK-NEXT: mr 8, 9
; CHECK-NEXT: # %bb.8: # %atomicrmw.end
; CHECK-NEXT: srw 3, 9, 3
; CHECK-NEXT: lwsync
; CHECK-NEXT: blr
%result = atomicrmw udec_wrap ptr %ptr, i16 %val seq_cst
@@ -309,37 +301,36 @@ define i32 @atomicrmw_udec_wrap_i32(ptr %ptr, i32 %val) {
; CHECK-NEXT: sync
; CHECK-NEXT: lwz 6, 0(3)
; CHECK-NEXT: b .LBB6_2
; CHECK-NEXT: .LBB6_1: # %atomicrmw.start
; CHECK-NEXT: #
; CHECK-NEXT: cmplw 5, 6
; CHECK-NEXT: mr 6, 5
; CHECK-NEXT: beq 0, .LBB6_7
; CHECK-NEXT: .LBB6_2: # %atomicrmw.start
; CHECK-NEXT: # =>This Loop Header: Depth=1
; CHECK-NEXT: # Child Loop BB6_5 Depth 2
; CHECK-NEXT: cmpwi 6, 0
; CHECK-NEXT: mr 7, 4
; CHECK-NEXT: .LBB6_1: # %cmpxchg.nostore
; CHECK-NEXT: # in Loop: Header=BB6_2 Depth=1
; CHECK-NEXT: mr 6, 5
; CHECK-NEXT: .LBB6_2: # %atomicrmw.start
; CHECK-NEXT: # =>This Loop Header: Depth=1
; CHECK-NEXT: # Child Loop BB6_5 Depth 2
; CHECK-NEXT: cmpwi 6, 0
; CHECK-NEXT: mr 7, 4
; CHECK-NEXT: bc 12, 2, .LBB6_5
; CHECK-NEXT: # %bb.3: # %atomicrmw.start
; CHECK-NEXT: #
; CHECK-NEXT: cmplw 6, 4
; CHECK-NEXT: mr 7, 4
; CHECK-NEXT: # %bb.3: # %atomicrmw.start
; CHECK-NEXT: # in Loop: Header=BB6_2 Depth=1
; CHECK-NEXT: cmplw 6, 4
; CHECK-NEXT: mr 7, 4
; CHECK-NEXT: bc 12, 1, .LBB6_5
; CHECK-NEXT: # %bb.4: # %atomicrmw.start
; CHECK-NEXT: #
; CHECK-NEXT: # %bb.4: # %atomicrmw.start
; CHECK-NEXT: # in Loop: Header=BB6_2 Depth=1
; CHECK-NEXT: addi 7, 6, -1
; CHECK-NEXT: .LBB6_5: # %atomicrmw.start
; CHECK-NEXT: # Parent Loop BB6_2 Depth=1
; CHECK-NEXT: # => This Inner Loop Header: Depth=2
; CHECK-NEXT: .LBB6_5: # %cmpxchg.start
; CHECK-NEXT: # Parent Loop BB6_2 Depth=1
; CHECK-NEXT: # => This Inner Loop Header: Depth=2
; CHECK-NEXT: lwarx 5, 0, 3
; CHECK-NEXT: cmpw 5, 6
; CHECK-NEXT: bne 0, .LBB6_1
; CHECK-NEXT: # %bb.6: # %atomicrmw.start
; CHECK-NEXT: #
; CHECK-NEXT: cmplw 5, 6
; CHECK-NEXT: bne 0, .LBB6_1
; CHECK-NEXT: # %bb.6: # %cmpxchg.fencedstore
; CHECK-NEXT: # in Loop: Header=BB6_5 Depth=2
; CHECK-NEXT: stwcx. 7, 0, 3
; CHECK-NEXT: bne 0, .LBB6_5
; CHECK-NEXT: b .LBB6_1
; CHECK-NEXT: .LBB6_7: # %atomicrmw.end
; CHECK-NEXT: bne 0, .LBB6_5
; CHECK-NEXT: # %bb.7:
; CHECK-NEXT: mr 6, 5
; CHECK-NEXT: # %bb.8: # %atomicrmw.end
; CHECK-NEXT: mr 3, 5
; CHECK-NEXT: lwsync
; CHECK-NEXT: blr
@@ -353,38 +344,37 @@ define i64 @atomicrmw_udec_wrap_i64(ptr %ptr, i64 %val) {
; CHECK-NEXT: sync
; CHECK-NEXT: ld 6, 0(3)
; CHECK-NEXT: b .LBB7_2
; CHECK-NEXT: .LBB7_1: # %atomicrmw.start
; CHECK-NEXT: #
; CHECK-NEXT: cmpld 5, 6
; CHECK-NEXT: mr 6, 5
; CHECK-NEXT: beq 0, .LBB7_7
; CHECK-NEXT: .LBB7_2: # %atomicrmw.start
; CHECK-NEXT: # =>This Loop Header: Depth=1
; CHECK-NEXT: # Child Loop BB7_5 Depth 2
; CHECK-NEXT: cmpdi 6, 0
; CHECK-NEXT: mr 7, 4
; CHECK-NEXT: .LBB7_1: # %cmpxchg.nostore
; CHECK-NEXT: # in Loop: Header=BB7_2 Depth=1
; CHECK-NEXT: mr 6, 5
; CHECK-NEXT: .LBB7_2: # %atomicrmw.start
; CHECK-NEXT: # =>This Loop Header: Depth=1
; CHECK-NEXT: # Child Loop BB7_5 Depth 2
; CHECK-NEXT: cmpdi 6, 0
; CHECK-NEXT: mr 7, 4
; CHECK-NEXT: bc 12, 2, .LBB7_5
; CHECK-NEXT: # %bb.3: # %atomicrmw.start
; CHECK-NEXT: #
; CHECK-NEXT: cmpld 6, 4
; CHECK-NEXT: mr 7, 4
; CHECK-NEXT: # %bb.3: # %atomicrmw.start
; CHECK-NEXT: # in Loop: Header=BB7_2 Depth=1
; CHECK-NEXT: cmpld 6, 4
; CHECK-NEXT: mr 7, 4
; CHECK-NEXT: bc 12, 1, .LBB7_5
; CHECK-NEXT: # %bb.4: # %atomicrmw.start
; CHECK-NEXT: #
; CHECK-NEXT: # %bb.4: # %atomicrmw.start
; CHECK-NEXT: # in Loop: Header=BB7_2 Depth=1
; CHECK-NEXT: addi 7, 6, -1
; CHECK-NEXT: .LBB7_5: # %atomicrmw.start
; CHECK-NEXT: # Parent Loop BB7_2 Depth=1
; CHECK-NEXT: # => This Inner Loop Header: Depth=2
; CHECK-NEXT: .LBB7_5: # %cmpxchg.start
; CHECK-NEXT: # Parent Loop BB7_2 Depth=1
; CHECK-NEXT: # => This Inner Loop Header: Depth=2
; CHECK-NEXT: ldarx 5, 0, 3
; CHECK-NEXT: cmpd 5, 6
; CHECK-NEXT: bne 0, .LBB7_1
; CHECK-NEXT: # %bb.6: # %atomicrmw.start
; CHECK-NEXT: #
; CHECK-NEXT: cmpld 5, 6
; CHECK-NEXT: bne 0, .LBB7_1
; CHECK-NEXT: # %bb.6: # %cmpxchg.fencedstore
; CHECK-NEXT: # in Loop: Header=BB7_5 Depth=2
; CHECK-NEXT: stdcx. 7, 0, 3
; CHECK-NEXT: bne 0, .LBB7_5
; CHECK-NEXT: b .LBB7_1
; CHECK-NEXT: .LBB7_7: # %atomicrmw.end
; CHECK-NEXT: mr 3, 5
; CHECK-NEXT: bne 0, .LBB7_5
; CHECK-NEXT: # %bb.7:
; CHECK-NEXT: mr 6, 5
; CHECK-NEXT: # %bb.8: # %atomicrmw.end
; CHECK-NEXT: mr 3, 5
; CHECK-NEXT: lwsync
; CHECK-NEXT: blr
%result = atomicrmw udec_wrap ptr %ptr, i64 %val seq_cst

File diff suppressed because it is too large Load Diff

View File

@@ -139,59 +139,67 @@ define void @store_i64_seq_cst(ptr %mem) {
define i8 @cas_strong_i8_sc_sc(ptr %mem) {
; PPC32-LABEL: cas_strong_i8_sc_sc:
; PPC32: # %bb.0:
; PPC32-NEXT: rlwinm r8, r3, 3, 27, 28
; PPC32-NEXT: li r5, 1
; PPC32-NEXT: li r6, 0
; PPC32-NEXT: li r7, 255
; PPC32-NEXT: rlwinm r4, r3, 0, 0, 29
; PPC32-NEXT: xori r3, r8, 24
; PPC32-NEXT: slw r8, r5, r3
; PPC32-NEXT: slw r9, r6, r3
; PPC32-NEXT: slw r5, r7, r3
; PPC32-NEXT: and r6, r8, r5
; PPC32-NEXT: and r7, r9, r5
; PPC32-NEXT: rlwinm r5, r3, 0, 0, 29
; PPC32-NEXT: lwarx r4, 0, r5
; PPC32-NEXT: not r3, r3
; PPC32-NEXT: rlwinm r3, r3, 3, 27, 28
; PPC32-NEXT: srw r6, r4, r3
; PPC32-NEXT: andi. r6, r6, 255
; PPC32-NEXT: bne cr0, .LBB8_4
; PPC32-NEXT: # %bb.1: # %cmpxchg.fencedstore
; PPC32-NEXT: li r6, 255
; PPC32-NEXT: li r7, 1
; PPC32-NEXT: slw r6, r6, r3
; PPC32-NEXT: not r6, r6
; PPC32-NEXT: slw r7, r7, r3
; PPC32-NEXT: sync
; PPC32-NEXT: .LBB8_1:
; PPC32-NEXT: lwarx r9, 0, r4
; PPC32-NEXT: and r8, r9, r5
; PPC32-NEXT: cmpw r8, r7
; PPC32-NEXT: bne cr0, .LBB8_3
; PPC32-NEXT: # %bb.2:
; PPC32-NEXT: andc r9, r9, r5
; PPC32-NEXT: or r9, r9, r6
; PPC32-NEXT: stwcx. r9, 0, r4
; PPC32-NEXT: bne cr0, .LBB8_1
; PPC32-NEXT: .LBB8_3:
; PPC32-NEXT: srw r3, r8, r3
; PPC32-NEXT: .LBB8_2: # %cmpxchg.trystore
; PPC32-NEXT: # =>This Inner Loop Header: Depth=1
; PPC32-NEXT: and r8, r4, r6
; PPC32-NEXT: or r8, r8, r7
; PPC32-NEXT: stwcx. r8, 0, r5
; PPC32-NEXT: beq cr0, .LBB8_4
; PPC32-NEXT: # %bb.3: # %cmpxchg.releasedload
; PPC32-NEXT: # in Loop: Header=BB8_2 Depth=1
; PPC32-NEXT: lwarx r4, 0, r5
; PPC32-NEXT: srw r8, r4, r3
; PPC32-NEXT: andi. r8, r8, 255
; PPC32-NEXT: beq cr0, .LBB8_2
; PPC32-NEXT: .LBB8_4: # %cmpxchg.nostore
; PPC32-NEXT: srw r3, r4, r3
; PPC32-NEXT: lwsync
; PPC32-NEXT: blr
;
; PPC64-LABEL: cas_strong_i8_sc_sc:
; PPC64: # %bb.0:
; PPC64-NEXT: rlwinm r8, r3, 3, 27, 28
; PPC64-NEXT: li r5, 1
; PPC64-NEXT: li r6, 0
; PPC64-NEXT: li r7, 255
; PPC64-NEXT: rldicr r4, r3, 0, 61
; PPC64-NEXT: xori r3, r8, 24
; PPC64-NEXT: slw r8, r5, r3
; PPC64-NEXT: slw r9, r6, r3
; PPC64-NEXT: slw r5, r7, r3
; PPC64-NEXT: and r6, r8, r5
; PPC64-NEXT: and r7, r9, r5
; PPC64-NEXT: rldicr r5, r3, 0, 61
; PPC64-NEXT: not r3, r3
; PPC64-NEXT: lwarx r4, 0, r5
; PPC64-NEXT: rlwinm r3, r3, 3, 27, 28
; PPC64-NEXT: srw r6, r4, r3
; PPC64-NEXT: andi. r6, r6, 255
; PPC64-NEXT: bne cr0, .LBB8_4
; PPC64-NEXT: # %bb.1: # %cmpxchg.fencedstore
; PPC64-NEXT: li r6, 255
; PPC64-NEXT: li r7, 1
; PPC64-NEXT: slw r6, r6, r3
; PPC64-NEXT: not r6, r6
; PPC64-NEXT: slw r7, r7, r3
; PPC64-NEXT: sync
; PPC64-NEXT: .LBB8_1:
; PPC64-NEXT: lwarx r9, 0, r4
; PPC64-NEXT: and r8, r9, r5
; PPC64-NEXT: cmpw r8, r7
; PPC64-NEXT: bne cr0, .LBB8_3
; PPC64-NEXT: # %bb.2:
; PPC64-NEXT: andc r9, r9, r5
; PPC64-NEXT: or r9, r9, r6
; PPC64-NEXT: stwcx. r9, 0, r4
; PPC64-NEXT: bne cr0, .LBB8_1
; PPC64-NEXT: .LBB8_3:
; PPC64-NEXT: srw r3, r8, r3
; PPC64-NEXT: .LBB8_2: # %cmpxchg.trystore
; PPC64-NEXT: # =>This Inner Loop Header: Depth=1
; PPC64-NEXT: and r8, r4, r6
; PPC64-NEXT: or r8, r8, r7
; PPC64-NEXT: stwcx. r8, 0, r5
; PPC64-NEXT: beq cr0, .LBB8_4
; PPC64-NEXT: # %bb.3: # %cmpxchg.releasedload
; PPC64-NEXT: # in Loop: Header=BB8_2 Depth=1
; PPC64-NEXT: lwarx r4, 0, r5
; PPC64-NEXT: srw r8, r4, r3
; PPC64-NEXT: andi. r8, r8, 255
; PPC64-NEXT: beq cr0, .LBB8_2
; PPC64-NEXT: .LBB8_4: # %cmpxchg.nostore
; PPC64-NEXT: srw r3, r4, r3
; PPC64-NEXT: lwsync
; PPC64-NEXT: blr
%val = cmpxchg ptr %mem, i8 0, i8 1 seq_cst seq_cst
@@ -201,57 +209,53 @@ define i8 @cas_strong_i8_sc_sc(ptr %mem) {
define i16 @cas_weak_i16_acquire_acquire(ptr %mem) {
; PPC32-LABEL: cas_weak_i16_acquire_acquire:
; PPC32: # %bb.0:
; PPC32-NEXT: li r6, 0
; PPC32-NEXT: rlwinm r4, r3, 3, 27, 27
; PPC32-NEXT: li r5, 1
; PPC32-NEXT: ori r7, r6, 65535
; PPC32-NEXT: xori r4, r4, 16
; PPC32-NEXT: slw r8, r5, r4
; PPC32-NEXT: slw r9, r6, r4
; PPC32-NEXT: slw r5, r7, r4
; PPC32-NEXT: rlwinm r3, r3, 0, 0, 29
; PPC32-NEXT: and r6, r8, r5
; PPC32-NEXT: and r7, r9, r5
; PPC32-NEXT: .LBB9_1:
; PPC32-NEXT: lwarx r9, 0, r3
; PPC32-NEXT: and r8, r9, r5
; PPC32-NEXT: cmpw r8, r7
; PPC32-NEXT: bne cr0, .LBB9_3
; PPC32-NEXT: # %bb.2:
; PPC32-NEXT: andc r9, r9, r5
; PPC32-NEXT: or r9, r9, r6
; PPC32-NEXT: stwcx. r9, 0, r3
; PPC32-NEXT: bne cr0, .LBB9_1
; PPC32-NEXT: .LBB9_3:
; PPC32-NEXT: srw r3, r8, r4
; PPC32-NEXT: rlwinm r4, r3, 0, 0, 29
; PPC32-NEXT: lwarx r5, 0, r4
; PPC32-NEXT: clrlwi r3, r3, 30
; PPC32-NEXT: xori r3, r3, 2
; PPC32-NEXT: slwi r6, r3, 3
; PPC32-NEXT: srw r3, r5, r6
; PPC32-NEXT: andi. r7, r3, 65535
; PPC32-NEXT: beq cr0, .LBB9_2
; PPC32-NEXT: # %bb.1: # %cmpxchg.failure
; PPC32-NEXT: lwsync
; PPC32-NEXT: blr
; PPC32-NEXT: .LBB9_2: # %cmpxchg.fencedstore
; PPC32-NEXT: lis r7, 0
; PPC32-NEXT: ori r7, r7, 65535
; PPC32-NEXT: slw r7, r7, r6
; PPC32-NEXT: li r8, 1
; PPC32-NEXT: not r7, r7
; PPC32-NEXT: slw r6, r8, r6
; PPC32-NEXT: and r5, r5, r7
; PPC32-NEXT: or r5, r5, r6
; PPC32-NEXT: stwcx. r5, 0, r4
; PPC32-NEXT: lwsync
; PPC32-NEXT: blr
;
; PPC64-LABEL: cas_weak_i16_acquire_acquire:
; PPC64: # %bb.0:
; PPC64-NEXT: li r6, 0
; PPC64-NEXT: rlwinm r4, r3, 3, 27, 27
; PPC64-NEXT: li r5, 1
; PPC64-NEXT: ori r7, r6, 65535
; PPC64-NEXT: xori r4, r4, 16
; PPC64-NEXT: slw r8, r5, r4
; PPC64-NEXT: slw r9, r6, r4
; PPC64-NEXT: slw r5, r7, r4
; PPC64-NEXT: rldicr r3, r3, 0, 61
; PPC64-NEXT: and r6, r8, r5
; PPC64-NEXT: and r7, r9, r5
; PPC64-NEXT: .LBB9_1:
; PPC64-NEXT: lwarx r9, 0, r3
; PPC64-NEXT: and r8, r9, r5
; PPC64-NEXT: cmpw r8, r7
; PPC64-NEXT: bne cr0, .LBB9_3
; PPC64-NEXT: # %bb.2:
; PPC64-NEXT: andc r9, r9, r5
; PPC64-NEXT: or r9, r9, r6
; PPC64-NEXT: stwcx. r9, 0, r3
; PPC64-NEXT: bne cr0, .LBB9_1
; PPC64-NEXT: .LBB9_3:
; PPC64-NEXT: srw r3, r8, r4
; PPC64-NEXT: rldicr r4, r3, 0, 61
; PPC64-NEXT: clrlwi r3, r3, 30
; PPC64-NEXT: lwarx r5, 0, r4
; PPC64-NEXT: xori r3, r3, 2
; PPC64-NEXT: slwi r6, r3, 3
; PPC64-NEXT: srw r3, r5, r6
; PPC64-NEXT: andi. r7, r3, 65535
; PPC64-NEXT: beq cr0, .LBB9_2
; PPC64-NEXT: # %bb.1: # %cmpxchg.failure
; PPC64-NEXT: lwsync
; PPC64-NEXT: blr
; PPC64-NEXT: .LBB9_2: # %cmpxchg.fencedstore
; PPC64-NEXT: lis r7, 0
; PPC64-NEXT: ori r7, r7, 65535
; PPC64-NEXT: slw r7, r7, r6
; PPC64-NEXT: li r8, 1
; PPC64-NEXT: not r7, r7
; PPC64-NEXT: slw r6, r8, r6
; PPC64-NEXT: and r5, r5, r7
; PPC64-NEXT: or r5, r5, r6
; PPC64-NEXT: stwcx. r5, 0, r4
; PPC64-NEXT: lwsync
; PPC64-NEXT: blr
%val = cmpxchg weak ptr %mem, i16 0, i16 1 acquire acquire
@@ -261,17 +265,23 @@ define i16 @cas_weak_i16_acquire_acquire(ptr %mem) {
define i32 @cas_strong_i32_acqrel_acquire(ptr %mem) {
; CHECK-LABEL: cas_strong_i32_acqrel_acquire:
; CHECK: # %bb.0:
; CHECK-NEXT: mr r4, r3
; CHECK-NEXT: lwarx r3, 0, r3
; CHECK-NEXT: cmplwi r3, 0
; CHECK-NEXT: bne cr0, .LBB10_4
; CHECK-NEXT: # %bb.1: # %cmpxchg.fencedstore
; CHECK-NEXT: li r5, 1
; CHECK-NEXT: lwsync
; CHECK-NEXT: .LBB10_1:
; CHECK-NEXT: lwarx r4, 0, r3
; CHECK-NEXT: cmpwi r4, 0
; CHECK-NEXT: bne cr0, .LBB10_3
; CHECK-NEXT: # %bb.2:
; CHECK-NEXT: stwcx. r5, 0, r3
; CHECK-NEXT: bne cr0, .LBB10_1
; CHECK-NEXT: .LBB10_3:
; CHECK-NEXT: mr r3, r4
; CHECK-NEXT: .LBB10_2: # %cmpxchg.trystore
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: stwcx. r5, 0, r4
; CHECK-NEXT: beq cr0, .LBB10_4
; CHECK-NEXT: # %bb.3: # %cmpxchg.releasedload
; CHECK-NEXT: # in Loop: Header=BB10_2 Depth=1
; CHECK-NEXT: lwarx r3, 0, r4
; CHECK-NEXT: cmplwi r3, 0
; CHECK-NEXT: beq cr0, .LBB10_2
; CHECK-NEXT: .LBB10_4: # %cmpxchg.nostore
; CHECK-NEXT: lwsync
; CHECK-NEXT: blr
%val = cmpxchg ptr %mem, i32 0, i32 1 acq_rel acquire
@@ -304,17 +314,14 @@ define i64 @cas_weak_i64_release_monotonic(ptr %mem) {
;
; PPC64-LABEL: cas_weak_i64_release_monotonic:
; PPC64: # %bb.0:
; PPC64-NEXT: mr r4, r3
; PPC64-NEXT: ldarx r3, 0, r3
; PPC64-NEXT: cmpldi r3, 0
; PPC64-NEXT: bnelr cr0
; PPC64-NEXT: # %bb.1: # %cmpxchg.fencedstore
; PPC64-NEXT: li r5, 1
; PPC64-NEXT: lwsync
; PPC64-NEXT: .LBB11_1:
; PPC64-NEXT: ldarx r4, 0, r3
; PPC64-NEXT: cmpdi r4, 0
; PPC64-NEXT: bne cr0, .LBB11_3
; PPC64-NEXT: # %bb.2:
; PPC64-NEXT: stdcx. r5, 0, r3
; PPC64-NEXT: bne cr0, .LBB11_1
; PPC64-NEXT: .LBB11_3:
; PPC64-NEXT: mr r3, r4
; PPC64-NEXT: stdcx. r5, 0, r4
; PPC64-NEXT: blr
%val = cmpxchg weak ptr %mem, i64 0, i64 1 release monotonic
%loaded = extractvalue { i64, i1} %val, 0

View File

@@ -4,12 +4,17 @@
define void @test(ptr %ptr, i8 %cmp, i8 %val) {
; PPC64LE-LABEL: test:
; PPC64LE: # %bb.0:
; PPC64LE-NEXT: clrlwi 5, 5, 24
; PPC64LE-NEXT: clrlwi 4, 4, 24
; PPC64LE-NEXT: .LBB0_1:
; PPC64LE-NEXT: .p2align 5
; PPC64LE-NEXT: .LBB0_1: # %cmpxchg.start
; PPC64LE-NEXT: # =>This Inner Loop Header: Depth=1
; PPC64LE-NEXT: lbarx 6, 0, 3
; PPC64LE-NEXT: cmpw 6, 4
; PPC64LE-NEXT: clrlwi 6, 6, 24
; PPC64LE-NEXT: cmplw 6, 4
; PPC64LE-NEXT: bnelr 0
; PPC64LE-NEXT: # %bb.2:
; PPC64LE-NEXT: # in Loop: Header=BB0_1 Depth=1
; PPC64LE-NEXT: stbcx. 5, 0, 3
; PPC64LE-NEXT: bne 0, .LBB0_1
; PPC64LE-NEXT: # %bb.3:

View File

@@ -7,19 +7,51 @@ define float @test_atomicrmw_fadd_f32(ptr %ptr, float %value) {
; CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[PTR:%.*]], align 4
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
; CHECK-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
; CHECK-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
; CHECK-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
; CHECK-NEXT: [[TMP4:%.*]] = cmpxchg ptr [[PTR]], i32 [[TMP3]], i32 [[TMP2]] monotonic monotonic, align 4
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
; CHECK-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
; CHECK-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; CHECK: atomicrmw.end:
; CHECK-NEXT: %loaded = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], %cmpxchg.end ]
; CHECK-NEXT: %new = fadd float %loaded, %value
; CHECK-NEXT: [[TMP2:%.*]] = bitcast float %new to i32
; CHECK-NEXT: [[TMP3:%.*]] = bitcast float %loaded to i32
; CHECK-NEXT: br label %cmpxchg.start
; CHECK-EMPTY:
; CHECK-NEXT: cmpxchg.start: ; preds = %cmpxchg.trystore, %atomicrmw.start
; CHECK-NEXT: %larx = call i32 @llvm.ppc.lwarx(ptr %ptr)
; CHECK-NEXT: %should_store = icmp eq i32 %larx, [[TMP3]]
; CHECK-NEXT: br i1 %should_store, label %cmpxchg.fencedstore, label %cmpxchg.nostore
; CHECK-EMPTY:
; CHECK-NEXT: cmpxchg.fencedstore: ; preds = %cmpxchg.start
; CHECK-NEXT: br label %cmpxchg.trystore
; CHECK-EMPTY:
; CHECK-NEXT: cmpxchg.trystore: ; preds = %cmpxchg.fencedstore
; CHECK-NEXT: %loaded.trystore = phi i32 [ %larx, %cmpxchg.fencedstore ]
; CHECK-NEXT: %stcx = call i32 @llvm.ppc.stwcx(ptr %ptr, i32 [[TMP2]])
; CHECK-NEXT: [[TMP4:%.*]] = xor i32 %stcx, 1
; CHECK-NEXT: %success1 = icmp eq i32 [[TMP4]], 0
; CHECK-NEXT: br i1 %success1, label %cmpxchg.success, label %cmpxchg.start
; CHECK-EMPTY:
; CHECK-NEXT: cmpxchg.releasedload: ; No predecessors!
; CHECK-NEXT: unreachable
; CHECK-EMPTY:
; CHECK-NEXT: cmpxchg.success: ; preds = %cmpxchg.trystore
; CHECK-NEXT: br label %cmpxchg.end
; CHECK-EMPTY:
; CHECK-NEXT: cmpxchg.nostore: ; preds = %cmpxchg.start
; CHECK-NEXT: %loaded.nostore = phi i32 [ %larx, %cmpxchg.start ]
; CHECK-NEXT: br label %cmpxchg.failure
; CHECK-EMPTY:
; CHECK-NEXT: cmpxchg.failure: ; preds = %cmpxchg.nostore
; CHECK-NEXT: %loaded.failure = phi i32 [ %loaded.nostore, %cmpxchg.nostore ]
; CHECK-NEXT: br label %cmpxchg.end
; CHECK-EMPTY:
; CHECK-NEXT: cmpxchg.end: ; preds = %cmpxchg.failure, %cmpxchg.success
; CHECK-NEXT: %loaded.exit = phi i32 [ %loaded.trystore, %cmpxchg.success ], [ %loaded.failure, %cmpxchg.failure ]
; CHECK-NEXT: %success2 = phi i1 [ true, %cmpxchg.success ], [ false, %cmpxchg.failure ]
; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32 %loaded.exit to float
; CHECK-NEXT: br i1 %success2, label %atomicrmw.end, label %atomicrmw.start
; CHECK-EMPTY:
; CHECK-NEXT: atomicrmw.end: ; preds = %cmpxchg.end
; CHECK-NEXT: call void @llvm.ppc.lwsync()
; CHECK-NEXT: ret float [[TMP5]]
;
; CHECK-NEXT: }
%res = atomicrmw fadd ptr %ptr, float %value seq_cst
ret float %res
}
@@ -28,22 +60,56 @@ define float @test_atomicrmw_fsub_f32(ptr %ptr, float %value) {
; CHECK-LABEL: @test_atomicrmw_fsub_f32(
; CHECK-NEXT: call void @llvm.ppc.sync()
; CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[PTR:%.*]], align 4
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
; CHECK-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
; CHECK-NEXT: [[NEW:%.*]] = fsub float [[LOADED]], [[VALUE:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
; CHECK-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
; CHECK-NEXT: [[TMP4:%.*]] = cmpxchg ptr [[PTR]], i32 [[TMP3]], i32 [[TMP2]] monotonic monotonic, align 4
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
; CHECK-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
; CHECK-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; CHECK: atomicrmw.end:
; CHECK-NEXT: br label %atomicrmw.start
; CHECK-EMPTY:
; CHECK-NEXT: atomicrmw.start:
; CHECK-NEXT: %loaded = phi float [ [[TMP1]], %0 ], [ [[TMP5:%.*]], %cmpxchg.end ]
; CHECK-NEXT: %new = fsub float %loaded, %value
; CHECK-NEXT: [[TMP2:%.*]] = bitcast float %new to i32
; CHECK-NEXT: [[TMP3:%.*]] = bitcast float %loaded to i32
; CHECK-NEXT: br label %cmpxchg.start
; CHECK-EMPTY:
; CHECK-NEXT: cmpxchg.start:
; CHECK-NEXT: %larx = call i32 @llvm.ppc.lwarx(ptr %ptr)
; CHECK-NEXT: %should_store = icmp eq i32 %larx, [[TMP3]]
; CHECK-NEXT: br i1 %should_store, label %cmpxchg.fencedstore, label %cmpxchg.nostore
; CHECK-EMPTY:
; CHECK-NEXT: cmpxchg.fencedstore: ; preds = %cmpxchg.start
; CHECK-NEXT: br label %cmpxchg.trystore
; CHECK-EMPTY:
; CHECK-NEXT: cmpxchg.trystore: ; preds = %cmpxchg.fencedstore
; CHECK-NEXT: %loaded.trystore = phi i32 [ %larx, %cmpxchg.fencedstore ]
; CHECK-NEXT: %stcx = call i32 @llvm.ppc.stwcx(ptr %ptr, i32 %2)
; CHECK-NEXT: [[TMP4:%.*]] = xor i32 %stcx, 1
; CHECK-NEXT: %success1 = icmp eq i32 [[TMP4]], 0
; CHECK-NEXT: br i1 %success1, label %cmpxchg.success, label %cmpxchg.start
; CHECK-EMPTY:
; CHECK-NEXT: cmpxchg.releasedload: ; No predecessors!
; CHECK-NEXT: unreachable
; CHECK-EMPTY:
; CHECK-NEXT: cmpxchg.success: ; preds = %cmpxchg.trystore
; CHECK-NEXT: br label %cmpxchg.end
; CHECK-EMPTY:
; CHECK-NEXT: cmpxchg.nostore: ; preds = %cmpxchg.start
; CHECK-NEXT: %loaded.nostore = phi i32 [ %larx, %cmpxchg.start ]
; CHECK-NEXT: br label %cmpxchg.failure
; CHECK-EMPTY:
; CHECK-NEXT: cmpxchg.failure: ; preds = %cmpxchg.nostore
; CHECK-NEXT: %loaded.failure = phi i32 [ %loaded.nostore, %cmpxchg.nostore ]
; CHECK-NEXT: br label %cmpxchg.end
; CHECK-EMPTY:
; CHECK-NEXT: cmpxchg.end: ; preds = %cmpxchg.failure, %cmpxchg.success
; CHECK-NEXT: %loaded.exit = phi i32 [ %loaded.trystore, %cmpxchg.success ], [ %loaded.failure, %cmpxchg.failure ]
; CHECK-NEXT: %success2 = phi i1 [ true, %cmpxchg.success ], [ false, %cmpxchg.failure ]
; CHECK-NEXT: [[TMP5]] = bitcast i32 %loaded.exit to float
; CHECK-NEXT: br i1 %success2, label %atomicrmw.end, label %atomicrmw.start
; CHECK-EMPTY:
; CHECK-NEXT: atomicrmw.end: ; preds = %cmpxchg.end
; CHECK-NEXT: call void @llvm.ppc.lwsync()
; CHECK-NEXT: ret float [[TMP5]]
;
%res = atomicrmw fsub ptr %ptr, float %value seq_cst
; CHECK-NEXT: }
%res = atomicrmw fsub ptr %ptr, float %value seq_cst
ret float %res
}