Utilize common API in PPCTargetParser (https://github.com/llvm/llvm-project/pull/97541) to set default CPU with same interfaces for LLC. This will update AIX default CPU to pwr7 and LoP powerppc64 default CPU to ppc64.
674 lines
20 KiB
LLVM
674 lines
20 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc-unknown-linux-gnu -verify-machineinstrs -ppc-asm-full-reg-names | FileCheck %s --check-prefix=CHECK --check-prefix=PPC32
|
|
; This is already checked for in Atomics-64.ll
|
|
; RUN: llc -verify-machineinstrs < %s -mcpu=ppc -mtriple=powerpc64-unknown-linux-gnu -ppc-asm-full-reg-names | FileCheck %s --check-prefix=CHECK --check-prefix=PPC64
|
|
|
|
; FIXME: we don't currently check for the operations themselves with CHECK-NEXT,
|
|
; because they are implemented in a very messy way with lwarx/stwcx.
|
|
; It should be fixed soon in another patch.
|
|
|
|
; We first check loads, for all sizes from i8 to i64.
|
|
; We also vary orderings to check for barriers.
|
|
define i8 @load_i8_unordered(ptr %mem) {
|
|
; CHECK-LABEL: load_i8_unordered:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: lbz r3, 0(r3)
|
|
; CHECK-NEXT: blr
|
|
%val = load atomic i8, ptr %mem unordered, align 1
|
|
ret i8 %val
|
|
}
|
|
define i16 @load_i16_monotonic(ptr %mem) {
|
|
; CHECK-LABEL: load_i16_monotonic:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: lhz r3, 0(r3)
|
|
; CHECK-NEXT: blr
|
|
%val = load atomic i16, ptr %mem monotonic, align 2
|
|
ret i16 %val
|
|
}
|
|
define i32 @load_i32_acquire(ptr %mem) {
|
|
; PPC32-LABEL: load_i32_acquire:
|
|
; PPC32: # %bb.0:
|
|
; PPC32-NEXT: lwz r3, 0(r3)
|
|
; PPC32-NEXT: cmpw cr7, r3, r3
|
|
; PPC32-NEXT: bne- cr7, .+4
|
|
; PPC32-NEXT: isync
|
|
; PPC32-NEXT: blr
|
|
;
|
|
; PPC64-LABEL: load_i32_acquire:
|
|
; PPC64: # %bb.0:
|
|
; PPC64-NEXT: lwz r3, 0(r3)
|
|
; PPC64-NEXT: cmpd cr7, r3, r3
|
|
; PPC64-NEXT: bne- cr7, .+4
|
|
; PPC64-NEXT: isync
|
|
; PPC64-NEXT: blr
|
|
%val = load atomic i32, ptr %mem acquire, align 4
|
|
; CHECK-PPC32: lwsync
|
|
; CHECK-PPC64: cmpw [[CR:cr[0-9]+]], [[VAL]], [[VAL]]
|
|
; CHECK-PPC64: bne- [[CR]], .+4
|
|
; CHECK-PPC64: isync
|
|
ret i32 %val
|
|
}
|
|
define i64 @load_i64_seq_cst(ptr %mem) {
|
|
; PPC32-LABEL: load_i64_seq_cst:
|
|
; PPC32: # %bb.0:
|
|
; PPC32-NEXT: mflr r0
|
|
; PPC32-NEXT: stwu r1, -16(r1)
|
|
; PPC32-NEXT: stw r0, 20(r1)
|
|
; PPC32-NEXT: .cfi_def_cfa_offset 16
|
|
; PPC32-NEXT: .cfi_offset lr, 4
|
|
; PPC32-NEXT: li r4, 5
|
|
; PPC32-NEXT: bl __atomic_load_8
|
|
; PPC32-NEXT: lwz r0, 20(r1)
|
|
; PPC32-NEXT: addi r1, r1, 16
|
|
; PPC32-NEXT: mtlr r0
|
|
; PPC32-NEXT: blr
|
|
;
|
|
; PPC64-LABEL: load_i64_seq_cst:
|
|
; PPC64: # %bb.0:
|
|
; PPC64-NEXT: sync
|
|
; PPC64-NEXT: ld r3, 0(r3)
|
|
; PPC64-NEXT: cmpd cr7, r3, r3
|
|
; PPC64-NEXT: bne- cr7, .+4
|
|
; PPC64-NEXT: isync
|
|
; PPC64-NEXT: blr
|
|
%val = load atomic i64, ptr %mem seq_cst, align 8
|
|
; CHECK-PPC32: lwsync
|
|
; CHECK-PPC64: cmpw [[CR:cr[0-9]+]], [[VAL]], [[VAL]]
|
|
; CHECK-PPC64: bne- [[CR]], .+4
|
|
; CHECK-PPC64: isync
|
|
ret i64 %val
|
|
}
|
|
|
|
; Stores
|
|
define void @store_i8_unordered(ptr %mem) {
|
|
; CHECK-LABEL: store_i8_unordered:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: li r4, 42
|
|
; CHECK-NEXT: stb r4, 0(r3)
|
|
; CHECK-NEXT: blr
|
|
store atomic i8 42, ptr %mem unordered, align 1
|
|
ret void
|
|
}
|
|
define void @store_i16_monotonic(ptr %mem) {
|
|
; CHECK-LABEL: store_i16_monotonic:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: li r4, 42
|
|
; CHECK-NEXT: sth r4, 0(r3)
|
|
; CHECK-NEXT: blr
|
|
store atomic i16 42, ptr %mem monotonic, align 2
|
|
ret void
|
|
}
|
|
define void @store_i32_release(ptr %mem) {
|
|
; CHECK-LABEL: store_i32_release:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: li r4, 42
|
|
; CHECK-NEXT: lwsync
|
|
; CHECK-NEXT: stw r4, 0(r3)
|
|
; CHECK-NEXT: blr
|
|
store atomic i32 42, ptr %mem release, align 4
|
|
ret void
|
|
}
|
|
define void @store_i64_seq_cst(ptr %mem) {
|
|
; PPC32-LABEL: store_i64_seq_cst:
|
|
; PPC32: # %bb.0:
|
|
; PPC32-NEXT: mflr r0
|
|
; PPC32-NEXT: stwu r1, -16(r1)
|
|
; PPC32-NEXT: stw r0, 20(r1)
|
|
; PPC32-NEXT: .cfi_def_cfa_offset 16
|
|
; PPC32-NEXT: .cfi_offset lr, 4
|
|
; PPC32-NEXT: li r5, 0
|
|
; PPC32-NEXT: li r6, 42
|
|
; PPC32-NEXT: li r7, 5
|
|
; PPC32-NEXT: bl __atomic_store_8
|
|
; PPC32-NEXT: lwz r0, 20(r1)
|
|
; PPC32-NEXT: addi r1, r1, 16
|
|
; PPC32-NEXT: mtlr r0
|
|
; PPC32-NEXT: blr
|
|
;
|
|
; PPC64-LABEL: store_i64_seq_cst:
|
|
; PPC64: # %bb.0:
|
|
; PPC64-NEXT: li r4, 42
|
|
; PPC64-NEXT: sync
|
|
; PPC64-NEXT: std r4, 0(r3)
|
|
; PPC64-NEXT: blr
|
|
store atomic i64 42, ptr %mem seq_cst, align 8
|
|
ret void
|
|
}
|
|
|
|
; Atomic CmpXchg
|
|
define i8 @cas_strong_i8_sc_sc(ptr %mem) {
|
|
; PPC32-LABEL: cas_strong_i8_sc_sc:
|
|
; PPC32: # %bb.0:
|
|
; PPC32-NEXT: rlwinm r8, r3, 3, 27, 28
|
|
; PPC32-NEXT: li r5, 1
|
|
; PPC32-NEXT: li r6, 0
|
|
; PPC32-NEXT: li r7, 255
|
|
; PPC32-NEXT: rlwinm r4, r3, 0, 0, 29
|
|
; PPC32-NEXT: xori r3, r8, 24
|
|
; PPC32-NEXT: slw r8, r5, r3
|
|
; PPC32-NEXT: slw r9, r6, r3
|
|
; PPC32-NEXT: slw r5, r7, r3
|
|
; PPC32-NEXT: and r6, r8, r5
|
|
; PPC32-NEXT: and r7, r9, r5
|
|
; PPC32-NEXT: sync
|
|
; PPC32-NEXT: .LBB8_1:
|
|
; PPC32-NEXT: lwarx r9, 0, r4
|
|
; PPC32-NEXT: and r8, r9, r5
|
|
; PPC32-NEXT: cmpw r8, r7
|
|
; PPC32-NEXT: bne cr0, .LBB8_3
|
|
; PPC32-NEXT: # %bb.2:
|
|
; PPC32-NEXT: andc r9, r9, r5
|
|
; PPC32-NEXT: or r9, r9, r6
|
|
; PPC32-NEXT: stwcx. r9, 0, r4
|
|
; PPC32-NEXT: bne cr0, .LBB8_1
|
|
; PPC32-NEXT: .LBB8_3:
|
|
; PPC32-NEXT: srw r3, r8, r3
|
|
; PPC32-NEXT: lwsync
|
|
; PPC32-NEXT: blr
|
|
;
|
|
; PPC64-LABEL: cas_strong_i8_sc_sc:
|
|
; PPC64: # %bb.0:
|
|
; PPC64-NEXT: rlwinm r8, r3, 3, 27, 28
|
|
; PPC64-NEXT: li r5, 1
|
|
; PPC64-NEXT: li r6, 0
|
|
; PPC64-NEXT: li r7, 255
|
|
; PPC64-NEXT: rldicr r4, r3, 0, 61
|
|
; PPC64-NEXT: xori r3, r8, 24
|
|
; PPC64-NEXT: slw r8, r5, r3
|
|
; PPC64-NEXT: slw r9, r6, r3
|
|
; PPC64-NEXT: slw r5, r7, r3
|
|
; PPC64-NEXT: and r6, r8, r5
|
|
; PPC64-NEXT: and r7, r9, r5
|
|
; PPC64-NEXT: sync
|
|
; PPC64-NEXT: .LBB8_1:
|
|
; PPC64-NEXT: lwarx r9, 0, r4
|
|
; PPC64-NEXT: and r8, r9, r5
|
|
; PPC64-NEXT: cmpw r8, r7
|
|
; PPC64-NEXT: bne cr0, .LBB8_3
|
|
; PPC64-NEXT: # %bb.2:
|
|
; PPC64-NEXT: andc r9, r9, r5
|
|
; PPC64-NEXT: or r9, r9, r6
|
|
; PPC64-NEXT: stwcx. r9, 0, r4
|
|
; PPC64-NEXT: bne cr0, .LBB8_1
|
|
; PPC64-NEXT: .LBB8_3:
|
|
; PPC64-NEXT: srw r3, r8, r3
|
|
; PPC64-NEXT: lwsync
|
|
; PPC64-NEXT: blr
|
|
%val = cmpxchg ptr %mem, i8 0, i8 1 seq_cst seq_cst
|
|
%loaded = extractvalue { i8, i1} %val, 0
|
|
ret i8 %loaded
|
|
}
|
|
define i16 @cas_weak_i16_acquire_acquire(ptr %mem) {
|
|
; PPC32-LABEL: cas_weak_i16_acquire_acquire:
|
|
; PPC32: # %bb.0:
|
|
; PPC32-NEXT: li r6, 0
|
|
; PPC32-NEXT: rlwinm r4, r3, 3, 27, 27
|
|
; PPC32-NEXT: li r5, 1
|
|
; PPC32-NEXT: ori r7, r6, 65535
|
|
; PPC32-NEXT: xori r4, r4, 16
|
|
; PPC32-NEXT: slw r8, r5, r4
|
|
; PPC32-NEXT: slw r9, r6, r4
|
|
; PPC32-NEXT: slw r5, r7, r4
|
|
; PPC32-NEXT: rlwinm r3, r3, 0, 0, 29
|
|
; PPC32-NEXT: and r6, r8, r5
|
|
; PPC32-NEXT: and r7, r9, r5
|
|
; PPC32-NEXT: .LBB9_1:
|
|
; PPC32-NEXT: lwarx r9, 0, r3
|
|
; PPC32-NEXT: and r8, r9, r5
|
|
; PPC32-NEXT: cmpw r8, r7
|
|
; PPC32-NEXT: bne cr0, .LBB9_3
|
|
; PPC32-NEXT: # %bb.2:
|
|
; PPC32-NEXT: andc r9, r9, r5
|
|
; PPC32-NEXT: or r9, r9, r6
|
|
; PPC32-NEXT: stwcx. r9, 0, r3
|
|
; PPC32-NEXT: bne cr0, .LBB9_1
|
|
; PPC32-NEXT: .LBB9_3:
|
|
; PPC32-NEXT: srw r3, r8, r4
|
|
; PPC32-NEXT: lwsync
|
|
; PPC32-NEXT: blr
|
|
;
|
|
; PPC64-LABEL: cas_weak_i16_acquire_acquire:
|
|
; PPC64: # %bb.0:
|
|
; PPC64-NEXT: li r6, 0
|
|
; PPC64-NEXT: rlwinm r4, r3, 3, 27, 27
|
|
; PPC64-NEXT: li r5, 1
|
|
; PPC64-NEXT: ori r7, r6, 65535
|
|
; PPC64-NEXT: xori r4, r4, 16
|
|
; PPC64-NEXT: slw r8, r5, r4
|
|
; PPC64-NEXT: slw r9, r6, r4
|
|
; PPC64-NEXT: slw r5, r7, r4
|
|
; PPC64-NEXT: rldicr r3, r3, 0, 61
|
|
; PPC64-NEXT: and r6, r8, r5
|
|
; PPC64-NEXT: and r7, r9, r5
|
|
; PPC64-NEXT: .LBB9_1:
|
|
; PPC64-NEXT: lwarx r9, 0, r3
|
|
; PPC64-NEXT: and r8, r9, r5
|
|
; PPC64-NEXT: cmpw r8, r7
|
|
; PPC64-NEXT: bne cr0, .LBB9_3
|
|
; PPC64-NEXT: # %bb.2:
|
|
; PPC64-NEXT: andc r9, r9, r5
|
|
; PPC64-NEXT: or r9, r9, r6
|
|
; PPC64-NEXT: stwcx. r9, 0, r3
|
|
; PPC64-NEXT: bne cr0, .LBB9_1
|
|
; PPC64-NEXT: .LBB9_3:
|
|
; PPC64-NEXT: srw r3, r8, r4
|
|
; PPC64-NEXT: lwsync
|
|
; PPC64-NEXT: blr
|
|
%val = cmpxchg weak ptr %mem, i16 0, i16 1 acquire acquire
|
|
%loaded = extractvalue { i16, i1} %val, 0
|
|
ret i16 %loaded
|
|
}
|
|
define i32 @cas_strong_i32_acqrel_acquire(ptr %mem) {
|
|
; CHECK-LABEL: cas_strong_i32_acqrel_acquire:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: li r5, 1
|
|
; CHECK-NEXT: lwsync
|
|
; CHECK-NEXT: .LBB10_1:
|
|
; CHECK-NEXT: lwarx r4, 0, r3
|
|
; CHECK-NEXT: cmpwi r4, 0
|
|
; CHECK-NEXT: bne cr0, .LBB10_3
|
|
; CHECK-NEXT: # %bb.2:
|
|
; CHECK-NEXT: stwcx. r5, 0, r3
|
|
; CHECK-NEXT: bne cr0, .LBB10_1
|
|
; CHECK-NEXT: .LBB10_3:
|
|
; CHECK-NEXT: mr r3, r4
|
|
; CHECK-NEXT: lwsync
|
|
; CHECK-NEXT: blr
|
|
%val = cmpxchg ptr %mem, i32 0, i32 1 acq_rel acquire
|
|
%loaded = extractvalue { i32, i1} %val, 0
|
|
ret i32 %loaded
|
|
}
|
|
define i64 @cas_weak_i64_release_monotonic(ptr %mem) {
|
|
; PPC32-LABEL: cas_weak_i64_release_monotonic:
|
|
; PPC32: # %bb.0:
|
|
; PPC32-NEXT: mflr r0
|
|
; PPC32-NEXT: stwu r1, -16(r1)
|
|
; PPC32-NEXT: stw r0, 20(r1)
|
|
; PPC32-NEXT: .cfi_def_cfa_offset 16
|
|
; PPC32-NEXT: .cfi_offset lr, 4
|
|
; PPC32-NEXT: li r4, 0
|
|
; PPC32-NEXT: stw r4, 12(r1)
|
|
; PPC32-NEXT: li r5, 0
|
|
; PPC32-NEXT: stw r4, 8(r1)
|
|
; PPC32-NEXT: addi r4, r1, 8
|
|
; PPC32-NEXT: li r6, 1
|
|
; PPC32-NEXT: li r7, 3
|
|
; PPC32-NEXT: li r8, 0
|
|
; PPC32-NEXT: bl __atomic_compare_exchange_8
|
|
; PPC32-NEXT: lwz r4, 12(r1)
|
|
; PPC32-NEXT: lwz r3, 8(r1)
|
|
; PPC32-NEXT: lwz r0, 20(r1)
|
|
; PPC32-NEXT: addi r1, r1, 16
|
|
; PPC32-NEXT: mtlr r0
|
|
; PPC32-NEXT: blr
|
|
;
|
|
; PPC64-LABEL: cas_weak_i64_release_monotonic:
|
|
; PPC64: # %bb.0:
|
|
; PPC64-NEXT: li r5, 1
|
|
; PPC64-NEXT: lwsync
|
|
; PPC64-NEXT: .LBB11_1:
|
|
; PPC64-NEXT: ldarx r4, 0, r3
|
|
; PPC64-NEXT: cmpdi r4, 0
|
|
; PPC64-NEXT: bne cr0, .LBB11_3
|
|
; PPC64-NEXT: # %bb.2:
|
|
; PPC64-NEXT: stdcx. r5, 0, r3
|
|
; PPC64-NEXT: bne cr0, .LBB11_1
|
|
; PPC64-NEXT: .LBB11_3:
|
|
; PPC64-NEXT: mr r3, r4
|
|
; PPC64-NEXT: blr
|
|
%val = cmpxchg weak ptr %mem, i64 0, i64 1 release monotonic
|
|
%loaded = extractvalue { i64, i1} %val, 0
|
|
ret i64 %loaded
|
|
}
|
|
|
|
; AtomicRMW
|
|
define i8 @add_i8_monotonic(ptr %mem, i8 %operand) {
|
|
; PPC32-LABEL: add_i8_monotonic:
|
|
; PPC32: # %bb.0:
|
|
; PPC32-NEXT: rlwinm r7, r3, 3, 27, 28
|
|
; PPC32-NEXT: li r6, 255
|
|
; PPC32-NEXT: rlwinm r5, r3, 0, 0, 29
|
|
; PPC32-NEXT: xori r3, r7, 24
|
|
; PPC32-NEXT: slw r4, r4, r3
|
|
; PPC32-NEXT: slw r6, r6, r3
|
|
; PPC32-NEXT: .LBB12_1:
|
|
; PPC32-NEXT: lwarx r7, 0, r5
|
|
; PPC32-NEXT: add r8, r4, r7
|
|
; PPC32-NEXT: andc r9, r7, r6
|
|
; PPC32-NEXT: and r8, r8, r6
|
|
; PPC32-NEXT: or r8, r8, r9
|
|
; PPC32-NEXT: stwcx. r8, 0, r5
|
|
; PPC32-NEXT: bne cr0, .LBB12_1
|
|
; PPC32-NEXT: # %bb.2:
|
|
; PPC32-NEXT: srw r3, r7, r3
|
|
; PPC32-NEXT: clrlwi r3, r3, 24
|
|
; PPC32-NEXT: blr
|
|
;
|
|
; PPC64-LABEL: add_i8_monotonic:
|
|
; PPC64: # %bb.0:
|
|
; PPC64-NEXT: rlwinm r7, r3, 3, 27, 28
|
|
; PPC64-NEXT: li r6, 255
|
|
; PPC64-NEXT: rldicr r5, r3, 0, 61
|
|
; PPC64-NEXT: xori r3, r7, 24
|
|
; PPC64-NEXT: slw r4, r4, r3
|
|
; PPC64-NEXT: slw r6, r6, r3
|
|
; PPC64-NEXT: .LBB12_1:
|
|
; PPC64-NEXT: lwarx r7, 0, r5
|
|
; PPC64-NEXT: add r8, r4, r7
|
|
; PPC64-NEXT: andc r9, r7, r6
|
|
; PPC64-NEXT: and r8, r8, r6
|
|
; PPC64-NEXT: or r8, r8, r9
|
|
; PPC64-NEXT: stwcx. r8, 0, r5
|
|
; PPC64-NEXT: bne cr0, .LBB12_1
|
|
; PPC64-NEXT: # %bb.2:
|
|
; PPC64-NEXT: srw r3, r7, r3
|
|
; PPC64-NEXT: clrlwi r3, r3, 24
|
|
; PPC64-NEXT: blr
|
|
%val = atomicrmw add ptr %mem, i8 %operand monotonic
|
|
ret i8 %val
|
|
}
|
|
define i16 @xor_i16_seq_cst(ptr %mem, i16 %operand) {
|
|
; PPC32-LABEL: xor_i16_seq_cst:
|
|
; PPC32: # %bb.0:
|
|
; PPC32-NEXT: li r5, 0
|
|
; PPC32-NEXT: rlwinm r6, r3, 3, 27, 27
|
|
; PPC32-NEXT: ori r7, r5, 65535
|
|
; PPC32-NEXT: xori r5, r6, 16
|
|
; PPC32-NEXT: rlwinm r3, r3, 0, 0, 29
|
|
; PPC32-NEXT: slw r4, r4, r5
|
|
; PPC32-NEXT: slw r6, r7, r5
|
|
; PPC32-NEXT: sync
|
|
; PPC32-NEXT: .LBB13_1:
|
|
; PPC32-NEXT: lwarx r7, 0, r3
|
|
; PPC32-NEXT: xor r8, r4, r7
|
|
; PPC32-NEXT: andc r9, r7, r6
|
|
; PPC32-NEXT: and r8, r8, r6
|
|
; PPC32-NEXT: or r8, r8, r9
|
|
; PPC32-NEXT: stwcx. r8, 0, r3
|
|
; PPC32-NEXT: bne cr0, .LBB13_1
|
|
; PPC32-NEXT: # %bb.2:
|
|
; PPC32-NEXT: srw r3, r7, r5
|
|
; PPC32-NEXT: clrlwi r3, r3, 16
|
|
; PPC32-NEXT: lwsync
|
|
; PPC32-NEXT: blr
|
|
;
|
|
; PPC64-LABEL: xor_i16_seq_cst:
|
|
; PPC64: # %bb.0:
|
|
; PPC64-NEXT: li r5, 0
|
|
; PPC64-NEXT: rlwinm r6, r3, 3, 27, 27
|
|
; PPC64-NEXT: ori r7, r5, 65535
|
|
; PPC64-NEXT: xori r5, r6, 16
|
|
; PPC64-NEXT: rldicr r3, r3, 0, 61
|
|
; PPC64-NEXT: slw r4, r4, r5
|
|
; PPC64-NEXT: slw r6, r7, r5
|
|
; PPC64-NEXT: sync
|
|
; PPC64-NEXT: .LBB13_1:
|
|
; PPC64-NEXT: lwarx r7, 0, r3
|
|
; PPC64-NEXT: xor r8, r4, r7
|
|
; PPC64-NEXT: andc r9, r7, r6
|
|
; PPC64-NEXT: and r8, r8, r6
|
|
; PPC64-NEXT: or r8, r8, r9
|
|
; PPC64-NEXT: stwcx. r8, 0, r3
|
|
; PPC64-NEXT: bne cr0, .LBB13_1
|
|
; PPC64-NEXT: # %bb.2:
|
|
; PPC64-NEXT: srw r3, r7, r5
|
|
; PPC64-NEXT: clrlwi r3, r3, 16
|
|
; PPC64-NEXT: lwsync
|
|
; PPC64-NEXT: blr
|
|
%val = atomicrmw xor ptr %mem, i16 %operand seq_cst
|
|
ret i16 %val
|
|
}
|
|
define i32 @xchg_i32_acq_rel(ptr %mem, i32 %operand) {
|
|
; CHECK-LABEL: xchg_i32_acq_rel:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: lwsync
|
|
; CHECK-NEXT: .LBB14_1:
|
|
; CHECK-NEXT: lwarx r5, 0, r3
|
|
; CHECK-NEXT: stwcx. r4, 0, r3
|
|
; CHECK-NEXT: bne cr0, .LBB14_1
|
|
; CHECK-NEXT: # %bb.2:
|
|
; CHECK-NEXT: mr r3, r5
|
|
; CHECK-NEXT: lwsync
|
|
; CHECK-NEXT: blr
|
|
%val = atomicrmw xchg ptr %mem, i32 %operand acq_rel
|
|
ret i32 %val
|
|
}
|
|
define i64 @and_i64_release(ptr %mem, i64 %operand) {
|
|
; PPC32-LABEL: and_i64_release:
|
|
; PPC32: # %bb.0:
|
|
; PPC32-NEXT: mflr r0
|
|
; PPC32-NEXT: stwu r1, -16(r1)
|
|
; PPC32-NEXT: stw r0, 20(r1)
|
|
; PPC32-NEXT: .cfi_def_cfa_offset 16
|
|
; PPC32-NEXT: .cfi_offset lr, 4
|
|
; PPC32-NEXT: li r7, 3
|
|
; PPC32-NEXT: bl __atomic_fetch_and_8
|
|
; PPC32-NEXT: lwz r0, 20(r1)
|
|
; PPC32-NEXT: addi r1, r1, 16
|
|
; PPC32-NEXT: mtlr r0
|
|
; PPC32-NEXT: blr
|
|
;
|
|
; PPC64-LABEL: and_i64_release:
|
|
; PPC64: # %bb.0:
|
|
; PPC64-NEXT: lwsync
|
|
; PPC64-NEXT: .LBB15_1:
|
|
; PPC64-NEXT: ldarx r5, 0, r3
|
|
; PPC64-NEXT: and r6, r4, r5
|
|
; PPC64-NEXT: stdcx. r6, 0, r3
|
|
; PPC64-NEXT: bne cr0, .LBB15_1
|
|
; PPC64-NEXT: # %bb.2:
|
|
; PPC64-NEXT: mr r3, r5
|
|
; PPC64-NEXT: blr
|
|
%val = atomicrmw and ptr %mem, i64 %operand release
|
|
ret i64 %val
|
|
}
|
|
|
|
define half @load_atomic_f16__seq_cst(ptr %ptr) {
|
|
; PPC32-LABEL: load_atomic_f16__seq_cst:
|
|
; PPC32: # %bb.0:
|
|
; PPC32-NEXT: mflr r0
|
|
; PPC32-NEXT: stwu r1, -16(r1)
|
|
; PPC32-NEXT: stw r0, 20(r1)
|
|
; PPC32-NEXT: .cfi_def_cfa_offset 16
|
|
; PPC32-NEXT: .cfi_offset lr, 4
|
|
; PPC32-NEXT: sync
|
|
; PPC32-NEXT: lhz r3, 0(r3)
|
|
; PPC32-NEXT: cmpw cr7, r3, r3
|
|
; PPC32-NEXT: bne- cr7, .+4
|
|
; PPC32-NEXT: isync
|
|
; PPC32-NEXT: bl __gnu_h2f_ieee
|
|
; PPC32-NEXT: lwz r0, 20(r1)
|
|
; PPC32-NEXT: addi r1, r1, 16
|
|
; PPC32-NEXT: mtlr r0
|
|
; PPC32-NEXT: blr
|
|
;
|
|
; PPC64-LABEL: load_atomic_f16__seq_cst:
|
|
; PPC64: # %bb.0:
|
|
; PPC64-NEXT: mflr r0
|
|
; PPC64-NEXT: stdu r1, -112(r1)
|
|
; PPC64-NEXT: std r0, 128(r1)
|
|
; PPC64-NEXT: .cfi_def_cfa_offset 112
|
|
; PPC64-NEXT: .cfi_offset lr, 16
|
|
; PPC64-NEXT: sync
|
|
; PPC64-NEXT: lhz r3, 0(r3)
|
|
; PPC64-NEXT: cmpd cr7, r3, r3
|
|
; PPC64-NEXT: bne- cr7, .+4
|
|
; PPC64-NEXT: isync
|
|
; PPC64-NEXT: bl __gnu_h2f_ieee
|
|
; PPC64-NEXT: nop
|
|
; PPC64-NEXT: addi r1, r1, 112
|
|
; PPC64-NEXT: ld r0, 16(r1)
|
|
; PPC64-NEXT: mtlr r0
|
|
; PPC64-NEXT: blr
|
|
%val = load atomic half, ptr %ptr seq_cst, align 2
|
|
ret half %val
|
|
}
|
|
|
|
; FIXME: bf16_to_fp fails to select
|
|
; define bfloat @load_atomic_bf16__seq_cst(ptr %ptr) {
|
|
; %val = load atomic bfloat, ptr %ptr seq_cst, align 2
|
|
; ret bfloat %val
|
|
; }
|
|
|
|
define float @load_atomic_f32__seq_cst(ptr %ptr) {
|
|
; PPC32-LABEL: load_atomic_f32__seq_cst:
|
|
; PPC32: # %bb.0:
|
|
; PPC32-NEXT: stwu r1, -16(r1)
|
|
; PPC32-NEXT: .cfi_def_cfa_offset 16
|
|
; PPC32-NEXT: sync
|
|
; PPC32-NEXT: lwz r3, 0(r3)
|
|
; PPC32-NEXT: cmpw cr7, r3, r3
|
|
; PPC32-NEXT: bne- cr7, .+4
|
|
; PPC32-NEXT: isync
|
|
; PPC32-NEXT: stw r3, 12(r1)
|
|
; PPC32-NEXT: lfs f1, 12(r1)
|
|
; PPC32-NEXT: addi r1, r1, 16
|
|
; PPC32-NEXT: blr
|
|
;
|
|
; PPC64-LABEL: load_atomic_f32__seq_cst:
|
|
; PPC64: # %bb.0:
|
|
; PPC64-NEXT: sync
|
|
; PPC64-NEXT: lwz r3, 0(r3)
|
|
; PPC64-NEXT: cmpd cr7, r3, r3
|
|
; PPC64-NEXT: bne- cr7, .+4
|
|
; PPC64-NEXT: isync
|
|
; PPC64-NEXT: stw r3, -4(r1)
|
|
; PPC64-NEXT: lfs f1, -4(r1)
|
|
; PPC64-NEXT: blr
|
|
%val = load atomic float, ptr %ptr seq_cst, align 4
|
|
ret float %val
|
|
}
|
|
|
|
define double @load_atomic_f64__seq_cst(ptr %ptr) {
|
|
; PPC32-LABEL: load_atomic_f64__seq_cst:
|
|
; PPC32: # %bb.0:
|
|
; PPC32-NEXT: mflr r0
|
|
; PPC32-NEXT: stwu r1, -16(r1)
|
|
; PPC32-NEXT: stw r0, 20(r1)
|
|
; PPC32-NEXT: .cfi_def_cfa_offset 16
|
|
; PPC32-NEXT: .cfi_offset lr, 4
|
|
; PPC32-NEXT: li r4, 5
|
|
; PPC32-NEXT: bl __atomic_load_8
|
|
; PPC32-NEXT: stw r4, 12(r1)
|
|
; PPC32-NEXT: stw r3, 8(r1)
|
|
; PPC32-NEXT: lfd f1, 8(r1)
|
|
; PPC32-NEXT: lwz r0, 20(r1)
|
|
; PPC32-NEXT: addi r1, r1, 16
|
|
; PPC32-NEXT: mtlr r0
|
|
; PPC32-NEXT: blr
|
|
;
|
|
; PPC64-LABEL: load_atomic_f64__seq_cst:
|
|
; PPC64: # %bb.0:
|
|
; PPC64-NEXT: sync
|
|
; PPC64-NEXT: ld r3, 0(r3)
|
|
; PPC64-NEXT: cmpd cr7, r3, r3
|
|
; PPC64-NEXT: bne- cr7, .+4
|
|
; PPC64-NEXT: isync
|
|
; PPC64-NEXT: std r3, -8(r1)
|
|
; PPC64-NEXT: lfd f1, -8(r1)
|
|
; PPC64-NEXT: blr
|
|
%val = load atomic double, ptr %ptr seq_cst, align 8
|
|
ret double %val
|
|
}
|
|
|
|
define void @store_atomic_f16__seq_cst(ptr %ptr, half %val1) {
|
|
; PPC32-LABEL: store_atomic_f16__seq_cst:
|
|
; PPC32: # %bb.0:
|
|
; PPC32-NEXT: mflr r0
|
|
; PPC32-NEXT: stwu r1, -16(r1)
|
|
; PPC32-NEXT: stw r0, 20(r1)
|
|
; PPC32-NEXT: .cfi_def_cfa_offset 16
|
|
; PPC32-NEXT: .cfi_offset lr, 4
|
|
; PPC32-NEXT: .cfi_offset r30, -8
|
|
; PPC32-NEXT: stw r30, 8(r1) # 4-byte Folded Spill
|
|
; PPC32-NEXT: mr r30, r3
|
|
; PPC32-NEXT: bl __gnu_f2h_ieee
|
|
; PPC32-NEXT: sync
|
|
; PPC32-NEXT: sth r3, 0(r30)
|
|
; PPC32-NEXT: lwz r30, 8(r1) # 4-byte Folded Reload
|
|
; PPC32-NEXT: lwz r0, 20(r1)
|
|
; PPC32-NEXT: addi r1, r1, 16
|
|
; PPC32-NEXT: mtlr r0
|
|
; PPC32-NEXT: blr
|
|
;
|
|
; PPC64-LABEL: store_atomic_f16__seq_cst:
|
|
; PPC64: # %bb.0:
|
|
; PPC64-NEXT: mflr r0
|
|
; PPC64-NEXT: stdu r1, -128(r1)
|
|
; PPC64-NEXT: std r0, 144(r1)
|
|
; PPC64-NEXT: .cfi_def_cfa_offset 128
|
|
; PPC64-NEXT: .cfi_offset lr, 16
|
|
; PPC64-NEXT: .cfi_offset r30, -16
|
|
; PPC64-NEXT: std r30, 112(r1) # 8-byte Folded Spill
|
|
; PPC64-NEXT: mr r30, r3
|
|
; PPC64-NEXT: bl __gnu_f2h_ieee
|
|
; PPC64-NEXT: nop
|
|
; PPC64-NEXT: sync
|
|
; PPC64-NEXT: sth r3, 0(r30)
|
|
; PPC64-NEXT: ld r30, 112(r1) # 8-byte Folded Reload
|
|
; PPC64-NEXT: addi r1, r1, 128
|
|
; PPC64-NEXT: ld r0, 16(r1)
|
|
; PPC64-NEXT: mtlr r0
|
|
; PPC64-NEXT: blr
|
|
store atomic half %val1, ptr %ptr seq_cst, align 2
|
|
ret void
|
|
}
|
|
|
|
; FIXME: bf16_to_fp fails to select
|
|
; define void @store_atomic_bf16__seq_cst(ptr %ptr, bfloat %val1) {
|
|
; store atomic bfloat %val1, ptr %ptr seq_cst, align 2
|
|
; ret void
|
|
; }
|
|
|
|
define void @store_atomic_f32__seq_cst(ptr %ptr, float %val1) {
|
|
; PPC32-LABEL: store_atomic_f32__seq_cst:
|
|
; PPC32: # %bb.0:
|
|
; PPC32-NEXT: stwu r1, -16(r1)
|
|
; PPC32-NEXT: .cfi_def_cfa_offset 16
|
|
; PPC32-NEXT: stfs f1, 12(r1)
|
|
; PPC32-NEXT: lwz r4, 12(r1)
|
|
; PPC32-NEXT: sync
|
|
; PPC32-NEXT: stw r4, 0(r3)
|
|
; PPC32-NEXT: addi r1, r1, 16
|
|
; PPC32-NEXT: blr
|
|
;
|
|
; PPC64-LABEL: store_atomic_f32__seq_cst:
|
|
; PPC64: # %bb.0:
|
|
; PPC64-NEXT: stfs f1, -4(r1)
|
|
; PPC64-NEXT: lwz r4, -4(r1)
|
|
; PPC64-NEXT: sync
|
|
; PPC64-NEXT: stw r4, 0(r3)
|
|
; PPC64-NEXT: blr
|
|
store atomic float %val1, ptr %ptr seq_cst, align 4
|
|
ret void
|
|
}
|
|
|
|
define void @store_atomic_f64__seq_cst(ptr %ptr, double %val1) {
|
|
; PPC32-LABEL: store_atomic_f64__seq_cst:
|
|
; PPC32: # %bb.0:
|
|
; PPC32-NEXT: mflr r0
|
|
; PPC32-NEXT: stwu r1, -16(r1)
|
|
; PPC32-NEXT: stw r0, 20(r1)
|
|
; PPC32-NEXT: .cfi_def_cfa_offset 16
|
|
; PPC32-NEXT: .cfi_offset lr, 4
|
|
; PPC32-NEXT: stfd f1, 8(r1)
|
|
; PPC32-NEXT: li r7, 5
|
|
; PPC32-NEXT: lwz r5, 8(r1)
|
|
; PPC32-NEXT: lwz r6, 12(r1)
|
|
; PPC32-NEXT: bl __atomic_store_8
|
|
; PPC32-NEXT: lwz r0, 20(r1)
|
|
; PPC32-NEXT: addi r1, r1, 16
|
|
; PPC32-NEXT: mtlr r0
|
|
; PPC32-NEXT: blr
|
|
;
|
|
; PPC64-LABEL: store_atomic_f64__seq_cst:
|
|
; PPC64: # %bb.0:
|
|
; PPC64-NEXT: stfd f1, -8(r1)
|
|
; PPC64-NEXT: ld r4, -8(r1)
|
|
; PPC64-NEXT: sync
|
|
; PPC64-NEXT: std r4, 0(r3)
|
|
; PPC64-NEXT: blr
|
|
store atomic double %val1, ptr %ptr seq_cst, align 8
|
|
ret void
|
|
}
|