[X86] Use fence(seq_cst) in IdempotentRMWIntoFencedLoad (#126521)
This extends this optimization for scenarios where the subtarget has `!hasMFence` or we have SyncScope SingleThread, by avoiding the direct usage of `llvm.x64.sse2.mfence`.
This commit is contained in:
@@ -31905,21 +31905,10 @@ X86TargetLowering::lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const {
|
||||
// otherwise, we might be able to be more aggressive on relaxed idempotent
|
||||
// rmw. In practice, they do not look useful, so we don't try to be
|
||||
// especially clever.
|
||||
if (SSID == SyncScope::SingleThread)
|
||||
// FIXME: we could just insert an ISD::MEMBARRIER here, except we are at
|
||||
// the IR level, so we must wrap it in an intrinsic.
|
||||
return nullptr;
|
||||
|
||||
if (!Subtarget.hasMFence())
|
||||
// FIXME: it might make sense to use a locked operation here but on a
|
||||
// different cache-line to prevent cache-line bouncing. In practice it
|
||||
// is probably a small win, and x86 processors without mfence are rare
|
||||
// enough that we do not bother.
|
||||
return nullptr;
|
||||
|
||||
Function *MFence =
|
||||
llvm::Intrinsic::getOrInsertDeclaration(M, Intrinsic::x86_sse2_mfence);
|
||||
Builder.CreateCall(MFence, {});
|
||||
// Use `fence seq_cst` over `llvm.x64.sse2.mfence` here to get the correct
|
||||
// lowering for SSID == SyncScope::SingleThread and !hasMFence
|
||||
Builder.CreateFence(AtomicOrdering::SequentiallyConsistent, SSID);
|
||||
|
||||
// Finally we can emit the atomic load.
|
||||
LoadInst *Loaded = Builder.CreateAlignedLoad(
|
||||
|
||||
615
llvm/test/CodeGen/X86/atomic-idempotent-syncscope.ll
Normal file
615
llvm/test/CodeGen/X86/atomic-idempotent-syncscope.ll
Normal file
@@ -0,0 +1,615 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs | FileCheck %s --check-prefix=X64
|
||||
; RUN: llc < %s -mtriple=i686-- -verify-machineinstrs -mattr=+sse2 | FileCheck %s --check-prefixes=X86,X86-GENERIC,X86-SSE2
|
||||
; RUN: llc < %s -mtriple=i686-- -verify-machineinstrs -mcpu=slm -mattr=-sse2 | FileCheck %s --check-prefixes=X86,X86-GENERIC,X86-SLM
|
||||
; RUN: llc < %s -mtriple=i686-- -verify-machineinstrs -mcpu=goldmont -mattr=-sse2 | FileCheck %s --check-prefixes=X86,X86-GENERIC,X86-SLM
|
||||
; RUN: llc < %s -mtriple=i686-- -verify-machineinstrs -mcpu=knl -mattr=-sse2 | FileCheck %s --check-prefixes=X86,X86-GENERIC,X86-SLM
|
||||
; RUN: llc < %s -mtriple=i686-- -verify-machineinstrs -mcpu=atom -mattr=-sse2 | FileCheck %s --check-prefixes=X86,X86-ATOM
|
||||
|
||||
; On x86, an atomic rmw operation that does not modify the value in memory
|
||||
; (such as atomic add 0) can be replaced by an mfence followed by a mov.
|
||||
; This is explained (with the motivation for such an optimization) in
|
||||
; http://www.hpl.hp.com/techreports/2012/HPL-2012-68.pdf
|
||||
|
||||
define i8 @add8(ptr %p) #0 {
|
||||
; X64-LABEL: add8:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: #MEMBARRIER
|
||||
; X64-NEXT: movzbl (%rdi), %eax
|
||||
; X64-NEXT: retq
|
||||
;
|
||||
; X86-GENERIC-LABEL: add8:
|
||||
; X86-GENERIC: # %bb.0:
|
||||
; X86-GENERIC-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-GENERIC-NEXT: #MEMBARRIER
|
||||
; X86-GENERIC-NEXT: movzbl (%eax), %eax
|
||||
; X86-GENERIC-NEXT: retl
|
||||
;
|
||||
; X86-ATOM-LABEL: add8:
|
||||
; X86-ATOM: # %bb.0:
|
||||
; X86-ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-ATOM-NEXT: #MEMBARRIER
|
||||
; X86-ATOM-NEXT: movzbl (%eax), %eax
|
||||
; X86-ATOM-NEXT: nop
|
||||
; X86-ATOM-NEXT: nop
|
||||
; X86-ATOM-NEXT: nop
|
||||
; X86-ATOM-NEXT: nop
|
||||
; X86-ATOM-NEXT: retl
|
||||
%1 = atomicrmw add ptr %p, i8 0 syncscope("singlethread") monotonic
|
||||
ret i8 %1
|
||||
}
|
||||
|
||||
define i16 @or16(ptr %p) #0 {
|
||||
; X64-LABEL: or16:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: #MEMBARRIER
|
||||
; X64-NEXT: movzwl (%rdi), %eax
|
||||
; X64-NEXT: retq
|
||||
;
|
||||
; X86-GENERIC-LABEL: or16:
|
||||
; X86-GENERIC: # %bb.0:
|
||||
; X86-GENERIC-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-GENERIC-NEXT: #MEMBARRIER
|
||||
; X86-GENERIC-NEXT: movzwl (%eax), %eax
|
||||
; X86-GENERIC-NEXT: retl
|
||||
;
|
||||
; X86-ATOM-LABEL: or16:
|
||||
; X86-ATOM: # %bb.0:
|
||||
; X86-ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-ATOM-NEXT: #MEMBARRIER
|
||||
; X86-ATOM-NEXT: movzwl (%eax), %eax
|
||||
; X86-ATOM-NEXT: nop
|
||||
; X86-ATOM-NEXT: nop
|
||||
; X86-ATOM-NEXT: nop
|
||||
; X86-ATOM-NEXT: nop
|
||||
; X86-ATOM-NEXT: retl
|
||||
%1 = atomicrmw or ptr %p, i16 0 syncscope("singlethread") acquire
|
||||
ret i16 %1
|
||||
}
|
||||
|
||||
define i32 @xor32(ptr %p) #0 {
|
||||
; X64-LABEL: xor32:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: #MEMBARRIER
|
||||
; X64-NEXT: movl (%rdi), %eax
|
||||
; X64-NEXT: retq
|
||||
;
|
||||
; X86-GENERIC-LABEL: xor32:
|
||||
; X86-GENERIC: # %bb.0:
|
||||
; X86-GENERIC-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-GENERIC-NEXT: #MEMBARRIER
|
||||
; X86-GENERIC-NEXT: movl (%eax), %eax
|
||||
; X86-GENERIC-NEXT: retl
|
||||
;
|
||||
; X86-ATOM-LABEL: xor32:
|
||||
; X86-ATOM: # %bb.0:
|
||||
; X86-ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-ATOM-NEXT: #MEMBARRIER
|
||||
; X86-ATOM-NEXT: movl (%eax), %eax
|
||||
; X86-ATOM-NEXT: nop
|
||||
; X86-ATOM-NEXT: nop
|
||||
; X86-ATOM-NEXT: nop
|
||||
; X86-ATOM-NEXT: nop
|
||||
; X86-ATOM-NEXT: retl
|
||||
%1 = atomicrmw xor ptr %p, i32 0 syncscope("singlethread") release
|
||||
ret i32 %1
|
||||
}
|
||||
|
||||
define i64 @sub64(ptr %p) #0 {
|
||||
; X64-LABEL: sub64:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: #MEMBARRIER
|
||||
; X64-NEXT: movq (%rdi), %rax
|
||||
; X64-NEXT: retq
|
||||
;
|
||||
; X86-LABEL: sub64:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: pushl %ebx
|
||||
; X86-NEXT: pushl %esi
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
|
||||
; X86-NEXT: movl (%esi), %eax
|
||||
; X86-NEXT: movl 4(%esi), %edx
|
||||
; X86-NEXT: .p2align 4
|
||||
; X86-NEXT: .LBB3_1: # %atomicrmw.start
|
||||
; X86-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; X86-NEXT: movl %edx, %ecx
|
||||
; X86-NEXT: movl %eax, %ebx
|
||||
; X86-NEXT: lock cmpxchg8b (%esi)
|
||||
; X86-NEXT: jne .LBB3_1
|
||||
; X86-NEXT: # %bb.2: # %atomicrmw.end
|
||||
; X86-NEXT: popl %esi
|
||||
; X86-NEXT: popl %ebx
|
||||
; X86-NEXT: retl
|
||||
%1 = atomicrmw sub ptr %p, i64 0 syncscope("singlethread") seq_cst
|
||||
ret i64 %1
|
||||
}
|
||||
|
||||
define i128 @or128(ptr %p) #0 {
|
||||
; X64-LABEL: or128:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: pushq %rax
|
||||
; X64-NEXT: xorl %esi, %esi
|
||||
; X64-NEXT: xorl %edx, %edx
|
||||
; X64-NEXT: xorl %ecx, %ecx
|
||||
; X64-NEXT: callq __atomic_fetch_or_16@PLT
|
||||
; X64-NEXT: popq %rcx
|
||||
; X64-NEXT: retq
|
||||
;
|
||||
; X86-GENERIC-LABEL: or128:
|
||||
; X86-GENERIC: # %bb.0:
|
||||
; X86-GENERIC-NEXT: pushl %ebp
|
||||
; X86-GENERIC-NEXT: movl %esp, %ebp
|
||||
; X86-GENERIC-NEXT: pushl %ebx
|
||||
; X86-GENERIC-NEXT: pushl %edi
|
||||
; X86-GENERIC-NEXT: pushl %esi
|
||||
; X86-GENERIC-NEXT: andl $-16, %esp
|
||||
; X86-GENERIC-NEXT: subl $48, %esp
|
||||
; X86-GENERIC-NEXT: movl 12(%ebp), %edi
|
||||
; X86-GENERIC-NEXT: movl 12(%edi), %ecx
|
||||
; X86-GENERIC-NEXT: movl 8(%edi), %edx
|
||||
; X86-GENERIC-NEXT: movl (%edi), %ebx
|
||||
; X86-GENERIC-NEXT: movl 4(%edi), %esi
|
||||
; X86-GENERIC-NEXT: .p2align 4
|
||||
; X86-GENERIC-NEXT: .LBB4_1: # %atomicrmw.start
|
||||
; X86-GENERIC-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; X86-GENERIC-NEXT: movl %ebx, (%esp)
|
||||
; X86-GENERIC-NEXT: movl %esi, {{[0-9]+}}(%esp)
|
||||
; X86-GENERIC-NEXT: movl %edx, {{[0-9]+}}(%esp)
|
||||
; X86-GENERIC-NEXT: movl %ecx, {{[0-9]+}}(%esp)
|
||||
; X86-GENERIC-NEXT: movl %ecx, {{[0-9]+}}(%esp)
|
||||
; X86-GENERIC-NEXT: movl %edx, {{[0-9]+}}(%esp)
|
||||
; X86-GENERIC-NEXT: movl %esi, {{[0-9]+}}(%esp)
|
||||
; X86-GENERIC-NEXT: movl %ebx, {{[0-9]+}}(%esp)
|
||||
; X86-GENERIC-NEXT: pushl $0
|
||||
; X86-GENERIC-NEXT: pushl $0
|
||||
; X86-GENERIC-NEXT: leal {{[0-9]+}}(%esp), %eax
|
||||
; X86-GENERIC-NEXT: pushl %eax
|
||||
; X86-GENERIC-NEXT: leal {{[0-9]+}}(%esp), %eax
|
||||
; X86-GENERIC-NEXT: pushl %eax
|
||||
; X86-GENERIC-NEXT: pushl %edi
|
||||
; X86-GENERIC-NEXT: pushl $16
|
||||
; X86-GENERIC-NEXT: calll __atomic_compare_exchange@PLT
|
||||
; X86-GENERIC-NEXT: addl $24, %esp
|
||||
; X86-GENERIC-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-GENERIC-NEXT: movl {{[0-9]+}}(%esp), %edx
|
||||
; X86-GENERIC-NEXT: movl (%esp), %ebx
|
||||
; X86-GENERIC-NEXT: movl {{[0-9]+}}(%esp), %esi
|
||||
; X86-GENERIC-NEXT: testb %al, %al
|
||||
; X86-GENERIC-NEXT: je .LBB4_1
|
||||
; X86-GENERIC-NEXT: # %bb.2: # %atomicrmw.end
|
||||
; X86-GENERIC-NEXT: movl 8(%ebp), %eax
|
||||
; X86-GENERIC-NEXT: movl %ebx, (%eax)
|
||||
; X86-GENERIC-NEXT: movl %esi, 4(%eax)
|
||||
; X86-GENERIC-NEXT: movl %edx, 8(%eax)
|
||||
; X86-GENERIC-NEXT: movl %ecx, 12(%eax)
|
||||
; X86-GENERIC-NEXT: leal -12(%ebp), %esp
|
||||
; X86-GENERIC-NEXT: popl %esi
|
||||
; X86-GENERIC-NEXT: popl %edi
|
||||
; X86-GENERIC-NEXT: popl %ebx
|
||||
; X86-GENERIC-NEXT: popl %ebp
|
||||
; X86-GENERIC-NEXT: retl $4
|
||||
;
|
||||
; X86-ATOM-LABEL: or128:
|
||||
; X86-ATOM: # %bb.0:
|
||||
; X86-ATOM-NEXT: pushl %ebp
|
||||
; X86-ATOM-NEXT: movl %esp, %ebp
|
||||
; X86-ATOM-NEXT: pushl %ebx
|
||||
; X86-ATOM-NEXT: pushl %edi
|
||||
; X86-ATOM-NEXT: pushl %esi
|
||||
; X86-ATOM-NEXT: andl $-16, %esp
|
||||
; X86-ATOM-NEXT: leal -{{[0-9]+}}(%esp), %esp
|
||||
; X86-ATOM-NEXT: movl 12(%ebp), %edi
|
||||
; X86-ATOM-NEXT: movl 12(%edi), %ecx
|
||||
; X86-ATOM-NEXT: movl 8(%edi), %edx
|
||||
; X86-ATOM-NEXT: movl (%edi), %esi
|
||||
; X86-ATOM-NEXT: movl 4(%edi), %ebx
|
||||
; X86-ATOM-NEXT: .p2align 4
|
||||
; X86-ATOM-NEXT: .LBB4_1: # %atomicrmw.start
|
||||
; X86-ATOM-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; X86-ATOM-NEXT: movl %esi, (%esp)
|
||||
; X86-ATOM-NEXT: movl %ebx, {{[0-9]+}}(%esp)
|
||||
; X86-ATOM-NEXT: movl %edx, {{[0-9]+}}(%esp)
|
||||
; X86-ATOM-NEXT: movl %ecx, {{[0-9]+}}(%esp)
|
||||
; X86-ATOM-NEXT: movl %ecx, {{[0-9]+}}(%esp)
|
||||
; X86-ATOM-NEXT: movl %edx, {{[0-9]+}}(%esp)
|
||||
; X86-ATOM-NEXT: movl %ebx, {{[0-9]+}}(%esp)
|
||||
; X86-ATOM-NEXT: movl %esi, {{[0-9]+}}(%esp)
|
||||
; X86-ATOM-NEXT: pushl $0
|
||||
; X86-ATOM-NEXT: pushl $0
|
||||
; X86-ATOM-NEXT: leal {{[0-9]+}}(%esp), %eax
|
||||
; X86-ATOM-NEXT: pushl %eax
|
||||
; X86-ATOM-NEXT: leal {{[0-9]+}}(%esp), %eax
|
||||
; X86-ATOM-NEXT: pushl %eax
|
||||
; X86-ATOM-NEXT: pushl %edi
|
||||
; X86-ATOM-NEXT: pushl $16
|
||||
; X86-ATOM-NEXT: calll __atomic_compare_exchange@PLT
|
||||
; X86-ATOM-NEXT: leal {{[0-9]+}}(%esp), %esp
|
||||
; X86-ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-ATOM-NEXT: movl {{[0-9]+}}(%esp), %edx
|
||||
; X86-ATOM-NEXT: testb %al, %al
|
||||
; X86-ATOM-NEXT: movl (%esp), %esi
|
||||
; X86-ATOM-NEXT: movl {{[0-9]+}}(%esp), %ebx
|
||||
; X86-ATOM-NEXT: je .LBB4_1
|
||||
; X86-ATOM-NEXT: # %bb.2: # %atomicrmw.end
|
||||
; X86-ATOM-NEXT: movl 8(%ebp), %eax
|
||||
; X86-ATOM-NEXT: movl %esi, (%eax)
|
||||
; X86-ATOM-NEXT: movl %ebx, 4(%eax)
|
||||
; X86-ATOM-NEXT: movl %edx, 8(%eax)
|
||||
; X86-ATOM-NEXT: movl %ecx, 12(%eax)
|
||||
; X86-ATOM-NEXT: leal -12(%ebp), %esp
|
||||
; X86-ATOM-NEXT: popl %esi
|
||||
; X86-ATOM-NEXT: popl %edi
|
||||
; X86-ATOM-NEXT: popl %ebx
|
||||
; X86-ATOM-NEXT: popl %ebp
|
||||
; X86-ATOM-NEXT: retl $4
|
||||
%1 = atomicrmw or ptr %p, i128 0 syncscope("singlethread") monotonic
|
||||
ret i128 %1
|
||||
}
|
||||
|
||||
; For 'and', the idempotent value is (-1)
|
||||
define i32 @and32 (ptr %p) #0 {
|
||||
; X64-LABEL: and32:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: #MEMBARRIER
|
||||
; X64-NEXT: movl (%rdi), %eax
|
||||
; X64-NEXT: retq
|
||||
;
|
||||
; X86-GENERIC-LABEL: and32:
|
||||
; X86-GENERIC: # %bb.0:
|
||||
; X86-GENERIC-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-GENERIC-NEXT: #MEMBARRIER
|
||||
; X86-GENERIC-NEXT: movl (%eax), %eax
|
||||
; X86-GENERIC-NEXT: retl
|
||||
;
|
||||
; X86-ATOM-LABEL: and32:
|
||||
; X86-ATOM: # %bb.0:
|
||||
; X86-ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-ATOM-NEXT: #MEMBARRIER
|
||||
; X86-ATOM-NEXT: movl (%eax), %eax
|
||||
; X86-ATOM-NEXT: nop
|
||||
; X86-ATOM-NEXT: nop
|
||||
; X86-ATOM-NEXT: nop
|
||||
; X86-ATOM-NEXT: nop
|
||||
; X86-ATOM-NEXT: retl
|
||||
%1 = atomicrmw and ptr %p, i32 -1 syncscope("singlethread") acq_rel
|
||||
ret i32 %1
|
||||
}
|
||||
|
||||
define void @or32_nouse_monotonic(ptr %p) #0 {
|
||||
; X64-LABEL: or32_nouse_monotonic:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: #MEMBARRIER
|
||||
; X64-NEXT: retq
|
||||
;
|
||||
; X86-GENERIC-LABEL: or32_nouse_monotonic:
|
||||
; X86-GENERIC: # %bb.0:
|
||||
; X86-GENERIC-NEXT: #MEMBARRIER
|
||||
; X86-GENERIC-NEXT: retl
|
||||
;
|
||||
; X86-ATOM-LABEL: or32_nouse_monotonic:
|
||||
; X86-ATOM: # %bb.0:
|
||||
; X86-ATOM-NEXT: #MEMBARRIER
|
||||
; X86-ATOM-NEXT: nop
|
||||
; X86-ATOM-NEXT: nop
|
||||
; X86-ATOM-NEXT: nop
|
||||
; X86-ATOM-NEXT: nop
|
||||
; X86-ATOM-NEXT: nop
|
||||
; X86-ATOM-NEXT: nop
|
||||
; X86-ATOM-NEXT: nop
|
||||
; X86-ATOM-NEXT: nop
|
||||
; X86-ATOM-NEXT: retl
|
||||
atomicrmw or ptr %p, i32 0 syncscope("singlethread") monotonic
|
||||
ret void
|
||||
}
|
||||
|
||||
|
||||
define void @or32_nouse_acquire(ptr %p) #0 {
|
||||
; X64-LABEL: or32_nouse_acquire:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: #MEMBARRIER
|
||||
; X64-NEXT: retq
|
||||
;
|
||||
; X86-GENERIC-LABEL: or32_nouse_acquire:
|
||||
; X86-GENERIC: # %bb.0:
|
||||
; X86-GENERIC-NEXT: #MEMBARRIER
|
||||
; X86-GENERIC-NEXT: retl
|
||||
;
|
||||
; X86-ATOM-LABEL: or32_nouse_acquire:
|
||||
; X86-ATOM: # %bb.0:
|
||||
; X86-ATOM-NEXT: #MEMBARRIER
|
||||
; X86-ATOM-NEXT: nop
|
||||
; X86-ATOM-NEXT: nop
|
||||
; X86-ATOM-NEXT: nop
|
||||
; X86-ATOM-NEXT: nop
|
||||
; X86-ATOM-NEXT: nop
|
||||
; X86-ATOM-NEXT: nop
|
||||
; X86-ATOM-NEXT: nop
|
||||
; X86-ATOM-NEXT: nop
|
||||
; X86-ATOM-NEXT: retl
|
||||
atomicrmw or ptr %p, i32 0 acquire
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @or32_nouse_release(ptr %p) #0 {
|
||||
; X64-LABEL: or32_nouse_release:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: #MEMBARRIER
|
||||
; X64-NEXT: retq
|
||||
;
|
||||
; X86-GENERIC-LABEL: or32_nouse_release:
|
||||
; X86-GENERIC: # %bb.0:
|
||||
; X86-GENERIC-NEXT: #MEMBARRIER
|
||||
; X86-GENERIC-NEXT: retl
|
||||
;
|
||||
; X86-ATOM-LABEL: or32_nouse_release:
|
||||
; X86-ATOM: # %bb.0:
|
||||
; X86-ATOM-NEXT: #MEMBARRIER
|
||||
; X86-ATOM-NEXT: nop
|
||||
; X86-ATOM-NEXT: nop
|
||||
; X86-ATOM-NEXT: nop
|
||||
; X86-ATOM-NEXT: nop
|
||||
; X86-ATOM-NEXT: nop
|
||||
; X86-ATOM-NEXT: nop
|
||||
; X86-ATOM-NEXT: nop
|
||||
; X86-ATOM-NEXT: nop
|
||||
; X86-ATOM-NEXT: retl
|
||||
atomicrmw or ptr %p, i32 0 syncscope("singlethread") release
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @or32_nouse_acq_rel(ptr %p) #0 {
|
||||
; X64-LABEL: or32_nouse_acq_rel:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: #MEMBARRIER
|
||||
; X64-NEXT: retq
|
||||
;
|
||||
; X86-GENERIC-LABEL: or32_nouse_acq_rel:
|
||||
; X86-GENERIC: # %bb.0:
|
||||
; X86-GENERIC-NEXT: #MEMBARRIER
|
||||
; X86-GENERIC-NEXT: retl
|
||||
;
|
||||
; X86-ATOM-LABEL: or32_nouse_acq_rel:
|
||||
; X86-ATOM: # %bb.0:
|
||||
; X86-ATOM-NEXT: #MEMBARRIER
|
||||
; X86-ATOM-NEXT: nop
|
||||
; X86-ATOM-NEXT: nop
|
||||
; X86-ATOM-NEXT: nop
|
||||
; X86-ATOM-NEXT: nop
|
||||
; X86-ATOM-NEXT: nop
|
||||
; X86-ATOM-NEXT: nop
|
||||
; X86-ATOM-NEXT: nop
|
||||
; X86-ATOM-NEXT: nop
|
||||
; X86-ATOM-NEXT: retl
|
||||
atomicrmw or ptr %p, i32 0 syncscope("singlethread") acq_rel
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @or32_nouse_seq_cst(ptr %p) #0 {
|
||||
; X64-LABEL: or32_nouse_seq_cst:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: #MEMBARRIER
|
||||
; X64-NEXT: retq
|
||||
;
|
||||
; X86-GENERIC-LABEL: or32_nouse_seq_cst:
|
||||
; X86-GENERIC: # %bb.0:
|
||||
; X86-GENERIC-NEXT: #MEMBARRIER
|
||||
; X86-GENERIC-NEXT: retl
|
||||
;
|
||||
; X86-ATOM-LABEL: or32_nouse_seq_cst:
|
||||
; X86-ATOM: # %bb.0:
|
||||
; X86-ATOM-NEXT: #MEMBARRIER
|
||||
; X86-ATOM-NEXT: nop
|
||||
; X86-ATOM-NEXT: nop
|
||||
; X86-ATOM-NEXT: nop
|
||||
; X86-ATOM-NEXT: nop
|
||||
; X86-ATOM-NEXT: nop
|
||||
; X86-ATOM-NEXT: nop
|
||||
; X86-ATOM-NEXT: nop
|
||||
; X86-ATOM-NEXT: nop
|
||||
; X86-ATOM-NEXT: retl
|
||||
atomicrmw or ptr %p, i32 0 syncscope("singlethread") seq_cst
|
||||
ret void
|
||||
}
|
||||
|
||||
; TODO: The value isn't used on 32 bit, so the cmpxchg8b is unneeded
|
||||
define void @or64_nouse_seq_cst(ptr %p) #0 {
|
||||
; X64-LABEL: or64_nouse_seq_cst:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: #MEMBARRIER
|
||||
; X64-NEXT: retq
|
||||
;
|
||||
; X86-LABEL: or64_nouse_seq_cst:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: pushl %ebx
|
||||
; X86-NEXT: pushl %esi
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
|
||||
; X86-NEXT: movl (%esi), %eax
|
||||
; X86-NEXT: movl 4(%esi), %edx
|
||||
; X86-NEXT: .p2align 4
|
||||
; X86-NEXT: .LBB11_1: # %atomicrmw.start
|
||||
; X86-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; X86-NEXT: movl %edx, %ecx
|
||||
; X86-NEXT: movl %eax, %ebx
|
||||
; X86-NEXT: lock cmpxchg8b (%esi)
|
||||
; X86-NEXT: jne .LBB11_1
|
||||
; X86-NEXT: # %bb.2: # %atomicrmw.end
|
||||
; X86-NEXT: popl %esi
|
||||
; X86-NEXT: popl %ebx
|
||||
; X86-NEXT: retl
|
||||
atomicrmw or ptr %p, i64 0 syncscope("singlethread") seq_cst
|
||||
ret void
|
||||
}
|
||||
|
||||
; TODO: Don't need to lower as sync_and_fetch call
|
||||
define void @or128_nouse_seq_cst(ptr %p) #0 {
|
||||
; X64-LABEL: or128_nouse_seq_cst:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: pushq %rax
|
||||
; X64-NEXT: xorl %esi, %esi
|
||||
; X64-NEXT: xorl %edx, %edx
|
||||
; X64-NEXT: movl $5, %ecx
|
||||
; X64-NEXT: callq __atomic_fetch_or_16@PLT
|
||||
; X64-NEXT: popq %rax
|
||||
; X64-NEXT: retq
|
||||
;
|
||||
; X86-GENERIC-LABEL: or128_nouse_seq_cst:
|
||||
; X86-GENERIC: # %bb.0:
|
||||
; X86-GENERIC-NEXT: pushl %ebp
|
||||
; X86-GENERIC-NEXT: movl %esp, %ebp
|
||||
; X86-GENERIC-NEXT: pushl %ebx
|
||||
; X86-GENERIC-NEXT: pushl %edi
|
||||
; X86-GENERIC-NEXT: pushl %esi
|
||||
; X86-GENERIC-NEXT: andl $-16, %esp
|
||||
; X86-GENERIC-NEXT: subl $48, %esp
|
||||
; X86-GENERIC-NEXT: movl 8(%ebp), %esi
|
||||
; X86-GENERIC-NEXT: movl 12(%esi), %ecx
|
||||
; X86-GENERIC-NEXT: movl 8(%esi), %edi
|
||||
; X86-GENERIC-NEXT: movl (%esi), %edx
|
||||
; X86-GENERIC-NEXT: movl 4(%esi), %ebx
|
||||
; X86-GENERIC-NEXT: .p2align 4
|
||||
; X86-GENERIC-NEXT: .LBB12_1: # %atomicrmw.start
|
||||
; X86-GENERIC-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; X86-GENERIC-NEXT: movl %edx, (%esp)
|
||||
; X86-GENERIC-NEXT: movl %ebx, {{[0-9]+}}(%esp)
|
||||
; X86-GENERIC-NEXT: movl %edi, {{[0-9]+}}(%esp)
|
||||
; X86-GENERIC-NEXT: movl %ecx, {{[0-9]+}}(%esp)
|
||||
; X86-GENERIC-NEXT: movl %ecx, {{[0-9]+}}(%esp)
|
||||
; X86-GENERIC-NEXT: movl %edi, {{[0-9]+}}(%esp)
|
||||
; X86-GENERIC-NEXT: movl %ebx, {{[0-9]+}}(%esp)
|
||||
; X86-GENERIC-NEXT: movl %edx, {{[0-9]+}}(%esp)
|
||||
; X86-GENERIC-NEXT: pushl $5
|
||||
; X86-GENERIC-NEXT: pushl $5
|
||||
; X86-GENERIC-NEXT: leal {{[0-9]+}}(%esp), %eax
|
||||
; X86-GENERIC-NEXT: pushl %eax
|
||||
; X86-GENERIC-NEXT: leal {{[0-9]+}}(%esp), %eax
|
||||
; X86-GENERIC-NEXT: pushl %eax
|
||||
; X86-GENERIC-NEXT: pushl %esi
|
||||
; X86-GENERIC-NEXT: pushl $16
|
||||
; X86-GENERIC-NEXT: calll __atomic_compare_exchange@PLT
|
||||
; X86-GENERIC-NEXT: addl $24, %esp
|
||||
; X86-GENERIC-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-GENERIC-NEXT: movl {{[0-9]+}}(%esp), %edi
|
||||
; X86-GENERIC-NEXT: movl (%esp), %edx
|
||||
; X86-GENERIC-NEXT: movl {{[0-9]+}}(%esp), %ebx
|
||||
; X86-GENERIC-NEXT: testb %al, %al
|
||||
; X86-GENERIC-NEXT: je .LBB12_1
|
||||
; X86-GENERIC-NEXT: # %bb.2: # %atomicrmw.end
|
||||
; X86-GENERIC-NEXT: leal -12(%ebp), %esp
|
||||
; X86-GENERIC-NEXT: popl %esi
|
||||
; X86-GENERIC-NEXT: popl %edi
|
||||
; X86-GENERIC-NEXT: popl %ebx
|
||||
; X86-GENERIC-NEXT: popl %ebp
|
||||
; X86-GENERIC-NEXT: retl
|
||||
;
|
||||
; X86-ATOM-LABEL: or128_nouse_seq_cst:
|
||||
; X86-ATOM: # %bb.0:
|
||||
; X86-ATOM-NEXT: pushl %ebp
|
||||
; X86-ATOM-NEXT: movl %esp, %ebp
|
||||
; X86-ATOM-NEXT: pushl %ebx
|
||||
; X86-ATOM-NEXT: pushl %edi
|
||||
; X86-ATOM-NEXT: pushl %esi
|
||||
; X86-ATOM-NEXT: andl $-16, %esp
|
||||
; X86-ATOM-NEXT: leal -{{[0-9]+}}(%esp), %esp
|
||||
; X86-ATOM-NEXT: movl 8(%ebp), %esi
|
||||
; X86-ATOM-NEXT: movl %esp, %ebx
|
||||
; X86-ATOM-NEXT: movl 12(%esi), %ecx
|
||||
; X86-ATOM-NEXT: movl 8(%esi), %edx
|
||||
; X86-ATOM-NEXT: movl (%esi), %eax
|
||||
; X86-ATOM-NEXT: movl 4(%esi), %edi
|
||||
; X86-ATOM-NEXT: .p2align 4
|
||||
; X86-ATOM-NEXT: .LBB12_1: # %atomicrmw.start
|
||||
; X86-ATOM-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; X86-ATOM-NEXT: movl %eax, (%esp)
|
||||
; X86-ATOM-NEXT: movl %edi, {{[0-9]+}}(%esp)
|
||||
; X86-ATOM-NEXT: movl %edx, {{[0-9]+}}(%esp)
|
||||
; X86-ATOM-NEXT: movl %ecx, {{[0-9]+}}(%esp)
|
||||
; X86-ATOM-NEXT: movl %ecx, {{[0-9]+}}(%esp)
|
||||
; X86-ATOM-NEXT: movl %edx, {{[0-9]+}}(%esp)
|
||||
; X86-ATOM-NEXT: movl %edi, {{[0-9]+}}(%esp)
|
||||
; X86-ATOM-NEXT: movl %eax, {{[0-9]+}}(%esp)
|
||||
; X86-ATOM-NEXT: pushl $5
|
||||
; X86-ATOM-NEXT: pushl $5
|
||||
; X86-ATOM-NEXT: leal {{[0-9]+}}(%esp), %eax
|
||||
; X86-ATOM-NEXT: pushl %eax
|
||||
; X86-ATOM-NEXT: pushl %ebx
|
||||
; X86-ATOM-NEXT: pushl %esi
|
||||
; X86-ATOM-NEXT: pushl $16
|
||||
; X86-ATOM-NEXT: calll __atomic_compare_exchange@PLT
|
||||
; X86-ATOM-NEXT: leal {{[0-9]+}}(%esp), %esp
|
||||
; X86-ATOM-NEXT: testb %al, %al
|
||||
; X86-ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-ATOM-NEXT: movl {{[0-9]+}}(%esp), %edx
|
||||
; X86-ATOM-NEXT: movl (%esp), %eax
|
||||
; X86-ATOM-NEXT: movl {{[0-9]+}}(%esp), %edi
|
||||
; X86-ATOM-NEXT: je .LBB12_1
|
||||
; X86-ATOM-NEXT: # %bb.2: # %atomicrmw.end
|
||||
; X86-ATOM-NEXT: leal -12(%ebp), %esp
|
||||
; X86-ATOM-NEXT: popl %esi
|
||||
; X86-ATOM-NEXT: popl %edi
|
||||
; X86-ATOM-NEXT: popl %ebx
|
||||
; X86-ATOM-NEXT: popl %ebp
|
||||
; X86-ATOM-NEXT: retl
|
||||
atomicrmw or ptr %p, i128 0 syncscope("singlethread") seq_cst
|
||||
ret void
|
||||
}
|
||||
|
||||
|
||||
define void @or16_nouse_seq_cst(ptr %p) #0 {
|
||||
; X64-LABEL: or16_nouse_seq_cst:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: #MEMBARRIER
|
||||
; X64-NEXT: retq
|
||||
;
|
||||
; X86-GENERIC-LABEL: or16_nouse_seq_cst:
|
||||
; X86-GENERIC: # %bb.0:
|
||||
; X86-GENERIC-NEXT: #MEMBARRIER
|
||||
; X86-GENERIC-NEXT: retl
|
||||
;
|
||||
; X86-ATOM-LABEL: or16_nouse_seq_cst:
|
||||
; X86-ATOM: # %bb.0:
|
||||
; X86-ATOM-NEXT: #MEMBARRIER
|
||||
; X86-ATOM-NEXT: nop
|
||||
; X86-ATOM-NEXT: nop
|
||||
; X86-ATOM-NEXT: nop
|
||||
; X86-ATOM-NEXT: nop
|
||||
; X86-ATOM-NEXT: nop
|
||||
; X86-ATOM-NEXT: nop
|
||||
; X86-ATOM-NEXT: nop
|
||||
; X86-ATOM-NEXT: nop
|
||||
; X86-ATOM-NEXT: retl
|
||||
atomicrmw or ptr %p, i16 0 syncscope("singlethread") seq_cst
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @or8_nouse_seq_cst(ptr %p) #0 {
|
||||
; X64-LABEL: or8_nouse_seq_cst:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: #MEMBARRIER
|
||||
; X64-NEXT: retq
|
||||
;
|
||||
; X86-GENERIC-LABEL: or8_nouse_seq_cst:
|
||||
; X86-GENERIC: # %bb.0:
|
||||
; X86-GENERIC-NEXT: #MEMBARRIER
|
||||
; X86-GENERIC-NEXT: retl
|
||||
;
|
||||
; X86-ATOM-LABEL: or8_nouse_seq_cst:
|
||||
; X86-ATOM: # %bb.0:
|
||||
; X86-ATOM-NEXT: #MEMBARRIER
|
||||
; X86-ATOM-NEXT: nop
|
||||
; X86-ATOM-NEXT: nop
|
||||
; X86-ATOM-NEXT: nop
|
||||
; X86-ATOM-NEXT: nop
|
||||
; X86-ATOM-NEXT: nop
|
||||
; X86-ATOM-NEXT: nop
|
||||
; X86-ATOM-NEXT: nop
|
||||
; X86-ATOM-NEXT: nop
|
||||
; X86-ATOM-NEXT: retl
|
||||
atomicrmw or ptr %p, i8 0 syncscope("singlethread") seq_cst
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
|
||||
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
|
||||
; X86-SLM: {{.*}}
|
||||
; X86-SSE2: {{.*}}
|
||||
@@ -11,7 +11,7 @@
|
||||
; This is explained (with the motivation for such an optimization) in
|
||||
; http://www.hpl.hp.com/techreports/2012/HPL-2012-68.pdf
|
||||
|
||||
define i8 @add8(ptr %p) {
|
||||
define i8 @add8(ptr %p) #0 {
|
||||
; X64-LABEL: add8:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: mfence
|
||||
@@ -27,18 +27,16 @@ define i8 @add8(ptr %p) {
|
||||
;
|
||||
; X86-SLM-LABEL: add8:
|
||||
; X86-SLM: # %bb.0:
|
||||
; X86-SLM-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-SLM-NEXT: xorl %eax, %eax
|
||||
; X86-SLM-NEXT: lock xaddb %al, (%ecx)
|
||||
; X86-SLM-NEXT: # kill: def $al killed $al killed $eax
|
||||
; X86-SLM-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-SLM-NEXT: lock orl $0, (%esp)
|
||||
; X86-SLM-NEXT: movzbl (%eax), %eax
|
||||
; X86-SLM-NEXT: retl
|
||||
;
|
||||
; X86-ATOM-LABEL: add8:
|
||||
; X86-ATOM: # %bb.0:
|
||||
; X86-ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-ATOM-NEXT: xorl %eax, %eax
|
||||
; X86-ATOM-NEXT: lock xaddb %al, (%ecx)
|
||||
; X86-ATOM-NEXT: # kill: def $al killed $al killed $eax
|
||||
; X86-ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-ATOM-NEXT: lock orl $0, (%esp)
|
||||
; X86-ATOM-NEXT: movzbl (%eax), %eax
|
||||
; X86-ATOM-NEXT: nop
|
||||
; X86-ATOM-NEXT: nop
|
||||
; X86-ATOM-NEXT: retl
|
||||
@@ -46,7 +44,7 @@ define i8 @add8(ptr %p) {
|
||||
ret i8 %1
|
||||
}
|
||||
|
||||
define i16 @or16(ptr %p) {
|
||||
define i16 @or16(ptr %p) #0 {
|
||||
; X64-LABEL: or16:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: mfence
|
||||
@@ -62,32 +60,24 @@ define i16 @or16(ptr %p) {
|
||||
;
|
||||
; X86-SLM-LABEL: or16:
|
||||
; X86-SLM: # %bb.0:
|
||||
; X86-SLM-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-SLM-NEXT: movzwl (%ecx), %eax
|
||||
; X86-SLM-NEXT: .p2align 4
|
||||
; X86-SLM-NEXT: .LBB1_1: # %atomicrmw.start
|
||||
; X86-SLM-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; X86-SLM-NEXT: lock cmpxchgw %ax, (%ecx)
|
||||
; X86-SLM-NEXT: jne .LBB1_1
|
||||
; X86-SLM-NEXT: # %bb.2: # %atomicrmw.end
|
||||
; X86-SLM-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-SLM-NEXT: lock orl $0, (%esp)
|
||||
; X86-SLM-NEXT: movzwl (%eax), %eax
|
||||
; X86-SLM-NEXT: retl
|
||||
;
|
||||
; X86-ATOM-LABEL: or16:
|
||||
; X86-ATOM: # %bb.0:
|
||||
; X86-ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-ATOM-NEXT: movzwl (%ecx), %eax
|
||||
; X86-ATOM-NEXT: .p2align 4
|
||||
; X86-ATOM-NEXT: .LBB1_1: # %atomicrmw.start
|
||||
; X86-ATOM-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; X86-ATOM-NEXT: lock cmpxchgw %ax, (%ecx)
|
||||
; X86-ATOM-NEXT: jne .LBB1_1
|
||||
; X86-ATOM-NEXT: # %bb.2: # %atomicrmw.end
|
||||
; X86-ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-ATOM-NEXT: lock orl $0, (%esp)
|
||||
; X86-ATOM-NEXT: movzwl (%eax), %eax
|
||||
; X86-ATOM-NEXT: nop
|
||||
; X86-ATOM-NEXT: nop
|
||||
; X86-ATOM-NEXT: retl
|
||||
%1 = atomicrmw or ptr %p, i16 0 acquire
|
||||
ret i16 %1
|
||||
}
|
||||
|
||||
define i32 @xor32(ptr %p) {
|
||||
define i32 @xor32(ptr %p) #0 {
|
||||
; X64-LABEL: xor32:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: mfence
|
||||
@@ -103,32 +93,24 @@ define i32 @xor32(ptr %p) {
|
||||
;
|
||||
; X86-SLM-LABEL: xor32:
|
||||
; X86-SLM: # %bb.0:
|
||||
; X86-SLM-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-SLM-NEXT: movl (%ecx), %eax
|
||||
; X86-SLM-NEXT: .p2align 4
|
||||
; X86-SLM-NEXT: .LBB2_1: # %atomicrmw.start
|
||||
; X86-SLM-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; X86-SLM-NEXT: lock cmpxchgl %eax, (%ecx)
|
||||
; X86-SLM-NEXT: jne .LBB2_1
|
||||
; X86-SLM-NEXT: # %bb.2: # %atomicrmw.end
|
||||
; X86-SLM-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-SLM-NEXT: lock orl $0, (%esp)
|
||||
; X86-SLM-NEXT: movl (%eax), %eax
|
||||
; X86-SLM-NEXT: retl
|
||||
;
|
||||
; X86-ATOM-LABEL: xor32:
|
||||
; X86-ATOM: # %bb.0:
|
||||
; X86-ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-ATOM-NEXT: movl (%ecx), %eax
|
||||
; X86-ATOM-NEXT: .p2align 4
|
||||
; X86-ATOM-NEXT: .LBB2_1: # %atomicrmw.start
|
||||
; X86-ATOM-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; X86-ATOM-NEXT: lock cmpxchgl %eax, (%ecx)
|
||||
; X86-ATOM-NEXT: jne .LBB2_1
|
||||
; X86-ATOM-NEXT: # %bb.2: # %atomicrmw.end
|
||||
; X86-ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-ATOM-NEXT: lock orl $0, (%esp)
|
||||
; X86-ATOM-NEXT: movl (%eax), %eax
|
||||
; X86-ATOM-NEXT: nop
|
||||
; X86-ATOM-NEXT: nop
|
||||
; X86-ATOM-NEXT: retl
|
||||
%1 = atomicrmw xor ptr %p, i32 0 release
|
||||
ret i32 %1
|
||||
}
|
||||
|
||||
define i64 @sub64(ptr %p) {
|
||||
define i64 @sub64(ptr %p) #0 {
|
||||
; X64-LABEL: sub64:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: mfence
|
||||
@@ -138,11 +120,7 @@ define i64 @sub64(ptr %p) {
|
||||
; X86-LABEL: sub64:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: pushl %ebx
|
||||
; X86-NEXT: .cfi_def_cfa_offset 8
|
||||
; X86-NEXT: pushl %esi
|
||||
; X86-NEXT: .cfi_def_cfa_offset 12
|
||||
; X86-NEXT: .cfi_offset %esi, -12
|
||||
; X86-NEXT: .cfi_offset %ebx, -8
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
|
||||
; X86-NEXT: movl (%esi), %eax
|
||||
; X86-NEXT: movl 4(%esi), %edx
|
||||
@@ -155,42 +133,32 @@ define i64 @sub64(ptr %p) {
|
||||
; X86-NEXT: jne .LBB3_1
|
||||
; X86-NEXT: # %bb.2: # %atomicrmw.end
|
||||
; X86-NEXT: popl %esi
|
||||
; X86-NEXT: .cfi_def_cfa_offset 8
|
||||
; X86-NEXT: popl %ebx
|
||||
; X86-NEXT: .cfi_def_cfa_offset 4
|
||||
; X86-NEXT: retl
|
||||
%1 = atomicrmw sub ptr %p, i64 0 seq_cst
|
||||
ret i64 %1
|
||||
}
|
||||
|
||||
define i128 @or128(ptr %p) {
|
||||
define i128 @or128(ptr %p) #0 {
|
||||
; X64-LABEL: or128:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: pushq %rax
|
||||
; X64-NEXT: .cfi_def_cfa_offset 16
|
||||
; X64-NEXT: xorl %esi, %esi
|
||||
; X64-NEXT: xorl %edx, %edx
|
||||
; X64-NEXT: xorl %ecx, %ecx
|
||||
; X64-NEXT: callq __atomic_fetch_or_16@PLT
|
||||
; X64-NEXT: popq %rcx
|
||||
; X64-NEXT: .cfi_def_cfa_offset 8
|
||||
; X64-NEXT: retq
|
||||
;
|
||||
; X86-GENERIC-LABEL: or128:
|
||||
; X86-GENERIC: # %bb.0:
|
||||
; X86-GENERIC-NEXT: pushl %ebp
|
||||
; X86-GENERIC-NEXT: .cfi_def_cfa_offset 8
|
||||
; X86-GENERIC-NEXT: .cfi_offset %ebp, -8
|
||||
; X86-GENERIC-NEXT: movl %esp, %ebp
|
||||
; X86-GENERIC-NEXT: .cfi_def_cfa_register %ebp
|
||||
; X86-GENERIC-NEXT: pushl %ebx
|
||||
; X86-GENERIC-NEXT: pushl %edi
|
||||
; X86-GENERIC-NEXT: pushl %esi
|
||||
; X86-GENERIC-NEXT: andl $-16, %esp
|
||||
; X86-GENERIC-NEXT: subl $48, %esp
|
||||
; X86-GENERIC-NEXT: .cfi_offset %esi, -20
|
||||
; X86-GENERIC-NEXT: .cfi_offset %edi, -16
|
||||
; X86-GENERIC-NEXT: .cfi_offset %ebx, -12
|
||||
; X86-GENERIC-NEXT: movl 12(%ebp), %edi
|
||||
; X86-GENERIC-NEXT: movl 12(%edi), %ecx
|
||||
; X86-GENERIC-NEXT: movl 8(%edi), %edx
|
||||
@@ -234,24 +202,17 @@ define i128 @or128(ptr %p) {
|
||||
; X86-GENERIC-NEXT: popl %edi
|
||||
; X86-GENERIC-NEXT: popl %ebx
|
||||
; X86-GENERIC-NEXT: popl %ebp
|
||||
; X86-GENERIC-NEXT: .cfi_def_cfa %esp, 4
|
||||
; X86-GENERIC-NEXT: retl $4
|
||||
;
|
||||
; X86-ATOM-LABEL: or128:
|
||||
; X86-ATOM: # %bb.0:
|
||||
; X86-ATOM-NEXT: pushl %ebp
|
||||
; X86-ATOM-NEXT: .cfi_def_cfa_offset 8
|
||||
; X86-ATOM-NEXT: .cfi_offset %ebp, -8
|
||||
; X86-ATOM-NEXT: movl %esp, %ebp
|
||||
; X86-ATOM-NEXT: .cfi_def_cfa_register %ebp
|
||||
; X86-ATOM-NEXT: pushl %ebx
|
||||
; X86-ATOM-NEXT: pushl %edi
|
||||
; X86-ATOM-NEXT: pushl %esi
|
||||
; X86-ATOM-NEXT: andl $-16, %esp
|
||||
; X86-ATOM-NEXT: leal -{{[0-9]+}}(%esp), %esp
|
||||
; X86-ATOM-NEXT: .cfi_offset %esi, -20
|
||||
; X86-ATOM-NEXT: .cfi_offset %edi, -16
|
||||
; X86-ATOM-NEXT: .cfi_offset %ebx, -12
|
||||
; X86-ATOM-NEXT: movl 12(%ebp), %edi
|
||||
; X86-ATOM-NEXT: movl 12(%edi), %ecx
|
||||
; X86-ATOM-NEXT: movl 8(%edi), %edx
|
||||
@@ -295,14 +256,13 @@ define i128 @or128(ptr %p) {
|
||||
; X86-ATOM-NEXT: popl %edi
|
||||
; X86-ATOM-NEXT: popl %ebx
|
||||
; X86-ATOM-NEXT: popl %ebp
|
||||
; X86-ATOM-NEXT: .cfi_def_cfa %esp, 4
|
||||
; X86-ATOM-NEXT: retl $4
|
||||
%1 = atomicrmw or ptr %p, i128 0 monotonic
|
||||
ret i128 %1
|
||||
}
|
||||
|
||||
; For 'and', the idempotent value is (-1)
|
||||
define i32 @and32 (ptr %p) {
|
||||
define i32 @and32 (ptr %p) #0 {
|
||||
; X64-LABEL: and32:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: mfence
|
||||
@@ -318,32 +278,24 @@ define i32 @and32 (ptr %p) {
|
||||
;
|
||||
; X86-SLM-LABEL: and32:
|
||||
; X86-SLM: # %bb.0:
|
||||
; X86-SLM-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-SLM-NEXT: movl (%ecx), %eax
|
||||
; X86-SLM-NEXT: .p2align 4
|
||||
; X86-SLM-NEXT: .LBB5_1: # %atomicrmw.start
|
||||
; X86-SLM-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; X86-SLM-NEXT: lock cmpxchgl %eax, (%ecx)
|
||||
; X86-SLM-NEXT: jne .LBB5_1
|
||||
; X86-SLM-NEXT: # %bb.2: # %atomicrmw.end
|
||||
; X86-SLM-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-SLM-NEXT: lock orl $0, (%esp)
|
||||
; X86-SLM-NEXT: movl (%eax), %eax
|
||||
; X86-SLM-NEXT: retl
|
||||
;
|
||||
; X86-ATOM-LABEL: and32:
|
||||
; X86-ATOM: # %bb.0:
|
||||
; X86-ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-ATOM-NEXT: movl (%ecx), %eax
|
||||
; X86-ATOM-NEXT: .p2align 4
|
||||
; X86-ATOM-NEXT: .LBB5_1: # %atomicrmw.start
|
||||
; X86-ATOM-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; X86-ATOM-NEXT: lock cmpxchgl %eax, (%ecx)
|
||||
; X86-ATOM-NEXT: jne .LBB5_1
|
||||
; X86-ATOM-NEXT: # %bb.2: # %atomicrmw.end
|
||||
; X86-ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-ATOM-NEXT: lock orl $0, (%esp)
|
||||
; X86-ATOM-NEXT: movl (%eax), %eax
|
||||
; X86-ATOM-NEXT: nop
|
||||
; X86-ATOM-NEXT: nop
|
||||
; X86-ATOM-NEXT: retl
|
||||
%1 = atomicrmw and ptr %p, i32 -1 acq_rel
|
||||
ret i32 %1
|
||||
}
|
||||
|
||||
define void @or32_nouse_monotonic(ptr %p) {
|
||||
define void @or32_nouse_monotonic(ptr %p) #0 {
|
||||
; X64-LABEL: or32_nouse_monotonic:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: #MEMBARRIER
|
||||
@@ -371,7 +323,7 @@ define void @or32_nouse_monotonic(ptr %p) {
|
||||
}
|
||||
|
||||
|
||||
define void @or32_nouse_acquire(ptr %p) {
|
||||
define void @or32_nouse_acquire(ptr %p) #0 {
|
||||
; X64-LABEL: or32_nouse_acquire:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: #MEMBARRIER
|
||||
@@ -398,7 +350,7 @@ define void @or32_nouse_acquire(ptr %p) {
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @or32_nouse_release(ptr %p) {
|
||||
define void @or32_nouse_release(ptr %p) #0 {
|
||||
; X64-LABEL: or32_nouse_release:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: #MEMBARRIER
|
||||
@@ -425,7 +377,7 @@ define void @or32_nouse_release(ptr %p) {
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @or32_nouse_acq_rel(ptr %p) {
|
||||
define void @or32_nouse_acq_rel(ptr %p) #0 {
|
||||
; X64-LABEL: or32_nouse_acq_rel:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: #MEMBARRIER
|
||||
@@ -452,7 +404,7 @@ define void @or32_nouse_acq_rel(ptr %p) {
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @or32_nouse_seq_cst(ptr %p) {
|
||||
define void @or32_nouse_seq_cst(ptr %p) #0 {
|
||||
; X64-LABEL: or32_nouse_seq_cst:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: lock orl $0, -{{[0-9]+}}(%rsp)
|
||||
@@ -478,7 +430,7 @@ define void @or32_nouse_seq_cst(ptr %p) {
|
||||
}
|
||||
|
||||
; TODO: The value isn't used on 32 bit, so the cmpxchg8b is unneeded
|
||||
define void @or64_nouse_seq_cst(ptr %p) {
|
||||
define void @or64_nouse_seq_cst(ptr %p) #0 {
|
||||
; X64-LABEL: or64_nouse_seq_cst:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: lock orl $0, -{{[0-9]+}}(%rsp)
|
||||
@@ -487,11 +439,7 @@ define void @or64_nouse_seq_cst(ptr %p) {
|
||||
; X86-LABEL: or64_nouse_seq_cst:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: pushl %ebx
|
||||
; X86-NEXT: .cfi_def_cfa_offset 8
|
||||
; X86-NEXT: pushl %esi
|
||||
; X86-NEXT: .cfi_def_cfa_offset 12
|
||||
; X86-NEXT: .cfi_offset %esi, -12
|
||||
; X86-NEXT: .cfi_offset %ebx, -8
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
|
||||
; X86-NEXT: movl (%esi), %eax
|
||||
; X86-NEXT: movl 4(%esi), %edx
|
||||
@@ -504,43 +452,33 @@ define void @or64_nouse_seq_cst(ptr %p) {
|
||||
; X86-NEXT: jne .LBB11_1
|
||||
; X86-NEXT: # %bb.2: # %atomicrmw.end
|
||||
; X86-NEXT: popl %esi
|
||||
; X86-NEXT: .cfi_def_cfa_offset 8
|
||||
; X86-NEXT: popl %ebx
|
||||
; X86-NEXT: .cfi_def_cfa_offset 4
|
||||
; X86-NEXT: retl
|
||||
atomicrmw or ptr %p, i64 0 seq_cst
|
||||
ret void
|
||||
}
|
||||
|
||||
; TODO: Don't need to lower as sync_and_fetch call
|
||||
define void @or128_nouse_seq_cst(ptr %p) {
|
||||
define void @or128_nouse_seq_cst(ptr %p) #0 {
|
||||
; X64-LABEL: or128_nouse_seq_cst:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: pushq %rax
|
||||
; X64-NEXT: .cfi_def_cfa_offset 16
|
||||
; X64-NEXT: xorl %esi, %esi
|
||||
; X64-NEXT: xorl %edx, %edx
|
||||
; X64-NEXT: movl $5, %ecx
|
||||
; X64-NEXT: callq __atomic_fetch_or_16@PLT
|
||||
; X64-NEXT: popq %rax
|
||||
; X64-NEXT: .cfi_def_cfa_offset 8
|
||||
; X64-NEXT: retq
|
||||
;
|
||||
; X86-GENERIC-LABEL: or128_nouse_seq_cst:
|
||||
; X86-GENERIC: # %bb.0:
|
||||
; X86-GENERIC-NEXT: pushl %ebp
|
||||
; X86-GENERIC-NEXT: .cfi_def_cfa_offset 8
|
||||
; X86-GENERIC-NEXT: .cfi_offset %ebp, -8
|
||||
; X86-GENERIC-NEXT: movl %esp, %ebp
|
||||
; X86-GENERIC-NEXT: .cfi_def_cfa_register %ebp
|
||||
; X86-GENERIC-NEXT: pushl %ebx
|
||||
; X86-GENERIC-NEXT: pushl %edi
|
||||
; X86-GENERIC-NEXT: pushl %esi
|
||||
; X86-GENERIC-NEXT: andl $-16, %esp
|
||||
; X86-GENERIC-NEXT: subl $48, %esp
|
||||
; X86-GENERIC-NEXT: .cfi_offset %esi, -20
|
||||
; X86-GENERIC-NEXT: .cfi_offset %edi, -16
|
||||
; X86-GENERIC-NEXT: .cfi_offset %ebx, -12
|
||||
; X86-GENERIC-NEXT: movl 8(%ebp), %esi
|
||||
; X86-GENERIC-NEXT: movl 12(%esi), %ecx
|
||||
; X86-GENERIC-NEXT: movl 8(%esi), %edi
|
||||
@@ -579,24 +517,17 @@ define void @or128_nouse_seq_cst(ptr %p) {
|
||||
; X86-GENERIC-NEXT: popl %edi
|
||||
; X86-GENERIC-NEXT: popl %ebx
|
||||
; X86-GENERIC-NEXT: popl %ebp
|
||||
; X86-GENERIC-NEXT: .cfi_def_cfa %esp, 4
|
||||
; X86-GENERIC-NEXT: retl
|
||||
;
|
||||
; X86-ATOM-LABEL: or128_nouse_seq_cst:
|
||||
; X86-ATOM: # %bb.0:
|
||||
; X86-ATOM-NEXT: pushl %ebp
|
||||
; X86-ATOM-NEXT: .cfi_def_cfa_offset 8
|
||||
; X86-ATOM-NEXT: .cfi_offset %ebp, -8
|
||||
; X86-ATOM-NEXT: movl %esp, %ebp
|
||||
; X86-ATOM-NEXT: .cfi_def_cfa_register %ebp
|
||||
; X86-ATOM-NEXT: pushl %ebx
|
||||
; X86-ATOM-NEXT: pushl %edi
|
||||
; X86-ATOM-NEXT: pushl %esi
|
||||
; X86-ATOM-NEXT: andl $-16, %esp
|
||||
; X86-ATOM-NEXT: leal -{{[0-9]+}}(%esp), %esp
|
||||
; X86-ATOM-NEXT: .cfi_offset %esi, -20
|
||||
; X86-ATOM-NEXT: .cfi_offset %edi, -16
|
||||
; X86-ATOM-NEXT: .cfi_offset %ebx, -12
|
||||
; X86-ATOM-NEXT: movl 8(%ebp), %esi
|
||||
; X86-ATOM-NEXT: movl %esp, %ebx
|
||||
; X86-ATOM-NEXT: movl 12(%esi), %ecx
|
||||
@@ -635,14 +566,13 @@ define void @or128_nouse_seq_cst(ptr %p) {
|
||||
; X86-ATOM-NEXT: popl %edi
|
||||
; X86-ATOM-NEXT: popl %ebx
|
||||
; X86-ATOM-NEXT: popl %ebp
|
||||
; X86-ATOM-NEXT: .cfi_def_cfa %esp, 4
|
||||
; X86-ATOM-NEXT: retl
|
||||
atomicrmw or ptr %p, i128 0 seq_cst
|
||||
ret void
|
||||
}
|
||||
|
||||
|
||||
define void @or16_nouse_seq_cst(ptr %p) {
|
||||
define void @or16_nouse_seq_cst(ptr %p) #0 {
|
||||
; X64-LABEL: or16_nouse_seq_cst:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: lock orl $0, -{{[0-9]+}}(%rsp)
|
||||
@@ -667,7 +597,7 @@ define void @or16_nouse_seq_cst(ptr %p) {
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @or8_nouse_seq_cst(ptr %p) {
|
||||
define void @or8_nouse_seq_cst(ptr %p) #0 {
|
||||
; X64-LABEL: or8_nouse_seq_cst:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: lock orl $0, -{{[0-9]+}}(%rsp)
|
||||
@@ -691,3 +621,5 @@ define void @or8_nouse_seq_cst(ptr %p) {
|
||||
atomicrmw or ptr %p, i8 0 seq_cst
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
|
||||
Reference in New Issue
Block a user