Files
clang-p2996/llvm/test/CodeGen/X86/atomic_idempotent.ll
Craig Topper d7391eefdf [X86] Remove RELEASE_ and ACQUIRE_ pseudo instructions. Use isel patterns and the normal instructions instead
At one point in time acquire implied mayLoad and mayStore as did release. Thus we needed separate pseudos that also carried that property. This appears to no longer be the case. I believe it was changed in 2012 with a comment saying that atomic memory accesses are marked volatile which preserves the ordering.

So from what I can tell we shouldn't need additional pseudos since they aren't carry any flags that are different from the normal instructions. The only thing I can think of is that we may consider them for load folding candidates in the peephole pass now where we didn't before. If that's important hopefully there's something in the memory operand we can check to prevent the folding without relying on pseudo instructions.

Differential Revision: https://reviews.llvm.org/D50212

llvm-svn: 338925
2018-08-03 21:40:44 +00:00

167 lines
4.6 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs | FileCheck %s --check-prefix=CHECK --check-prefix=X64
; RUN: llc < %s -mtriple=i686-- -mattr=+sse2 -verify-machineinstrs | FileCheck %s --check-prefix=CHECK --check-prefix=X32
; On x86, an atomic rmw operation that does not modify the value in memory
; (such as atomic add 0) can be replaced by an mfence followed by a mov.
; This is explained (with the motivation for such an optimization) in
; http://www.hpl.hp.com/techreports/2012/HPL-2012-68.pdf
define i8 @add8(i8* %p) {
; X64-LABEL: add8:
; X64: # %bb.0:
; X64-NEXT: mfence
; X64-NEXT: movb (%rdi), %al
; X64-NEXT: retq
;
; X32-LABEL: add8:
; X32: # %bb.0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: mfence
; X32-NEXT: movb (%eax), %al
; X32-NEXT: retl
%1 = atomicrmw add i8* %p, i8 0 monotonic
ret i8 %1
}
define i16 @or16(i16* %p) {
; X64-LABEL: or16:
; X64: # %bb.0:
; X64-NEXT: mfence
; X64-NEXT: movzwl (%rdi), %eax
; X64-NEXT: retq
;
; X32-LABEL: or16:
; X32: # %bb.0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: mfence
; X32-NEXT: movzwl (%eax), %eax
; X32-NEXT: retl
%1 = atomicrmw or i16* %p, i16 0 acquire
ret i16 %1
}
define i32 @xor32(i32* %p) {
; X64-LABEL: xor32:
; X64: # %bb.0:
; X64-NEXT: mfence
; X64-NEXT: movl (%rdi), %eax
; X64-NEXT: retq
;
; X32-LABEL: xor32:
; X32: # %bb.0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: mfence
; X32-NEXT: movl (%eax), %eax
; X32-NEXT: retl
%1 = atomicrmw xor i32* %p, i32 0 release
ret i32 %1
}
define i64 @sub64(i64* %p) {
; X64-LABEL: sub64:
; X64: # %bb.0:
; X64-NEXT: mfence
; X64-NEXT: movq (%rdi), %rax
; X64-NEXT: retq
;
; X32-LABEL: sub64:
; X32: # %bb.0:
; X32-NEXT: pushl %ebx
; X32-NEXT: .cfi_def_cfa_offset 8
; X32-NEXT: pushl %esi
; X32-NEXT: .cfi_def_cfa_offset 12
; X32-NEXT: .cfi_offset %esi, -12
; X32-NEXT: .cfi_offset %ebx, -8
; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
; X32-NEXT: movl (%esi), %eax
; X32-NEXT: movl 4(%esi), %edx
; X32-NEXT: .p2align 4, 0x90
; X32-NEXT: .LBB3_1: # %atomicrmw.start
; X32-NEXT: # =>This Inner Loop Header: Depth=1
; X32-NEXT: movl %edx, %ecx
; X32-NEXT: movl %eax, %ebx
; X32-NEXT: lock cmpxchg8b (%esi)
; X32-NEXT: jne .LBB3_1
; X32-NEXT: # %bb.2: # %atomicrmw.end
; X32-NEXT: popl %esi
; X32-NEXT: .cfi_def_cfa_offset 8
; X32-NEXT: popl %ebx
; X32-NEXT: .cfi_def_cfa_offset 4
; X32-NEXT: retl
%1 = atomicrmw sub i64* %p, i64 0 seq_cst
ret i64 %1
}
define i128 @or128(i128* %p) {
; X64-LABEL: or128:
; X64: # %bb.0:
; X64-NEXT: pushq %rax
; X64-NEXT: .cfi_def_cfa_offset 16
; X64-NEXT: xorl %esi, %esi
; X64-NEXT: xorl %edx, %edx
; X64-NEXT: callq __sync_fetch_and_or_16
; X64-NEXT: popq %rcx
; X64-NEXT: .cfi_def_cfa_offset 8
; X64-NEXT: retq
;
; X32-LABEL: or128:
; X32: # %bb.0:
; X32-NEXT: pushl %ebp
; X32-NEXT: .cfi_def_cfa_offset 8
; X32-NEXT: .cfi_offset %ebp, -8
; X32-NEXT: movl %esp, %ebp
; X32-NEXT: .cfi_def_cfa_register %ebp
; X32-NEXT: pushl %edi
; X32-NEXT: pushl %esi
; X32-NEXT: andl $-8, %esp
; X32-NEXT: subl $16, %esp
; X32-NEXT: .cfi_offset %esi, -16
; X32-NEXT: .cfi_offset %edi, -12
; X32-NEXT: movl 8(%ebp), %esi
; X32-NEXT: movl %esp, %eax
; X32-NEXT: pushl $0
; X32-NEXT: pushl $0
; X32-NEXT: pushl $0
; X32-NEXT: pushl $0
; X32-NEXT: pushl 12(%ebp)
; X32-NEXT: pushl %eax
; X32-NEXT: calll __sync_fetch_and_or_16
; X32-NEXT: addl $20, %esp
; X32-NEXT: movl (%esp), %eax
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
; X32-NEXT: movl {{[0-9]+}}(%esp), %edi
; X32-NEXT: movl %edi, 12(%esi)
; X32-NEXT: movl %edx, 8(%esi)
; X32-NEXT: movl %ecx, 4(%esi)
; X32-NEXT: movl %eax, (%esi)
; X32-NEXT: movl %esi, %eax
; X32-NEXT: leal -8(%ebp), %esp
; X32-NEXT: popl %esi
; X32-NEXT: popl %edi
; X32-NEXT: popl %ebp
; X32-NEXT: .cfi_def_cfa %esp, 4
; X32-NEXT: retl $4
%1 = atomicrmw or i128* %p, i128 0 monotonic
ret i128 %1
}
; For 'and', the idempotent value is (-1)
define i32 @and32 (i32* %p) {
; X64-LABEL: and32:
; X64: # %bb.0:
; X64-NEXT: mfence
; X64-NEXT: movl (%rdi), %eax
; X64-NEXT: retq
;
; X32-LABEL: and32:
; X32: # %bb.0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: mfence
; X32-NEXT: movl (%eax), %eax
; X32-NEXT: retl
%1 = atomicrmw and i32* %p, i32 -1 acq_rel
ret i32 %1
}