The motivation is that the update script has at least two deviations (`<...>@GOT`/`<...>@PLT`/ and not hiding pointer arithmetics) from what pretty much all the checklines were generated with, and most of the tests are still not updated, so each time one of the non-up-to-date tests is updated to see the effect of the code change, there is a lot of noise. Instead of having to deal with that each time, let's just deal with everything at once. This has been done via: ``` cd llvm-project/llvm/test/CodeGen/X86 grep -rl "; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py" | xargs -L1 <...>/llvm-project/llvm/utils/update_llc_test_checks.py --llc-binary <...>/llvm-project/build/bin/llc ``` Not all tests were regenerated, however.
640 lines
19 KiB
LLVM
640 lines
19 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs | FileCheck %s --check-prefix=X64
|
|
; RUN: llc < %s -mtriple=i686-- -verify-machineinstrs -mattr=+sse2 | FileCheck %s --check-prefixes=X86,X86-GENERIC,X86-SSE2
|
|
; RUN: llc < %s -mtriple=i686-- -verify-machineinstrs -mcpu=slm -mattr=-sse2 | FileCheck %s --check-prefixes=X86,X86-GENERIC,X86-SLM
|
|
; RUN: llc < %s -mtriple=i686-- -verify-machineinstrs -mcpu=goldmont -mattr=-sse2 | FileCheck %s --check-prefixes=X86,X86-GENERIC,X86-SLM
|
|
; RUN: llc < %s -mtriple=i686-- -verify-machineinstrs -mcpu=knl -mattr=-sse2 | FileCheck %s --check-prefixes=X86,X86-GENERIC,X86-SLM
|
|
; RUN: llc < %s -mtriple=i686-- -verify-machineinstrs -mcpu=atom -mattr=-sse2 | FileCheck %s --check-prefixes=X86,X86-ATOM
|
|
|
|
; On x86, an atomic rmw operation that does not modify the value in memory
|
|
; (such as atomic add 0) can be replaced by an mfence followed by a mov.
|
|
; This is explained (with the motivation for such an optimization) in
|
|
; http://www.hpl.hp.com/techreports/2012/HPL-2012-68.pdf
|
|
|
|
define i8 @add8(i8* %p) {
|
|
; X64-LABEL: add8:
|
|
; X64: # %bb.0:
|
|
; X64-NEXT: mfence
|
|
; X64-NEXT: movb (%rdi), %al
|
|
; X64-NEXT: retq
|
|
;
|
|
; X86-SSE2-LABEL: add8:
|
|
; X86-SSE2: # %bb.0:
|
|
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
; X86-SSE2-NEXT: mfence
|
|
; X86-SSE2-NEXT: movb (%eax), %al
|
|
; X86-SSE2-NEXT: retl
|
|
;
|
|
; X86-SLM-LABEL: add8:
|
|
; X86-SLM: # %bb.0:
|
|
; X86-SLM-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
|
; X86-SLM-NEXT: xorl %eax, %eax
|
|
; X86-SLM-NEXT: lock xaddb %al, (%ecx)
|
|
; X86-SLM-NEXT: # kill: def $al killed $al killed $eax
|
|
; X86-SLM-NEXT: retl
|
|
;
|
|
; X86-ATOM-LABEL: add8:
|
|
; X86-ATOM: # %bb.0:
|
|
; X86-ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
|
; X86-ATOM-NEXT: xorl %eax, %eax
|
|
; X86-ATOM-NEXT: lock xaddb %al, (%ecx)
|
|
; X86-ATOM-NEXT: # kill: def $al killed $al killed $eax
|
|
; X86-ATOM-NEXT: nop
|
|
; X86-ATOM-NEXT: nop
|
|
; X86-ATOM-NEXT: retl
|
|
%1 = atomicrmw add i8* %p, i8 0 monotonic
|
|
ret i8 %1
|
|
}
|
|
|
|
define i16 @or16(i16* %p) {
|
|
; X64-LABEL: or16:
|
|
; X64: # %bb.0:
|
|
; X64-NEXT: mfence
|
|
; X64-NEXT: movzwl (%rdi), %eax
|
|
; X64-NEXT: retq
|
|
;
|
|
; X86-SSE2-LABEL: or16:
|
|
; X86-SSE2: # %bb.0:
|
|
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
; X86-SSE2-NEXT: mfence
|
|
; X86-SSE2-NEXT: movzwl (%eax), %eax
|
|
; X86-SSE2-NEXT: retl
|
|
;
|
|
; X86-SLM-LABEL: or16:
|
|
; X86-SLM: # %bb.0:
|
|
; X86-SLM-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
|
; X86-SLM-NEXT: movzwl (%ecx), %eax
|
|
; X86-SLM-NEXT: .p2align 4, 0x90
|
|
; X86-SLM-NEXT: .LBB1_1: # %atomicrmw.start
|
|
; X86-SLM-NEXT: # =>This Inner Loop Header: Depth=1
|
|
; X86-SLM-NEXT: lock cmpxchgw %ax, (%ecx)
|
|
; X86-SLM-NEXT: jne .LBB1_1
|
|
; X86-SLM-NEXT: # %bb.2: # %atomicrmw.end
|
|
; X86-SLM-NEXT: retl
|
|
;
|
|
; X86-ATOM-LABEL: or16:
|
|
; X86-ATOM: # %bb.0:
|
|
; X86-ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
|
; X86-ATOM-NEXT: movzwl (%ecx), %eax
|
|
; X86-ATOM-NEXT: .p2align 4, 0x90
|
|
; X86-ATOM-NEXT: .LBB1_1: # %atomicrmw.start
|
|
; X86-ATOM-NEXT: # =>This Inner Loop Header: Depth=1
|
|
; X86-ATOM-NEXT: lock cmpxchgw %ax, (%ecx)
|
|
; X86-ATOM-NEXT: jne .LBB1_1
|
|
; X86-ATOM-NEXT: # %bb.2: # %atomicrmw.end
|
|
; X86-ATOM-NEXT: retl
|
|
%1 = atomicrmw or i16* %p, i16 0 acquire
|
|
ret i16 %1
|
|
}
|
|
|
|
define i32 @xor32(i32* %p) {
|
|
; X64-LABEL: xor32:
|
|
; X64: # %bb.0:
|
|
; X64-NEXT: mfence
|
|
; X64-NEXT: movl (%rdi), %eax
|
|
; X64-NEXT: retq
|
|
;
|
|
; X86-SSE2-LABEL: xor32:
|
|
; X86-SSE2: # %bb.0:
|
|
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
; X86-SSE2-NEXT: mfence
|
|
; X86-SSE2-NEXT: movl (%eax), %eax
|
|
; X86-SSE2-NEXT: retl
|
|
;
|
|
; X86-SLM-LABEL: xor32:
|
|
; X86-SLM: # %bb.0:
|
|
; X86-SLM-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
|
; X86-SLM-NEXT: movl (%ecx), %eax
|
|
; X86-SLM-NEXT: .p2align 4, 0x90
|
|
; X86-SLM-NEXT: .LBB2_1: # %atomicrmw.start
|
|
; X86-SLM-NEXT: # =>This Inner Loop Header: Depth=1
|
|
; X86-SLM-NEXT: lock cmpxchgl %eax, (%ecx)
|
|
; X86-SLM-NEXT: jne .LBB2_1
|
|
; X86-SLM-NEXT: # %bb.2: # %atomicrmw.end
|
|
; X86-SLM-NEXT: retl
|
|
;
|
|
; X86-ATOM-LABEL: xor32:
|
|
; X86-ATOM: # %bb.0:
|
|
; X86-ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
|
; X86-ATOM-NEXT: movl (%ecx), %eax
|
|
; X86-ATOM-NEXT: .p2align 4, 0x90
|
|
; X86-ATOM-NEXT: .LBB2_1: # %atomicrmw.start
|
|
; X86-ATOM-NEXT: # =>This Inner Loop Header: Depth=1
|
|
; X86-ATOM-NEXT: lock cmpxchgl %eax, (%ecx)
|
|
; X86-ATOM-NEXT: jne .LBB2_1
|
|
; X86-ATOM-NEXT: # %bb.2: # %atomicrmw.end
|
|
; X86-ATOM-NEXT: retl
|
|
%1 = atomicrmw xor i32* %p, i32 0 release
|
|
ret i32 %1
|
|
}
|
|
|
|
define i64 @sub64(i64* %p) {
|
|
; X64-LABEL: sub64:
|
|
; X64: # %bb.0:
|
|
; X64-NEXT: mfence
|
|
; X64-NEXT: movq (%rdi), %rax
|
|
; X64-NEXT: retq
|
|
;
|
|
; X86-LABEL: sub64:
|
|
; X86: # %bb.0:
|
|
; X86-NEXT: pushl %ebx
|
|
; X86-NEXT: .cfi_def_cfa_offset 8
|
|
; X86-NEXT: pushl %esi
|
|
; X86-NEXT: .cfi_def_cfa_offset 12
|
|
; X86-NEXT: .cfi_offset %esi, -12
|
|
; X86-NEXT: .cfi_offset %ebx, -8
|
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
|
|
; X86-NEXT: movl (%esi), %eax
|
|
; X86-NEXT: movl 4(%esi), %edx
|
|
; X86-NEXT: .p2align 4, 0x90
|
|
; X86-NEXT: .LBB3_1: # %atomicrmw.start
|
|
; X86-NEXT: # =>This Inner Loop Header: Depth=1
|
|
; X86-NEXT: movl %edx, %ecx
|
|
; X86-NEXT: movl %eax, %ebx
|
|
; X86-NEXT: lock cmpxchg8b (%esi)
|
|
; X86-NEXT: jne .LBB3_1
|
|
; X86-NEXT: # %bb.2: # %atomicrmw.end
|
|
; X86-NEXT: popl %esi
|
|
; X86-NEXT: .cfi_def_cfa_offset 8
|
|
; X86-NEXT: popl %ebx
|
|
; X86-NEXT: .cfi_def_cfa_offset 4
|
|
; X86-NEXT: retl
|
|
%1 = atomicrmw sub i64* %p, i64 0 seq_cst
|
|
ret i64 %1
|
|
}
|
|
|
|
define i128 @or128(i128* %p) {
|
|
; X64-LABEL: or128:
|
|
; X64: # %bb.0:
|
|
; X64-NEXT: pushq %rax
|
|
; X64-NEXT: .cfi_def_cfa_offset 16
|
|
; X64-NEXT: xorl %esi, %esi
|
|
; X64-NEXT: xorl %edx, %edx
|
|
; X64-NEXT: callq __sync_fetch_and_or_16@PLT
|
|
; X64-NEXT: popq %rcx
|
|
; X64-NEXT: .cfi_def_cfa_offset 8
|
|
; X64-NEXT: retq
|
|
;
|
|
; X86-SSE2-LABEL: or128:
|
|
; X86-SSE2: # %bb.0:
|
|
; X86-SSE2-NEXT: pushl %ebp
|
|
; X86-SSE2-NEXT: .cfi_def_cfa_offset 8
|
|
; X86-SSE2-NEXT: .cfi_offset %ebp, -8
|
|
; X86-SSE2-NEXT: movl %esp, %ebp
|
|
; X86-SSE2-NEXT: .cfi_def_cfa_register %ebp
|
|
; X86-SSE2-NEXT: pushl %edi
|
|
; X86-SSE2-NEXT: pushl %esi
|
|
; X86-SSE2-NEXT: andl $-8, %esp
|
|
; X86-SSE2-NEXT: subl $16, %esp
|
|
; X86-SSE2-NEXT: .cfi_offset %esi, -16
|
|
; X86-SSE2-NEXT: .cfi_offset %edi, -12
|
|
; X86-SSE2-NEXT: movl 8(%ebp), %esi
|
|
; X86-SSE2-NEXT: movl %esp, %eax
|
|
; X86-SSE2-NEXT: pushl $0
|
|
; X86-SSE2-NEXT: pushl $0
|
|
; X86-SSE2-NEXT: pushl $0
|
|
; X86-SSE2-NEXT: pushl $0
|
|
; X86-SSE2-NEXT: pushl 12(%ebp)
|
|
; X86-SSE2-NEXT: pushl %eax
|
|
; X86-SSE2-NEXT: calll __sync_fetch_and_or_16
|
|
; X86-SSE2-NEXT: addl $20, %esp
|
|
; X86-SSE2-NEXT: movl (%esp), %eax
|
|
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
|
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx
|
|
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edi
|
|
; X86-SSE2-NEXT: movl %edi, 8(%esi)
|
|
; X86-SSE2-NEXT: movl %edx, 12(%esi)
|
|
; X86-SSE2-NEXT: movl %eax, (%esi)
|
|
; X86-SSE2-NEXT: movl %ecx, 4(%esi)
|
|
; X86-SSE2-NEXT: movl %esi, %eax
|
|
; X86-SSE2-NEXT: leal -8(%ebp), %esp
|
|
; X86-SSE2-NEXT: popl %esi
|
|
; X86-SSE2-NEXT: popl %edi
|
|
; X86-SSE2-NEXT: popl %ebp
|
|
; X86-SSE2-NEXT: .cfi_def_cfa %esp, 4
|
|
; X86-SSE2-NEXT: retl $4
|
|
;
|
|
; X86-SLM-LABEL: or128:
|
|
; X86-SLM: # %bb.0:
|
|
; X86-SLM-NEXT: pushl %ebp
|
|
; X86-SLM-NEXT: .cfi_def_cfa_offset 8
|
|
; X86-SLM-NEXT: .cfi_offset %ebp, -8
|
|
; X86-SLM-NEXT: movl %esp, %ebp
|
|
; X86-SLM-NEXT: .cfi_def_cfa_register %ebp
|
|
; X86-SLM-NEXT: pushl %edi
|
|
; X86-SLM-NEXT: pushl %esi
|
|
; X86-SLM-NEXT: andl $-8, %esp
|
|
; X86-SLM-NEXT: subl $16, %esp
|
|
; X86-SLM-NEXT: .cfi_offset %esi, -16
|
|
; X86-SLM-NEXT: .cfi_offset %edi, -12
|
|
; X86-SLM-NEXT: movl 8(%ebp), %esi
|
|
; X86-SLM-NEXT: movl 12(%ebp), %eax
|
|
; X86-SLM-NEXT: movl %esp, %ecx
|
|
; X86-SLM-NEXT: pushl $0
|
|
; X86-SLM-NEXT: pushl $0
|
|
; X86-SLM-NEXT: pushl $0
|
|
; X86-SLM-NEXT: pushl $0
|
|
; X86-SLM-NEXT: pushl %eax
|
|
; X86-SLM-NEXT: pushl %ecx
|
|
; X86-SLM-NEXT: calll __sync_fetch_and_or_16
|
|
; X86-SLM-NEXT: addl $20, %esp
|
|
; X86-SLM-NEXT: movl (%esp), %eax
|
|
; X86-SLM-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
|
; X86-SLM-NEXT: movl {{[0-9]+}}(%esp), %edx
|
|
; X86-SLM-NEXT: movl {{[0-9]+}}(%esp), %edi
|
|
; X86-SLM-NEXT: movl %edi, 8(%esi)
|
|
; X86-SLM-NEXT: movl %edx, 12(%esi)
|
|
; X86-SLM-NEXT: movl %eax, (%esi)
|
|
; X86-SLM-NEXT: movl %ecx, 4(%esi)
|
|
; X86-SLM-NEXT: movl %esi, %eax
|
|
; X86-SLM-NEXT: leal -8(%ebp), %esp
|
|
; X86-SLM-NEXT: popl %esi
|
|
; X86-SLM-NEXT: popl %edi
|
|
; X86-SLM-NEXT: popl %ebp
|
|
; X86-SLM-NEXT: .cfi_def_cfa %esp, 4
|
|
; X86-SLM-NEXT: retl $4
|
|
;
|
|
; X86-ATOM-LABEL: or128:
|
|
; X86-ATOM: # %bb.0:
|
|
; X86-ATOM-NEXT: pushl %ebp
|
|
; X86-ATOM-NEXT: .cfi_def_cfa_offset 8
|
|
; X86-ATOM-NEXT: .cfi_offset %ebp, -8
|
|
; X86-ATOM-NEXT: leal (%esp), %ebp
|
|
; X86-ATOM-NEXT: .cfi_def_cfa_register %ebp
|
|
; X86-ATOM-NEXT: pushl %edi
|
|
; X86-ATOM-NEXT: pushl %esi
|
|
; X86-ATOM-NEXT: andl $-8, %esp
|
|
; X86-ATOM-NEXT: leal -{{[0-9]+}}(%esp), %esp
|
|
; X86-ATOM-NEXT: .cfi_offset %esi, -16
|
|
; X86-ATOM-NEXT: .cfi_offset %edi, -12
|
|
; X86-ATOM-NEXT: movl 8(%ebp), %esi
|
|
; X86-ATOM-NEXT: movl 12(%ebp), %eax
|
|
; X86-ATOM-NEXT: movl %esp, %ecx
|
|
; X86-ATOM-NEXT: pushl $0
|
|
; X86-ATOM-NEXT: pushl $0
|
|
; X86-ATOM-NEXT: pushl $0
|
|
; X86-ATOM-NEXT: pushl $0
|
|
; X86-ATOM-NEXT: pushl %eax
|
|
; X86-ATOM-NEXT: pushl %ecx
|
|
; X86-ATOM-NEXT: calll __sync_fetch_and_or_16
|
|
; X86-ATOM-NEXT: leal {{[0-9]+}}(%esp), %esp
|
|
; X86-ATOM-NEXT: movl (%esp), %ecx
|
|
; X86-ATOM-NEXT: movl {{[0-9]+}}(%esp), %edx
|
|
; X86-ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
; X86-ATOM-NEXT: movl {{[0-9]+}}(%esp), %edi
|
|
; X86-ATOM-NEXT: movl %eax, 8(%esi)
|
|
; X86-ATOM-NEXT: movl %edi, 12(%esi)
|
|
; X86-ATOM-NEXT: movl %ecx, (%esi)
|
|
; X86-ATOM-NEXT: movl %esi, %eax
|
|
; X86-ATOM-NEXT: movl %edx, 4(%esi)
|
|
; X86-ATOM-NEXT: leal -8(%ebp), %esp
|
|
; X86-ATOM-NEXT: popl %esi
|
|
; X86-ATOM-NEXT: popl %edi
|
|
; X86-ATOM-NEXT: popl %ebp
|
|
; X86-ATOM-NEXT: .cfi_def_cfa %esp, 4
|
|
; X86-ATOM-NEXT: retl $4
|
|
%1 = atomicrmw or i128* %p, i128 0 monotonic
|
|
ret i128 %1
|
|
}
|
|
|
|
; For 'and', the idempotent value is (-1)
|
|
define i32 @and32 (i32* %p) {
|
|
; X64-LABEL: and32:
|
|
; X64: # %bb.0:
|
|
; X64-NEXT: mfence
|
|
; X64-NEXT: movl (%rdi), %eax
|
|
; X64-NEXT: retq
|
|
;
|
|
; X86-SSE2-LABEL: and32:
|
|
; X86-SSE2: # %bb.0:
|
|
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
; X86-SSE2-NEXT: mfence
|
|
; X86-SSE2-NEXT: movl (%eax), %eax
|
|
; X86-SSE2-NEXT: retl
|
|
;
|
|
; X86-SLM-LABEL: and32:
|
|
; X86-SLM: # %bb.0:
|
|
; X86-SLM-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
|
; X86-SLM-NEXT: movl (%ecx), %eax
|
|
; X86-SLM-NEXT: .p2align 4, 0x90
|
|
; X86-SLM-NEXT: .LBB5_1: # %atomicrmw.start
|
|
; X86-SLM-NEXT: # =>This Inner Loop Header: Depth=1
|
|
; X86-SLM-NEXT: lock cmpxchgl %eax, (%ecx)
|
|
; X86-SLM-NEXT: jne .LBB5_1
|
|
; X86-SLM-NEXT: # %bb.2: # %atomicrmw.end
|
|
; X86-SLM-NEXT: retl
|
|
;
|
|
; X86-ATOM-LABEL: and32:
|
|
; X86-ATOM: # %bb.0:
|
|
; X86-ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
|
; X86-ATOM-NEXT: movl (%ecx), %eax
|
|
; X86-ATOM-NEXT: .p2align 4, 0x90
|
|
; X86-ATOM-NEXT: .LBB5_1: # %atomicrmw.start
|
|
; X86-ATOM-NEXT: # =>This Inner Loop Header: Depth=1
|
|
; X86-ATOM-NEXT: lock cmpxchgl %eax, (%ecx)
|
|
; X86-ATOM-NEXT: jne .LBB5_1
|
|
; X86-ATOM-NEXT: # %bb.2: # %atomicrmw.end
|
|
; X86-ATOM-NEXT: retl
|
|
%1 = atomicrmw and i32* %p, i32 -1 acq_rel
|
|
ret i32 %1
|
|
}
|
|
|
|
define void @or32_nouse_monotonic(i32* %p) {
|
|
; X64-LABEL: or32_nouse_monotonic:
|
|
; X64: # %bb.0:
|
|
; X64-NEXT: #MEMBARRIER
|
|
; X64-NEXT: retq
|
|
;
|
|
; X86-GENERIC-LABEL: or32_nouse_monotonic:
|
|
; X86-GENERIC: # %bb.0:
|
|
; X86-GENERIC-NEXT: #MEMBARRIER
|
|
; X86-GENERIC-NEXT: retl
|
|
;
|
|
; X86-ATOM-LABEL: or32_nouse_monotonic:
|
|
; X86-ATOM: # %bb.0:
|
|
; X86-ATOM-NEXT: #MEMBARRIER
|
|
; X86-ATOM-NEXT: nop
|
|
; X86-ATOM-NEXT: nop
|
|
; X86-ATOM-NEXT: nop
|
|
; X86-ATOM-NEXT: nop
|
|
; X86-ATOM-NEXT: nop
|
|
; X86-ATOM-NEXT: nop
|
|
; X86-ATOM-NEXT: retl
|
|
atomicrmw or i32* %p, i32 0 monotonic
|
|
ret void
|
|
}
|
|
|
|
|
|
define void @or32_nouse_acquire(i32* %p) {
|
|
; X64-LABEL: or32_nouse_acquire:
|
|
; X64: # %bb.0:
|
|
; X64-NEXT: #MEMBARRIER
|
|
; X64-NEXT: retq
|
|
;
|
|
; X86-GENERIC-LABEL: or32_nouse_acquire:
|
|
; X86-GENERIC: # %bb.0:
|
|
; X86-GENERIC-NEXT: #MEMBARRIER
|
|
; X86-GENERIC-NEXT: retl
|
|
;
|
|
; X86-ATOM-LABEL: or32_nouse_acquire:
|
|
; X86-ATOM: # %bb.0:
|
|
; X86-ATOM-NEXT: #MEMBARRIER
|
|
; X86-ATOM-NEXT: nop
|
|
; X86-ATOM-NEXT: nop
|
|
; X86-ATOM-NEXT: nop
|
|
; X86-ATOM-NEXT: nop
|
|
; X86-ATOM-NEXT: nop
|
|
; X86-ATOM-NEXT: nop
|
|
; X86-ATOM-NEXT: retl
|
|
atomicrmw or i32* %p, i32 0 acquire
|
|
ret void
|
|
}
|
|
|
|
define void @or32_nouse_release(i32* %p) {
|
|
; X64-LABEL: or32_nouse_release:
|
|
; X64: # %bb.0:
|
|
; X64-NEXT: #MEMBARRIER
|
|
; X64-NEXT: retq
|
|
;
|
|
; X86-GENERIC-LABEL: or32_nouse_release:
|
|
; X86-GENERIC: # %bb.0:
|
|
; X86-GENERIC-NEXT: #MEMBARRIER
|
|
; X86-GENERIC-NEXT: retl
|
|
;
|
|
; X86-ATOM-LABEL: or32_nouse_release:
|
|
; X86-ATOM: # %bb.0:
|
|
; X86-ATOM-NEXT: #MEMBARRIER
|
|
; X86-ATOM-NEXT: nop
|
|
; X86-ATOM-NEXT: nop
|
|
; X86-ATOM-NEXT: nop
|
|
; X86-ATOM-NEXT: nop
|
|
; X86-ATOM-NEXT: nop
|
|
; X86-ATOM-NEXT: nop
|
|
; X86-ATOM-NEXT: retl
|
|
atomicrmw or i32* %p, i32 0 release
|
|
ret void
|
|
}
|
|
|
|
define void @or32_nouse_acq_rel(i32* %p) {
|
|
; X64-LABEL: or32_nouse_acq_rel:
|
|
; X64: # %bb.0:
|
|
; X64-NEXT: #MEMBARRIER
|
|
; X64-NEXT: retq
|
|
;
|
|
; X86-GENERIC-LABEL: or32_nouse_acq_rel:
|
|
; X86-GENERIC: # %bb.0:
|
|
; X86-GENERIC-NEXT: #MEMBARRIER
|
|
; X86-GENERIC-NEXT: retl
|
|
;
|
|
; X86-ATOM-LABEL: or32_nouse_acq_rel:
|
|
; X86-ATOM: # %bb.0:
|
|
; X86-ATOM-NEXT: #MEMBARRIER
|
|
; X86-ATOM-NEXT: nop
|
|
; X86-ATOM-NEXT: nop
|
|
; X86-ATOM-NEXT: nop
|
|
; X86-ATOM-NEXT: nop
|
|
; X86-ATOM-NEXT: nop
|
|
; X86-ATOM-NEXT: nop
|
|
; X86-ATOM-NEXT: retl
|
|
atomicrmw or i32* %p, i32 0 acq_rel
|
|
ret void
|
|
}
|
|
|
|
define void @or32_nouse_seq_cst(i32* %p) {
|
|
; X64-LABEL: or32_nouse_seq_cst:
|
|
; X64: # %bb.0:
|
|
; X64-NEXT: lock orl $0, -{{[0-9]+}}(%rsp)
|
|
; X64-NEXT: retq
|
|
;
|
|
; X86-GENERIC-LABEL: or32_nouse_seq_cst:
|
|
; X86-GENERIC: # %bb.0:
|
|
; X86-GENERIC-NEXT: lock orl $0, (%esp)
|
|
; X86-GENERIC-NEXT: retl
|
|
;
|
|
; X86-ATOM-LABEL: or32_nouse_seq_cst:
|
|
; X86-ATOM: # %bb.0:
|
|
; X86-ATOM-NEXT: lock orl $0, (%esp)
|
|
; X86-ATOM-NEXT: nop
|
|
; X86-ATOM-NEXT: nop
|
|
; X86-ATOM-NEXT: nop
|
|
; X86-ATOM-NEXT: nop
|
|
; X86-ATOM-NEXT: nop
|
|
; X86-ATOM-NEXT: nop
|
|
; X86-ATOM-NEXT: retl
|
|
atomicrmw or i32* %p, i32 0 seq_cst
|
|
ret void
|
|
}
|
|
|
|
; TODO: The value isn't used on 32 bit, so the cmpxchg8b is unneeded
|
|
define void @or64_nouse_seq_cst(i64* %p) {
|
|
; X64-LABEL: or64_nouse_seq_cst:
|
|
; X64: # %bb.0:
|
|
; X64-NEXT: lock orl $0, -{{[0-9]+}}(%rsp)
|
|
; X64-NEXT: retq
|
|
;
|
|
; X86-LABEL: or64_nouse_seq_cst:
|
|
; X86: # %bb.0:
|
|
; X86-NEXT: pushl %ebx
|
|
; X86-NEXT: .cfi_def_cfa_offset 8
|
|
; X86-NEXT: pushl %esi
|
|
; X86-NEXT: .cfi_def_cfa_offset 12
|
|
; X86-NEXT: .cfi_offset %esi, -12
|
|
; X86-NEXT: .cfi_offset %ebx, -8
|
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
|
|
; X86-NEXT: movl (%esi), %eax
|
|
; X86-NEXT: movl 4(%esi), %edx
|
|
; X86-NEXT: .p2align 4, 0x90
|
|
; X86-NEXT: .LBB11_1: # %atomicrmw.start
|
|
; X86-NEXT: # =>This Inner Loop Header: Depth=1
|
|
; X86-NEXT: movl %edx, %ecx
|
|
; X86-NEXT: movl %eax, %ebx
|
|
; X86-NEXT: lock cmpxchg8b (%esi)
|
|
; X86-NEXT: jne .LBB11_1
|
|
; X86-NEXT: # %bb.2: # %atomicrmw.end
|
|
; X86-NEXT: popl %esi
|
|
; X86-NEXT: .cfi_def_cfa_offset 8
|
|
; X86-NEXT: popl %ebx
|
|
; X86-NEXT: .cfi_def_cfa_offset 4
|
|
; X86-NEXT: retl
|
|
atomicrmw or i64* %p, i64 0 seq_cst
|
|
ret void
|
|
}
|
|
|
|
; TODO: Don't need to lower as sync_and_fetch call
|
|
define void @or128_nouse_seq_cst(i128* %p) {
|
|
; X64-LABEL: or128_nouse_seq_cst:
|
|
; X64: # %bb.0:
|
|
; X64-NEXT: pushq %rax
|
|
; X64-NEXT: .cfi_def_cfa_offset 16
|
|
; X64-NEXT: xorl %esi, %esi
|
|
; X64-NEXT: xorl %edx, %edx
|
|
; X64-NEXT: callq __sync_fetch_and_or_16@PLT
|
|
; X64-NEXT: popq %rax
|
|
; X64-NEXT: .cfi_def_cfa_offset 8
|
|
; X64-NEXT: retq
|
|
;
|
|
; X86-SSE2-LABEL: or128_nouse_seq_cst:
|
|
; X86-SSE2: # %bb.0:
|
|
; X86-SSE2-NEXT: pushl %ebp
|
|
; X86-SSE2-NEXT: .cfi_def_cfa_offset 8
|
|
; X86-SSE2-NEXT: .cfi_offset %ebp, -8
|
|
; X86-SSE2-NEXT: movl %esp, %ebp
|
|
; X86-SSE2-NEXT: .cfi_def_cfa_register %ebp
|
|
; X86-SSE2-NEXT: andl $-8, %esp
|
|
; X86-SSE2-NEXT: subl $16, %esp
|
|
; X86-SSE2-NEXT: movl %esp, %eax
|
|
; X86-SSE2-NEXT: pushl $0
|
|
; X86-SSE2-NEXT: pushl $0
|
|
; X86-SSE2-NEXT: pushl $0
|
|
; X86-SSE2-NEXT: pushl $0
|
|
; X86-SSE2-NEXT: pushl 8(%ebp)
|
|
; X86-SSE2-NEXT: pushl %eax
|
|
; X86-SSE2-NEXT: calll __sync_fetch_and_or_16
|
|
; X86-SSE2-NEXT: addl $20, %esp
|
|
; X86-SSE2-NEXT: movl %ebp, %esp
|
|
; X86-SSE2-NEXT: popl %ebp
|
|
; X86-SSE2-NEXT: .cfi_def_cfa %esp, 4
|
|
; X86-SSE2-NEXT: retl
|
|
;
|
|
; X86-SLM-LABEL: or128_nouse_seq_cst:
|
|
; X86-SLM: # %bb.0:
|
|
; X86-SLM-NEXT: pushl %ebp
|
|
; X86-SLM-NEXT: .cfi_def_cfa_offset 8
|
|
; X86-SLM-NEXT: .cfi_offset %ebp, -8
|
|
; X86-SLM-NEXT: movl %esp, %ebp
|
|
; X86-SLM-NEXT: .cfi_def_cfa_register %ebp
|
|
; X86-SLM-NEXT: andl $-8, %esp
|
|
; X86-SLM-NEXT: subl $16, %esp
|
|
; X86-SLM-NEXT: movl 8(%ebp), %eax
|
|
; X86-SLM-NEXT: movl %esp, %ecx
|
|
; X86-SLM-NEXT: pushl $0
|
|
; X86-SLM-NEXT: pushl $0
|
|
; X86-SLM-NEXT: pushl $0
|
|
; X86-SLM-NEXT: pushl $0
|
|
; X86-SLM-NEXT: pushl %eax
|
|
; X86-SLM-NEXT: pushl %ecx
|
|
; X86-SLM-NEXT: calll __sync_fetch_and_or_16
|
|
; X86-SLM-NEXT: addl $20, %esp
|
|
; X86-SLM-NEXT: movl %ebp, %esp
|
|
; X86-SLM-NEXT: popl %ebp
|
|
; X86-SLM-NEXT: .cfi_def_cfa %esp, 4
|
|
; X86-SLM-NEXT: retl
|
|
;
|
|
; X86-ATOM-LABEL: or128_nouse_seq_cst:
|
|
; X86-ATOM: # %bb.0:
|
|
; X86-ATOM-NEXT: pushl %ebp
|
|
; X86-ATOM-NEXT: .cfi_def_cfa_offset 8
|
|
; X86-ATOM-NEXT: .cfi_offset %ebp, -8
|
|
; X86-ATOM-NEXT: leal (%esp), %ebp
|
|
; X86-ATOM-NEXT: .cfi_def_cfa_register %ebp
|
|
; X86-ATOM-NEXT: andl $-8, %esp
|
|
; X86-ATOM-NEXT: leal -{{[0-9]+}}(%esp), %esp
|
|
; X86-ATOM-NEXT: movl 8(%ebp), %eax
|
|
; X86-ATOM-NEXT: movl %esp, %ecx
|
|
; X86-ATOM-NEXT: pushl $0
|
|
; X86-ATOM-NEXT: pushl $0
|
|
; X86-ATOM-NEXT: pushl $0
|
|
; X86-ATOM-NEXT: pushl $0
|
|
; X86-ATOM-NEXT: pushl %eax
|
|
; X86-ATOM-NEXT: pushl %ecx
|
|
; X86-ATOM-NEXT: calll __sync_fetch_and_or_16
|
|
; X86-ATOM-NEXT: leal {{[0-9]+}}(%esp), %esp
|
|
; X86-ATOM-NEXT: movl %ebp, %esp
|
|
; X86-ATOM-NEXT: popl %ebp
|
|
; X86-ATOM-NEXT: .cfi_def_cfa %esp, 4
|
|
; X86-ATOM-NEXT: retl
|
|
atomicrmw or i128* %p, i128 0 seq_cst
|
|
ret void
|
|
}
|
|
|
|
|
|
define void @or16_nouse_seq_cst(i16* %p) {
|
|
; X64-LABEL: or16_nouse_seq_cst:
|
|
; X64: # %bb.0:
|
|
; X64-NEXT: lock orl $0, -{{[0-9]+}}(%rsp)
|
|
; X64-NEXT: retq
|
|
;
|
|
; X86-GENERIC-LABEL: or16_nouse_seq_cst:
|
|
; X86-GENERIC: # %bb.0:
|
|
; X86-GENERIC-NEXT: lock orl $0, (%esp)
|
|
; X86-GENERIC-NEXT: retl
|
|
;
|
|
; X86-ATOM-LABEL: or16_nouse_seq_cst:
|
|
; X86-ATOM: # %bb.0:
|
|
; X86-ATOM-NEXT: lock orl $0, (%esp)
|
|
; X86-ATOM-NEXT: nop
|
|
; X86-ATOM-NEXT: nop
|
|
; X86-ATOM-NEXT: nop
|
|
; X86-ATOM-NEXT: nop
|
|
; X86-ATOM-NEXT: nop
|
|
; X86-ATOM-NEXT: nop
|
|
; X86-ATOM-NEXT: retl
|
|
atomicrmw or i16* %p, i16 0 seq_cst
|
|
ret void
|
|
}
|
|
|
|
define void @or8_nouse_seq_cst(i8* %p) {
|
|
; X64-LABEL: or8_nouse_seq_cst:
|
|
; X64: # %bb.0:
|
|
; X64-NEXT: lock orl $0, -{{[0-9]+}}(%rsp)
|
|
; X64-NEXT: retq
|
|
;
|
|
; X86-GENERIC-LABEL: or8_nouse_seq_cst:
|
|
; X86-GENERIC: # %bb.0:
|
|
; X86-GENERIC-NEXT: lock orl $0, (%esp)
|
|
; X86-GENERIC-NEXT: retl
|
|
;
|
|
; X86-ATOM-LABEL: or8_nouse_seq_cst:
|
|
; X86-ATOM: # %bb.0:
|
|
; X86-ATOM-NEXT: lock orl $0, (%esp)
|
|
; X86-ATOM-NEXT: nop
|
|
; X86-ATOM-NEXT: nop
|
|
; X86-ATOM-NEXT: nop
|
|
; X86-ATOM-NEXT: nop
|
|
; X86-ATOM-NEXT: nop
|
|
; X86-ATOM-NEXT: nop
|
|
; X86-ATOM-NEXT: retl
|
|
atomicrmw or i8* %p, i8 0 seq_cst
|
|
ret void
|
|
}
|