This behavior was added in r130928 for both FastISel and SD, and then disabled in r131156 for FastISel. This re-enables it for FastISel with the corresponding fix. This is triggered only when FastISel can't lower the arguments and falls back to SelectionDAG for it. FastISel contains a map of "register fixups" where at the end of the selection phase it replaces all uses of a register with another register that FastISel sometimes pre-assigned. Code at the end of SelectionDAGISel::runOnMachineFunction is doing the replacement at the very end of the function, while other pieces that come in before that look through the MachineFunction and assume everything is done. In this case, the real issue is that the code emitting COPY instructions for the liveins (physreg to vreg) (EmitLiveInCopies) is checking if the vreg assigned to the physreg is used, and if it's not, it will skip the COPY. If a register wasn't replaced with its assigned fixup yet, the copy will be skipped and we'll end up with uses of undefined registers. This fix moves the replacement of registers before the emission of copies for the live-ins. The initial motivation for this fix is to enable tail calls for swiftself functions, which were blocked because we couldn't prove that the swiftself argument (which is callee-save) comes from a function argument (live-in), because there was an extra copy (vreg to vreg). A few tests are affected by this: * llvm/test/CodeGen/AArch64/swifterror.ll: we used to spill x21 (callee-save) but never reload it because it's attached to the return. We now don't even spill it anymore. * llvm/test/CodeGen/*/swiftself.ll: we tail-call now. * llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll: I believe this test was not really testing the right thing, but it worked because the same registers were re-used. * llvm/test/CodeGen/ARM/cmpxchg-O0.ll: regalloc changes * llvm/test/CodeGen/ARM/swifterror.ll: get rid of a copy * llvm/test/CodeGen/Mips/*: get rid of spills and copies * llvm/test/CodeGen/SystemZ/swift-return.ll: smaller stack * llvm/test/CodeGen/X86/atomic-unordered.ll: smaller stack * llvm/test/CodeGen/X86/swifterror.ll: same as AArch64 * llvm/test/DebugInfo/X86/dbg-declare-arg.ll: stack size changed Differential Revision: https://reviews.llvm.org/D62361 llvm-svn: 362963
2549 lines
77 KiB
LLVM
2549 lines
77 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc -O0 < %s -mtriple=x86_64-linux-generic -verify-machineinstrs -mcpu=skylake | FileCheck --check-prefix=CHECK-O0 %s
|
|
; RUN: llc -O3 < %s -mtriple=x86_64-linux-generic -verify-machineinstrs -mcpu=skylake | FileCheck --check-prefix=CHECK-O3 %s
|
|
|
|
define i8 @load_i8(i8* %ptr) {
|
|
; CHECK-O0-LABEL: load_i8:
|
|
; CHECK-O0: # %bb.0:
|
|
; CHECK-O0-NEXT: movb (%rdi), %al
|
|
; CHECK-O0-NEXT: retq
|
|
;
|
|
; CHECK-O3-LABEL: load_i8:
|
|
; CHECK-O3: # %bb.0:
|
|
; CHECK-O3-NEXT: movb (%rdi), %al
|
|
; CHECK-O3-NEXT: retq
|
|
%v = load atomic i8, i8* %ptr unordered, align 1
|
|
ret i8 %v
|
|
}
|
|
|
|
define void @store_i8(i8* %ptr, i8 %v) {
|
|
; CHECK-O0-LABEL: store_i8:
|
|
; CHECK-O0: # %bb.0:
|
|
; CHECK-O0-NEXT: # kill: def $sil killed $sil killed $esi
|
|
; CHECK-O0-NEXT: movb %sil, (%rdi)
|
|
; CHECK-O0-NEXT: retq
|
|
;
|
|
; CHECK-O3-LABEL: store_i8:
|
|
; CHECK-O3: # %bb.0:
|
|
; CHECK-O3-NEXT: movb %sil, (%rdi)
|
|
; CHECK-O3-NEXT: retq
|
|
store atomic i8 %v, i8* %ptr unordered, align 1
|
|
ret void
|
|
}
|
|
|
|
define i16 @load_i16(i16* %ptr) {
|
|
; CHECK-O0-LABEL: load_i16:
|
|
; CHECK-O0: # %bb.0:
|
|
; CHECK-O0-NEXT: movw (%rdi), %ax
|
|
; CHECK-O0-NEXT: retq
|
|
;
|
|
; CHECK-O3-LABEL: load_i16:
|
|
; CHECK-O3: # %bb.0:
|
|
; CHECK-O3-NEXT: movzwl (%rdi), %eax
|
|
; CHECK-O3-NEXT: retq
|
|
%v = load atomic i16, i16* %ptr unordered, align 2
|
|
ret i16 %v
|
|
}
|
|
|
|
|
|
define void @store_i16(i16* %ptr, i16 %v) {
|
|
; CHECK-O0-LABEL: store_i16:
|
|
; CHECK-O0: # %bb.0:
|
|
; CHECK-O0-NEXT: # kill: def $si killed $si killed $esi
|
|
; CHECK-O0-NEXT: movw %si, (%rdi)
|
|
; CHECK-O0-NEXT: retq
|
|
;
|
|
; CHECK-O3-LABEL: store_i16:
|
|
; CHECK-O3: # %bb.0:
|
|
; CHECK-O3-NEXT: movw %si, (%rdi)
|
|
; CHECK-O3-NEXT: retq
|
|
store atomic i16 %v, i16* %ptr unordered, align 2
|
|
ret void
|
|
}
|
|
|
|
define i32 @load_i32(i32* %ptr) {
|
|
; CHECK-O0-LABEL: load_i32:
|
|
; CHECK-O0: # %bb.0:
|
|
; CHECK-O0-NEXT: movl (%rdi), %eax
|
|
; CHECK-O0-NEXT: retq
|
|
;
|
|
; CHECK-O3-LABEL: load_i32:
|
|
; CHECK-O3: # %bb.0:
|
|
; CHECK-O3-NEXT: movl (%rdi), %eax
|
|
; CHECK-O3-NEXT: retq
|
|
%v = load atomic i32, i32* %ptr unordered, align 4
|
|
ret i32 %v
|
|
}
|
|
|
|
define void @store_i32(i32* %ptr, i32 %v) {
|
|
; CHECK-O0-LABEL: store_i32:
|
|
; CHECK-O0: # %bb.0:
|
|
; CHECK-O0-NEXT: movl %esi, (%rdi)
|
|
; CHECK-O0-NEXT: retq
|
|
;
|
|
; CHECK-O3-LABEL: store_i32:
|
|
; CHECK-O3: # %bb.0:
|
|
; CHECK-O3-NEXT: movl %esi, (%rdi)
|
|
; CHECK-O3-NEXT: retq
|
|
store atomic i32 %v, i32* %ptr unordered, align 4
|
|
ret void
|
|
}
|
|
|
|
define i64 @load_i64(i64* %ptr) {
|
|
; CHECK-O0-LABEL: load_i64:
|
|
; CHECK-O0: # %bb.0:
|
|
; CHECK-O0-NEXT: movq (%rdi), %rax
|
|
; CHECK-O0-NEXT: retq
|
|
;
|
|
; CHECK-O3-LABEL: load_i64:
|
|
; CHECK-O3: # %bb.0:
|
|
; CHECK-O3-NEXT: movq (%rdi), %rax
|
|
; CHECK-O3-NEXT: retq
|
|
%v = load atomic i64, i64* %ptr unordered, align 8
|
|
ret i64 %v
|
|
}
|
|
|
|
define void @store_i64(i64* %ptr, i64 %v) {
|
|
; CHECK-O0-LABEL: store_i64:
|
|
; CHECK-O0: # %bb.0:
|
|
; CHECK-O0-NEXT: movq %rsi, (%rdi)
|
|
; CHECK-O0-NEXT: retq
|
|
;
|
|
; CHECK-O3-LABEL: store_i64:
|
|
; CHECK-O3: # %bb.0:
|
|
; CHECK-O3-NEXT: movq %rsi, (%rdi)
|
|
; CHECK-O3-NEXT: retq
|
|
store atomic i64 %v, i64* %ptr unordered, align 8
|
|
ret void
|
|
}
|
|
|
|
;; The tests in the rest of this file are intended to show transforms which we
|
|
;; either *can't* do for legality, or don't currently implement. The later
|
|
;; are noted carefully where relevant.
|
|
|
|
;; Start w/some clearly illegal ones.
|
|
|
|
; Must use a full width op, not a byte op
|
|
define void @narrow_writeback_or(i64* %ptr) {
|
|
; CHECK-O0-LABEL: narrow_writeback_or:
|
|
; CHECK-O0: # %bb.0:
|
|
; CHECK-O0-NEXT: movq (%rdi), %rax
|
|
; CHECK-O0-NEXT: orq $7, %rax
|
|
; CHECK-O0-NEXT: movq %rax, (%rdi)
|
|
; CHECK-O0-NEXT: retq
|
|
;
|
|
; CHECK-O3-LABEL: narrow_writeback_or:
|
|
; CHECK-O3: # %bb.0:
|
|
; CHECK-O3-NEXT: orq $7, (%rdi)
|
|
; CHECK-O3-NEXT: retq
|
|
%v = load atomic i64, i64* %ptr unordered, align 8
|
|
%v.new = or i64 %v, 7
|
|
store atomic i64 %v.new, i64* %ptr unordered, align 8
|
|
ret void
|
|
}
|
|
|
|
; Must use a full width op, not a byte op
|
|
define void @narrow_writeback_and(i64* %ptr) {
|
|
; CHECK-O0-LABEL: narrow_writeback_and:
|
|
; CHECK-O0: # %bb.0:
|
|
; CHECK-O0-NEXT: movq (%rdi), %rax
|
|
; CHECK-O0-NEXT: # kill: def $eax killed $eax killed $rax
|
|
; CHECK-O0-NEXT: andl $-256, %eax
|
|
; CHECK-O0-NEXT: movl %eax, %ecx
|
|
; CHECK-O0-NEXT: movq %rcx, (%rdi)
|
|
; CHECK-O0-NEXT: retq
|
|
;
|
|
; CHECK-O3-LABEL: narrow_writeback_and:
|
|
; CHECK-O3: # %bb.0:
|
|
; CHECK-O3-NEXT: movl $4294967040, %eax # imm = 0xFFFFFF00
|
|
; CHECK-O3-NEXT: andq %rax, (%rdi)
|
|
; CHECK-O3-NEXT: retq
|
|
%v = load atomic i64, i64* %ptr unordered, align 8
|
|
%v.new = and i64 %v, 4294967040 ;; 0xFFFF_FF00
|
|
store atomic i64 %v.new, i64* %ptr unordered, align 8
|
|
ret void
|
|
}
|
|
|
|
; Must use a full width op, not a byte op
|
|
define void @narrow_writeback_xor(i64* %ptr) {
|
|
; CHECK-O0-LABEL: narrow_writeback_xor:
|
|
; CHECK-O0: # %bb.0:
|
|
; CHECK-O0-NEXT: movq (%rdi), %rax
|
|
; CHECK-O0-NEXT: xorq $7, %rax
|
|
; CHECK-O0-NEXT: movq %rax, (%rdi)
|
|
; CHECK-O0-NEXT: retq
|
|
;
|
|
; CHECK-O3-LABEL: narrow_writeback_xor:
|
|
; CHECK-O3: # %bb.0:
|
|
; CHECK-O3-NEXT: xorq $7, (%rdi)
|
|
; CHECK-O3-NEXT: retq
|
|
%v = load atomic i64, i64* %ptr unordered, align 8
|
|
%v.new = xor i64 %v, 7
|
|
store atomic i64 %v.new, i64* %ptr unordered, align 8
|
|
ret void
|
|
}
|
|
|
|
;; Next batch of tests are exercising cases where store widening would
|
|
;; improve codegeneration. Note that widening is only legal if the
|
|
;; resulting type would be atomic. Each tests has a well aligned, and
|
|
;; unaligned variant to ensure we get correct codegen here.
|
|
;;
|
|
;; Note: It's not a legality issue, but there's a gotcha here to be aware
|
|
;; of. Once we widen a pair of atomic stores, we loose the information
|
|
;; that the original atomicity requirement was half the width. Given that,
|
|
;; we can't then split the load again. This challenges our usual iterative
|
|
;; approach to incremental improvement.
|
|
|
|
; Legal if wider type is also atomic (TODO)
|
|
define void @widen_store(i32* %p0, i32 %v1, i32 %v2) {
|
|
; CHECK-O0-LABEL: widen_store:
|
|
; CHECK-O0: # %bb.0:
|
|
; CHECK-O0-NEXT: movl %esi, (%rdi)
|
|
; CHECK-O0-NEXT: movl %edx, 4(%rdi)
|
|
; CHECK-O0-NEXT: retq
|
|
;
|
|
; CHECK-O3-LABEL: widen_store:
|
|
; CHECK-O3: # %bb.0:
|
|
; CHECK-O3-NEXT: movl %esi, (%rdi)
|
|
; CHECK-O3-NEXT: movl %edx, 4(%rdi)
|
|
; CHECK-O3-NEXT: retq
|
|
%p1 = getelementptr i32, i32* %p0, i64 1
|
|
store atomic i32 %v1, i32* %p0 unordered, align 8
|
|
store atomic i32 %v2, i32* %p1 unordered, align 4
|
|
ret void
|
|
}
|
|
|
|
; This one is *NOT* legal to widen. With weaker alignment,
|
|
; the wider type might cross a cache line and violate the
|
|
; atomicity requirement.
|
|
define void @widen_store_unaligned(i32* %p0, i32 %v1, i32 %v2) {
|
|
; CHECK-O0-LABEL: widen_store_unaligned:
|
|
; CHECK-O0: # %bb.0:
|
|
; CHECK-O0-NEXT: movl %esi, (%rdi)
|
|
; CHECK-O0-NEXT: movl %edx, 4(%rdi)
|
|
; CHECK-O0-NEXT: retq
|
|
;
|
|
; CHECK-O3-LABEL: widen_store_unaligned:
|
|
; CHECK-O3: # %bb.0:
|
|
; CHECK-O3-NEXT: movl %esi, (%rdi)
|
|
; CHECK-O3-NEXT: movl %edx, 4(%rdi)
|
|
; CHECK-O3-NEXT: retq
|
|
%p1 = getelementptr i32, i32* %p0, i64 1
|
|
store atomic i32 %v1, i32* %p0 unordered, align 4
|
|
store atomic i32 %v2, i32* %p1 unordered, align 4
|
|
ret void
|
|
}
|
|
|
|
; Legal if wider type is also atomic (TODO)
|
|
define void @widen_broadcast(i32* %p0, i32 %v) {
|
|
; CHECK-O0-LABEL: widen_broadcast:
|
|
; CHECK-O0: # %bb.0:
|
|
; CHECK-O0-NEXT: movl %esi, (%rdi)
|
|
; CHECK-O0-NEXT: movl %esi, 4(%rdi)
|
|
; CHECK-O0-NEXT: retq
|
|
;
|
|
; CHECK-O3-LABEL: widen_broadcast:
|
|
; CHECK-O3: # %bb.0:
|
|
; CHECK-O3-NEXT: movl %esi, (%rdi)
|
|
; CHECK-O3-NEXT: movl %esi, 4(%rdi)
|
|
; CHECK-O3-NEXT: retq
|
|
%p1 = getelementptr i32, i32* %p0, i64 1
|
|
store atomic i32 %v, i32* %p0 unordered, align 8
|
|
store atomic i32 %v, i32* %p1 unordered, align 4
|
|
ret void
|
|
}
|
|
|
|
; Not legal to widen due to alignment restriction
|
|
define void @widen_broadcast_unaligned(i32* %p0, i32 %v) {
|
|
; CHECK-O0-LABEL: widen_broadcast_unaligned:
|
|
; CHECK-O0: # %bb.0:
|
|
; CHECK-O0-NEXT: movl %esi, (%rdi)
|
|
; CHECK-O0-NEXT: movl %esi, 4(%rdi)
|
|
; CHECK-O0-NEXT: retq
|
|
;
|
|
; CHECK-O3-LABEL: widen_broadcast_unaligned:
|
|
; CHECK-O3: # %bb.0:
|
|
; CHECK-O3-NEXT: movl %esi, (%rdi)
|
|
; CHECK-O3-NEXT: movl %esi, 4(%rdi)
|
|
; CHECK-O3-NEXT: retq
|
|
%p1 = getelementptr i32, i32* %p0, i64 1
|
|
store atomic i32 %v, i32* %p0 unordered, align 4
|
|
store atomic i32 %v, i32* %p1 unordered, align 4
|
|
ret void
|
|
}
|
|
|
|
define i128 @load_i128(i128* %ptr) {
|
|
; CHECK-O0-LABEL: load_i128:
|
|
; CHECK-O0: # %bb.0:
|
|
; CHECK-O0-NEXT: pushq %rbx
|
|
; CHECK-O0-NEXT: .cfi_def_cfa_offset 16
|
|
; CHECK-O0-NEXT: .cfi_offset %rbx, -16
|
|
; CHECK-O0-NEXT: xorl %eax, %eax
|
|
; CHECK-O0-NEXT: movl %eax, %ecx
|
|
; CHECK-O0-NEXT: movq %rcx, %rax
|
|
; CHECK-O0-NEXT: movq %rcx, %rdx
|
|
; CHECK-O0-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
|
; CHECK-O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload
|
|
; CHECK-O0-NEXT: lock cmpxchg16b (%rdi)
|
|
; CHECK-O0-NEXT: popq %rbx
|
|
; CHECK-O0-NEXT: .cfi_def_cfa_offset 8
|
|
; CHECK-O0-NEXT: retq
|
|
;
|
|
; CHECK-O3-LABEL: load_i128:
|
|
; CHECK-O3: # %bb.0:
|
|
; CHECK-O3-NEXT: pushq %rbx
|
|
; CHECK-O3-NEXT: .cfi_def_cfa_offset 16
|
|
; CHECK-O3-NEXT: .cfi_offset %rbx, -16
|
|
; CHECK-O3-NEXT: xorl %eax, %eax
|
|
; CHECK-O3-NEXT: xorl %edx, %edx
|
|
; CHECK-O3-NEXT: xorl %ecx, %ecx
|
|
; CHECK-O3-NEXT: xorl %ebx, %ebx
|
|
; CHECK-O3-NEXT: lock cmpxchg16b (%rdi)
|
|
; CHECK-O3-NEXT: popq %rbx
|
|
; CHECK-O3-NEXT: .cfi_def_cfa_offset 8
|
|
; CHECK-O3-NEXT: retq
|
|
%v = load atomic i128, i128* %ptr unordered, align 16
|
|
ret i128 %v
|
|
}
|
|
|
|
define void @store_i128(i128* %ptr, i128 %v) {
|
|
; CHECK-O0-LABEL: store_i128:
|
|
; CHECK-O0: # %bb.0:
|
|
; CHECK-O0-NEXT: pushq %rbx
|
|
; CHECK-O0-NEXT: .cfi_def_cfa_offset 16
|
|
; CHECK-O0-NEXT: .cfi_offset %rbx, -16
|
|
; CHECK-O0-NEXT: movq (%rdi), %rax
|
|
; CHECK-O0-NEXT: movq 8(%rdi), %rcx
|
|
; CHECK-O0-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
|
; CHECK-O0-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
|
; CHECK-O0-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
|
; CHECK-O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
|
; CHECK-O0-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
|
; CHECK-O0-NEXT: jmp .LBB16_1
|
|
; CHECK-O0-NEXT: .LBB16_1: # %atomicrmw.start
|
|
; CHECK-O0-NEXT: # =>This Inner Loop Header: Depth=1
|
|
; CHECK-O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
|
|
; CHECK-O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
|
|
; CHECK-O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
|
|
; CHECK-O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload
|
|
; CHECK-O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
|
|
; CHECK-O0-NEXT: lock cmpxchg16b (%rsi)
|
|
; CHECK-O0-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
|
; CHECK-O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
|
; CHECK-O0-NEXT: jne .LBB16_1
|
|
; CHECK-O0-NEXT: jmp .LBB16_2
|
|
; CHECK-O0-NEXT: .LBB16_2: # %atomicrmw.end
|
|
; CHECK-O0-NEXT: popq %rbx
|
|
; CHECK-O0-NEXT: .cfi_def_cfa_offset 8
|
|
; CHECK-O0-NEXT: retq
|
|
;
|
|
; CHECK-O3-LABEL: store_i128:
|
|
; CHECK-O3: # %bb.0:
|
|
; CHECK-O3-NEXT: pushq %rbx
|
|
; CHECK-O3-NEXT: .cfi_def_cfa_offset 16
|
|
; CHECK-O3-NEXT: .cfi_offset %rbx, -16
|
|
; CHECK-O3-NEXT: movq %rdx, %rcx
|
|
; CHECK-O3-NEXT: movq %rsi, %rbx
|
|
; CHECK-O3-NEXT: movq (%rdi), %rax
|
|
; CHECK-O3-NEXT: movq 8(%rdi), %rdx
|
|
; CHECK-O3-NEXT: .p2align 4, 0x90
|
|
; CHECK-O3-NEXT: .LBB16_1: # %atomicrmw.start
|
|
; CHECK-O3-NEXT: # =>This Inner Loop Header: Depth=1
|
|
; CHECK-O3-NEXT: lock cmpxchg16b (%rdi)
|
|
; CHECK-O3-NEXT: jne .LBB16_1
|
|
; CHECK-O3-NEXT: # %bb.2: # %atomicrmw.end
|
|
; CHECK-O3-NEXT: popq %rbx
|
|
; CHECK-O3-NEXT: .cfi_def_cfa_offset 8
|
|
; CHECK-O3-NEXT: retq
|
|
store atomic i128 %v, i128* %ptr unordered, align 16
|
|
ret void
|
|
}
|
|
|
|
define i256 @load_i256(i256* %ptr) {
|
|
; CHECK-O0-LABEL: load_i256:
|
|
; CHECK-O0: # %bb.0:
|
|
; CHECK-O0-NEXT: subq $56, %rsp
|
|
; CHECK-O0-NEXT: .cfi_def_cfa_offset 64
|
|
; CHECK-O0-NEXT: movq %rdi, %rax
|
|
; CHECK-O0-NEXT: movl $32, %ecx
|
|
; CHECK-O0-NEXT: leaq {{[0-9]+}}(%rsp), %rdx
|
|
; CHECK-O0-NEXT: xorl %r8d, %r8d
|
|
; CHECK-O0-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
|
; CHECK-O0-NEXT: movq %rcx, %rdi
|
|
; CHECK-O0-NEXT: movl %r8d, %ecx
|
|
; CHECK-O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
|
; CHECK-O0-NEXT: callq __atomic_load
|
|
; CHECK-O0-NEXT: movq {{[0-9]+}}(%rsp), %rax
|
|
; CHECK-O0-NEXT: movq {{[0-9]+}}(%rsp), %rdx
|
|
; CHECK-O0-NEXT: movq {{[0-9]+}}(%rsp), %rsi
|
|
; CHECK-O0-NEXT: movq {{[0-9]+}}(%rsp), %rdi
|
|
; CHECK-O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload
|
|
; CHECK-O0-NEXT: movq %rdi, 24(%r9)
|
|
; CHECK-O0-NEXT: movq %rsi, 16(%r9)
|
|
; CHECK-O0-NEXT: movq %rdx, 8(%r9)
|
|
; CHECK-O0-NEXT: movq %rax, (%r9)
|
|
; CHECK-O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
|
|
; CHECK-O0-NEXT: addq $56, %rsp
|
|
; CHECK-O0-NEXT: .cfi_def_cfa_offset 8
|
|
; CHECK-O0-NEXT: retq
|
|
;
|
|
; CHECK-O3-LABEL: load_i256:
|
|
; CHECK-O3: # %bb.0:
|
|
; CHECK-O3-NEXT: pushq %rbx
|
|
; CHECK-O3-NEXT: .cfi_def_cfa_offset 16
|
|
; CHECK-O3-NEXT: subq $32, %rsp
|
|
; CHECK-O3-NEXT: .cfi_def_cfa_offset 48
|
|
; CHECK-O3-NEXT: .cfi_offset %rbx, -16
|
|
; CHECK-O3-NEXT: movq %rdi, %rbx
|
|
; CHECK-O3-NEXT: movq %rsp, %rdx
|
|
; CHECK-O3-NEXT: movl $32, %edi
|
|
; CHECK-O3-NEXT: xorl %ecx, %ecx
|
|
; CHECK-O3-NEXT: callq __atomic_load
|
|
; CHECK-O3-NEXT: vmovups (%rsp), %ymm0
|
|
; CHECK-O3-NEXT: vmovups %ymm0, (%rbx)
|
|
; CHECK-O3-NEXT: movq %rbx, %rax
|
|
; CHECK-O3-NEXT: addq $32, %rsp
|
|
; CHECK-O3-NEXT: .cfi_def_cfa_offset 16
|
|
; CHECK-O3-NEXT: popq %rbx
|
|
; CHECK-O3-NEXT: .cfi_def_cfa_offset 8
|
|
; CHECK-O3-NEXT: vzeroupper
|
|
; CHECK-O3-NEXT: retq
|
|
%v = load atomic i256, i256* %ptr unordered, align 16
|
|
ret i256 %v
|
|
}
|
|
|
|
define void @store_i256(i256* %ptr, i256 %v) {
|
|
; CHECK-O0-LABEL: store_i256:
|
|
; CHECK-O0: # %bb.0:
|
|
; CHECK-O0-NEXT: subq $40, %rsp
|
|
; CHECK-O0-NEXT: .cfi_def_cfa_offset 48
|
|
; CHECK-O0-NEXT: xorl %eax, %eax
|
|
; CHECK-O0-NEXT: leaq {{[0-9]+}}(%rsp), %r9
|
|
; CHECK-O0-NEXT: movq %rsi, {{[0-9]+}}(%rsp)
|
|
; CHECK-O0-NEXT: movq %rdx, {{[0-9]+}}(%rsp)
|
|
; CHECK-O0-NEXT: movq %rcx, {{[0-9]+}}(%rsp)
|
|
; CHECK-O0-NEXT: movq %r8, {{[0-9]+}}(%rsp)
|
|
; CHECK-O0-NEXT: movl $32, %ecx
|
|
; CHECK-O0-NEXT: movq %rdi, (%rsp) # 8-byte Spill
|
|
; CHECK-O0-NEXT: movq %rcx, %rdi
|
|
; CHECK-O0-NEXT: movq (%rsp), %rsi # 8-byte Reload
|
|
; CHECK-O0-NEXT: movq %r9, %rdx
|
|
; CHECK-O0-NEXT: movl %eax, %ecx
|
|
; CHECK-O0-NEXT: callq __atomic_store
|
|
; CHECK-O0-NEXT: addq $40, %rsp
|
|
; CHECK-O0-NEXT: .cfi_def_cfa_offset 8
|
|
; CHECK-O0-NEXT: retq
|
|
;
|
|
; CHECK-O3-LABEL: store_i256:
|
|
; CHECK-O3: # %bb.0:
|
|
; CHECK-O3-NEXT: subq $40, %rsp
|
|
; CHECK-O3-NEXT: .cfi_def_cfa_offset 48
|
|
; CHECK-O3-NEXT: movq %rdi, %rax
|
|
; CHECK-O3-NEXT: movq %r8, {{[0-9]+}}(%rsp)
|
|
; CHECK-O3-NEXT: movq %rcx, {{[0-9]+}}(%rsp)
|
|
; CHECK-O3-NEXT: movq %rdx, {{[0-9]+}}(%rsp)
|
|
; CHECK-O3-NEXT: movq %rsi, {{[0-9]+}}(%rsp)
|
|
; CHECK-O3-NEXT: leaq {{[0-9]+}}(%rsp), %rdx
|
|
; CHECK-O3-NEXT: movl $32, %edi
|
|
; CHECK-O3-NEXT: movq %rax, %rsi
|
|
; CHECK-O3-NEXT: xorl %ecx, %ecx
|
|
; CHECK-O3-NEXT: callq __atomic_store
|
|
; CHECK-O3-NEXT: addq $40, %rsp
|
|
; CHECK-O3-NEXT: .cfi_def_cfa_offset 8
|
|
; CHECK-O3-NEXT: retq
|
|
store atomic i256 %v, i256* %ptr unordered, align 16
|
|
ret void
|
|
}
|
|
|
|
; Legal if wider type is also atomic (TODO)
|
|
define void @vec_store(i32* %p0, <2 x i32> %vec) {
|
|
; CHECK-O0-LABEL: vec_store:
|
|
; CHECK-O0: # %bb.0:
|
|
; CHECK-O0-NEXT: vmovd %xmm0, %eax
|
|
; CHECK-O0-NEXT: vpextrd $2, %xmm0, %ecx
|
|
; CHECK-O0-NEXT: movl %eax, (%rdi)
|
|
; CHECK-O0-NEXT: movl %ecx, 4(%rdi)
|
|
; CHECK-O0-NEXT: retq
|
|
;
|
|
; CHECK-O3-LABEL: vec_store:
|
|
; CHECK-O3: # %bb.0:
|
|
; CHECK-O3-NEXT: vmovd %xmm0, %eax
|
|
; CHECK-O3-NEXT: vpextrd $2, %xmm0, %ecx
|
|
; CHECK-O3-NEXT: movl %eax, (%rdi)
|
|
; CHECK-O3-NEXT: movl %ecx, 4(%rdi)
|
|
; CHECK-O3-NEXT: retq
|
|
%v1 = extractelement <2 x i32> %vec, i32 0
|
|
%v2 = extractelement <2 x i32> %vec, i32 1
|
|
%p1 = getelementptr i32, i32* %p0, i64 1
|
|
store atomic i32 %v1, i32* %p0 unordered, align 8
|
|
store atomic i32 %v2, i32* %p1 unordered, align 4
|
|
ret void
|
|
}
|
|
|
|
; Not legal to widen due to alignment restriction
|
|
define void @vec_store_unaligned(i32* %p0, <2 x i32> %vec) {
|
|
; CHECK-O0-LABEL: vec_store_unaligned:
|
|
; CHECK-O0: # %bb.0:
|
|
; CHECK-O0-NEXT: vmovd %xmm0, %eax
|
|
; CHECK-O0-NEXT: vpextrd $2, %xmm0, %ecx
|
|
; CHECK-O0-NEXT: movl %eax, (%rdi)
|
|
; CHECK-O0-NEXT: movl %ecx, 4(%rdi)
|
|
; CHECK-O0-NEXT: retq
|
|
;
|
|
; CHECK-O3-LABEL: vec_store_unaligned:
|
|
; CHECK-O3: # %bb.0:
|
|
; CHECK-O3-NEXT: vmovd %xmm0, %eax
|
|
; CHECK-O3-NEXT: vpextrd $2, %xmm0, %ecx
|
|
; CHECK-O3-NEXT: movl %eax, (%rdi)
|
|
; CHECK-O3-NEXT: movl %ecx, 4(%rdi)
|
|
; CHECK-O3-NEXT: retq
|
|
%v1 = extractelement <2 x i32> %vec, i32 0
|
|
%v2 = extractelement <2 x i32> %vec, i32 1
|
|
%p1 = getelementptr i32, i32* %p0, i64 1
|
|
store atomic i32 %v1, i32* %p0 unordered, align 4
|
|
store atomic i32 %v2, i32* %p1 unordered, align 4
|
|
ret void
|
|
}
|
|
|
|
|
|
|
|
; Legal if wider type is also atomic (TODO)
|
|
; Also, can avoid register move from xmm to eax (TODO)
|
|
define void @widen_broadcast2(i32* %p0, <2 x i32> %vec) {
|
|
; CHECK-O0-LABEL: widen_broadcast2:
|
|
; CHECK-O0: # %bb.0:
|
|
; CHECK-O0-NEXT: vmovd %xmm0, %eax
|
|
; CHECK-O0-NEXT: movl %eax, (%rdi)
|
|
; CHECK-O0-NEXT: movl %eax, 4(%rdi)
|
|
; CHECK-O0-NEXT: retq
|
|
;
|
|
; CHECK-O3-LABEL: widen_broadcast2:
|
|
; CHECK-O3: # %bb.0:
|
|
; CHECK-O3-NEXT: vmovd %xmm0, %eax
|
|
; CHECK-O3-NEXT: movl %eax, (%rdi)
|
|
; CHECK-O3-NEXT: movl %eax, 4(%rdi)
|
|
; CHECK-O3-NEXT: retq
|
|
%v1 = extractelement <2 x i32> %vec, i32 0
|
|
%p1 = getelementptr i32, i32* %p0, i64 1
|
|
store atomic i32 %v1, i32* %p0 unordered, align 8
|
|
store atomic i32 %v1, i32* %p1 unordered, align 4
|
|
ret void
|
|
}
|
|
|
|
; Not legal to widen due to alignment restriction
|
|
define void @widen_broadcast2_unaligned(i32* %p0, <2 x i32> %vec) {
|
|
; CHECK-O0-LABEL: widen_broadcast2_unaligned:
|
|
; CHECK-O0: # %bb.0:
|
|
; CHECK-O0-NEXT: vmovd %xmm0, %eax
|
|
; CHECK-O0-NEXT: movl %eax, (%rdi)
|
|
; CHECK-O0-NEXT: movl %eax, 4(%rdi)
|
|
; CHECK-O0-NEXT: retq
|
|
;
|
|
; CHECK-O3-LABEL: widen_broadcast2_unaligned:
|
|
; CHECK-O3: # %bb.0:
|
|
; CHECK-O3-NEXT: vmovd %xmm0, %eax
|
|
; CHECK-O3-NEXT: movl %eax, (%rdi)
|
|
; CHECK-O3-NEXT: movl %eax, 4(%rdi)
|
|
; CHECK-O3-NEXT: retq
|
|
%v1 = extractelement <2 x i32> %vec, i32 0
|
|
%p1 = getelementptr i32, i32* %p0, i64 1
|
|
store atomic i32 %v1, i32* %p0 unordered, align 4
|
|
store atomic i32 %v1, i32* %p1 unordered, align 4
|
|
ret void
|
|
}
|
|
|
|
; Legal if wider type is also atomic (TODO)
|
|
define void @widen_zero_init(i32* %p0, i32 %v1, i32 %v2) {
|
|
; CHECK-O0-LABEL: widen_zero_init:
|
|
; CHECK-O0: # %bb.0:
|
|
; CHECK-O0-NEXT: movl $0, (%rdi)
|
|
; CHECK-O0-NEXT: movl $0, 4(%rdi)
|
|
; CHECK-O0-NEXT: retq
|
|
;
|
|
; CHECK-O3-LABEL: widen_zero_init:
|
|
; CHECK-O3: # %bb.0:
|
|
; CHECK-O3-NEXT: movl $0, (%rdi)
|
|
; CHECK-O3-NEXT: movl $0, 4(%rdi)
|
|
; CHECK-O3-NEXT: retq
|
|
%p1 = getelementptr i32, i32* %p0, i64 1
|
|
store atomic i32 0, i32* %p0 unordered, align 8
|
|
store atomic i32 0, i32* %p1 unordered, align 4
|
|
ret void
|
|
}
|
|
|
|
; Not legal to widen due to alignment restriction
|
|
define void @widen_zero_init_unaligned(i32* %p0, i32 %v1, i32 %v2) {
|
|
; CHECK-O0-LABEL: widen_zero_init_unaligned:
|
|
; CHECK-O0: # %bb.0:
|
|
; CHECK-O0-NEXT: movl $0, (%rdi)
|
|
; CHECK-O0-NEXT: movl $0, 4(%rdi)
|
|
; CHECK-O0-NEXT: retq
|
|
;
|
|
; CHECK-O3-LABEL: widen_zero_init_unaligned:
|
|
; CHECK-O3: # %bb.0:
|
|
; CHECK-O3-NEXT: movl $0, (%rdi)
|
|
; CHECK-O3-NEXT: movl $0, 4(%rdi)
|
|
; CHECK-O3-NEXT: retq
|
|
%p1 = getelementptr i32, i32* %p0, i64 1
|
|
store atomic i32 0, i32* %p0 unordered, align 4
|
|
store atomic i32 0, i32* %p1 unordered, align 4
|
|
ret void
|
|
}
|
|
|
|
;; The next batch of tests are stressing load folding. Folding is legal
|
|
;; on x86, so these are simply checking optimization quality.
|
|
|
|
; Legal, as expected
|
|
define i64 @load_fold_add1(i64* %p) {
|
|
; CHECK-O0-LABEL: load_fold_add1:
|
|
; CHECK-O0: # %bb.0:
|
|
; CHECK-O0-NEXT: movq (%rdi), %rax
|
|
; CHECK-O0-NEXT: addq $15, %rax
|
|
; CHECK-O0-NEXT: retq
|
|
;
|
|
; CHECK-O3-LABEL: load_fold_add1:
|
|
; CHECK-O3: # %bb.0:
|
|
; CHECK-O3-NEXT: movq (%rdi), %rax
|
|
; CHECK-O3-NEXT: addq $15, %rax
|
|
; CHECK-O3-NEXT: retq
|
|
%v = load atomic i64, i64* %p unordered, align 8
|
|
%ret = add i64 %v, 15
|
|
ret i64 %ret
|
|
}
|
|
|
|
define i64 @load_fold_add2(i64* %p, i64 %v2) {
|
|
; CHECK-O0-LABEL: load_fold_add2:
|
|
; CHECK-O0: # %bb.0:
|
|
; CHECK-O0-NEXT: addq (%rdi), %rsi
|
|
; CHECK-O0-NEXT: movq %rsi, %rax
|
|
; CHECK-O0-NEXT: retq
|
|
;
|
|
; CHECK-O3-LABEL: load_fold_add2:
|
|
; CHECK-O3: # %bb.0:
|
|
; CHECK-O3-NEXT: movq %rsi, %rax
|
|
; CHECK-O3-NEXT: addq (%rdi), %rax
|
|
; CHECK-O3-NEXT: retq
|
|
%v = load atomic i64, i64* %p unordered, align 8
|
|
%ret = add i64 %v, %v2
|
|
ret i64 %ret
|
|
}
|
|
|
|
define i64 @load_fold_add3(i64* %p1, i64* %p2) {
|
|
; CHECK-O0-LABEL: load_fold_add3:
|
|
; CHECK-O0: # %bb.0:
|
|
; CHECK-O0-NEXT: movq (%rdi), %rax
|
|
; CHECK-O0-NEXT: addq (%rsi), %rax
|
|
; CHECK-O0-NEXT: retq
|
|
;
|
|
; CHECK-O3-LABEL: load_fold_add3:
|
|
; CHECK-O3: # %bb.0:
|
|
; CHECK-O3-NEXT: movq (%rsi), %rax
|
|
; CHECK-O3-NEXT: addq (%rdi), %rax
|
|
; CHECK-O3-NEXT: retq
|
|
%v = load atomic i64, i64* %p1 unordered, align 8
|
|
%v2 = load atomic i64, i64* %p2 unordered, align 8
|
|
%ret = add i64 %v, %v2
|
|
ret i64 %ret
|
|
}
|
|
|
|
; Legal, as expected
|
|
define i64 @load_fold_sub1(i64* %p) {
|
|
; CHECK-O0-LABEL: load_fold_sub1:
|
|
; CHECK-O0: # %bb.0:
|
|
; CHECK-O0-NEXT: movq (%rdi), %rax
|
|
; CHECK-O0-NEXT: subq $15, %rax
|
|
; CHECK-O0-NEXT: retq
|
|
;
|
|
; CHECK-O3-LABEL: load_fold_sub1:
|
|
; CHECK-O3: # %bb.0:
|
|
; CHECK-O3-NEXT: movq (%rdi), %rax
|
|
; CHECK-O3-NEXT: addq $-15, %rax
|
|
; CHECK-O3-NEXT: retq
|
|
%v = load atomic i64, i64* %p unordered, align 8
|
|
%ret = sub i64 %v, 15
|
|
ret i64 %ret
|
|
}
|
|
|
|
define i64 @load_fold_sub2(i64* %p, i64 %v2) {
|
|
; CHECK-O0-LABEL: load_fold_sub2:
|
|
; CHECK-O0: # %bb.0:
|
|
; CHECK-O0-NEXT: movq (%rdi), %rax
|
|
; CHECK-O0-NEXT: subq %rsi, %rax
|
|
; CHECK-O0-NEXT: retq
|
|
;
|
|
; CHECK-O3-LABEL: load_fold_sub2:
|
|
; CHECK-O3: # %bb.0:
|
|
; CHECK-O3-NEXT: movq (%rdi), %rax
|
|
; CHECK-O3-NEXT: subq %rsi, %rax
|
|
; CHECK-O3-NEXT: retq
|
|
%v = load atomic i64, i64* %p unordered, align 8
|
|
%ret = sub i64 %v, %v2
|
|
ret i64 %ret
|
|
}
|
|
|
|
define i64 @load_fold_sub3(i64* %p1, i64* %p2) {
|
|
; CHECK-O0-LABEL: load_fold_sub3:
|
|
; CHECK-O0: # %bb.0:
|
|
; CHECK-O0-NEXT: movq (%rdi), %rax
|
|
; CHECK-O0-NEXT: subq (%rsi), %rax
|
|
; CHECK-O0-NEXT: retq
|
|
;
|
|
; CHECK-O3-LABEL: load_fold_sub3:
|
|
; CHECK-O3: # %bb.0:
|
|
; CHECK-O3-NEXT: movq (%rdi), %rax
|
|
; CHECK-O3-NEXT: subq (%rsi), %rax
|
|
; CHECK-O3-NEXT: retq
|
|
%v = load atomic i64, i64* %p1 unordered, align 8
|
|
%v2 = load atomic i64, i64* %p2 unordered, align 8
|
|
%ret = sub i64 %v, %v2
|
|
ret i64 %ret
|
|
}
|
|
|
|
; Legal, as expected
|
|
define i64 @load_fold_mul1(i64* %p) {
|
|
; CHECK-O0-LABEL: load_fold_mul1:
|
|
; CHECK-O0: # %bb.0:
|
|
; CHECK-O0-NEXT: imulq $15, (%rdi), %rax
|
|
; CHECK-O0-NEXT: retq
|
|
;
|
|
; CHECK-O3-LABEL: load_fold_mul1:
|
|
; CHECK-O3: # %bb.0:
|
|
; CHECK-O3-NEXT: movq (%rdi), %rax
|
|
; CHECK-O3-NEXT: leaq (%rax,%rax,4), %rax
|
|
; CHECK-O3-NEXT: leaq (%rax,%rax,2), %rax
|
|
; CHECK-O3-NEXT: retq
|
|
%v = load atomic i64, i64* %p unordered, align 8
|
|
%ret = mul i64 %v, 15
|
|
ret i64 %ret
|
|
}
|
|
|
|
define i64 @load_fold_mul2(i64* %p, i64 %v2) {
|
|
; CHECK-O0-LABEL: load_fold_mul2:
|
|
; CHECK-O0: # %bb.0:
|
|
; CHECK-O0-NEXT: imulq (%rdi), %rsi
|
|
; CHECK-O0-NEXT: movq %rsi, %rax
|
|
; CHECK-O0-NEXT: retq
|
|
;
|
|
; CHECK-O3-LABEL: load_fold_mul2:
|
|
; CHECK-O3: # %bb.0:
|
|
; CHECK-O3-NEXT: movq %rsi, %rax
|
|
; CHECK-O3-NEXT: imulq (%rdi), %rax
|
|
; CHECK-O3-NEXT: retq
|
|
%v = load atomic i64, i64* %p unordered, align 8
|
|
%ret = mul i64 %v, %v2
|
|
ret i64 %ret
|
|
}
|
|
|
|
define i64 @load_fold_mul3(i64* %p1, i64* %p2) {
|
|
; CHECK-O0-LABEL: load_fold_mul3:
|
|
; CHECK-O0: # %bb.0:
|
|
; CHECK-O0-NEXT: movq (%rdi), %rax
|
|
; CHECK-O0-NEXT: imulq (%rsi), %rax
|
|
; CHECK-O0-NEXT: retq
|
|
;
|
|
; CHECK-O3-LABEL: load_fold_mul3:
|
|
; CHECK-O3: # %bb.0:
|
|
; CHECK-O3-NEXT: movq (%rsi), %rax
|
|
; CHECK-O3-NEXT: imulq (%rdi), %rax
|
|
; CHECK-O3-NEXT: retq
|
|
%v = load atomic i64, i64* %p1 unordered, align 8
|
|
%v2 = load atomic i64, i64* %p2 unordered, align 8
|
|
%ret = mul i64 %v, %v2
|
|
ret i64 %ret
|
|
}
|
|
|
|
; Legal to fold (TODO)
|
|
define i64 @load_fold_sdiv1(i64* %p) {
|
|
; CHECK-O0-LABEL: load_fold_sdiv1:
|
|
; CHECK-O0: # %bb.0:
|
|
; CHECK-O0-NEXT: movq (%rdi), %rax
|
|
; CHECK-O0-NEXT: cqto
|
|
; CHECK-O0-NEXT: movl $15, %ecx
|
|
; CHECK-O0-NEXT: idivq %rcx
|
|
; CHECK-O0-NEXT: retq
|
|
;
|
|
; CHECK-O3-LABEL: load_fold_sdiv1:
|
|
; CHECK-O3: # %bb.0:
|
|
; CHECK-O3-NEXT: movq (%rdi), %rcx
|
|
; CHECK-O3-NEXT: movabsq $-8608480567731124087, %rdx # imm = 0x8888888888888889
|
|
; CHECK-O3-NEXT: movq %rcx, %rax
|
|
; CHECK-O3-NEXT: imulq %rdx
|
|
; CHECK-O3-NEXT: addq %rcx, %rdx
|
|
; CHECK-O3-NEXT: movq %rdx, %rax
|
|
; CHECK-O3-NEXT: shrq $63, %rax
|
|
; CHECK-O3-NEXT: sarq $3, %rdx
|
|
; CHECK-O3-NEXT: addq %rdx, %rax
|
|
; CHECK-O3-NEXT: retq
|
|
%v = load atomic i64, i64* %p unordered, align 8
|
|
%ret = sdiv i64 %v, 15
|
|
ret i64 %ret
|
|
}
|
|
|
|
; Legal to fold (TODO)
|
|
define i64 @load_fold_sdiv2(i64* %p, i64 %v2) {
|
|
; CHECK-O0-LABEL: load_fold_sdiv2:
|
|
; CHECK-O0: # %bb.0:
|
|
; CHECK-O0-NEXT: movq (%rdi), %rax
|
|
; CHECK-O0-NEXT: cqto
|
|
; CHECK-O0-NEXT: idivq %rsi
|
|
; CHECK-O0-NEXT: retq
|
|
;
|
|
; CHECK-O3-LABEL: load_fold_sdiv2:
|
|
; CHECK-O3: # %bb.0:
|
|
; CHECK-O3-NEXT: movq (%rdi), %rax
|
|
; CHECK-O3-NEXT: movq %rax, %rcx
|
|
; CHECK-O3-NEXT: orq %rsi, %rcx
|
|
; CHECK-O3-NEXT: shrq $32, %rcx
|
|
; CHECK-O3-NEXT: je .LBB35_1
|
|
; CHECK-O3-NEXT: # %bb.2:
|
|
; CHECK-O3-NEXT: cqto
|
|
; CHECK-O3-NEXT: idivq %rsi
|
|
; CHECK-O3-NEXT: retq
|
|
; CHECK-O3-NEXT: .LBB35_1:
|
|
; CHECK-O3-NEXT: # kill: def $eax killed $eax killed $rax
|
|
; CHECK-O3-NEXT: xorl %edx, %edx
|
|
; CHECK-O3-NEXT: divl %esi
|
|
; CHECK-O3-NEXT: # kill: def $eax killed $eax def $rax
|
|
; CHECK-O3-NEXT: retq
|
|
%v = load atomic i64, i64* %p unordered, align 8
|
|
%ret = sdiv i64 %v, %v2
|
|
ret i64 %ret
|
|
}
|
|
|
|
define i64 @load_fold_sdiv3(i64* %p1, i64* %p2) {
|
|
; CHECK-O0-LABEL: load_fold_sdiv3:
|
|
; CHECK-O0: # %bb.0:
|
|
; CHECK-O0-NEXT: movq (%rdi), %rax
|
|
; CHECK-O0-NEXT: cqto
|
|
; CHECK-O0-NEXT: idivq (%rsi)
|
|
; CHECK-O0-NEXT: retq
|
|
;
|
|
; CHECK-O3-LABEL: load_fold_sdiv3:
|
|
; CHECK-O3: # %bb.0:
|
|
; CHECK-O3-NEXT: movq (%rdi), %rax
|
|
; CHECK-O3-NEXT: movq (%rsi), %rcx
|
|
; CHECK-O3-NEXT: movq %rax, %rdx
|
|
; CHECK-O3-NEXT: orq %rcx, %rdx
|
|
; CHECK-O3-NEXT: shrq $32, %rdx
|
|
; CHECK-O3-NEXT: je .LBB36_1
|
|
; CHECK-O3-NEXT: # %bb.2:
|
|
; CHECK-O3-NEXT: cqto
|
|
; CHECK-O3-NEXT: idivq %rcx
|
|
; CHECK-O3-NEXT: retq
|
|
; CHECK-O3-NEXT: .LBB36_1:
|
|
; CHECK-O3-NEXT: # kill: def $eax killed $eax killed $rax
|
|
; CHECK-O3-NEXT: xorl %edx, %edx
|
|
; CHECK-O3-NEXT: divl %ecx
|
|
; CHECK-O3-NEXT: # kill: def $eax killed $eax def $rax
|
|
; CHECK-O3-NEXT: retq
|
|
%v = load atomic i64, i64* %p1 unordered, align 8
|
|
%v2 = load atomic i64, i64* %p2 unordered, align 8
|
|
%ret = sdiv i64 %v, %v2
|
|
ret i64 %ret
|
|
}
|
|
|
|
; Legal to fold (TODO)
|
|
define i64 @load_fold_udiv1(i64* %p) {
|
|
; CHECK-O0-LABEL: load_fold_udiv1:
|
|
; CHECK-O0: # %bb.0:
|
|
; CHECK-O0-NEXT: movq (%rdi), %rax
|
|
; CHECK-O0-NEXT: xorl %ecx, %ecx
|
|
; CHECK-O0-NEXT: movl %ecx, %edx
|
|
; CHECK-O0-NEXT: movl $15, %esi
|
|
; CHECK-O0-NEXT: divq %rsi
|
|
; CHECK-O0-NEXT: retq
|
|
;
|
|
; CHECK-O3-LABEL: load_fold_udiv1:
|
|
; CHECK-O3: # %bb.0:
|
|
; CHECK-O3-NEXT: movq (%rdi), %rax
|
|
; CHECK-O3-NEXT: movabsq $-8608480567731124087, %rcx # imm = 0x8888888888888889
|
|
; CHECK-O3-NEXT: mulq %rcx
|
|
; CHECK-O3-NEXT: movq %rdx, %rax
|
|
; CHECK-O3-NEXT: shrq $3, %rax
|
|
; CHECK-O3-NEXT: retq
|
|
%v = load atomic i64, i64* %p unordered, align 8
|
|
%ret = udiv i64 %v, 15
|
|
ret i64 %ret
|
|
}
|
|
|
|
define i64 @load_fold_udiv2(i64* %p, i64 %v2) {
|
|
; CHECK-O0-LABEL: load_fold_udiv2:
|
|
; CHECK-O0: # %bb.0:
|
|
; CHECK-O0-NEXT: movq (%rdi), %rax
|
|
; CHECK-O0-NEXT: xorl %ecx, %ecx
|
|
; CHECK-O0-NEXT: movl %ecx, %edx
|
|
; CHECK-O0-NEXT: divq %rsi
|
|
; CHECK-O0-NEXT: retq
|
|
;
|
|
; CHECK-O3-LABEL: load_fold_udiv2:
|
|
; CHECK-O3: # %bb.0:
|
|
; CHECK-O3-NEXT: movq (%rdi), %rax
|
|
; CHECK-O3-NEXT: movq %rax, %rcx
|
|
; CHECK-O3-NEXT: orq %rsi, %rcx
|
|
; CHECK-O3-NEXT: shrq $32, %rcx
|
|
; CHECK-O3-NEXT: je .LBB38_1
|
|
; CHECK-O3-NEXT: # %bb.2:
|
|
; CHECK-O3-NEXT: xorl %edx, %edx
|
|
; CHECK-O3-NEXT: divq %rsi
|
|
; CHECK-O3-NEXT: retq
|
|
; CHECK-O3-NEXT: .LBB38_1:
|
|
; CHECK-O3-NEXT: # kill: def $eax killed $eax killed $rax
|
|
; CHECK-O3-NEXT: xorl %edx, %edx
|
|
; CHECK-O3-NEXT: divl %esi
|
|
; CHECK-O3-NEXT: # kill: def $eax killed $eax def $rax
|
|
; CHECK-O3-NEXT: retq
|
|
%v = load atomic i64, i64* %p unordered, align 8
|
|
%ret = udiv i64 %v, %v2
|
|
ret i64 %ret
|
|
}
|
|
|
|
define i64 @load_fold_udiv3(i64* %p1, i64* %p2) {
|
|
; CHECK-O0-LABEL: load_fold_udiv3:
|
|
; CHECK-O0: # %bb.0:
|
|
; CHECK-O0-NEXT: movq (%rdi), %rax
|
|
; CHECK-O0-NEXT: xorl %ecx, %ecx
|
|
; CHECK-O0-NEXT: movl %ecx, %edx
|
|
; CHECK-O0-NEXT: divq (%rsi)
|
|
; CHECK-O0-NEXT: retq
|
|
;
|
|
; CHECK-O3-LABEL: load_fold_udiv3:
|
|
; CHECK-O3: # %bb.0:
|
|
; CHECK-O3-NEXT: movq (%rdi), %rax
|
|
; CHECK-O3-NEXT: movq (%rsi), %rcx
|
|
; CHECK-O3-NEXT: movq %rax, %rdx
|
|
; CHECK-O3-NEXT: orq %rcx, %rdx
|
|
; CHECK-O3-NEXT: shrq $32, %rdx
|
|
; CHECK-O3-NEXT: je .LBB39_1
|
|
; CHECK-O3-NEXT: # %bb.2:
|
|
; CHECK-O3-NEXT: xorl %edx, %edx
|
|
; CHECK-O3-NEXT: divq %rcx
|
|
; CHECK-O3-NEXT: retq
|
|
; CHECK-O3-NEXT: .LBB39_1:
|
|
; CHECK-O3-NEXT: # kill: def $eax killed $eax killed $rax
|
|
; CHECK-O3-NEXT: xorl %edx, %edx
|
|
; CHECK-O3-NEXT: divl %ecx
|
|
; CHECK-O3-NEXT: # kill: def $eax killed $eax def $rax
|
|
; CHECK-O3-NEXT: retq
|
|
%v = load atomic i64, i64* %p1 unordered, align 8
|
|
%v2 = load atomic i64, i64* %p2 unordered, align 8
|
|
%ret = udiv i64 %v, %v2
|
|
ret i64 %ret
|
|
}
|
|
|
|
; Legal to fold (TODO)
|
|
define i64 @load_fold_srem1(i64* %p) {
|
|
; CHECK-O0-LABEL: load_fold_srem1:
|
|
; CHECK-O0: # %bb.0:
|
|
; CHECK-O0-NEXT: movq (%rdi), %rax
|
|
; CHECK-O0-NEXT: cqto
|
|
; CHECK-O0-NEXT: movl $15, %ecx
|
|
; CHECK-O0-NEXT: idivq %rcx
|
|
; CHECK-O0-NEXT: movq %rdx, %rax
|
|
; CHECK-O0-NEXT: retq
|
|
;
|
|
; CHECK-O3-LABEL: load_fold_srem1:
|
|
; CHECK-O3: # %bb.0:
|
|
; CHECK-O3-NEXT: movq (%rdi), %rcx
|
|
; CHECK-O3-NEXT: movabsq $-8608480567731124087, %rdx # imm = 0x8888888888888889
|
|
; CHECK-O3-NEXT: movq %rcx, %rax
|
|
; CHECK-O3-NEXT: imulq %rdx
|
|
; CHECK-O3-NEXT: addq %rcx, %rdx
|
|
; CHECK-O3-NEXT: movq %rdx, %rax
|
|
; CHECK-O3-NEXT: shrq $63, %rax
|
|
; CHECK-O3-NEXT: sarq $3, %rdx
|
|
; CHECK-O3-NEXT: addq %rax, %rdx
|
|
; CHECK-O3-NEXT: leaq (%rdx,%rdx,4), %rax
|
|
; CHECK-O3-NEXT: leaq (%rax,%rax,2), %rax
|
|
; CHECK-O3-NEXT: subq %rax, %rcx
|
|
; CHECK-O3-NEXT: movq %rcx, %rax
|
|
; CHECK-O3-NEXT: retq
|
|
%v = load atomic i64, i64* %p unordered, align 8
|
|
%ret = srem i64 %v, 15
|
|
ret i64 %ret
|
|
}
|
|
|
|
; Legal, as expected
|
|
define i64 @load_fold_srem2(i64* %p, i64 %v2) {
|
|
; CHECK-O0-LABEL: load_fold_srem2:
|
|
; CHECK-O0: # %bb.0:
|
|
; CHECK-O0-NEXT: movq (%rdi), %rax
|
|
; CHECK-O0-NEXT: cqto
|
|
; CHECK-O0-NEXT: idivq %rsi
|
|
; CHECK-O0-NEXT: movq %rdx, %rax
|
|
; CHECK-O0-NEXT: retq
|
|
;
|
|
; CHECK-O3-LABEL: load_fold_srem2:
|
|
; CHECK-O3: # %bb.0:
|
|
; CHECK-O3-NEXT: movq (%rdi), %rax
|
|
; CHECK-O3-NEXT: movq %rax, %rcx
|
|
; CHECK-O3-NEXT: orq %rsi, %rcx
|
|
; CHECK-O3-NEXT: shrq $32, %rcx
|
|
; CHECK-O3-NEXT: je .LBB41_1
|
|
; CHECK-O3-NEXT: # %bb.2:
|
|
; CHECK-O3-NEXT: cqto
|
|
; CHECK-O3-NEXT: idivq %rsi
|
|
; CHECK-O3-NEXT: movq %rdx, %rax
|
|
; CHECK-O3-NEXT: retq
|
|
; CHECK-O3-NEXT: .LBB41_1:
|
|
; CHECK-O3-NEXT: # kill: def $eax killed $eax killed $rax
|
|
; CHECK-O3-NEXT: xorl %edx, %edx
|
|
; CHECK-O3-NEXT: divl %esi
|
|
; CHECK-O3-NEXT: movl %edx, %eax
|
|
; CHECK-O3-NEXT: retq
|
|
%v = load atomic i64, i64* %p unordered, align 8
|
|
%ret = srem i64 %v, %v2
|
|
ret i64 %ret
|
|
}
|
|
|
|
define i64 @load_fold_srem3(i64* %p1, i64* %p2) {
|
|
; CHECK-O0-LABEL: load_fold_srem3:
|
|
; CHECK-O0: # %bb.0:
|
|
; CHECK-O0-NEXT: movq (%rdi), %rax
|
|
; CHECK-O0-NEXT: cqto
|
|
; CHECK-O0-NEXT: idivq (%rsi)
|
|
; CHECK-O0-NEXT: movq %rdx, %rax
|
|
; CHECK-O0-NEXT: retq
|
|
;
|
|
; CHECK-O3-LABEL: load_fold_srem3:
|
|
; CHECK-O3: # %bb.0:
|
|
; CHECK-O3-NEXT: movq (%rdi), %rax
|
|
; CHECK-O3-NEXT: movq (%rsi), %rcx
|
|
; CHECK-O3-NEXT: movq %rax, %rdx
|
|
; CHECK-O3-NEXT: orq %rcx, %rdx
|
|
; CHECK-O3-NEXT: shrq $32, %rdx
|
|
; CHECK-O3-NEXT: je .LBB42_1
|
|
; CHECK-O3-NEXT: # %bb.2:
|
|
; CHECK-O3-NEXT: cqto
|
|
; CHECK-O3-NEXT: idivq %rcx
|
|
; CHECK-O3-NEXT: movq %rdx, %rax
|
|
; CHECK-O3-NEXT: retq
|
|
; CHECK-O3-NEXT: .LBB42_1:
|
|
; CHECK-O3-NEXT: # kill: def $eax killed $eax killed $rax
|
|
; CHECK-O3-NEXT: xorl %edx, %edx
|
|
; CHECK-O3-NEXT: divl %ecx
|
|
; CHECK-O3-NEXT: movl %edx, %eax
|
|
; CHECK-O3-NEXT: retq
|
|
%v = load atomic i64, i64* %p1 unordered, align 8
|
|
%v2 = load atomic i64, i64* %p2 unordered, align 8
|
|
%ret = srem i64 %v, %v2
|
|
ret i64 %ret
|
|
}
|
|
|
|
; Legal to fold (TODO)
|
|
define i64 @load_fold_urem1(i64* %p) {
|
|
; CHECK-O0-LABEL: load_fold_urem1:
|
|
; CHECK-O0: # %bb.0:
|
|
; CHECK-O0-NEXT: movq (%rdi), %rax
|
|
; CHECK-O0-NEXT: xorl %ecx, %ecx
|
|
; CHECK-O0-NEXT: movl %ecx, %edx
|
|
; CHECK-O0-NEXT: movl $15, %esi
|
|
; CHECK-O0-NEXT: divq %rsi
|
|
; CHECK-O0-NEXT: movq %rdx, %rax
|
|
; CHECK-O0-NEXT: retq
|
|
;
|
|
; CHECK-O3-LABEL: load_fold_urem1:
|
|
; CHECK-O3: # %bb.0:
|
|
; CHECK-O3-NEXT: movq (%rdi), %rcx
|
|
; CHECK-O3-NEXT: movabsq $-8608480567731124087, %rdx # imm = 0x8888888888888889
|
|
; CHECK-O3-NEXT: movq %rcx, %rax
|
|
; CHECK-O3-NEXT: mulq %rdx
|
|
; CHECK-O3-NEXT: shrq $3, %rdx
|
|
; CHECK-O3-NEXT: leaq (%rdx,%rdx,4), %rax
|
|
; CHECK-O3-NEXT: leaq (%rax,%rax,2), %rax
|
|
; CHECK-O3-NEXT: subq %rax, %rcx
|
|
; CHECK-O3-NEXT: movq %rcx, %rax
|
|
; CHECK-O3-NEXT: retq
|
|
%v = load atomic i64, i64* %p unordered, align 8
|
|
%ret = urem i64 %v, 15
|
|
ret i64 %ret
|
|
}
|
|
|
|
; Legal, as expected
|
|
define i64 @load_fold_urem2(i64* %p, i64 %v2) {
|
|
; CHECK-O0-LABEL: load_fold_urem2:
|
|
; CHECK-O0: # %bb.0:
|
|
; CHECK-O0-NEXT: movq (%rdi), %rax
|
|
; CHECK-O0-NEXT: xorl %ecx, %ecx
|
|
; CHECK-O0-NEXT: movl %ecx, %edx
|
|
; CHECK-O0-NEXT: divq %rsi
|
|
; CHECK-O0-NEXT: movq %rdx, %rax
|
|
; CHECK-O0-NEXT: retq
|
|
;
|
|
; CHECK-O3-LABEL: load_fold_urem2:
|
|
; CHECK-O3: # %bb.0:
|
|
; CHECK-O3-NEXT: movq (%rdi), %rax
|
|
; CHECK-O3-NEXT: movq %rax, %rcx
|
|
; CHECK-O3-NEXT: orq %rsi, %rcx
|
|
; CHECK-O3-NEXT: shrq $32, %rcx
|
|
; CHECK-O3-NEXT: je .LBB44_1
|
|
; CHECK-O3-NEXT: # %bb.2:
|
|
; CHECK-O3-NEXT: xorl %edx, %edx
|
|
; CHECK-O3-NEXT: divq %rsi
|
|
; CHECK-O3-NEXT: movq %rdx, %rax
|
|
; CHECK-O3-NEXT: retq
|
|
; CHECK-O3-NEXT: .LBB44_1:
|
|
; CHECK-O3-NEXT: # kill: def $eax killed $eax killed $rax
|
|
; CHECK-O3-NEXT: xorl %edx, %edx
|
|
; CHECK-O3-NEXT: divl %esi
|
|
; CHECK-O3-NEXT: movl %edx, %eax
|
|
; CHECK-O3-NEXT: retq
|
|
%v = load atomic i64, i64* %p unordered, align 8
|
|
%ret = urem i64 %v, %v2
|
|
ret i64 %ret
|
|
}
|
|
|
|
define i64 @load_fold_urem3(i64* %p1, i64* %p2) {
|
|
; CHECK-O0-LABEL: load_fold_urem3:
|
|
; CHECK-O0: # %bb.0:
|
|
; CHECK-O0-NEXT: movq (%rdi), %rax
|
|
; CHECK-O0-NEXT: xorl %ecx, %ecx
|
|
; CHECK-O0-NEXT: movl %ecx, %edx
|
|
; CHECK-O0-NEXT: divq (%rsi)
|
|
; CHECK-O0-NEXT: movq %rdx, %rax
|
|
; CHECK-O0-NEXT: retq
|
|
;
|
|
; CHECK-O3-LABEL: load_fold_urem3:
|
|
; CHECK-O3: # %bb.0:
|
|
; CHECK-O3-NEXT: movq (%rdi), %rax
|
|
; CHECK-O3-NEXT: movq (%rsi), %rcx
|
|
; CHECK-O3-NEXT: movq %rax, %rdx
|
|
; CHECK-O3-NEXT: orq %rcx, %rdx
|
|
; CHECK-O3-NEXT: shrq $32, %rdx
|
|
; CHECK-O3-NEXT: je .LBB45_1
|
|
; CHECK-O3-NEXT: # %bb.2:
|
|
; CHECK-O3-NEXT: xorl %edx, %edx
|
|
; CHECK-O3-NEXT: divq %rcx
|
|
; CHECK-O3-NEXT: movq %rdx, %rax
|
|
; CHECK-O3-NEXT: retq
|
|
; CHECK-O3-NEXT: .LBB45_1:
|
|
; CHECK-O3-NEXT: # kill: def $eax killed $eax killed $rax
|
|
; CHECK-O3-NEXT: xorl %edx, %edx
|
|
; CHECK-O3-NEXT: divl %ecx
|
|
; CHECK-O3-NEXT: movl %edx, %eax
|
|
; CHECK-O3-NEXT: retq
|
|
%v = load atomic i64, i64* %p1 unordered, align 8
|
|
%v2 = load atomic i64, i64* %p2 unordered, align 8
|
|
%ret = urem i64 %v, %v2
|
|
ret i64 %ret
|
|
}
|
|
|
|
; Legal, as expected
|
|
define i64 @load_fold_shl1(i64* %p) {
|
|
; CHECK-O0-LABEL: load_fold_shl1:
|
|
; CHECK-O0: # %bb.0:
|
|
; CHECK-O0-NEXT: movq (%rdi), %rax
|
|
; CHECK-O0-NEXT: shlq $15, %rax
|
|
; CHECK-O0-NEXT: retq
|
|
;
|
|
; CHECK-O3-LABEL: load_fold_shl1:
|
|
; CHECK-O3: # %bb.0:
|
|
; CHECK-O3-NEXT: movq (%rdi), %rax
|
|
; CHECK-O3-NEXT: shlq $15, %rax
|
|
; CHECK-O3-NEXT: retq
|
|
%v = load atomic i64, i64* %p unordered, align 8
|
|
%ret = shl i64 %v, 15
|
|
ret i64 %ret
|
|
}
|
|
|
|
define i64 @load_fold_shl2(i64* %p, i64 %v2) {
|
|
; CHECK-O0-LABEL: load_fold_shl2:
|
|
; CHECK-O0: # %bb.0:
|
|
; CHECK-O0-NEXT: movq (%rdi), %rax
|
|
; CHECK-O0-NEXT: movq %rsi, %rcx
|
|
; CHECK-O0-NEXT: # kill: def $cl killed $rcx
|
|
; CHECK-O0-NEXT: shlq %cl, %rax
|
|
; CHECK-O0-NEXT: retq
|
|
;
|
|
; CHECK-O3-LABEL: load_fold_shl2:
|
|
; CHECK-O3: # %bb.0:
|
|
; CHECK-O3-NEXT: shlxq %rsi, (%rdi), %rax
|
|
; CHECK-O3-NEXT: retq
|
|
%v = load atomic i64, i64* %p unordered, align 8
|
|
%ret = shl i64 %v, %v2
|
|
ret i64 %ret
|
|
}
|
|
|
|
define i64 @load_fold_shl3(i64* %p1, i64* %p2) {
|
|
; CHECK-O0-LABEL: load_fold_shl3:
|
|
; CHECK-O0: # %bb.0:
|
|
; CHECK-O0-NEXT: movq (%rdi), %rax
|
|
; CHECK-O0-NEXT: movq (%rsi), %rcx
|
|
; CHECK-O0-NEXT: # kill: def $cl killed $rcx
|
|
; CHECK-O0-NEXT: shlq %cl, %rax
|
|
; CHECK-O0-NEXT: retq
|
|
;
|
|
; CHECK-O3-LABEL: load_fold_shl3:
|
|
; CHECK-O3: # %bb.0:
|
|
; CHECK-O3-NEXT: movq (%rsi), %rax
|
|
; CHECK-O3-NEXT: shlxq %rax, (%rdi), %rax
|
|
; CHECK-O3-NEXT: retq
|
|
%v = load atomic i64, i64* %p1 unordered, align 8
|
|
%v2 = load atomic i64, i64* %p2 unordered, align 8
|
|
%ret = shl i64 %v, %v2
|
|
ret i64 %ret
|
|
}
|
|
|
|
; Legal, as expected
|
|
define i64 @load_fold_lshr1(i64* %p) {
|
|
; CHECK-O0-LABEL: load_fold_lshr1:
|
|
; CHECK-O0: # %bb.0:
|
|
; CHECK-O0-NEXT: movq (%rdi), %rax
|
|
; CHECK-O0-NEXT: shrq $15, %rax
|
|
; CHECK-O0-NEXT: retq
|
|
;
|
|
; CHECK-O3-LABEL: load_fold_lshr1:
|
|
; CHECK-O3: # %bb.0:
|
|
; CHECK-O3-NEXT: movq (%rdi), %rax
|
|
; CHECK-O3-NEXT: shrq $15, %rax
|
|
; CHECK-O3-NEXT: retq
|
|
%v = load atomic i64, i64* %p unordered, align 8
|
|
%ret = lshr i64 %v, 15
|
|
ret i64 %ret
|
|
}
|
|
|
|
define i64 @load_fold_lshr2(i64* %p, i64 %v2) {
|
|
; CHECK-O0-LABEL: load_fold_lshr2:
|
|
; CHECK-O0: # %bb.0:
|
|
; CHECK-O0-NEXT: movq (%rdi), %rax
|
|
; CHECK-O0-NEXT: movq %rsi, %rcx
|
|
; CHECK-O0-NEXT: # kill: def $cl killed $rcx
|
|
; CHECK-O0-NEXT: shrq %cl, %rax
|
|
; CHECK-O0-NEXT: retq
|
|
;
|
|
; CHECK-O3-LABEL: load_fold_lshr2:
|
|
; CHECK-O3: # %bb.0:
|
|
; CHECK-O3-NEXT: shrxq %rsi, (%rdi), %rax
|
|
; CHECK-O3-NEXT: retq
|
|
%v = load atomic i64, i64* %p unordered, align 8
|
|
%ret = lshr i64 %v, %v2
|
|
ret i64 %ret
|
|
}
|
|
|
|
define i64 @load_fold_lshr3(i64* %p1, i64* %p2) {
|
|
; CHECK-O0-LABEL: load_fold_lshr3:
|
|
; CHECK-O0: # %bb.0:
|
|
; CHECK-O0-NEXT: movq (%rdi), %rax
|
|
; CHECK-O0-NEXT: movq (%rsi), %rcx
|
|
; CHECK-O0-NEXT: # kill: def $cl killed $rcx
|
|
; CHECK-O0-NEXT: shrq %cl, %rax
|
|
; CHECK-O0-NEXT: retq
|
|
;
|
|
; CHECK-O3-LABEL: load_fold_lshr3:
|
|
; CHECK-O3: # %bb.0:
|
|
; CHECK-O3-NEXT: movq (%rsi), %rax
|
|
; CHECK-O3-NEXT: shrxq %rax, (%rdi), %rax
|
|
; CHECK-O3-NEXT: retq
|
|
%v = load atomic i64, i64* %p1 unordered, align 8
|
|
%v2 = load atomic i64, i64* %p2 unordered, align 8
|
|
%ret = lshr i64 %v, %v2
|
|
ret i64 %ret
|
|
}
|
|
|
|
; Legal, as expected
|
|
define i64 @load_fold_ashr1(i64* %p) {
|
|
; CHECK-O0-LABEL: load_fold_ashr1:
|
|
; CHECK-O0: # %bb.0:
|
|
; CHECK-O0-NEXT: movq (%rdi), %rax
|
|
; CHECK-O0-NEXT: sarq $15, %rax
|
|
; CHECK-O0-NEXT: retq
|
|
;
|
|
; CHECK-O3-LABEL: load_fold_ashr1:
|
|
; CHECK-O3: # %bb.0:
|
|
; CHECK-O3-NEXT: movq (%rdi), %rax
|
|
; CHECK-O3-NEXT: sarq $15, %rax
|
|
; CHECK-O3-NEXT: retq
|
|
%v = load atomic i64, i64* %p unordered, align 8
|
|
%ret = ashr i64 %v, 15
|
|
ret i64 %ret
|
|
}
|
|
|
|
define i64 @load_fold_ashr2(i64* %p, i64 %v2) {
|
|
; CHECK-O0-LABEL: load_fold_ashr2:
|
|
; CHECK-O0: # %bb.0:
|
|
; CHECK-O0-NEXT: movq (%rdi), %rax
|
|
; CHECK-O0-NEXT: movq %rsi, %rcx
|
|
; CHECK-O0-NEXT: # kill: def $cl killed $rcx
|
|
; CHECK-O0-NEXT: sarq %cl, %rax
|
|
; CHECK-O0-NEXT: retq
|
|
;
|
|
; CHECK-O3-LABEL: load_fold_ashr2:
|
|
; CHECK-O3: # %bb.0:
|
|
; CHECK-O3-NEXT: sarxq %rsi, (%rdi), %rax
|
|
; CHECK-O3-NEXT: retq
|
|
%v = load atomic i64, i64* %p unordered, align 8
|
|
%ret = ashr i64 %v, %v2
|
|
ret i64 %ret
|
|
}
|
|
|
|
define i64 @load_fold_ashr3(i64* %p1, i64* %p2) {
|
|
; CHECK-O0-LABEL: load_fold_ashr3:
|
|
; CHECK-O0: # %bb.0:
|
|
; CHECK-O0-NEXT: movq (%rdi), %rax
|
|
; CHECK-O0-NEXT: movq (%rsi), %rcx
|
|
; CHECK-O0-NEXT: # kill: def $cl killed $rcx
|
|
; CHECK-O0-NEXT: sarq %cl, %rax
|
|
; CHECK-O0-NEXT: retq
|
|
;
|
|
; CHECK-O3-LABEL: load_fold_ashr3:
|
|
; CHECK-O3: # %bb.0:
|
|
; CHECK-O3-NEXT: movq (%rsi), %rax
|
|
; CHECK-O3-NEXT: sarxq %rax, (%rdi), %rax
|
|
; CHECK-O3-NEXT: retq
|
|
%v = load atomic i64, i64* %p1 unordered, align 8
|
|
%v2 = load atomic i64, i64* %p2 unordered, align 8
|
|
%ret = ashr i64 %v, %v2
|
|
ret i64 %ret
|
|
}
|
|
|
|
; Legal, as expected
|
|
define i64 @load_fold_and1(i64* %p) {
|
|
; CHECK-O0-LABEL: load_fold_and1:
|
|
; CHECK-O0: # %bb.0:
|
|
; CHECK-O0-NEXT: movq (%rdi), %rax
|
|
; CHECK-O0-NEXT: andq $15, %rax
|
|
; CHECK-O0-NEXT: retq
|
|
;
|
|
; CHECK-O3-LABEL: load_fold_and1:
|
|
; CHECK-O3: # %bb.0:
|
|
; CHECK-O3-NEXT: movq (%rdi), %rax
|
|
; CHECK-O3-NEXT: andl $15, %eax
|
|
; CHECK-O3-NEXT: retq
|
|
%v = load atomic i64, i64* %p unordered, align 8
|
|
%ret = and i64 %v, 15
|
|
ret i64 %ret
|
|
}
|
|
|
|
define i64 @load_fold_and2(i64* %p, i64 %v2) {
|
|
; CHECK-O0-LABEL: load_fold_and2:
|
|
; CHECK-O0: # %bb.0:
|
|
; CHECK-O0-NEXT: andq (%rdi), %rsi
|
|
; CHECK-O0-NEXT: movq %rsi, %rax
|
|
; CHECK-O0-NEXT: retq
|
|
;
|
|
; CHECK-O3-LABEL: load_fold_and2:
|
|
; CHECK-O3: # %bb.0:
|
|
; CHECK-O3-NEXT: movq %rsi, %rax
|
|
; CHECK-O3-NEXT: andq (%rdi), %rax
|
|
; CHECK-O3-NEXT: retq
|
|
%v = load atomic i64, i64* %p unordered, align 8
|
|
%ret = and i64 %v, %v2
|
|
ret i64 %ret
|
|
}
|
|
|
|
define i64 @load_fold_and3(i64* %p1, i64* %p2) {
|
|
; CHECK-O0-LABEL: load_fold_and3:
|
|
; CHECK-O0: # %bb.0:
|
|
; CHECK-O0-NEXT: movq (%rdi), %rax
|
|
; CHECK-O0-NEXT: andq (%rsi), %rax
|
|
; CHECK-O0-NEXT: retq
|
|
;
|
|
; CHECK-O3-LABEL: load_fold_and3:
|
|
; CHECK-O3: # %bb.0:
|
|
; CHECK-O3-NEXT: movq (%rsi), %rax
|
|
; CHECK-O3-NEXT: andq (%rdi), %rax
|
|
; CHECK-O3-NEXT: retq
|
|
%v = load atomic i64, i64* %p1 unordered, align 8
|
|
%v2 = load atomic i64, i64* %p2 unordered, align 8
|
|
%ret = and i64 %v, %v2
|
|
ret i64 %ret
|
|
}
|
|
|
|
; Legal, as expected
|
|
define i64 @load_fold_or1(i64* %p) {
|
|
; CHECK-O0-LABEL: load_fold_or1:
|
|
; CHECK-O0: # %bb.0:
|
|
; CHECK-O0-NEXT: movq (%rdi), %rax
|
|
; CHECK-O0-NEXT: orq $15, %rax
|
|
; CHECK-O0-NEXT: retq
|
|
;
|
|
; CHECK-O3-LABEL: load_fold_or1:
|
|
; CHECK-O3: # %bb.0:
|
|
; CHECK-O3-NEXT: movq (%rdi), %rax
|
|
; CHECK-O3-NEXT: orq $15, %rax
|
|
; CHECK-O3-NEXT: retq
|
|
%v = load atomic i64, i64* %p unordered, align 8
|
|
%ret = or i64 %v, 15
|
|
ret i64 %ret
|
|
}
|
|
|
|
define i64 @load_fold_or2(i64* %p, i64 %v2) {
|
|
; CHECK-O0-LABEL: load_fold_or2:
|
|
; CHECK-O0: # %bb.0:
|
|
; CHECK-O0-NEXT: orq (%rdi), %rsi
|
|
; CHECK-O0-NEXT: movq %rsi, %rax
|
|
; CHECK-O0-NEXT: retq
|
|
;
|
|
; CHECK-O3-LABEL: load_fold_or2:
|
|
; CHECK-O3: # %bb.0:
|
|
; CHECK-O3-NEXT: movq %rsi, %rax
|
|
; CHECK-O3-NEXT: orq (%rdi), %rax
|
|
; CHECK-O3-NEXT: retq
|
|
%v = load atomic i64, i64* %p unordered, align 8
|
|
%ret = or i64 %v, %v2
|
|
ret i64 %ret
|
|
}
|
|
|
|
define i64 @load_fold_or3(i64* %p1, i64* %p2) {
|
|
; CHECK-O0-LABEL: load_fold_or3:
|
|
; CHECK-O0: # %bb.0:
|
|
; CHECK-O0-NEXT: movq (%rdi), %rax
|
|
; CHECK-O0-NEXT: orq (%rsi), %rax
|
|
; CHECK-O0-NEXT: retq
|
|
;
|
|
; CHECK-O3-LABEL: load_fold_or3:
|
|
; CHECK-O3: # %bb.0:
|
|
; CHECK-O3-NEXT: movq (%rsi), %rax
|
|
; CHECK-O3-NEXT: orq (%rdi), %rax
|
|
; CHECK-O3-NEXT: retq
|
|
%v = load atomic i64, i64* %p1 unordered, align 8
|
|
%v2 = load atomic i64, i64* %p2 unordered, align 8
|
|
%ret = or i64 %v, %v2
|
|
ret i64 %ret
|
|
}
|
|
|
|
; Legal, as expected
|
|
define i64 @load_fold_xor1(i64* %p) {
|
|
; CHECK-O0-LABEL: load_fold_xor1:
|
|
; CHECK-O0: # %bb.0:
|
|
; CHECK-O0-NEXT: movq (%rdi), %rax
|
|
; CHECK-O0-NEXT: xorq $15, %rax
|
|
; CHECK-O0-NEXT: retq
|
|
;
|
|
; CHECK-O3-LABEL: load_fold_xor1:
|
|
; CHECK-O3: # %bb.0:
|
|
; CHECK-O3-NEXT: movq (%rdi), %rax
|
|
; CHECK-O3-NEXT: xorq $15, %rax
|
|
; CHECK-O3-NEXT: retq
|
|
%v = load atomic i64, i64* %p unordered, align 8
|
|
%ret = xor i64 %v, 15
|
|
ret i64 %ret
|
|
}
|
|
|
|
define i64 @load_fold_xor2(i64* %p, i64 %v2) {
|
|
; CHECK-O0-LABEL: load_fold_xor2:
|
|
; CHECK-O0: # %bb.0:
|
|
; CHECK-O0-NEXT: xorq (%rdi), %rsi
|
|
; CHECK-O0-NEXT: movq %rsi, %rax
|
|
; CHECK-O0-NEXT: retq
|
|
;
|
|
; CHECK-O3-LABEL: load_fold_xor2:
|
|
; CHECK-O3: # %bb.0:
|
|
; CHECK-O3-NEXT: movq %rsi, %rax
|
|
; CHECK-O3-NEXT: xorq (%rdi), %rax
|
|
; CHECK-O3-NEXT: retq
|
|
%v = load atomic i64, i64* %p unordered, align 8
|
|
%ret = xor i64 %v, %v2
|
|
ret i64 %ret
|
|
}
|
|
|
|
define i64 @load_fold_xor3(i64* %p1, i64* %p2) {
|
|
; CHECK-O0-LABEL: load_fold_xor3:
|
|
; CHECK-O0: # %bb.0:
|
|
; CHECK-O0-NEXT: movq (%rdi), %rax
|
|
; CHECK-O0-NEXT: xorq (%rsi), %rax
|
|
; CHECK-O0-NEXT: retq
|
|
;
|
|
; CHECK-O3-LABEL: load_fold_xor3:
|
|
; CHECK-O3: # %bb.0:
|
|
; CHECK-O3-NEXT: movq (%rsi), %rax
|
|
; CHECK-O3-NEXT: xorq (%rdi), %rax
|
|
; CHECK-O3-NEXT: retq
|
|
%v = load atomic i64, i64* %p1 unordered, align 8
|
|
%v2 = load atomic i64, i64* %p2 unordered, align 8
|
|
%ret = xor i64 %v, %v2
|
|
ret i64 %ret
|
|
}
|
|
|
|
define i1 @load_fold_icmp1(i64* %p) {
|
|
; CHECK-O0-LABEL: load_fold_icmp1:
|
|
; CHECK-O0: # %bb.0:
|
|
; CHECK-O0-NEXT: movq (%rdi), %rax
|
|
; CHECK-O0-NEXT: subq $15, %rax
|
|
; CHECK-O0-NEXT: sete %cl
|
|
; CHECK-O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
|
; CHECK-O0-NEXT: movb %cl, %al
|
|
; CHECK-O0-NEXT: retq
|
|
;
|
|
; CHECK-O3-LABEL: load_fold_icmp1:
|
|
; CHECK-O3: # %bb.0:
|
|
; CHECK-O3-NEXT: cmpq $15, (%rdi)
|
|
; CHECK-O3-NEXT: sete %al
|
|
; CHECK-O3-NEXT: retq
|
|
%v = load atomic i64, i64* %p unordered, align 8
|
|
%ret = icmp eq i64 %v, 15
|
|
ret i1 %ret
|
|
}
|
|
|
|
define i1 @load_fold_icmp2(i64* %p, i64 %v2) {
|
|
; CHECK-O0-LABEL: load_fold_icmp2:
|
|
; CHECK-O0: # %bb.0:
|
|
; CHECK-O0-NEXT: movq (%rdi), %rax
|
|
; CHECK-O0-NEXT: subq %rsi, %rax
|
|
; CHECK-O0-NEXT: sete %cl
|
|
; CHECK-O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
|
; CHECK-O0-NEXT: movb %cl, %al
|
|
; CHECK-O0-NEXT: retq
|
|
;
|
|
; CHECK-O3-LABEL: load_fold_icmp2:
|
|
; CHECK-O3: # %bb.0:
|
|
; CHECK-O3-NEXT: cmpq %rsi, (%rdi)
|
|
; CHECK-O3-NEXT: sete %al
|
|
; CHECK-O3-NEXT: retq
|
|
%v = load atomic i64, i64* %p unordered, align 8
|
|
%ret = icmp eq i64 %v, %v2
|
|
ret i1 %ret
|
|
}
|
|
|
|
define i1 @load_fold_icmp3(i64* %p1, i64* %p2) {
|
|
; CHECK-O0-LABEL: load_fold_icmp3:
|
|
; CHECK-O0: # %bb.0:
|
|
; CHECK-O0-NEXT: movq (%rdi), %rax
|
|
; CHECK-O0-NEXT: movq (%rsi), %rcx
|
|
; CHECK-O0-NEXT: subq %rcx, %rax
|
|
; CHECK-O0-NEXT: sete %dl
|
|
; CHECK-O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
|
; CHECK-O0-NEXT: movb %dl, %al
|
|
; CHECK-O0-NEXT: retq
|
|
;
|
|
; CHECK-O3-LABEL: load_fold_icmp3:
|
|
; CHECK-O3: # %bb.0:
|
|
; CHECK-O3-NEXT: movq (%rsi), %rax
|
|
; CHECK-O3-NEXT: cmpq %rax, (%rdi)
|
|
; CHECK-O3-NEXT: sete %al
|
|
; CHECK-O3-NEXT: retq
|
|
%v = load atomic i64, i64* %p1 unordered, align 8
|
|
%v2 = load atomic i64, i64* %p2 unordered, align 8
|
|
%ret = icmp eq i64 %v, %v2
|
|
ret i1 %ret
|
|
}
|
|
|
|
|
|
;; The next batch of tests check for read-modify-write patterns
|
|
;; Legally, it's okay to use a memory operand here as long as the operand
|
|
;; is well aligned (i.e. doesn't cross a cache line boundary). We are
|
|
;; required not to narrow the store though!
|
|
|
|
; Legal, as expected
|
|
define void @rmw_fold_add1(i64* %p, i64 %v) {
|
|
; CHECK-O0-LABEL: rmw_fold_add1:
|
|
; CHECK-O0: # %bb.0:
|
|
; CHECK-O0-NEXT: movq (%rdi), %rax
|
|
; CHECK-O0-NEXT: addq $15, %rax
|
|
; CHECK-O0-NEXT: movq %rax, (%rdi)
|
|
; CHECK-O0-NEXT: retq
|
|
;
|
|
; CHECK-O3-LABEL: rmw_fold_add1:
|
|
; CHECK-O3: # %bb.0:
|
|
; CHECK-O3-NEXT: addq $15, (%rdi)
|
|
; CHECK-O3-NEXT: retq
|
|
%prev = load atomic i64, i64* %p unordered, align 8
|
|
%val = add i64 %prev, 15
|
|
store atomic i64 %val, i64* %p unordered, align 8
|
|
ret void
|
|
}
|
|
|
|
; Legal, as expected
|
|
define void @rmw_fold_add2(i64* %p, i64 %v) {
|
|
; CHECK-O0-LABEL: rmw_fold_add2:
|
|
; CHECK-O0: # %bb.0:
|
|
; CHECK-O0-NEXT: movq (%rdi), %rax
|
|
; CHECK-O0-NEXT: addq %rsi, %rax
|
|
; CHECK-O0-NEXT: movq %rax, (%rdi)
|
|
; CHECK-O0-NEXT: retq
|
|
;
|
|
; CHECK-O3-LABEL: rmw_fold_add2:
|
|
; CHECK-O3: # %bb.0:
|
|
; CHECK-O3-NEXT: addq %rsi, (%rdi)
|
|
; CHECK-O3-NEXT: retq
|
|
%prev = load atomic i64, i64* %p unordered, align 8
|
|
%val = add i64 %prev, %v
|
|
store atomic i64 %val, i64* %p unordered, align 8
|
|
ret void
|
|
}
|
|
|
|
; Legal, as expected
|
|
define void @rmw_fold_sub1(i64* %p, i64 %v) {
|
|
; CHECK-O0-LABEL: rmw_fold_sub1:
|
|
; CHECK-O0: # %bb.0:
|
|
; CHECK-O0-NEXT: movq (%rdi), %rax
|
|
; CHECK-O0-NEXT: addq $-15, %rax
|
|
; CHECK-O0-NEXT: movq %rax, (%rdi)
|
|
; CHECK-O0-NEXT: retq
|
|
;
|
|
; CHECK-O3-LABEL: rmw_fold_sub1:
|
|
; CHECK-O3: # %bb.0:
|
|
; CHECK-O3-NEXT: addq $-15, (%rdi)
|
|
; CHECK-O3-NEXT: retq
|
|
%prev = load atomic i64, i64* %p unordered, align 8
|
|
%val = sub i64 %prev, 15
|
|
store atomic i64 %val, i64* %p unordered, align 8
|
|
ret void
|
|
}
|
|
|
|
; Legal, as expected
|
|
define void @rmw_fold_sub2(i64* %p, i64 %v) {
|
|
; CHECK-O0-LABEL: rmw_fold_sub2:
|
|
; CHECK-O0: # %bb.0:
|
|
; CHECK-O0-NEXT: movq (%rdi), %rax
|
|
; CHECK-O0-NEXT: subq %rsi, %rax
|
|
; CHECK-O0-NEXT: movq %rax, (%rdi)
|
|
; CHECK-O0-NEXT: retq
|
|
;
|
|
; CHECK-O3-LABEL: rmw_fold_sub2:
|
|
; CHECK-O3: # %bb.0:
|
|
; CHECK-O3-NEXT: subq %rsi, (%rdi)
|
|
; CHECK-O3-NEXT: retq
|
|
%prev = load atomic i64, i64* %p unordered, align 8
|
|
%val = sub i64 %prev, %v
|
|
store atomic i64 %val, i64* %p unordered, align 8
|
|
ret void
|
|
}
|
|
|
|
; Legal, as expected
|
|
define void @rmw_fold_mul1(i64* %p, i64 %v) {
|
|
; CHECK-O0-LABEL: rmw_fold_mul1:
|
|
; CHECK-O0: # %bb.0:
|
|
; CHECK-O0-NEXT: movq (%rdi), %rax
|
|
; CHECK-O0-NEXT: leaq (%rax,%rax,4), %rax
|
|
; CHECK-O0-NEXT: leaq (%rax,%rax,2), %rax
|
|
; CHECK-O0-NEXT: movq %rax, (%rdi)
|
|
; CHECK-O0-NEXT: retq
|
|
;
|
|
; CHECK-O3-LABEL: rmw_fold_mul1:
|
|
; CHECK-O3: # %bb.0:
|
|
; CHECK-O3-NEXT: movq (%rdi), %rax
|
|
; CHECK-O3-NEXT: leaq (%rax,%rax,4), %rax
|
|
; CHECK-O3-NEXT: leaq (%rax,%rax,2), %rax
|
|
; CHECK-O3-NEXT: movq %rax, (%rdi)
|
|
; CHECK-O3-NEXT: retq
|
|
%prev = load atomic i64, i64* %p unordered, align 8
|
|
%val = mul i64 %prev, 15
|
|
store atomic i64 %val, i64* %p unordered, align 8
|
|
ret void
|
|
}
|
|
|
|
; Legal to fold (TODO)
|
|
define void @rmw_fold_mul2(i64* %p, i64 %v) {
|
|
; CHECK-O0-LABEL: rmw_fold_mul2:
|
|
; CHECK-O0: # %bb.0:
|
|
; CHECK-O0-NEXT: movq (%rdi), %rax
|
|
; CHECK-O0-NEXT: imulq %rsi, %rax
|
|
; CHECK-O0-NEXT: movq %rax, (%rdi)
|
|
; CHECK-O0-NEXT: retq
|
|
;
|
|
; CHECK-O3-LABEL: rmw_fold_mul2:
|
|
; CHECK-O3: # %bb.0:
|
|
; CHECK-O3-NEXT: imulq (%rdi), %rsi
|
|
; CHECK-O3-NEXT: movq %rsi, (%rdi)
|
|
; CHECK-O3-NEXT: retq
|
|
%prev = load atomic i64, i64* %p unordered, align 8
|
|
%val = mul i64 %prev, %v
|
|
store atomic i64 %val, i64* %p unordered, align 8
|
|
ret void
|
|
}
|
|
|
|
; Legal, as expected
|
|
define void @rmw_fold_sdiv1(i64* %p, i64 %v) {
|
|
; CHECK-O0-LABEL: rmw_fold_sdiv1:
|
|
; CHECK-O0: # %bb.0:
|
|
; CHECK-O0-NEXT: movq (%rdi), %rax
|
|
; CHECK-O0-NEXT: movabsq $-8608480567731124087, %rcx # imm = 0x8888888888888889
|
|
; CHECK-O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
|
; CHECK-O0-NEXT: imulq %rcx
|
|
; CHECK-O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
|
|
; CHECK-O0-NEXT: addq %rax, %rdx
|
|
; CHECK-O0-NEXT: movq %rdx, %rcx
|
|
; CHECK-O0-NEXT: shrq $63, %rcx
|
|
; CHECK-O0-NEXT: sarq $3, %rdx
|
|
; CHECK-O0-NEXT: addq %rcx, %rdx
|
|
; CHECK-O0-NEXT: movq %rdx, (%rdi)
|
|
; CHECK-O0-NEXT: retq
|
|
;
|
|
; CHECK-O3-LABEL: rmw_fold_sdiv1:
|
|
; CHECK-O3: # %bb.0:
|
|
; CHECK-O3-NEXT: movq (%rdi), %rcx
|
|
; CHECK-O3-NEXT: movabsq $-8608480567731124087, %rdx # imm = 0x8888888888888889
|
|
; CHECK-O3-NEXT: movq %rcx, %rax
|
|
; CHECK-O3-NEXT: imulq %rdx
|
|
; CHECK-O3-NEXT: addq %rcx, %rdx
|
|
; CHECK-O3-NEXT: movq %rdx, %rax
|
|
; CHECK-O3-NEXT: shrq $63, %rax
|
|
; CHECK-O3-NEXT: sarq $3, %rdx
|
|
; CHECK-O3-NEXT: addq %rax, %rdx
|
|
; CHECK-O3-NEXT: movq %rdx, (%rdi)
|
|
; CHECK-O3-NEXT: retq
|
|
%prev = load atomic i64, i64* %p unordered, align 8
|
|
%val = sdiv i64 %prev, 15
|
|
store atomic i64 %val, i64* %p unordered, align 8
|
|
ret void
|
|
}
|
|
|
|
; Legal, as expected
|
|
define void @rmw_fold_sdiv2(i64* %p, i64 %v) {
|
|
; CHECK-O0-LABEL: rmw_fold_sdiv2:
|
|
; CHECK-O0: # %bb.0:
|
|
; CHECK-O0-NEXT: movq (%rdi), %rax
|
|
; CHECK-O0-NEXT: cqto
|
|
; CHECK-O0-NEXT: idivq %rsi
|
|
; CHECK-O0-NEXT: movq %rax, (%rdi)
|
|
; CHECK-O0-NEXT: retq
|
|
;
|
|
; CHECK-O3-LABEL: rmw_fold_sdiv2:
|
|
; CHECK-O3: # %bb.0:
|
|
; CHECK-O3-NEXT: movq (%rdi), %rax
|
|
; CHECK-O3-NEXT: movq %rax, %rcx
|
|
; CHECK-O3-NEXT: orq %rsi, %rcx
|
|
; CHECK-O3-NEXT: shrq $32, %rcx
|
|
; CHECK-O3-NEXT: je .LBB74_1
|
|
; CHECK-O3-NEXT: # %bb.2:
|
|
; CHECK-O3-NEXT: cqto
|
|
; CHECK-O3-NEXT: idivq %rsi
|
|
; CHECK-O3-NEXT: movq %rax, (%rdi)
|
|
; CHECK-O3-NEXT: retq
|
|
; CHECK-O3-NEXT: .LBB74_1:
|
|
; CHECK-O3-NEXT: # kill: def $eax killed $eax killed $rax
|
|
; CHECK-O3-NEXT: xorl %edx, %edx
|
|
; CHECK-O3-NEXT: divl %esi
|
|
; CHECK-O3-NEXT: # kill: def $eax killed $eax def $rax
|
|
; CHECK-O3-NEXT: movq %rax, (%rdi)
|
|
; CHECK-O3-NEXT: retq
|
|
%prev = load atomic i64, i64* %p unordered, align 8
|
|
%val = sdiv i64 %prev, %v
|
|
store atomic i64 %val, i64* %p unordered, align 8
|
|
ret void
|
|
}
|
|
|
|
; Legal, as expected
|
|
define void @rmw_fold_udiv1(i64* %p, i64 %v) {
|
|
; CHECK-O0-LABEL: rmw_fold_udiv1:
|
|
; CHECK-O0: # %bb.0:
|
|
; CHECK-O0-NEXT: movq (%rdi), %rax
|
|
; CHECK-O0-NEXT: movabsq $-8608480567731124087, %rcx # imm = 0x8888888888888889
|
|
; CHECK-O0-NEXT: mulq %rcx
|
|
; CHECK-O0-NEXT: shrq $3, %rdx
|
|
; CHECK-O0-NEXT: movq %rdx, (%rdi)
|
|
; CHECK-O0-NEXT: retq
|
|
;
|
|
; CHECK-O3-LABEL: rmw_fold_udiv1:
|
|
; CHECK-O3: # %bb.0:
|
|
; CHECK-O3-NEXT: movq (%rdi), %rax
|
|
; CHECK-O3-NEXT: movabsq $-8608480567731124087, %rcx # imm = 0x8888888888888889
|
|
; CHECK-O3-NEXT: mulq %rcx
|
|
; CHECK-O3-NEXT: shrq $3, %rdx
|
|
; CHECK-O3-NEXT: movq %rdx, (%rdi)
|
|
; CHECK-O3-NEXT: retq
|
|
%prev = load atomic i64, i64* %p unordered, align 8
|
|
%val = udiv i64 %prev, 15
|
|
store atomic i64 %val, i64* %p unordered, align 8
|
|
ret void
|
|
}
|
|
|
|
; Legal, as expected
|
|
define void @rmw_fold_udiv2(i64* %p, i64 %v) {
|
|
; CHECK-O0-LABEL: rmw_fold_udiv2:
|
|
; CHECK-O0: # %bb.0:
|
|
; CHECK-O0-NEXT: movq (%rdi), %rax
|
|
; CHECK-O0-NEXT: xorl %ecx, %ecx
|
|
; CHECK-O0-NEXT: movl %ecx, %edx
|
|
; CHECK-O0-NEXT: divq %rsi
|
|
; CHECK-O0-NEXT: movq %rax, (%rdi)
|
|
; CHECK-O0-NEXT: retq
|
|
;
|
|
; CHECK-O3-LABEL: rmw_fold_udiv2:
|
|
; CHECK-O3: # %bb.0:
|
|
; CHECK-O3-NEXT: movq (%rdi), %rax
|
|
; CHECK-O3-NEXT: movq %rax, %rcx
|
|
; CHECK-O3-NEXT: orq %rsi, %rcx
|
|
; CHECK-O3-NEXT: shrq $32, %rcx
|
|
; CHECK-O3-NEXT: je .LBB76_1
|
|
; CHECK-O3-NEXT: # %bb.2:
|
|
; CHECK-O3-NEXT: xorl %edx, %edx
|
|
; CHECK-O3-NEXT: divq %rsi
|
|
; CHECK-O3-NEXT: movq %rax, (%rdi)
|
|
; CHECK-O3-NEXT: retq
|
|
; CHECK-O3-NEXT: .LBB76_1:
|
|
; CHECK-O3-NEXT: # kill: def $eax killed $eax killed $rax
|
|
; CHECK-O3-NEXT: xorl %edx, %edx
|
|
; CHECK-O3-NEXT: divl %esi
|
|
; CHECK-O3-NEXT: # kill: def $eax killed $eax def $rax
|
|
; CHECK-O3-NEXT: movq %rax, (%rdi)
|
|
; CHECK-O3-NEXT: retq
|
|
%prev = load atomic i64, i64* %p unordered, align 8
|
|
%val = udiv i64 %prev, %v
|
|
store atomic i64 %val, i64* %p unordered, align 8
|
|
ret void
|
|
}
|
|
|
|
; Legal, as expected
|
|
define void @rmw_fold_srem1(i64* %p, i64 %v) {
|
|
; CHECK-O0-LABEL: rmw_fold_srem1:
|
|
; CHECK-O0: # %bb.0:
|
|
; CHECK-O0-NEXT: movq (%rdi), %rax
|
|
; CHECK-O0-NEXT: movabsq $-8608480567731124087, %rcx # imm = 0x8888888888888889
|
|
; CHECK-O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
|
; CHECK-O0-NEXT: imulq %rcx
|
|
; CHECK-O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
|
|
; CHECK-O0-NEXT: addq %rax, %rdx
|
|
; CHECK-O0-NEXT: movq %rdx, %rcx
|
|
; CHECK-O0-NEXT: shrq $63, %rcx
|
|
; CHECK-O0-NEXT: sarq $3, %rdx
|
|
; CHECK-O0-NEXT: addq %rcx, %rdx
|
|
; CHECK-O0-NEXT: leaq (%rdx,%rdx,4), %rcx
|
|
; CHECK-O0-NEXT: leaq (%rcx,%rcx,2), %rcx
|
|
; CHECK-O0-NEXT: subq %rcx, %rax
|
|
; CHECK-O0-NEXT: movq %rax, (%rdi)
|
|
; CHECK-O0-NEXT: retq
|
|
;
|
|
; CHECK-O3-LABEL: rmw_fold_srem1:
|
|
; CHECK-O3: # %bb.0:
|
|
; CHECK-O3-NEXT: movq (%rdi), %rcx
|
|
; CHECK-O3-NEXT: movabsq $-8608480567731124087, %rdx # imm = 0x8888888888888889
|
|
; CHECK-O3-NEXT: movq %rcx, %rax
|
|
; CHECK-O3-NEXT: imulq %rdx
|
|
; CHECK-O3-NEXT: addq %rcx, %rdx
|
|
; CHECK-O3-NEXT: movq %rdx, %rax
|
|
; CHECK-O3-NEXT: shrq $63, %rax
|
|
; CHECK-O3-NEXT: sarq $3, %rdx
|
|
; CHECK-O3-NEXT: addq %rax, %rdx
|
|
; CHECK-O3-NEXT: leaq (%rdx,%rdx,4), %rax
|
|
; CHECK-O3-NEXT: leaq (%rax,%rax,2), %rax
|
|
; CHECK-O3-NEXT: subq %rax, %rcx
|
|
; CHECK-O3-NEXT: movq %rcx, (%rdi)
|
|
; CHECK-O3-NEXT: retq
|
|
%prev = load atomic i64, i64* %p unordered, align 8
|
|
%val = srem i64 %prev, 15
|
|
store atomic i64 %val, i64* %p unordered, align 8
|
|
ret void
|
|
}
|
|
|
|
; Legal, as expected
|
|
define void @rmw_fold_srem2(i64* %p, i64 %v) {
|
|
; CHECK-O0-LABEL: rmw_fold_srem2:
|
|
; CHECK-O0: # %bb.0:
|
|
; CHECK-O0-NEXT: movq (%rdi), %rax
|
|
; CHECK-O0-NEXT: cqto
|
|
; CHECK-O0-NEXT: idivq %rsi
|
|
; CHECK-O0-NEXT: movq %rdx, (%rdi)
|
|
; CHECK-O0-NEXT: retq
|
|
;
|
|
; CHECK-O3-LABEL: rmw_fold_srem2:
|
|
; CHECK-O3: # %bb.0:
|
|
; CHECK-O3-NEXT: movq (%rdi), %rax
|
|
; CHECK-O3-NEXT: movq %rax, %rcx
|
|
; CHECK-O3-NEXT: orq %rsi, %rcx
|
|
; CHECK-O3-NEXT: shrq $32, %rcx
|
|
; CHECK-O3-NEXT: je .LBB78_1
|
|
; CHECK-O3-NEXT: # %bb.2:
|
|
; CHECK-O3-NEXT: cqto
|
|
; CHECK-O3-NEXT: idivq %rsi
|
|
; CHECK-O3-NEXT: movq %rdx, (%rdi)
|
|
; CHECK-O3-NEXT: retq
|
|
; CHECK-O3-NEXT: .LBB78_1:
|
|
; CHECK-O3-NEXT: # kill: def $eax killed $eax killed $rax
|
|
; CHECK-O3-NEXT: xorl %edx, %edx
|
|
; CHECK-O3-NEXT: divl %esi
|
|
; CHECK-O3-NEXT: # kill: def $edx killed $edx def $rdx
|
|
; CHECK-O3-NEXT: movq %rdx, (%rdi)
|
|
; CHECK-O3-NEXT: retq
|
|
%prev = load atomic i64, i64* %p unordered, align 8
|
|
%val = srem i64 %prev, %v
|
|
store atomic i64 %val, i64* %p unordered, align 8
|
|
ret void
|
|
}
|
|
|
|
; Legal, as expected
|
|
define void @rmw_fold_urem1(i64* %p, i64 %v) {
|
|
; CHECK-O0-LABEL: rmw_fold_urem1:
|
|
; CHECK-O0: # %bb.0:
|
|
; CHECK-O0-NEXT: movq (%rdi), %rax
|
|
; CHECK-O0-NEXT: movabsq $-8608480567731124087, %rcx # imm = 0x8888888888888889
|
|
; CHECK-O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
|
; CHECK-O0-NEXT: mulq %rcx
|
|
; CHECK-O0-NEXT: shrq $3, %rdx
|
|
; CHECK-O0-NEXT: leaq (%rdx,%rdx,4), %rax
|
|
; CHECK-O0-NEXT: leaq (%rax,%rax,2), %rax
|
|
; CHECK-O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
|
|
; CHECK-O0-NEXT: subq %rax, %rcx
|
|
; CHECK-O0-NEXT: movq %rcx, (%rdi)
|
|
; CHECK-O0-NEXT: retq
|
|
;
|
|
; CHECK-O3-LABEL: rmw_fold_urem1:
|
|
; CHECK-O3: # %bb.0:
|
|
; CHECK-O3-NEXT: movq (%rdi), %rcx
|
|
; CHECK-O3-NEXT: movabsq $-8608480567731124087, %rdx # imm = 0x8888888888888889
|
|
; CHECK-O3-NEXT: movq %rcx, %rax
|
|
; CHECK-O3-NEXT: mulq %rdx
|
|
; CHECK-O3-NEXT: shrq $3, %rdx
|
|
; CHECK-O3-NEXT: leaq (%rdx,%rdx,4), %rax
|
|
; CHECK-O3-NEXT: leaq (%rax,%rax,2), %rax
|
|
; CHECK-O3-NEXT: subq %rax, %rcx
|
|
; CHECK-O3-NEXT: movq %rcx, (%rdi)
|
|
; CHECK-O3-NEXT: retq
|
|
%prev = load atomic i64, i64* %p unordered, align 8
|
|
%val = urem i64 %prev, 15
|
|
store atomic i64 %val, i64* %p unordered, align 8
|
|
ret void
|
|
}
|
|
|
|
; Legal, as expected
|
|
define void @rmw_fold_urem2(i64* %p, i64 %v) {
|
|
; CHECK-O0-LABEL: rmw_fold_urem2:
|
|
; CHECK-O0: # %bb.0:
|
|
; CHECK-O0-NEXT: movq (%rdi), %rax
|
|
; CHECK-O0-NEXT: xorl %ecx, %ecx
|
|
; CHECK-O0-NEXT: movl %ecx, %edx
|
|
; CHECK-O0-NEXT: divq %rsi
|
|
; CHECK-O0-NEXT: movq %rdx, (%rdi)
|
|
; CHECK-O0-NEXT: retq
|
|
;
|
|
; CHECK-O3-LABEL: rmw_fold_urem2:
|
|
; CHECK-O3: # %bb.0:
|
|
; CHECK-O3-NEXT: movq (%rdi), %rax
|
|
; CHECK-O3-NEXT: movq %rax, %rcx
|
|
; CHECK-O3-NEXT: orq %rsi, %rcx
|
|
; CHECK-O3-NEXT: shrq $32, %rcx
|
|
; CHECK-O3-NEXT: je .LBB80_1
|
|
; CHECK-O3-NEXT: # %bb.2:
|
|
; CHECK-O3-NEXT: xorl %edx, %edx
|
|
; CHECK-O3-NEXT: divq %rsi
|
|
; CHECK-O3-NEXT: movq %rdx, (%rdi)
|
|
; CHECK-O3-NEXT: retq
|
|
; CHECK-O3-NEXT: .LBB80_1:
|
|
; CHECK-O3-NEXT: # kill: def $eax killed $eax killed $rax
|
|
; CHECK-O3-NEXT: xorl %edx, %edx
|
|
; CHECK-O3-NEXT: divl %esi
|
|
; CHECK-O3-NEXT: # kill: def $edx killed $edx def $rdx
|
|
; CHECK-O3-NEXT: movq %rdx, (%rdi)
|
|
; CHECK-O3-NEXT: retq
|
|
%prev = load atomic i64, i64* %p unordered, align 8
|
|
%val = urem i64 %prev, %v
|
|
store atomic i64 %val, i64* %p unordered, align 8
|
|
ret void
|
|
}
|
|
|
|
; Legal to fold (TODO)
|
|
define void @rmw_fold_shl1(i64* %p, i64 %v) {
|
|
; CHECK-O0-LABEL: rmw_fold_shl1:
|
|
; CHECK-O0: # %bb.0:
|
|
; CHECK-O0-NEXT: movq (%rdi), %rax
|
|
; CHECK-O0-NEXT: shlq $15, %rax
|
|
; CHECK-O0-NEXT: movq %rax, (%rdi)
|
|
; CHECK-O0-NEXT: retq
|
|
;
|
|
; CHECK-O3-LABEL: rmw_fold_shl1:
|
|
; CHECK-O3: # %bb.0:
|
|
; CHECK-O3-NEXT: movq (%rdi), %rax
|
|
; CHECK-O3-NEXT: shlq $15, %rax
|
|
; CHECK-O3-NEXT: movq %rax, (%rdi)
|
|
; CHECK-O3-NEXT: retq
|
|
%prev = load atomic i64, i64* %p unordered, align 8
|
|
%val = shl i64 %prev, 15
|
|
store atomic i64 %val, i64* %p unordered, align 8
|
|
ret void
|
|
}
|
|
|
|
; Legal to fold (TODO)
|
|
define void @rmw_fold_shl2(i64* %p, i64 %v) {
|
|
; CHECK-O0-LABEL: rmw_fold_shl2:
|
|
; CHECK-O0: # %bb.0:
|
|
; CHECK-O0-NEXT: movq (%rdi), %rax
|
|
; CHECK-O0-NEXT: # kill: def $sil killed $sil killed $rsi
|
|
; CHECK-O0-NEXT: # implicit-def: $rcx
|
|
; CHECK-O0-NEXT: movb %sil, %cl
|
|
; CHECK-O0-NEXT: shlxq %rcx, %rax, %rax
|
|
; CHECK-O0-NEXT: movq %rax, (%rdi)
|
|
; CHECK-O0-NEXT: retq
|
|
;
|
|
; CHECK-O3-LABEL: rmw_fold_shl2:
|
|
; CHECK-O3: # %bb.0:
|
|
; CHECK-O3-NEXT: shlxq %rsi, (%rdi), %rax
|
|
; CHECK-O3-NEXT: movq %rax, (%rdi)
|
|
; CHECK-O3-NEXT: retq
|
|
%prev = load atomic i64, i64* %p unordered, align 8
|
|
%val = shl i64 %prev, %v
|
|
store atomic i64 %val, i64* %p unordered, align 8
|
|
ret void
|
|
}
|
|
|
|
; Legal to fold (TODO)
|
|
define void @rmw_fold_lshr1(i64* %p, i64 %v) {
|
|
; CHECK-O0-LABEL: rmw_fold_lshr1:
|
|
; CHECK-O0: # %bb.0:
|
|
; CHECK-O0-NEXT: movq (%rdi), %rax
|
|
; CHECK-O0-NEXT: shrq $15, %rax
|
|
; CHECK-O0-NEXT: movq %rax, (%rdi)
|
|
; CHECK-O0-NEXT: retq
|
|
;
|
|
; CHECK-O3-LABEL: rmw_fold_lshr1:
|
|
; CHECK-O3: # %bb.0:
|
|
; CHECK-O3-NEXT: movq (%rdi), %rax
|
|
; CHECK-O3-NEXT: shrq $15, %rax
|
|
; CHECK-O3-NEXT: movq %rax, (%rdi)
|
|
; CHECK-O3-NEXT: retq
|
|
%prev = load atomic i64, i64* %p unordered, align 8
|
|
%val = lshr i64 %prev, 15
|
|
store atomic i64 %val, i64* %p unordered, align 8
|
|
ret void
|
|
}
|
|
|
|
; Legal to fold (TODO)
|
|
define void @rmw_fold_lshr2(i64* %p, i64 %v) {
|
|
; CHECK-O0-LABEL: rmw_fold_lshr2:
|
|
; CHECK-O0: # %bb.0:
|
|
; CHECK-O0-NEXT: movq (%rdi), %rax
|
|
; CHECK-O0-NEXT: # kill: def $sil killed $sil killed $rsi
|
|
; CHECK-O0-NEXT: # implicit-def: $rcx
|
|
; CHECK-O0-NEXT: movb %sil, %cl
|
|
; CHECK-O0-NEXT: shrxq %rcx, %rax, %rax
|
|
; CHECK-O0-NEXT: movq %rax, (%rdi)
|
|
; CHECK-O0-NEXT: retq
|
|
;
|
|
; CHECK-O3-LABEL: rmw_fold_lshr2:
|
|
; CHECK-O3: # %bb.0:
|
|
; CHECK-O3-NEXT: shrxq %rsi, (%rdi), %rax
|
|
; CHECK-O3-NEXT: movq %rax, (%rdi)
|
|
; CHECK-O3-NEXT: retq
|
|
%prev = load atomic i64, i64* %p unordered, align 8
|
|
%val = lshr i64 %prev, %v
|
|
store atomic i64 %val, i64* %p unordered, align 8
|
|
ret void
|
|
}
|
|
|
|
; Legal to fold (TODO)
|
|
define void @rmw_fold_ashr1(i64* %p, i64 %v) {
|
|
; CHECK-O0-LABEL: rmw_fold_ashr1:
|
|
; CHECK-O0: # %bb.0:
|
|
; CHECK-O0-NEXT: movq (%rdi), %rax
|
|
; CHECK-O0-NEXT: sarq $15, %rax
|
|
; CHECK-O0-NEXT: movq %rax, (%rdi)
|
|
; CHECK-O0-NEXT: retq
|
|
;
|
|
; CHECK-O3-LABEL: rmw_fold_ashr1:
|
|
; CHECK-O3: # %bb.0:
|
|
; CHECK-O3-NEXT: movq (%rdi), %rax
|
|
; CHECK-O3-NEXT: sarq $15, %rax
|
|
; CHECK-O3-NEXT: movq %rax, (%rdi)
|
|
; CHECK-O3-NEXT: retq
|
|
%prev = load atomic i64, i64* %p unordered, align 8
|
|
%val = ashr i64 %prev, 15
|
|
store atomic i64 %val, i64* %p unordered, align 8
|
|
ret void
|
|
}
|
|
|
|
; Legal to fold (TODO)
|
|
define void @rmw_fold_ashr2(i64* %p, i64 %v) {
|
|
; CHECK-O0-LABEL: rmw_fold_ashr2:
|
|
; CHECK-O0: # %bb.0:
|
|
; CHECK-O0-NEXT: movq (%rdi), %rax
|
|
; CHECK-O0-NEXT: # kill: def $sil killed $sil killed $rsi
|
|
; CHECK-O0-NEXT: # implicit-def: $rcx
|
|
; CHECK-O0-NEXT: movb %sil, %cl
|
|
; CHECK-O0-NEXT: sarxq %rcx, %rax, %rax
|
|
; CHECK-O0-NEXT: movq %rax, (%rdi)
|
|
; CHECK-O0-NEXT: retq
|
|
;
|
|
; CHECK-O3-LABEL: rmw_fold_ashr2:
|
|
; CHECK-O3: # %bb.0:
|
|
; CHECK-O3-NEXT: sarxq %rsi, (%rdi), %rax
|
|
; CHECK-O3-NEXT: movq %rax, (%rdi)
|
|
; CHECK-O3-NEXT: retq
|
|
%prev = load atomic i64, i64* %p unordered, align 8
|
|
%val = ashr i64 %prev, %v
|
|
store atomic i64 %val, i64* %p unordered, align 8
|
|
ret void
|
|
}
|
|
|
|
; Legal, as expected
|
|
define void @rmw_fold_and1(i64* %p, i64 %v) {
|
|
; CHECK-O0-LABEL: rmw_fold_and1:
|
|
; CHECK-O0: # %bb.0:
|
|
; CHECK-O0-NEXT: movq (%rdi), %rax
|
|
; CHECK-O0-NEXT: # kill: def $eax killed $eax killed $rax
|
|
; CHECK-O0-NEXT: andl $15, %eax
|
|
; CHECK-O0-NEXT: movl %eax, %ecx
|
|
; CHECK-O0-NEXT: movq %rcx, (%rdi)
|
|
; CHECK-O0-NEXT: retq
|
|
;
|
|
; CHECK-O3-LABEL: rmw_fold_and1:
|
|
; CHECK-O3: # %bb.0:
|
|
; CHECK-O3-NEXT: andq $15, (%rdi)
|
|
; CHECK-O3-NEXT: retq
|
|
%prev = load atomic i64, i64* %p unordered, align 8
|
|
%val = and i64 %prev, 15
|
|
store atomic i64 %val, i64* %p unordered, align 8
|
|
ret void
|
|
}
|
|
|
|
; Legal, as expected
|
|
define void @rmw_fold_and2(i64* %p, i64 %v) {
|
|
; CHECK-O0-LABEL: rmw_fold_and2:
|
|
; CHECK-O0: # %bb.0:
|
|
; CHECK-O0-NEXT: movq (%rdi), %rax
|
|
; CHECK-O0-NEXT: andq %rsi, %rax
|
|
; CHECK-O0-NEXT: movq %rax, (%rdi)
|
|
; CHECK-O0-NEXT: retq
|
|
;
|
|
; CHECK-O3-LABEL: rmw_fold_and2:
|
|
; CHECK-O3: # %bb.0:
|
|
; CHECK-O3-NEXT: andq %rsi, (%rdi)
|
|
; CHECK-O3-NEXT: retq
|
|
%prev = load atomic i64, i64* %p unordered, align 8
|
|
%val = and i64 %prev, %v
|
|
store atomic i64 %val, i64* %p unordered, align 8
|
|
ret void
|
|
}
|
|
|
|
; Legal, as expected
|
|
define void @rmw_fold_or1(i64* %p, i64 %v) {
|
|
; CHECK-O0-LABEL: rmw_fold_or1:
|
|
; CHECK-O0: # %bb.0:
|
|
; CHECK-O0-NEXT: movq (%rdi), %rax
|
|
; CHECK-O0-NEXT: orq $15, %rax
|
|
; CHECK-O0-NEXT: movq %rax, (%rdi)
|
|
; CHECK-O0-NEXT: retq
|
|
;
|
|
; CHECK-O3-LABEL: rmw_fold_or1:
|
|
; CHECK-O3: # %bb.0:
|
|
; CHECK-O3-NEXT: orq $15, (%rdi)
|
|
; CHECK-O3-NEXT: retq
|
|
%prev = load atomic i64, i64* %p unordered, align 8
|
|
%val = or i64 %prev, 15
|
|
store atomic i64 %val, i64* %p unordered, align 8
|
|
ret void
|
|
}
|
|
|
|
; Legal, as expected
|
|
define void @rmw_fold_or2(i64* %p, i64 %v) {
|
|
; CHECK-O0-LABEL: rmw_fold_or2:
|
|
; CHECK-O0: # %bb.0:
|
|
; CHECK-O0-NEXT: movq (%rdi), %rax
|
|
; CHECK-O0-NEXT: orq %rsi, %rax
|
|
; CHECK-O0-NEXT: movq %rax, (%rdi)
|
|
; CHECK-O0-NEXT: retq
|
|
;
|
|
; CHECK-O3-LABEL: rmw_fold_or2:
|
|
; CHECK-O3: # %bb.0:
|
|
; CHECK-O3-NEXT: orq %rsi, (%rdi)
|
|
; CHECK-O3-NEXT: retq
|
|
%prev = load atomic i64, i64* %p unordered, align 8
|
|
%val = or i64 %prev, %v
|
|
store atomic i64 %val, i64* %p unordered, align 8
|
|
ret void
|
|
}
|
|
|
|
; Legal, as expected
|
|
define void @rmw_fold_xor1(i64* %p, i64 %v) {
|
|
; CHECK-O0-LABEL: rmw_fold_xor1:
|
|
; CHECK-O0: # %bb.0:
|
|
; CHECK-O0-NEXT: movq (%rdi), %rax
|
|
; CHECK-O0-NEXT: xorq $15, %rax
|
|
; CHECK-O0-NEXT: movq %rax, (%rdi)
|
|
; CHECK-O0-NEXT: retq
|
|
;
|
|
; CHECK-O3-LABEL: rmw_fold_xor1:
|
|
; CHECK-O3: # %bb.0:
|
|
; CHECK-O3-NEXT: xorq $15, (%rdi)
|
|
; CHECK-O3-NEXT: retq
|
|
%prev = load atomic i64, i64* %p unordered, align 8
|
|
%val = xor i64 %prev, 15
|
|
store atomic i64 %val, i64* %p unordered, align 8
|
|
ret void
|
|
}
|
|
|
|
; Legal, as expected
|
|
define void @rmw_fold_xor2(i64* %p, i64 %v) {
|
|
; CHECK-O0-LABEL: rmw_fold_xor2:
|
|
; CHECK-O0: # %bb.0:
|
|
; CHECK-O0-NEXT: movq (%rdi), %rax
|
|
; CHECK-O0-NEXT: xorq %rsi, %rax
|
|
; CHECK-O0-NEXT: movq %rax, (%rdi)
|
|
; CHECK-O0-NEXT: retq
|
|
;
|
|
; CHECK-O3-LABEL: rmw_fold_xor2:
|
|
; CHECK-O3: # %bb.0:
|
|
; CHECK-O3-NEXT: xorq %rsi, (%rdi)
|
|
; CHECK-O3-NEXT: retq
|
|
%prev = load atomic i64, i64* %p unordered, align 8
|
|
%val = xor i64 %prev, %v
|
|
store atomic i64 %val, i64* %p unordered, align 8
|
|
ret void
|
|
}
|
|
|
|
;; The next batch test truncations, in combination w/operations which could
|
|
;; be folded against the memory operation.
|
|
|
|
; Legal to reduce the load width (TODO)
|
|
define i32 @fold_trunc(i64* %p) {
|
|
; CHECK-O0-LABEL: fold_trunc:
|
|
; CHECK-O0: # %bb.0:
|
|
; CHECK-O0-NEXT: movq (%rdi), %rax
|
|
; CHECK-O0-NEXT: # kill: def $eax killed $eax killed $rax
|
|
; CHECK-O0-NEXT: retq
|
|
;
|
|
; CHECK-O3-LABEL: fold_trunc:
|
|
; CHECK-O3: # %bb.0:
|
|
; CHECK-O3-NEXT: movq (%rdi), %rax
|
|
; CHECK-O3-NEXT: # kill: def $eax killed $eax killed $rax
|
|
; CHECK-O3-NEXT: retq
|
|
%v = load atomic i64, i64* %p unordered, align 8
|
|
%ret = trunc i64 %v to i32
|
|
ret i32 %ret
|
|
}
|
|
|
|
; Legal to reduce the load width and fold the load (TODO)
|
|
define i32 @fold_trunc_add(i64* %p, i32 %v2) {
|
|
; CHECK-O0-LABEL: fold_trunc_add:
|
|
; CHECK-O0: # %bb.0:
|
|
; CHECK-O0-NEXT: movq (%rdi), %rax
|
|
; CHECK-O0-NEXT: # kill: def $eax killed $eax killed $rax
|
|
; CHECK-O0-NEXT: addl %esi, %eax
|
|
; CHECK-O0-NEXT: retq
|
|
;
|
|
; CHECK-O3-LABEL: fold_trunc_add:
|
|
; CHECK-O3: # %bb.0:
|
|
; CHECK-O3-NEXT: movq (%rdi), %rax
|
|
; CHECK-O3-NEXT: addl %esi, %eax
|
|
; CHECK-O3-NEXT: # kill: def $eax killed $eax killed $rax
|
|
; CHECK-O3-NEXT: retq
|
|
%v = load atomic i64, i64* %p unordered, align 8
|
|
%trunc = trunc i64 %v to i32
|
|
%ret = add i32 %trunc, %v2
|
|
ret i32 %ret
|
|
}
|
|
|
|
; Legal to reduce the load width and fold the load (TODO)
|
|
define i32 @fold_trunc_and(i64* %p, i32 %v2) {
|
|
; CHECK-O0-LABEL: fold_trunc_and:
|
|
; CHECK-O0: # %bb.0:
|
|
; CHECK-O0-NEXT: movq (%rdi), %rax
|
|
; CHECK-O0-NEXT: # kill: def $eax killed $eax killed $rax
|
|
; CHECK-O0-NEXT: andl %esi, %eax
|
|
; CHECK-O0-NEXT: retq
|
|
;
|
|
; CHECK-O3-LABEL: fold_trunc_and:
|
|
; CHECK-O3: # %bb.0:
|
|
; CHECK-O3-NEXT: movq (%rdi), %rax
|
|
; CHECK-O3-NEXT: andl %esi, %eax
|
|
; CHECK-O3-NEXT: # kill: def $eax killed $eax killed $rax
|
|
; CHECK-O3-NEXT: retq
|
|
%v = load atomic i64, i64* %p unordered, align 8
|
|
%trunc = trunc i64 %v to i32
|
|
%ret = and i32 %trunc, %v2
|
|
ret i32 %ret
|
|
}
|
|
|
|
; Legal to reduce the load width and fold the load (TODO)
|
|
define i32 @fold_trunc_or(i64* %p, i32 %v2) {
|
|
; CHECK-O0-LABEL: fold_trunc_or:
|
|
; CHECK-O0: # %bb.0:
|
|
; CHECK-O0-NEXT: movq (%rdi), %rax
|
|
; CHECK-O0-NEXT: # kill: def $eax killed $eax killed $rax
|
|
; CHECK-O0-NEXT: orl %esi, %eax
|
|
; CHECK-O0-NEXT: retq
|
|
;
|
|
; CHECK-O3-LABEL: fold_trunc_or:
|
|
; CHECK-O3: # %bb.0:
|
|
; CHECK-O3-NEXT: movq (%rdi), %rax
|
|
; CHECK-O3-NEXT: orl %esi, %eax
|
|
; CHECK-O3-NEXT: # kill: def $eax killed $eax killed $rax
|
|
; CHECK-O3-NEXT: retq
|
|
%v = load atomic i64, i64* %p unordered, align 8
|
|
%trunc = trunc i64 %v to i32
|
|
%ret = or i32 %trunc, %v2
|
|
ret i32 %ret
|
|
}
|
|
|
|
; It's tempting to split the wide load into two smaller byte loads
|
|
; to reduce memory traffic, but this would be illegal for a atomic load
|
|
define i32 @split_load(i64* %p) {
|
|
; CHECK-O0-LABEL: split_load:
|
|
; CHECK-O0: # %bb.0:
|
|
; CHECK-O0-NEXT: movq (%rdi), %rax
|
|
; CHECK-O0-NEXT: movb %al, %cl
|
|
; CHECK-O0-NEXT: shrq $32, %rax
|
|
; CHECK-O0-NEXT: # kill: def $al killed $al killed $rax
|
|
; CHECK-O0-NEXT: orb %al, %cl
|
|
; CHECK-O0-NEXT: movzbl %cl, %eax
|
|
; CHECK-O0-NEXT: retq
|
|
;
|
|
; CHECK-O3-LABEL: split_load:
|
|
; CHECK-O3: # %bb.0:
|
|
; CHECK-O3-NEXT: movq (%rdi), %rax
|
|
; CHECK-O3-NEXT: movq %rax, %rcx
|
|
; CHECK-O3-NEXT: shrq $32, %rcx
|
|
; CHECK-O3-NEXT: orl %eax, %ecx
|
|
; CHECK-O3-NEXT: movzbl %cl, %eax
|
|
; CHECK-O3-NEXT: retq
|
|
%v = load atomic i64, i64* %p unordered, align 8
|
|
%b1 = trunc i64 %v to i8
|
|
%v.shift = lshr i64 %v, 32
|
|
%b2 = trunc i64 %v.shift to i8
|
|
%or = or i8 %b1, %b2
|
|
%ret = zext i8 %or to i32
|
|
ret i32 %ret
|
|
}
|
|
|
|
;; A collection of simple memory forwarding tests. Nothing particular
|
|
;; interesting semantic wise, just demonstrating obvious missed transforms.
|
|
|
|
@Zero = constant i64 0
|
|
|
|
; TODO: should return constant
|
|
define i64 @constant_folding(i64* %p) {
|
|
; CHECK-O0-LABEL: constant_folding:
|
|
; CHECK-O0: # %bb.0:
|
|
; CHECK-O0-NEXT: movq (%rdi), %rax
|
|
; CHECK-O0-NEXT: retq
|
|
;
|
|
; CHECK-O3-LABEL: constant_folding:
|
|
; CHECK-O3: # %bb.0:
|
|
; CHECK-O3-NEXT: movq (%rdi), %rax
|
|
; CHECK-O3-NEXT: retq
|
|
%v = load atomic i64, i64* %p unordered, align 8
|
|
ret i64 %v
|
|
}
|
|
|
|
; Legal to forward and fold (TODO)
|
|
define i64 @load_forwarding(i64* %p) {
|
|
; CHECK-O0-LABEL: load_forwarding:
|
|
; CHECK-O0: # %bb.0:
|
|
; CHECK-O0-NEXT: movq (%rdi), %rax
|
|
; CHECK-O0-NEXT: orq (%rdi), %rax
|
|
; CHECK-O0-NEXT: retq
|
|
;
|
|
; CHECK-O3-LABEL: load_forwarding:
|
|
; CHECK-O3: # %bb.0:
|
|
; CHECK-O3-NEXT: movq (%rdi), %rax
|
|
; CHECK-O3-NEXT: orq (%rdi), %rax
|
|
; CHECK-O3-NEXT: retq
|
|
%v = load atomic i64, i64* %p unordered, align 8
|
|
%v2 = load atomic i64, i64* %p unordered, align 8
|
|
%ret = or i64 %v, %v2
|
|
ret i64 %ret
|
|
}
|
|
|
|
; Legal to forward (TODO)
|
|
define i64 @store_forward(i64* %p, i64 %v) {
|
|
; CHECK-O0-LABEL: store_forward:
|
|
; CHECK-O0: # %bb.0:
|
|
; CHECK-O0-NEXT: movq %rsi, (%rdi)
|
|
; CHECK-O0-NEXT: movq (%rdi), %rax
|
|
; CHECK-O0-NEXT: retq
|
|
;
|
|
; CHECK-O3-LABEL: store_forward:
|
|
; CHECK-O3: # %bb.0:
|
|
; CHECK-O3-NEXT: movq %rsi, (%rdi)
|
|
; CHECK-O3-NEXT: movq (%rdi), %rax
|
|
; CHECK-O3-NEXT: retq
|
|
store atomic i64 %v, i64* %p unordered, align 8
|
|
%ret = load atomic i64, i64* %p unordered, align 8
|
|
ret i64 %ret
|
|
}
|
|
|
|
; Legal to kill (TODO)
|
|
define void @dead_writeback(i64* %p) {
|
|
; CHECK-O0-LABEL: dead_writeback:
|
|
; CHECK-O0: # %bb.0:
|
|
; CHECK-O0-NEXT: movq (%rdi), %rax
|
|
; CHECK-O0-NEXT: movq %rax, (%rdi)
|
|
; CHECK-O0-NEXT: retq
|
|
;
|
|
; CHECK-O3-LABEL: dead_writeback:
|
|
; CHECK-O3: # %bb.0:
|
|
; CHECK-O3-NEXT: movq (%rdi), %rax
|
|
; CHECK-O3-NEXT: movq %rax, (%rdi)
|
|
; CHECK-O3-NEXT: retq
|
|
%v = load atomic i64, i64* %p unordered, align 8
|
|
store atomic i64 %v, i64* %p unordered, align 8
|
|
ret void
|
|
}
|
|
|
|
; Legal to kill (TODO)
|
|
define void @dead_store(i64* %p, i64 %v) {
|
|
; CHECK-O0-LABEL: dead_store:
|
|
; CHECK-O0: # %bb.0:
|
|
; CHECK-O0-NEXT: movq $0, (%rdi)
|
|
; CHECK-O0-NEXT: movq %rsi, (%rdi)
|
|
; CHECK-O0-NEXT: retq
|
|
;
|
|
; CHECK-O3-LABEL: dead_store:
|
|
; CHECK-O3: # %bb.0:
|
|
; CHECK-O3-NEXT: movq $0, (%rdi)
|
|
; CHECK-O3-NEXT: movq %rsi, (%rdi)
|
|
; CHECK-O3-NEXT: retq
|
|
store atomic i64 0, i64* %p unordered, align 8
|
|
store atomic i64 %v, i64* %p unordered, align 8
|
|
ret void
|
|
}
|
|
|
|
;; The next batch of tests ensure that we don't try to fold a load into a
|
|
;; use where the code motion implied for the load is prevented by a fence.
|
|
;; Note: We're checking that the load doesn't get moved below the fence as
|
|
;; part of folding, but is technically legal to lift the add above the fence.
|
|
;; If that were to happen, please rewrite the test to ensure load movement
|
|
;; isn't violated.
|
|
|
|
define i64 @nofold_fence(i64* %p) {
|
|
; CHECK-O0-LABEL: nofold_fence:
|
|
; CHECK-O0: # %bb.0:
|
|
; CHECK-O0-NEXT: movq (%rdi), %rax
|
|
; CHECK-O0-NEXT: mfence
|
|
; CHECK-O0-NEXT: addq $15, %rax
|
|
; CHECK-O0-NEXT: retq
|
|
;
|
|
; CHECK-O3-LABEL: nofold_fence:
|
|
; CHECK-O3: # %bb.0:
|
|
; CHECK-O3-NEXT: movq (%rdi), %rax
|
|
; CHECK-O3-NEXT: mfence
|
|
; CHECK-O3-NEXT: addq $15, %rax
|
|
; CHECK-O3-NEXT: retq
|
|
%v = load atomic i64, i64* %p unordered, align 8
|
|
fence seq_cst
|
|
%ret = add i64 %v, 15
|
|
ret i64 %ret
|
|
}
|
|
|
|
define i64 @nofold_fence_acquire(i64* %p) {
|
|
; CHECK-O0-LABEL: nofold_fence_acquire:
|
|
; CHECK-O0: # %bb.0:
|
|
; CHECK-O0-NEXT: movq (%rdi), %rax
|
|
; CHECK-O0-NEXT: #MEMBARRIER
|
|
; CHECK-O0-NEXT: addq $15, %rax
|
|
; CHECK-O0-NEXT: retq
|
|
;
|
|
; CHECK-O3-LABEL: nofold_fence_acquire:
|
|
; CHECK-O3: # %bb.0:
|
|
; CHECK-O3-NEXT: movq (%rdi), %rax
|
|
; CHECK-O3-NEXT: #MEMBARRIER
|
|
; CHECK-O3-NEXT: addq $15, %rax
|
|
; CHECK-O3-NEXT: retq
|
|
%v = load atomic i64, i64* %p unordered, align 8
|
|
fence acquire
|
|
%ret = add i64 %v, 15
|
|
ret i64 %ret
|
|
}
|
|
|
|
|
|
define i64 @nofold_stfence(i64* %p) {
|
|
; CHECK-O0-LABEL: nofold_stfence:
|
|
; CHECK-O0: # %bb.0:
|
|
; CHECK-O0-NEXT: movq (%rdi), %rax
|
|
; CHECK-O0-NEXT: #MEMBARRIER
|
|
; CHECK-O0-NEXT: addq $15, %rax
|
|
; CHECK-O0-NEXT: retq
|
|
;
|
|
; CHECK-O3-LABEL: nofold_stfence:
|
|
; CHECK-O3: # %bb.0:
|
|
; CHECK-O3-NEXT: movq (%rdi), %rax
|
|
; CHECK-O3-NEXT: #MEMBARRIER
|
|
; CHECK-O3-NEXT: addq $15, %rax
|
|
; CHECK-O3-NEXT: retq
|
|
%v = load atomic i64, i64* %p unordered, align 8
|
|
fence syncscope("singlethread") seq_cst
|
|
%ret = add i64 %v, 15
|
|
ret i64 %ret
|
|
}
|
|
|
|
;; Next, test how well we can fold invariant loads.
|
|
|
|
@Constant = external constant i64
|
|
|
|
define i64 @fold_constant(i64 %arg) {
|
|
; CHECK-O0-LABEL: fold_constant:
|
|
; CHECK-O0: # %bb.0:
|
|
; CHECK-O0-NEXT: addq Constant, %rdi
|
|
; CHECK-O0-NEXT: movq %rdi, %rax
|
|
; CHECK-O0-NEXT: retq
|
|
;
|
|
; CHECK-O3-LABEL: fold_constant:
|
|
; CHECK-O3: # %bb.0:
|
|
; CHECK-O3-NEXT: movq %rdi, %rax
|
|
; CHECK-O3-NEXT: addq {{.*}}(%rip), %rax
|
|
; CHECK-O3-NEXT: retq
|
|
%v = load atomic i64, i64* @Constant unordered, align 8
|
|
%ret = add i64 %v, %arg
|
|
ret i64 %ret
|
|
}
|
|
|
|
define i64 @fold_constant_clobber(i64* %p, i64 %arg) {
|
|
; CHECK-O0-LABEL: fold_constant_clobber:
|
|
; CHECK-O0: # %bb.0:
|
|
; CHECK-O0-NEXT: movq {{.*}}(%rip), %rax
|
|
; CHECK-O0-NEXT: movq $5, (%rdi)
|
|
; CHECK-O0-NEXT: addq %rsi, %rax
|
|
; CHECK-O0-NEXT: retq
|
|
;
|
|
; CHECK-O3-LABEL: fold_constant_clobber:
|
|
; CHECK-O3: # %bb.0:
|
|
; CHECK-O3-NEXT: movq {{.*}}(%rip), %rax
|
|
; CHECK-O3-NEXT: movq $5, (%rdi)
|
|
; CHECK-O3-NEXT: addq %rsi, %rax
|
|
; CHECK-O3-NEXT: retq
|
|
%v = load atomic i64, i64* @Constant unordered, align 8
|
|
store i64 5, i64* %p
|
|
%ret = add i64 %v, %arg
|
|
ret i64 %ret
|
|
}
|
|
|
|
define i64 @fold_constant_fence(i64 %arg) {
|
|
; CHECK-O0-LABEL: fold_constant_fence:
|
|
; CHECK-O0: # %bb.0:
|
|
; CHECK-O0-NEXT: movq {{.*}}(%rip), %rax
|
|
; CHECK-O0-NEXT: mfence
|
|
; CHECK-O0-NEXT: addq %rdi, %rax
|
|
; CHECK-O0-NEXT: retq
|
|
;
|
|
; CHECK-O3-LABEL: fold_constant_fence:
|
|
; CHECK-O3: # %bb.0:
|
|
; CHECK-O3-NEXT: movq {{.*}}(%rip), %rax
|
|
; CHECK-O3-NEXT: mfence
|
|
; CHECK-O3-NEXT: addq %rdi, %rax
|
|
; CHECK-O3-NEXT: retq
|
|
%v = load atomic i64, i64* @Constant unordered, align 8
|
|
fence seq_cst
|
|
%ret = add i64 %v, %arg
|
|
ret i64 %ret
|
|
}
|
|
|
|
define i64 @fold_invariant_clobber(i64* dereferenceable(8) %p, i64 %arg) {
|
|
; CHECK-O0-LABEL: fold_invariant_clobber:
|
|
; CHECK-O0: # %bb.0:
|
|
; CHECK-O0-NEXT: movq (%rdi), %rax
|
|
; CHECK-O0-NEXT: movq $5, (%rdi)
|
|
; CHECK-O0-NEXT: addq %rsi, %rax
|
|
; CHECK-O0-NEXT: retq
|
|
;
|
|
; CHECK-O3-LABEL: fold_invariant_clobber:
|
|
; CHECK-O3: # %bb.0:
|
|
; CHECK-O3-NEXT: movq (%rdi), %rax
|
|
; CHECK-O3-NEXT: movq $5, (%rdi)
|
|
; CHECK-O3-NEXT: addq %rsi, %rax
|
|
; CHECK-O3-NEXT: retq
|
|
%v = load atomic i64, i64* %p unordered, align 8, !invariant.load !{}
|
|
store i64 5, i64* %p
|
|
%ret = add i64 %v, %arg
|
|
ret i64 %ret
|
|
}
|
|
|
|
|
|
define i64 @fold_invariant_fence(i64* dereferenceable(8) %p, i64 %arg) {
|
|
; CHECK-O0-LABEL: fold_invariant_fence:
|
|
; CHECK-O0: # %bb.0:
|
|
; CHECK-O0-NEXT: movq (%rdi), %rax
|
|
; CHECK-O0-NEXT: mfence
|
|
; CHECK-O0-NEXT: addq %rsi, %rax
|
|
; CHECK-O0-NEXT: retq
|
|
;
|
|
; CHECK-O3-LABEL: fold_invariant_fence:
|
|
; CHECK-O3: # %bb.0:
|
|
; CHECK-O3-NEXT: movq (%rdi), %rax
|
|
; CHECK-O3-NEXT: mfence
|
|
; CHECK-O3-NEXT: addq %rsi, %rax
|
|
; CHECK-O3-NEXT: retq
|
|
%v = load atomic i64, i64* %p unordered, align 8, !invariant.load !{}
|
|
fence seq_cst
|
|
%ret = add i64 %v, %arg
|
|
ret i64 %ret
|
|
}
|
|
|