Files
clang-p2996/llvm/test/CodeGen/X86/constant-pool-sharing.ll
Simon Pilgrim 11276563c8 [X86] X86DAGToDAGISel - attempt to merge XMM/YMM loads with YMM/ZMM loads of the same ptr (#73126)
If we are loading the same ptr at different vector widths, then reuse the largest load and just extract the low subvector.

Unlike the equivalent VBROADCAST_LOAD/SUBV_BROADCAST_LOAD folds which can occur in DAG, we have to wait until DAGISel otherwise we can hit infinite loops if constant folding recreates the original constant value.

This is mainly useful for better constant sharing.
2023-11-27 10:26:26 +00:00

137 lines
5.9 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-linux -mcpu=corei7 | FileCheck %s --check-prefixes=SSE-LINUX
; RUN: llc < %s -mtriple=x86_64-win32 -mcpu=corei7 | FileCheck %s --check-prefixes=SSE-MSVC
; RUN: llc < %s -mtriple=x86_64-linux -mcpu=corei7-avx | FileCheck %s --check-prefixes=AVX-LINUX
; RUN: llc < %s -mtriple=x86_64-win32 -mcpu=corei7-avx | FileCheck %s --check-prefixes=AVX-MSVC
; llc should share constant pool entries between this integer vector
; and this floating-point vector since they have the same encoding.
; FIXME: AVX is duplicating broadcasts
define void @share_v4i32_v4f32(ptr %p, ptr %q, i1 %t) nounwind {
; SSE-LINUX-LABEL: share_v4i32_v4f32:
; SSE-LINUX: # %bb.0: # %entry
; SSE-LINUX-NEXT: movaps {{.*#+}} xmm0 = [1073741824,1073741824,1073741824,1073741824]
; SSE-LINUX-NEXT: .p2align 4, 0x90
; SSE-LINUX-NEXT: .LBB0_1: # %loop
; SSE-LINUX-NEXT: # =>This Inner Loop Header: Depth=1
; SSE-LINUX-NEXT: movaps %xmm0, (%rdi)
; SSE-LINUX-NEXT: movaps %xmm0, (%rsi)
; SSE-LINUX-NEXT: testb $1, %dl
; SSE-LINUX-NEXT: jne .LBB0_1
; SSE-LINUX-NEXT: # %bb.2: # %ret
; SSE-LINUX-NEXT: retq
;
; SSE-MSVC-LABEL: share_v4i32_v4f32:
; SSE-MSVC: # %bb.0: # %entry
; SSE-MSVC-NEXT: movaps {{.*#+}} xmm0 = [1073741824,1073741824,1073741824,1073741824]
; SSE-MSVC-NEXT: .p2align 4, 0x90
; SSE-MSVC-NEXT: .LBB0_1: # %loop
; SSE-MSVC-NEXT: # =>This Inner Loop Header: Depth=1
; SSE-MSVC-NEXT: movaps %xmm0, (%rcx)
; SSE-MSVC-NEXT: movaps %xmm0, (%rdx)
; SSE-MSVC-NEXT: testb $1, %r8b
; SSE-MSVC-NEXT: jne .LBB0_1
; SSE-MSVC-NEXT: # %bb.2: # %ret
; SSE-MSVC-NEXT: retq
;
; AVX-LINUX-LABEL: share_v4i32_v4f32:
; AVX-LINUX: # %bb.0: # %entry
; AVX-LINUX-NEXT: vbroadcastss {{.*#+}} xmm0 = [1073741824,1073741824,1073741824,1073741824]
; AVX-LINUX-NEXT: vbroadcastss {{.*#+}} xmm1 = [1073741824,1073741824,1073741824,1073741824]
; AVX-LINUX-NEXT: .p2align 4, 0x90
; AVX-LINUX-NEXT: .LBB0_1: # %loop
; AVX-LINUX-NEXT: # =>This Inner Loop Header: Depth=1
; AVX-LINUX-NEXT: vmovaps %xmm0, (%rdi)
; AVX-LINUX-NEXT: vmovaps %xmm1, (%rsi)
; AVX-LINUX-NEXT: testb $1, %dl
; AVX-LINUX-NEXT: jne .LBB0_1
; AVX-LINUX-NEXT: # %bb.2: # %ret
; AVX-LINUX-NEXT: retq
;
; AVX-MSVC-LABEL: share_v4i32_v4f32:
; AVX-MSVC: # %bb.0: # %entry
; AVX-MSVC-NEXT: vbroadcastss {{.*#+}} xmm0 = [1073741824,1073741824,1073741824,1073741824]
; AVX-MSVC-NEXT: vbroadcastss {{.*#+}} xmm1 = [1073741824,1073741824,1073741824,1073741824]
; AVX-MSVC-NEXT: .p2align 4, 0x90
; AVX-MSVC-NEXT: .LBB0_1: # %loop
; AVX-MSVC-NEXT: # =>This Inner Loop Header: Depth=1
; AVX-MSVC-NEXT: vmovaps %xmm0, (%rcx)
; AVX-MSVC-NEXT: vmovaps %xmm1, (%rdx)
; AVX-MSVC-NEXT: testb $1, %r8b
; AVX-MSVC-NEXT: jne .LBB0_1
; AVX-MSVC-NEXT: # %bb.2: # %ret
; AVX-MSVC-NEXT: retq
entry:
br label %loop
loop:
store <4 x i32><i32 1073741824, i32 1073741824, i32 1073741824, i32 1073741824>, ptr %p
store <4 x float><float 2.0, float 2.0, float 2.0, float 2.0>, ptr %q
br i1 %t, label %loop, label %ret
ret:
ret void
}
define void @store_repeated_constants(ptr %lo, ptr %hi) {
; SSE-LINUX-LABEL: store_repeated_constants:
; SSE-LINUX: # %bb.0:
; SSE-LINUX-NEXT: xorps %xmm0, %xmm0
; SSE-LINUX-NEXT: movaps %xmm0, 48(%rdi)
; SSE-LINUX-NEXT: movaps {{.*#+}} xmm1 = [18446744073709551615,0]
; SSE-LINUX-NEXT: movaps %xmm1, 32(%rdi)
; SSE-LINUX-NEXT: movaps %xmm1, 16(%rdi)
; SSE-LINUX-NEXT: movaps %xmm1, (%rdi)
; SSE-LINUX-NEXT: movaps %xmm0, 32(%rsi)
; SSE-LINUX-NEXT: movaps %xmm0, 48(%rsi)
; SSE-LINUX-NEXT: movaps %xmm1, (%rsi)
; SSE-LINUX-NEXT: movaps {{.*#+}} xmm0 = [0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255]
; SSE-LINUX-NEXT: movaps %xmm0, 16(%rsi)
; SSE-LINUX-NEXT: retq
;
; SSE-MSVC-LABEL: store_repeated_constants:
; SSE-MSVC: # %bb.0:
; SSE-MSVC-NEXT: xorps %xmm0, %xmm0
; SSE-MSVC-NEXT: movaps %xmm0, 48(%rcx)
; SSE-MSVC-NEXT: movaps {{.*#+}} xmm1 = [18446744073709551615,0]
; SSE-MSVC-NEXT: movaps %xmm1, 32(%rcx)
; SSE-MSVC-NEXT: movaps %xmm1, 16(%rcx)
; SSE-MSVC-NEXT: movaps %xmm1, (%rcx)
; SSE-MSVC-NEXT: movaps %xmm0, 32(%rdx)
; SSE-MSVC-NEXT: movaps %xmm0, 48(%rdx)
; SSE-MSVC-NEXT: movaps %xmm1, (%rdx)
; SSE-MSVC-NEXT: movaps {{.*#+}} xmm0 = [0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255]
; SSE-MSVC-NEXT: movaps %xmm0, 16(%rdx)
; SSE-MSVC-NEXT: retq
;
; AVX-LINUX-LABEL: store_repeated_constants:
; AVX-LINUX: # %bb.0:
; AVX-LINUX-NEXT: vbroadcastf128 {{.*#+}} ymm0 = [18446744073709551615,0,18446744073709551615,0]
; AVX-LINUX-NEXT: # ymm0 = mem[0,1,0,1]
; AVX-LINUX-NEXT: vmovaps %ymm0, (%rdi)
; AVX-LINUX-NEXT: vmovaps {{.*#+}} ymm0 = [18446744073709551615,0,0,18446744073709551615]
; AVX-LINUX-NEXT: vmovaps %xmm0, %xmm1
; AVX-LINUX-NEXT: vmovaps %ymm1, 32(%rdi)
; AVX-LINUX-NEXT: vxorps %xmm1, %xmm1, %xmm1
; AVX-LINUX-NEXT: vmovaps %ymm1, 32(%rsi)
; AVX-LINUX-NEXT: vmovaps %ymm0, (%rsi)
; AVX-LINUX-NEXT: vzeroupper
; AVX-LINUX-NEXT: retq
;
; AVX-MSVC-LABEL: store_repeated_constants:
; AVX-MSVC: # %bb.0:
; AVX-MSVC-NEXT: vbroadcastf128 {{.*#+}} ymm0 = [18446744073709551615,0,18446744073709551615,0]
; AVX-MSVC-NEXT: # ymm0 = mem[0,1,0,1]
; AVX-MSVC-NEXT: vmovaps %ymm0, (%rcx)
; AVX-MSVC-NEXT: vmovaps {{.*#+}} ymm0 = [18446744073709551615,0,0,18446744073709551615]
; AVX-MSVC-NEXT: vmovaps %xmm0, %xmm1
; AVX-MSVC-NEXT: vmovaps %ymm1, 32(%rcx)
; AVX-MSVC-NEXT: vxorps %xmm1, %xmm1, %xmm1
; AVX-MSVC-NEXT: vmovaps %ymm1, 32(%rdx)
; AVX-MSVC-NEXT: vmovaps %ymm0, (%rdx)
; AVX-MSVC-NEXT: vzeroupper
; AVX-MSVC-NEXT: retq
store volatile <8 x i64> <i64 -1, i64 0, i64 -1, i64 0, i64 -1, i64 0, i64 0, i64 0>, ptr %lo, align 64
store volatile <8 x i64> <i64 -1, i64 0, i64 0, i64 -1, i64 0, i64 0, i64 0, i64 0>, ptr %hi, align 64
ret void
}