Files
clang-p2996/llvm/test/CodeGen/X86/vec_shift2.ll
Sanjay Patel 57c3fe76a3 [x86] favor vector constant load to avoid GPR to XMM transfer
This build vector lowering pattern came up in D79886.
I've tried to limit the improvement to cases where it looks
clearly better to load, but we could remove the 'TODO'
predicates already if we are willing to overlook some
corner cases.

Differential Revision: https://reviews.llvm.org/D80013
2020-05-17 11:56:26 -04:00

42 lines
1.5 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=i686-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X32
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X64
define <2 x i64> @t1(<2 x i64> %b1, <2 x i64> %c) nounwind {
; X32-LABEL: t1:
; X32: # %bb.0:
; X32-NEXT: psrlw {{\.LCPI.*}}, %xmm0
; X32-NEXT: retl
;
; X64-LABEL: t1:
; X64: # %bb.0:
; X64-NEXT: psrlw {{.*}}(%rip), %xmm0
; X64-NEXT: retq
%tmp1 = bitcast <2 x i64> %b1 to <8 x i16>
%tmp2 = tail call <8 x i16> @llvm.x86.sse2.psrl.w( <8 x i16> %tmp1, <8 x i16> bitcast (<4 x i32> < i32 14, i32 undef, i32 undef, i32 undef > to <8 x i16>) ) nounwind readnone
%tmp3 = bitcast <8 x i16> %tmp2 to <2 x i64>
ret <2 x i64> %tmp3
}
define <4 x i32> @t2(<2 x i64> %b1, <2 x i64> %c) nounwind {
; X32-LABEL: t2:
; X32: # %bb.0:
; X32-NEXT: movl $14, %eax
; X32-NEXT: movd %eax, %xmm1
; X32-NEXT: pslld %xmm1, %xmm0
; X32-NEXT: retl
;
; X64-LABEL: t2:
; X64: # %bb.0:
; X64-NEXT: movl $14, %eax
; X64-NEXT: movd %eax, %xmm1
; X64-NEXT: pslld %xmm1, %xmm0
; X64-NEXT: retq
%tmp1 = bitcast <2 x i64> %b1 to <4 x i32>
%tmp2 = tail call <4 x i32> @llvm.x86.sse2.psll.d( <4 x i32> %tmp1, <4 x i32> < i32 14, i32 undef, i32 undef, i32 undef > ) nounwind readnone
ret <4 x i32> %tmp2
}
declare <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16>, <8 x i16>) nounwind readnone
declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>) nounwind readnone