Files
clang-p2996/llvm/test/CodeGen/X86/vec_insert-3.ll
Simon Pilgrim 21d02dc595 [X86][SSE] SimplifyDemandedVectorEltsForTargetNode - add general shuffle combining support
This patch uses partial DemandedElts masks to further simplify target shuffle chains and finally starts making target shuffle combining part of SimplifyDemandedBits/SimplifyDemandedVectorElts.

We already manage this for Depth == 0 cases, where combineX86ShuffleChain would early-out if the shuffle combined to the same op, but the patch generalizes this by manipulating the depth handling of combineX86ShufflesRecursively - calling with a new Depth = 0 and reducing the maximum shuffle combine depth accordingly.

Differential Revision: https://reviews.llvm.org/D66004
2020-09-02 09:24:46 +01:00

22 lines
842 B
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=i686-unknown -mattr=+sse2,-sse4.1 | FileCheck %s --check-prefix=X32
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2,-sse4.1 | FileCheck %s --check-prefix=X64
define <2 x i64> @t1(i64 %s, <2 x i64> %tmp) nounwind {
; X32-LABEL: t1:
; X32: # %bb.0:
; X32-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X32-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
; X32-NEXT: movlhps {{.*#+}} xmm2 = xmm2[0],xmm1[0]
; X32-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,0]
; X32-NEXT: retl
;
; X64-LABEL: t1:
; X64: # %bb.0:
; X64-NEXT: movq %rdi, %xmm1
; X64-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; X64-NEXT: retq
%tmp1 = insertelement <2 x i64> %tmp, i64 %s, i32 1
ret <2 x i64> %tmp1
}