Files
clang-p2996/llvm/test/CodeGen/X86/pr97968.ll
Simon Pilgrim 92083e855b [X86] Allow VPERMV3 -> VPERMV folds to handle extraction from a wider source vector (e.g. v16i32 -> v4i32)
We don't need to restrict this to double width vectors, as long as we correctly bitcast the types

Improves the fix for #97968
2024-07-08 13:10:45 +01:00

21 lines
908 B
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver4 | FileCheck %s
define <2 x i32> @PR97968(<16 x i32> %a0) {
; CHECK-LABEL: PR97968:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovddup {{.*#+}} xmm1 = [2,7,2,7]
; CHECK-NEXT: # xmm1 = mem[0,0]
; CHECK-NEXT: vpermps %zmm0, %zmm1, %zmm0
; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
%sub0 = shufflevector <16 x i32> %a0, <16 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%sub1 = shufflevector <16 x i32> %a0, <16 x i32> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
%elt0 = extractelement <4 x i32> %sub0, i64 2
%elt7 = extractelement <4 x i32> %sub1, i64 3
%scl0 = insertelement <2 x i32> undef, i32 %elt0, i32 0
%scl1 = insertelement <2 x i32> %scl0, i32 %elt7, i32 1
ret <2 x i32> %scl1
}