We don't need to restrict this to double width vectors, as long as we correctly bitcast the types Improves the fix for #97968
21 lines
908 B
LLVM
21 lines
908 B
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver4 | FileCheck %s
|
|
|
|
define <2 x i32> @PR97968(<16 x i32> %a0) {
|
|
; CHECK-LABEL: PR97968:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovddup {{.*#+}} xmm1 = [2,7,2,7]
|
|
; CHECK-NEXT: # xmm1 = mem[0,0]
|
|
; CHECK-NEXT: vpermps %zmm0, %zmm1, %zmm0
|
|
; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
|
|
; CHECK-NEXT: vzeroupper
|
|
; CHECK-NEXT: retq
|
|
%sub0 = shufflevector <16 x i32> %a0, <16 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
%sub1 = shufflevector <16 x i32> %a0, <16 x i32> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
|
|
%elt0 = extractelement <4 x i32> %sub0, i64 2
|
|
%elt7 = extractelement <4 x i32> %sub1, i64 3
|
|
%scl0 = insertelement <2 x i32> undef, i32 %elt0, i32 0
|
|
%scl1 = insertelement <2 x i32> %scl0, i32 %elt7, i32 1
|
|
ret <2 x i32> %scl1
|
|
}
|