Files
clang-p2996/llvm/test/CodeGen/X86/trunc-vector-width.ll
Simon Pilgrim 8ac00ca486 [X86] lowerShuffleWithUndefHalf - don't split vXi8 unary shuffles if the 128-bit source lanes are already in place (#122919)
Allows us to use PSHUFB to shuffle the lanes, and then perform a sub-lane permutation down to the lower half

Fixes #116815
2025-01-15 08:19:54 +00:00

31 lines
1.5 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=skylake-avx512 -mattr=prefer-256-bit | FileCheck %s
define void @test(ptr %a0) #0 {
; CHECK-LABEL: test:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovdqu (%rdi), %ymm0
; CHECK-NEXT: vpmovsxbd {{.*#+}} xmm1 = [0,4,0,0]
; CHECK-NEXT: vpblendd {{.*#+}} ymm0 = mem[0],ymm0[1,2,3,4,5,6,7]
; CHECK-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u,16,20,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
; CHECK-NEXT: vpermd %ymm0, %ymm1, %ymm0
; CHECK-NEXT: vpternlogq {{.*#+}} xmm0 = ~xmm0
; CHECK-NEXT: vpextrb $1, %xmm0, (%rax)
; CHECK-NEXT: vpextrb $4, %xmm0, (%rax)
; CHECK-NEXT: vpextrb $8, %xmm0, (%rax)
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
%load = load <64 x i8>, ptr %a0, align 1
%shuf = shufflevector <64 x i8> %load, <64 x i8> undef, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28, i32 32, i32 36, i32 40, i32 44, i32 48, i32 52, i32 56, i32 60>
%xor = xor <16 x i8> %shuf, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
%i1 = extractelement <16 x i8> %xor, i32 1
%i2 = extractelement <16 x i8> %xor, i32 4
%i3 = extractelement <16 x i8> %xor, i32 8
store i8 %i1, ptr undef, align 1
store i8 %i2, ptr undef, align 1
store i8 %i3, ptr undef, align 1
ret void
}
attributes #0 = { "min-legal-vector-width"="0" "target-cpu"="skylake-avx512" }