D152276 wasn't handling the case where the inserted element is implicitly truncated into the vector - resulting in a i1 element (implicitly truncated from i8) overwriting 8 bits instead of 1 bit. This patch is intended to be merged into 17.x so I've just disallowed any vector element vs inserted element type mismatch - technically we could be more elegant and permit truncated stores (as long as the store is still byte sized), but the use cases for that are so limited I'd prefer to play it safe for now. Candidate patch for #64655 17.x merge Differential Revision: https://reviews.llvm.org/D158366
60 lines
2.0 KiB
LLVM
60 lines
2.0 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=AVX2
|
|
; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=AVX512
|
|
|
|
define void @f(ptr %0) {
|
|
; AVX2-LABEL: f:
|
|
; AVX2: # %bb.0:
|
|
; AVX2-NEXT: movzbl (%rdi), %eax
|
|
; AVX2-NEXT: movl %eax, %ecx
|
|
; AVX2-NEXT: shrb $2, %cl
|
|
; AVX2-NEXT: andb $1, %cl
|
|
; AVX2-NEXT: movl %eax, %edx
|
|
; AVX2-NEXT: andb $1, %dl
|
|
; AVX2-NEXT: vmovd %edx, %xmm0
|
|
; AVX2-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0
|
|
; AVX2-NEXT: movl %eax, %ecx
|
|
; AVX2-NEXT: shrb $3, %cl
|
|
; AVX2-NEXT: andb $1, %cl
|
|
; AVX2-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
|
|
; AVX2-NEXT: movl %eax, %ecx
|
|
; AVX2-NEXT: shrb $4, %cl
|
|
; AVX2-NEXT: andb $1, %cl
|
|
; AVX2-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0
|
|
; AVX2-NEXT: movl %eax, %ecx
|
|
; AVX2-NEXT: shrb $5, %cl
|
|
; AVX2-NEXT: andb $1, %cl
|
|
; AVX2-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0
|
|
; AVX2-NEXT: movl %eax, %ecx
|
|
; AVX2-NEXT: shrb $6, %cl
|
|
; AVX2-NEXT: andb $1, %cl
|
|
; AVX2-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0
|
|
; AVX2-NEXT: shrb $7, %al
|
|
; AVX2-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
|
|
; AVX2-NEXT: movl $1, %eax
|
|
; AVX2-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0
|
|
; AVX2-NEXT: vpsllw $15, %xmm0, %xmm0
|
|
; AVX2-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
|
|
; AVX2-NEXT: vpmovmskb %xmm0, %eax
|
|
; AVX2-NEXT: movb %al, (%rdi)
|
|
; AVX2-NEXT: retq
|
|
;
|
|
; AVX512-LABEL: f:
|
|
; AVX512: # %bb.0:
|
|
; AVX512-NEXT: kmovb (%rdi), %k0
|
|
; AVX512-NEXT: movb $-3, %al
|
|
; AVX512-NEXT: kmovd %eax, %k1
|
|
; AVX512-NEXT: kandb %k1, %k0, %k0
|
|
; AVX512-NEXT: movb $1, %al
|
|
; AVX512-NEXT: kmovd %eax, %k1
|
|
; AVX512-NEXT: kshiftlb $7, %k1, %k1
|
|
; AVX512-NEXT: kshiftrb $6, %k1, %k1
|
|
; AVX512-NEXT: korb %k1, %k0, %k0
|
|
; AVX512-NEXT: kmovb %k0, (%rdi)
|
|
; AVX512-NEXT: retq
|
|
%2 = load <8 x i1>, ptr %0
|
|
%3 = insertelement <8 x i1> %2, i1 true, i32 1
|
|
store <8 x i1> %3, ptr %0
|
|
ret void
|
|
}
|