Files
clang-p2996/llvm/test/CodeGen/X86/pr64655.ll
Simon Pilgrim ba818c4019 [DAG] replaceStoreOfInsertLoad - don't fold if the inserted element is implicitly truncated
D152276 wasn't handling the case where the inserted element is implicitly truncated into the vector - resulting in a i1 element (implicitly truncated from i8) overwriting 8 bits instead of 1 bit.

This patch is intended to be merged into 17.x so I've just disallowed any vector element vs inserted element type mismatch - technically we could be more elegant and permit truncated stores (as long as the store is still byte sized), but the use cases for that are so limited I'd prefer to play it safe for now.

Candidate patch for #64655 17.x merge

Differential Revision: https://reviews.llvm.org/D158366
2023-08-21 11:22:07 +01:00

60 lines
2.0 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=AVX2
; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=AVX512
define void @f(ptr %0) {
; AVX2-LABEL: f:
; AVX2: # %bb.0:
; AVX2-NEXT: movzbl (%rdi), %eax
; AVX2-NEXT: movl %eax, %ecx
; AVX2-NEXT: shrb $2, %cl
; AVX2-NEXT: andb $1, %cl
; AVX2-NEXT: movl %eax, %edx
; AVX2-NEXT: andb $1, %dl
; AVX2-NEXT: vmovd %edx, %xmm0
; AVX2-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0
; AVX2-NEXT: movl %eax, %ecx
; AVX2-NEXT: shrb $3, %cl
; AVX2-NEXT: andb $1, %cl
; AVX2-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; AVX2-NEXT: movl %eax, %ecx
; AVX2-NEXT: shrb $4, %cl
; AVX2-NEXT: andb $1, %cl
; AVX2-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0
; AVX2-NEXT: movl %eax, %ecx
; AVX2-NEXT: shrb $5, %cl
; AVX2-NEXT: andb $1, %cl
; AVX2-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0
; AVX2-NEXT: movl %eax, %ecx
; AVX2-NEXT: shrb $6, %cl
; AVX2-NEXT: andb $1, %cl
; AVX2-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0
; AVX2-NEXT: shrb $7, %al
; AVX2-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
; AVX2-NEXT: movl $1, %eax
; AVX2-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0
; AVX2-NEXT: vpsllw $15, %xmm0, %xmm0
; AVX2-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
; AVX2-NEXT: vpmovmskb %xmm0, %eax
; AVX2-NEXT: movb %al, (%rdi)
; AVX2-NEXT: retq
;
; AVX512-LABEL: f:
; AVX512: # %bb.0:
; AVX512-NEXT: kmovb (%rdi), %k0
; AVX512-NEXT: movb $-3, %al
; AVX512-NEXT: kmovd %eax, %k1
; AVX512-NEXT: kandb %k1, %k0, %k0
; AVX512-NEXT: movb $1, %al
; AVX512-NEXT: kmovd %eax, %k1
; AVX512-NEXT: kshiftlb $7, %k1, %k1
; AVX512-NEXT: kshiftrb $6, %k1, %k1
; AVX512-NEXT: korb %k1, %k0, %k0
; AVX512-NEXT: kmovb %k0, (%rdi)
; AVX512-NEXT: retq
%2 = load <8 x i1>, ptr %0
%3 = insertelement <8 x i1> %2, i1 true, i32 1
store <8 x i1> %3, ptr %0
ret void
}