Files
clang-p2996/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector-const-undef.ll
Alexey Bataev 48bc5b0a29 [SLP][PR64099]Fix unsound undef to poison transformation when handling
insertelement instructions.

If the original vector has undef, not poison values, which are not
rewritten by later insertelement instructions, need to transform shuffle
with the undef vector, not a poison vector, and actual indices, not
PoisonMaskElem, otherwise the transformation may produce more poisons
output than the input.
2023-07-27 16:09:49 -07:00

28 lines
1.5 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -S -passes=slp-vectorizer -slp-threshold=0 < %s | FileCheck %s
define <4 x float> @simple_select(<4 x float> %a, <4 x float> %b, <4 x i32> %c) {
; CHECK-LABEL: @simple_select(
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[C:%.*]], <4 x i32> poison, <2 x i32> <i32 0, i32 1>
; CHECK-NEXT: [[TMP2:%.*]] = icmp ne <2 x i32> [[TMP1]], zeroinitializer
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> poison, <2 x i32> <i32 0, i32 1>
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> poison, <2 x i32> <i32 0, i32 1>
; CHECK-NEXT: [[TMP5:%.*]] = select <2 x i1> [[TMP2]], <2 x float> [[TMP3]], <2 x float> [[TMP4]]
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; CHECK-NEXT: ret <4 x float> [[TMP6]]
;
%c0 = extractelement <4 x i32> %c, i32 0
%c1 = extractelement <4 x i32> %c, i32 1
%a0 = extractelement <4 x float> %a, i32 0
%a1 = extractelement <4 x float> %a, i32 1
%b0 = extractelement <4 x float> %b, i32 0
%b1 = extractelement <4 x float> %b, i32 1
%cmp0 = icmp ne i32 %c0, 0
%cmp1 = icmp ne i32 %c1, 0
%s0 = select i1 %cmp0, float %a0, float %b0
%s1 = select i1 %cmp1, float %a1, float %b1
%ra = insertelement <4 x float> <float poison, float poison, float undef, float undef>, float %s0, i32 0
%rb = insertelement <4 x float> %ra, float %s1, i32 1
ret <4 x float> %rb
}