Files
clang-p2996/llvm/test/Transforms/SLPVectorizer/X86/extractelements-vector-ops-shuffle.ll
Alexey Bataev 95703642e3 [SLP]Fix PR72202: wrong mask emission for the first found vector
operand.

Need to copy the submask not to the very first part of the common
extractelements vector mask, but to the proper one to avoid wrong code
emission.
2023-11-16 07:01:05 -08:00

55 lines
3.0 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s
define double @test() {
; CHECK-LABEL: define double @test() {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = load <2 x double>, ptr getelementptr inbounds ([13 x double], ptr null, i64 0, i64 5), align 8
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, ptr getelementptr inbounds ([13 x double], ptr null, i64 0, i64 8), align 16
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[TMP0]], <2 x double> [[TMP1]], <2 x i32> <i32 1, i32 3>
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x double> [[TMP3]], <4 x double> <double 0.000000e+00, double 0.000000e+00, double poison, double poison>, <4 x i32> <i32 4, i32 5, i32 0, i32 1>
; CHECK-NEXT: [[TMP5:%.*]] = fmul <4 x double> zeroinitializer, [[TMP4]]
; CHECK-NEXT: [[TMP6:%.*]] = call reassoc nsz double @llvm.vector.reduce.fadd.v4f64(double -0.000000e+00, <4 x double> [[TMP5]])
; CHECK-NEXT: [[TMP7:%.*]] = fmul double [[TMP6]], 0.000000e+00
; CHECK-NEXT: store double [[TMP7]], ptr null, align 16
; CHECK-NEXT: br label [[BB:%.*]]
; CHECK: bb:
; CHECK-NEXT: [[TMP8:%.*]] = fmul <2 x double> [[TMP1]], zeroinitializer
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x double> [[TMP8]], i32 0
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x double> [[TMP8]], i32 1
; CHECK-NEXT: [[TMP11:%.*]] = fadd double [[TMP9]], [[TMP10]]
; CHECK-NEXT: [[TMP12:%.*]] = fmul <2 x double> [[TMP0]], zeroinitializer
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x double> [[TMP12]], i32 1
; CHECK-NEXT: [[TMP14:%.*]] = fadd double [[TMP13]], [[TMP11]]
; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x double> [[TMP12]], i32 0
; CHECK-NEXT: [[TMP16:%.*]] = fadd double [[TMP15]], [[TMP14]]
; CHECK-NEXT: ret double [[TMP16]]
;
entry:
%0 = fmul double 0.000000e+00, 0.000000e+00
%1 = fmul double 0.000000e+00, 0.000000e+00
%2 = fadd reassoc nsz double %1, %0
%3 = load double, ptr getelementptr inbounds ([13 x double], ptr null, i64 0, i64 5), align 8
%4 = load double, ptr getelementptr inbounds ([13 x double], ptr null, i64 0, i64 6), align 16
%5 = fmul double %4, 0.000000e+00
%6 = fadd reassoc nsz double %5, %2
%7 = load double, ptr getelementptr inbounds ([13 x double], ptr null, i64 0, i64 8), align 16
%8 = load double, ptr getelementptr inbounds ([13 x double], ptr null, i64 0, i64 9), align 8
%9 = fmul double %8, 0.000000e+00
%10 = fadd reassoc nsz double %9, %6
%11 = fmul double %10, 0.000000e+00
store double %11, ptr null, align 16
br label %bb
bb:
%12 = fmul double %8, 0.000000e+00
%13 = fmul double %7, 0.000000e+00
%14 = fadd double %13, %12
%15 = fmul double %4, 0.000000e+00
%16 = fadd double %15, %14
%17 = fmul double %3, 0.000000e+00
%18 = fadd double %17, %16
ret double %18
}