operand. Need to copy the submask not to the very first part of the common extractelements vector mask, but to the proper one to avoid wrong code emission.
55 lines
3.0 KiB
LLVM
55 lines
3.0 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
|
|
; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s
|
|
|
|
define double @test() {
|
|
; CHECK-LABEL: define double @test() {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load <2 x double>, ptr getelementptr inbounds ([13 x double], ptr null, i64 0, i64 5), align 8
|
|
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, ptr getelementptr inbounds ([13 x double], ptr null, i64 0, i64 8), align 16
|
|
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[TMP0]], <2 x double> [[TMP1]], <2 x i32> <i32 1, i32 3>
|
|
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
|
|
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x double> [[TMP3]], <4 x double> <double 0.000000e+00, double 0.000000e+00, double poison, double poison>, <4 x i32> <i32 4, i32 5, i32 0, i32 1>
|
|
; CHECK-NEXT: [[TMP5:%.*]] = fmul <4 x double> zeroinitializer, [[TMP4]]
|
|
; CHECK-NEXT: [[TMP6:%.*]] = call reassoc nsz double @llvm.vector.reduce.fadd.v4f64(double -0.000000e+00, <4 x double> [[TMP5]])
|
|
; CHECK-NEXT: [[TMP7:%.*]] = fmul double [[TMP6]], 0.000000e+00
|
|
; CHECK-NEXT: store double [[TMP7]], ptr null, align 16
|
|
; CHECK-NEXT: br label [[BB:%.*]]
|
|
; CHECK: bb:
|
|
; CHECK-NEXT: [[TMP8:%.*]] = fmul <2 x double> [[TMP1]], zeroinitializer
|
|
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x double> [[TMP8]], i32 0
|
|
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x double> [[TMP8]], i32 1
|
|
; CHECK-NEXT: [[TMP11:%.*]] = fadd double [[TMP9]], [[TMP10]]
|
|
; CHECK-NEXT: [[TMP12:%.*]] = fmul <2 x double> [[TMP0]], zeroinitializer
|
|
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x double> [[TMP12]], i32 1
|
|
; CHECK-NEXT: [[TMP14:%.*]] = fadd double [[TMP13]], [[TMP11]]
|
|
; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x double> [[TMP12]], i32 0
|
|
; CHECK-NEXT: [[TMP16:%.*]] = fadd double [[TMP15]], [[TMP14]]
|
|
; CHECK-NEXT: ret double [[TMP16]]
|
|
;
|
|
entry:
|
|
%0 = fmul double 0.000000e+00, 0.000000e+00
|
|
%1 = fmul double 0.000000e+00, 0.000000e+00
|
|
%2 = fadd reassoc nsz double %1, %0
|
|
%3 = load double, ptr getelementptr inbounds ([13 x double], ptr null, i64 0, i64 5), align 8
|
|
%4 = load double, ptr getelementptr inbounds ([13 x double], ptr null, i64 0, i64 6), align 16
|
|
%5 = fmul double %4, 0.000000e+00
|
|
%6 = fadd reassoc nsz double %5, %2
|
|
%7 = load double, ptr getelementptr inbounds ([13 x double], ptr null, i64 0, i64 8), align 16
|
|
%8 = load double, ptr getelementptr inbounds ([13 x double], ptr null, i64 0, i64 9), align 8
|
|
%9 = fmul double %8, 0.000000e+00
|
|
%10 = fadd reassoc nsz double %9, %6
|
|
%11 = fmul double %10, 0.000000e+00
|
|
store double %11, ptr null, align 16
|
|
br label %bb
|
|
|
|
bb:
|
|
%12 = fmul double %8, 0.000000e+00
|
|
%13 = fmul double %7, 0.000000e+00
|
|
%14 = fadd double %13, %12
|
|
%15 = fmul double %4, 0.000000e+00
|
|
%16 = fadd double %15, %14
|
|
%17 = fmul double %3, 0.000000e+00
|
|
%18 = fadd double %17, %16
|
|
ret double %18
|
|
}
|