Need to use vectorized operands, not the vecop of the extractelement instructions, to avoid false detection of the extra vector operand in the extractelements shuffling.
33 lines
2.1 KiB
LLVM
33 lines
2.1 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
|
|
; RUN: opt --passes=slp-vectorizer -S < %s -mtriple=x86_64-unknown-linux -mattr=+avx512vl | FileCheck %s
|
|
|
|
define i32 @test() {
|
|
; CHECK-LABEL: define i32 @test(
|
|
; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
|
|
; CHECK-NEXT: br label [[TMP1:%.*]]
|
|
; CHECK: 1:
|
|
; CHECK-NEXT: [[TMP2:%.*]] = phi <4 x double> [ zeroinitializer, [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[TMP1]] ]
|
|
; CHECK-NEXT: [[TMP3:%.*]] = call <4 x double> @llvm.fma.v4f64(<4 x double> zeroinitializer, <4 x double> zeroinitializer, <4 x double> [[TMP2]])
|
|
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x double> [[TMP3]], <4 x double> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
|
|
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <8 x double> zeroinitializer, <8 x double> [[TMP4]], <4 x i32> <i32 0, i32 8, i32 poison, i32 8>
|
|
; CHECK-NEXT: [[TMP6]] = shufflevector <4 x double> [[TMP5]], <4 x double> <double poison, double poison, double 0.000000e+00, double poison>, <4 x i32> <i32 0, i32 1, i32 6, i32 3>
|
|
; CHECK-NEXT: br label [[TMP1]]
|
|
;
|
|
br label %1
|
|
|
|
1:
|
|
%.i489 = phi double [ 0.000000e+00, %0 ], [ 0.000000e+00, %1 ]
|
|
%.i1102 = phi double [ 0.000000e+00, %0 ], [ %.i1110, %1 ]
|
|
%.i4105 = phi double [ 0.000000e+00, %0 ], [ %.i4113, %1 ]
|
|
%.i14525 = call double @llvm.fma.f64(double 0.000000e+00, double 0.000000e+00, double %.i1102)
|
|
%.i24526 = call double @llvm.fma.f64(double 0.000000e+00, double 0.000000e+00, double %.i489)
|
|
%.i44529 = call double @llvm.fma.f64(double 0.000000e+00, double 0.000000e+00, double %.i4105)
|
|
%.upto16034 = insertelement <8 x double> zeroinitializer, double %.i14525, i64 1
|
|
%.upto26035 = insertelement <8 x double> %.upto16034, double %.i24526, i64 2
|
|
%.upto36036 = insertelement <8 x double> %.upto26035, double %.i14525, i64 3
|
|
%.upto46037 = insertelement <8 x double> %.upto36036, double %.i44529, i64 0
|
|
%.i1110 = extractelement <8 x double> %.upto46037, i64 0
|
|
%.i4113 = extractelement <8 x double> zeroinitializer, i64 0
|
|
br label %1
|
|
}
|