This is a follow up to924907bc6, and is mostly motivated by consistency but does include one additional optimization. In general, we prefer 0.0 over -0.0 as the identity value for an fadd. We use that value in several places, but don't in others. So, let's be consistent and use the same identity (when nsz allows) everywhere. This creates a bunch of test churn, but due to924907bc6, most of that churn doesn't actually indicate a change in codegen. The exception is that this change enables the use of 0.0 for nsz, but *not* reasoc, fadd reductions. Or said differently, it allows the neutral value of an ordered fadd reduction to be 0.0.
56 lines
3.1 KiB
LLVM
56 lines
3.1 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
|
|
; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s
|
|
|
|
define double @test() {
|
|
; CHECK-LABEL: define double @test() {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TMP3:%.*]] = load double, ptr getelementptr inbounds ([13 x double], ptr null, i64 0, i64 6), align 16
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load <2 x double>, ptr getelementptr inbounds ([13 x double], ptr null, i64 0, i64 5), align 8
|
|
; CHECK-NEXT: [[TMP2:%.*]] = load double, ptr getelementptr inbounds ([13 x double], ptr null, i64 0, i64 9), align 8
|
|
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, ptr getelementptr inbounds ([13 x double], ptr null, i64 0, i64 8), align 16
|
|
; CHECK-NEXT: [[TMP17:%.*]] = insertelement <4 x double> <double 0.000000e+00, double 0.000000e+00, double poison, double poison>, double [[TMP3]], i32 2
|
|
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x double> [[TMP17]], double [[TMP2]], i32 3
|
|
; CHECK-NEXT: [[TMP5:%.*]] = fmul <4 x double> zeroinitializer, [[TMP4]]
|
|
; CHECK-NEXT: [[TMP6:%.*]] = call reassoc nsz double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> [[TMP5]])
|
|
; CHECK-NEXT: [[TMP7:%.*]] = fmul double [[TMP6]], 0.000000e+00
|
|
; CHECK-NEXT: store double [[TMP7]], ptr null, align 16
|
|
; CHECK-NEXT: br label [[BB:%.*]]
|
|
; CHECK: bb:
|
|
; CHECK-NEXT: [[TMP8:%.*]] = fmul <2 x double> [[TMP1]], zeroinitializer
|
|
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x double> [[TMP8]], i32 0
|
|
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x double> [[TMP8]], i32 1
|
|
; CHECK-NEXT: [[TMP11:%.*]] = fadd double [[TMP9]], [[TMP10]]
|
|
; CHECK-NEXT: [[TMP12:%.*]] = fmul <2 x double> [[TMP0]], zeroinitializer
|
|
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x double> [[TMP12]], i32 1
|
|
; CHECK-NEXT: [[TMP14:%.*]] = fadd double [[TMP13]], [[TMP11]]
|
|
; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x double> [[TMP12]], i32 0
|
|
; CHECK-NEXT: [[TMP16:%.*]] = fadd double [[TMP15]], [[TMP14]]
|
|
; CHECK-NEXT: ret double [[TMP16]]
|
|
;
|
|
entry:
|
|
%0 = fmul double 0.000000e+00, 0.000000e+00
|
|
%1 = fmul double 0.000000e+00, 0.000000e+00
|
|
%2 = fadd reassoc nsz double %1, %0
|
|
%3 = load double, ptr getelementptr inbounds ([13 x double], ptr null, i64 0, i64 5), align 8
|
|
%4 = load double, ptr getelementptr inbounds ([13 x double], ptr null, i64 0, i64 6), align 16
|
|
%5 = fmul double %4, 0.000000e+00
|
|
%6 = fadd reassoc nsz double %5, %2
|
|
%7 = load double, ptr getelementptr inbounds ([13 x double], ptr null, i64 0, i64 8), align 16
|
|
%8 = load double, ptr getelementptr inbounds ([13 x double], ptr null, i64 0, i64 9), align 8
|
|
%9 = fmul double %8, 0.000000e+00
|
|
%10 = fadd reassoc nsz double %9, %6
|
|
%11 = fmul double %10, 0.000000e+00
|
|
store double %11, ptr null, align 16
|
|
br label %bb
|
|
|
|
bb:
|
|
%12 = fmul double %8, 0.000000e+00
|
|
%13 = fmul double %7, 0.000000e+00
|
|
%14 = fadd double %13, %12
|
|
%15 = fmul double %4, 0.000000e+00
|
|
%16 = fadd double %15, %14
|
|
%17 = fmul double %3, 0.000000e+00
|
|
%18 = fadd double %17, %16
|
|
ret double %18
|
|
}
|