Another issue unearthed by D127115 We take a long time to canonicalize an insert_vector_elt chain before being able to convert it into a build_vector - even if they are already in ascending insertion order, we fold the nodes one at a time into the build_vector 'seed', leaving plenty of time for other folds to alter it (in particular recognising when they come from extract_vector_elt resulting in a shuffle_vector that is much harder to fold with). D127115 makes this particularly difficult as we're almost guaranteed to have the lost the sequence before all possible insertions have been folded. This patch proposes to begin at the last insertion and attempt to collect all the (oneuse) insertions right away and create the build_vector before its too late. Differential Revision: https://reviews.llvm.org/D127595
34 lines
1.3 KiB
LLVM
34 lines
1.3 KiB
LLVM
; REQUIRES: arm-registered-target
|
|
; REQUIRES: asserts
|
|
; RUN: llc -o /dev/null %s -debug-only=legalize-types 2>&1 | FileCheck %s
|
|
|
|
; This test check that when v4f64 gets broken down to two v2f64 it maintains
|
|
; the "nnan" flags.
|
|
|
|
; CHECK: Legalizing node: [[VFOUR:t.*]]: v4f64 = BUILD_VECTOR
|
|
; CHECK-NEXT: Analyzing result type: v4f64
|
|
; CHECK-NEXT: Split node result: [[VFOUR]]: v4f64 = BUILD_VECTOR
|
|
|
|
; CHECK: Legalizing node: [[VTWOA:t.*]]: v2f64 = BUILD_VECTOR
|
|
; CHECK: Legally typed node: [[VTWOA]]: v2f64 = BUILD_VECTOR
|
|
; CHECK: Legalizing node: [[VTWOB:t.*]]: v2f64 = BUILD_VECTOR
|
|
; CHECK: Legally typed node: [[VTWOB]]: v2f64 = BUILD_VECTOR
|
|
; CHECK: Legalizing node: t30: v2f64 = fmaxnum nnan reassoc [[VTWOB]], [[VTWOA]]
|
|
|
|
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
|
|
target triple = "aarch64--linux-gnu"
|
|
|
|
|
|
; Function Attrs: norecurse nounwind
|
|
define fastcc double @test(double %a0, double %a1, double %a2, double %a3) unnamed_addr #1 {
|
|
entry:
|
|
%0 = insertelement <4 x double> undef, double %a0, i32 0
|
|
%1 = insertelement <4 x double> %0, double %a1, i32 1
|
|
%2 = insertelement <4 x double> %1, double %a2, i32 2
|
|
%3 = insertelement <4 x double> %2, double %a3, i32 3
|
|
%4 = call nnan reassoc double @llvm.vector.reduce.fmax.v4f64(<4 x double> %3)
|
|
ret double %4
|
|
}
|
|
|
|
declare double @llvm.vector.reduce.fmax.v4f64(<4 x double>)
|