Files
clang-p2996/llvm/test/Transforms/SLPVectorizer/X86/PR35777.ll
Alexey Bataev fa80c47c6a [SLP] Fix vectorization for tree with trunc to minimum required bit width.
Summary:
If the vectorized tree has truncate to minimum required bit width and
the vector type of the cast operation after the truncation is the same
as the vector type of the cast operands, count cost of the vector cast
operation as 0, because this cast will be later removed.
Also, if the vectorization tree root operations are integer cast operations, do not consider them as candidates for truncation. It will just create extra number of the same vector/scalar operations, which will be removed by instcombiner.

Reviewers: RKSimon, spatel, mkuper, hfinkel, mssimpso

Subscribers: llvm-commits

Differential Revision: https://reviews.llvm.org/D41948

llvm-svn: 322946
2018-01-19 14:40:13 +00:00

46 lines
2.9 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -verify -slp-vectorizer -o - -S -mtriple=x86_64-apple-macosx10.13.0 | FileCheck %s
@global = local_unnamed_addr global [6 x double] zeroinitializer, align 16
define { i64, i64 } @patatino(double %arg) {
; CHECK-LABEL: @patatino(
; CHECK-NEXT: bb:
; CHECK-NEXT: [[TMP0:%.*]] = load <2 x double>, <2 x double>* bitcast ([6 x double]* @global to <2 x double>*), align 16
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, <2 x double>* bitcast (double* getelementptr inbounds ([6 x double], [6 x double]* @global, i64 0, i64 2) to <2 x double>*), align 16
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> undef, double [[ARG:%.*]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> [[TMP2]], double [[ARG]], i32 1
; CHECK-NEXT: [[TMP4:%.*]] = fmul <2 x double> [[TMP3]], [[TMP1]]
; CHECK-NEXT: [[TMP5:%.*]] = fadd <2 x double> [[TMP0]], [[TMP4]]
; CHECK-NEXT: [[TMP6:%.*]] = load <2 x double>, <2 x double>* bitcast (double* getelementptr inbounds ([6 x double], [6 x double]* @global, i64 0, i64 4) to <2 x double>*), align 16
; CHECK-NEXT: [[TMP7:%.*]] = fadd <2 x double> [[TMP6]], [[TMP5]]
; CHECK-NEXT: [[TMP8:%.*]] = fptosi <2 x double> [[TMP7]] to <2 x i32>
; CHECK-NEXT: [[TMP9:%.*]] = sext <2 x i32> [[TMP8]] to <2 x i64>
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i64> [[TMP9]], i32 0
; CHECK-NEXT: [[TMP16:%.*]] = insertvalue { i64, i64 } undef, i64 [[TMP10]], 0
; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i64> [[TMP9]], i32 1
; CHECK-NEXT: [[TMP17:%.*]] = insertvalue { i64, i64 } [[TMP16]], i64 [[TMP11]], 1
; CHECK-NEXT: ret { i64, i64 } [[TMP17]]
;
bb:
%tmp = load double, double* getelementptr inbounds ([6 x double], [6 x double]* @global, i64 0, i64 0), align 16
%tmp1 = load double, double* getelementptr inbounds ([6 x double], [6 x double]* @global, i64 0, i64 2), align 16
%tmp2 = fmul double %tmp1, %arg
%tmp3 = fadd double %tmp, %tmp2
%tmp4 = load double, double* getelementptr inbounds ([6 x double], [6 x double]* @global, i64 0, i64 4), align 16
%tmp5 = fadd double %tmp4, %tmp3
%tmp6 = fptosi double %tmp5 to i32
%tmp7 = sext i32 %tmp6 to i64
%tmp8 = load double, double* getelementptr inbounds ([6 x double], [6 x double]* @global, i64 0, i64 1), align 8
%tmp9 = load double, double* getelementptr inbounds ([6 x double], [6 x double]* @global, i64 0, i64 3), align 8
%tmp10 = fmul double %tmp9, %arg
%tmp11 = fadd double %tmp8, %tmp10
%tmp12 = load double, double* getelementptr inbounds ([6 x double], [6 x double]* @global, i64 0, i64 5), align 8
%tmp13 = fadd double %tmp12, %tmp11
%tmp14 = fptosi double %tmp13 to i32
%tmp15 = sext i32 %tmp14 to i64
%tmp16 = insertvalue { i64, i64 } undef, i64 %tmp7, 0
%tmp17 = insertvalue { i64, i64 } %tmp16, i64 %tmp15, 1
ret { i64, i64 } %tmp17
}