Files
clang-p2996/llvm/test/Transforms/SLPVectorizer/X86/PR32086.ll
Alexey Bataev 087dadfd37 [SLP]Generalize cost model.
Generalized the cost model estimation. Improved cost model estimation
for repeated scalars (no need to count their cost anymore), improved
  cost model for extractelement instructions.

cpu2017
   511.povray_r             0.57
   520.omnetpp_r           -0.98
   521.wrf_r               -0.01
   525.x264_r               3.59 <+
   526.blender_r           -0.12
   531.deepsjeng_r         -0.07
   538.imagick_r           -1.42
Geometric mean:  0.21

Differential Revision: https://reviews.llvm.org/D115757
2022-10-18 11:55:59 -07:00

86 lines
3.7 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -passes=slp-vectorizer -slp-vectorize-hor -slp-vectorize-hor-store -S < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=bdver2 | FileCheck %s
define void @i64_simplified(i64* noalias %st, i64* noalias %ld) {
; CHECK-LABEL: @i64_simplified(
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i64* [[LD:%.*]] to <2 x i64>*
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* [[TMP1]], align 8
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
; CHECK-NEXT: [[TMP3:%.*]] = bitcast i64* [[ST:%.*]] to <4 x i64>*
; CHECK-NEXT: store <4 x i64> [[SHUFFLE]], <4 x i64>* [[TMP3]], align 8
; CHECK-NEXT: ret void
;
%arrayidx1 = getelementptr inbounds i64, i64* %ld, i64 1
%t0 = load i64, i64* %ld, align 8
%t1 = load i64, i64* %arrayidx1, align 8
%arrayidx3 = getelementptr inbounds i64, i64* %st, i64 1
%arrayidx4 = getelementptr inbounds i64, i64* %st, i64 2
%arrayidx5 = getelementptr inbounds i64, i64* %st, i64 3
store i64 %t0, i64* %st, align 8
store i64 %t1, i64* %arrayidx3, align 8
store i64 %t0, i64* %arrayidx4, align 8
store i64 %t1, i64* %arrayidx5, align 8
ret void
}
define void @i64_simplifiedi_reversed(i64* noalias %st, i64* noalias %ld) {
; CHECK-LABEL: @i64_simplifiedi_reversed(
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i64* [[LD:%.*]] to <2 x i64>*
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* [[TMP1]], align 8
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> poison, <4 x i32> <i32 1, i32 0, i32 1, i32 0>
; CHECK-NEXT: [[TMP3:%.*]] = bitcast i64* [[ST:%.*]] to <4 x i64>*
; CHECK-NEXT: store <4 x i64> [[SHUFFLE]], <4 x i64>* [[TMP3]], align 8
; CHECK-NEXT: ret void
;
%arrayidx1 = getelementptr inbounds i64, i64* %ld, i64 1
%t0 = load i64, i64* %ld, align 8
%t1 = load i64, i64* %arrayidx1, align 8
%arrayidx3 = getelementptr inbounds i64, i64* %st, i64 1
%arrayidx4 = getelementptr inbounds i64, i64* %st, i64 2
%arrayidx5 = getelementptr inbounds i64, i64* %st, i64 3
store i64 %t1, i64* %st, align 8
store i64 %t0, i64* %arrayidx3, align 8
store i64 %t1, i64* %arrayidx4, align 8
store i64 %t0, i64* %arrayidx5, align 8
ret void
}
define void @i64_simplifiedi_extract(i64* noalias %st, i64* noalias %ld) {
; CHECK-LABEL: @i64_simplifiedi_extract(
; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, i64* [[LD:%.*]], i64 1
; CHECK-NEXT: [[T0:%.*]] = load i64, i64* [[LD]], align 8
; CHECK-NEXT: [[T1:%.*]] = load i64, i64* [[ARRAYIDX1]], align 8
; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i64, i64* [[ST:%.*]], i64 1
; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i64, i64* [[ST]], i64 2
; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i64, i64* [[ST]], i64 3
; CHECK-NEXT: store i64 [[T0]], i64* [[ST]], align 8
; CHECK-NEXT: store i64 [[T0]], i64* [[ARRAYIDX3]], align 8
; CHECK-NEXT: store i64 [[T0]], i64* [[ARRAYIDX4]], align 8
; CHECK-NEXT: store i64 [[T1]], i64* [[ARRAYIDX5]], align 8
; CHECK-NEXT: store i64 [[T1]], i64* [[LD]], align 8
; CHECK-NEXT: ret void
;
%arrayidx1 = getelementptr inbounds i64, i64* %ld, i64 1
%t0 = load i64, i64* %ld, align 8
%t1 = load i64, i64* %arrayidx1, align 8
%arrayidx3 = getelementptr inbounds i64, i64* %st, i64 1
%arrayidx4 = getelementptr inbounds i64, i64* %st, i64 2
%arrayidx5 = getelementptr inbounds i64, i64* %st, i64 3
store i64 %t0, i64* %st, align 8
store i64 %t0, i64* %arrayidx3, align 8
store i64 %t0, i64* %arrayidx4, align 8
store i64 %t1, i64* %arrayidx5, align 8
store i64 %t1, i64* %ld, align 8
ret void
}