Make a separate routine for GEPs cost calculation and make the approach uniform across load, store and GEP tree nodes. Additional issue fixed is GEP cost savings were applied twice for ScatterVectorize nodes (aka gather load) making them look unrealistically profitable for vectorization. Differential Revision: https://reviews.llvm.org/D140789
58 lines
2.9 KiB
LLVM
58 lines
2.9 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
|
; RUN: opt -S -passes=slp-vectorizer < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=skylake -slp-threshold=-13 | FileCheck %s
|
|
|
|
define void @test(i1 %c, ptr %arg) {
|
|
; CHECK-LABEL: @test(
|
|
; CHECK-NEXT: br i1 [[C:%.*]], label [[IF:%.*]], label [[ELSE:%.*]]
|
|
; CHECK: if:
|
|
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x ptr> poison, ptr [[ARG:%.*]], i32 0
|
|
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x ptr> [[TMP1]], <4 x ptr> poison, <4 x i32> zeroinitializer
|
|
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, <4 x ptr> [[SHUFFLE]], <4 x i64> <i64 32, i64 24, i64 8, i64 0>
|
|
; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> [[TMP2]], i32 8, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i64> poison)
|
|
; CHECK-NEXT: br label [[JOIN:%.*]]
|
|
; CHECK: else:
|
|
; CHECK-NEXT: [[ARG_1:%.*]] = getelementptr inbounds i8, ptr [[ARG]], i64 8
|
|
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x ptr> poison, ptr [[ARG]], i32 0
|
|
; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <2 x ptr> [[TMP4]], <2 x ptr> poison, <2 x i32> zeroinitializer
|
|
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, <2 x ptr> [[SHUFFLE1]], <2 x i64> <i64 32, i64 24>
|
|
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x ptr> poison, ptr [[ARG]], i32 3
|
|
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x ptr> [[TMP5]], <2 x ptr> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
|
|
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x ptr> [[TMP6]], <4 x ptr> [[TMP7]], <4 x i32> <i32 4, i32 5, i32 undef, i32 3>
|
|
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <4 x ptr> [[TMP8]], ptr [[ARG_1]], i32 2
|
|
; CHECK-NEXT: [[TMP10:%.*]] = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> [[TMP9]], i32 8, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i64> poison)
|
|
; CHECK-NEXT: br label [[JOIN]]
|
|
; CHECK: join:
|
|
; CHECK-NEXT: [[TMP11:%.*]] = phi <4 x i64> [ [[TMP3]], [[IF]] ], [ [[TMP10]], [[ELSE]] ]
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
br i1 %c, label %if, label %else
|
|
|
|
if:
|
|
%i2.0 = load i64, ptr %arg, align 8
|
|
%arg2.1 = getelementptr inbounds i8, ptr %arg, i64 8
|
|
%i2.1 = load i64, ptr %arg2.1, align 8
|
|
%arg2.2 = getelementptr inbounds i8, ptr %arg, i64 24
|
|
%i2.2 = load i64, ptr %arg2.2, align 8
|
|
%arg2.3 = getelementptr inbounds i8, ptr %arg, i64 32
|
|
%i2.3 = load i64, ptr %arg2.3, align 8
|
|
br label %join
|
|
|
|
else:
|
|
%i.0 = load i64, ptr %arg, align 8
|
|
%arg.1 = getelementptr inbounds i8, ptr %arg, i64 8
|
|
%i.1 = load i64, ptr %arg.1, align 8
|
|
%arg.2 = getelementptr inbounds i8, ptr %arg, i64 24
|
|
%i.2 = load i64, ptr %arg.2, align 8
|
|
%arg.3 = getelementptr inbounds i8, ptr %arg, i64 32
|
|
%i.3 = load i64, ptr %arg.3, align 8
|
|
br label %join
|
|
|
|
join:
|
|
%phi.3 = phi i64 [ %i2.3, %if ], [ %i.3, %else ]
|
|
%phi.2 = phi i64 [ %i2.2, %if ], [ %i.2, %else ]
|
|
%phi.1 = phi i64 [ %i2.1, %if ], [ %i.1, %else ]
|
|
%phi.0 = phi i64 [ %i2.0, %if ], [ %i.0, %else ]
|
|
ret void
|
|
}
|
|
|