If the scalar does not need to be scheduled and it was vectorized already in one of the vector nodes, we still can try to vectorize it in another node. Just does not need account its cost in the scalar total cost, as it will be handled in the main vectorized node. Differential Revision: https://reviews.llvm.org/D159205
53 lines
2.4 KiB
LLVM
53 lines
2.4 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
|
; RUN: opt -S -passes=slp-vectorizer < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=skylake -slp-threshold=-14 | FileCheck %s
|
|
|
|
define void @test(i1 %c, ptr %arg) {
|
|
; CHECK-LABEL: @test(
|
|
; CHECK-NEXT: br i1 [[C:%.*]], label [[IF:%.*]], label [[ELSE:%.*]]
|
|
; CHECK: if:
|
|
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x ptr> poison, ptr [[ARG:%.*]], i32 0
|
|
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x ptr> [[TMP1]], <4 x ptr> poison, <4 x i32> zeroinitializer
|
|
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, <4 x ptr> [[TMP2]], <4 x i64> <i64 32, i64 24, i64 8, i64 0>
|
|
; CHECK-NEXT: [[TMP4:%.*]] = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> [[TMP3]], i32 8, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i64> poison)
|
|
; CHECK-NEXT: br label [[JOIN:%.*]]
|
|
; CHECK: else:
|
|
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x ptr> poison, ptr [[ARG]], i32 0
|
|
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x ptr> [[TMP5]], <4 x ptr> poison, <4 x i32> zeroinitializer
|
|
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, <4 x ptr> [[TMP6]], <4 x i64> <i64 32, i64 24, i64 8, i64 0>
|
|
; CHECK-NEXT: [[TMP8:%.*]] = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> [[TMP7]], i32 8, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i64> poison)
|
|
; CHECK-NEXT: br label [[JOIN]]
|
|
; CHECK: join:
|
|
; CHECK-NEXT: [[TMP9:%.*]] = phi <4 x i64> [ [[TMP4]], [[IF]] ], [ [[TMP8]], [[ELSE]] ]
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
br i1 %c, label %if, label %else
|
|
|
|
if:
|
|
%i2.0 = load i64, ptr %arg, align 8
|
|
%arg2.1 = getelementptr inbounds i8, ptr %arg, i64 8
|
|
%i2.1 = load i64, ptr %arg2.1, align 8
|
|
%arg2.2 = getelementptr inbounds i8, ptr %arg, i64 24
|
|
%i2.2 = load i64, ptr %arg2.2, align 8
|
|
%arg2.3 = getelementptr inbounds i8, ptr %arg, i64 32
|
|
%i2.3 = load i64, ptr %arg2.3, align 8
|
|
br label %join
|
|
|
|
else:
|
|
%i.0 = load i64, ptr %arg, align 8
|
|
%arg.1 = getelementptr inbounds i8, ptr %arg, i64 8
|
|
%i.1 = load i64, ptr %arg.1, align 8
|
|
%arg.2 = getelementptr inbounds i8, ptr %arg, i64 24
|
|
%i.2 = load i64, ptr %arg.2, align 8
|
|
%arg.3 = getelementptr inbounds i8, ptr %arg, i64 32
|
|
%i.3 = load i64, ptr %arg.3, align 8
|
|
br label %join
|
|
|
|
join:
|
|
%phi.3 = phi i64 [ %i2.3, %if ], [ %i.3, %else ]
|
|
%phi.2 = phi i64 [ %i2.2, %if ], [ %i.2, %else ]
|
|
%phi.1 = phi i64 [ %i2.1, %if ], [ %i.1, %else ]
|
|
%phi.0 = phi i64 [ %i2.0, %if ], [ %i.0, %else ]
|
|
ret void
|
|
}
|
|
|