Files
clang-p2996/llvm/test/Transforms/SLPVectorizer/X86/scatter-vectorize-reused-pointer.ll
Valery N Dmitriev 6d677c0b3d [SLP] Unify GEP cost modeling for load, store and GEP nodes.
Make a separate routine for GEPs cost calculation and make
the approach uniform across load, store and GEP tree nodes.
Additional issue fixed is GEP cost savings were applied twice
for ScatterVectorize nodes (aka gather load) making them look
unrealistically profitable for vectorization.

Differential Revision: https://reviews.llvm.org/D140789
2023-01-05 10:11:36 -08:00

58 lines
2.9 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -S -passes=slp-vectorizer < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=skylake -slp-threshold=-13 | FileCheck %s
define void @test(i1 %c, ptr %arg) {
; CHECK-LABEL: @test(
; CHECK-NEXT: br i1 [[C:%.*]], label [[IF:%.*]], label [[ELSE:%.*]]
; CHECK: if:
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x ptr> poison, ptr [[ARG:%.*]], i32 0
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x ptr> [[TMP1]], <4 x ptr> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, <4 x ptr> [[SHUFFLE]], <4 x i64> <i64 32, i64 24, i64 8, i64 0>
; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> [[TMP2]], i32 8, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i64> poison)
; CHECK-NEXT: br label [[JOIN:%.*]]
; CHECK: else:
; CHECK-NEXT: [[ARG_1:%.*]] = getelementptr inbounds i8, ptr [[ARG]], i64 8
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x ptr> poison, ptr [[ARG]], i32 0
; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <2 x ptr> [[TMP4]], <2 x ptr> poison, <2 x i32> zeroinitializer
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, <2 x ptr> [[SHUFFLE1]], <2 x i64> <i64 32, i64 24>
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x ptr> poison, ptr [[ARG]], i32 3
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x ptr> [[TMP5]], <2 x ptr> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x ptr> [[TMP6]], <4 x ptr> [[TMP7]], <4 x i32> <i32 4, i32 5, i32 undef, i32 3>
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <4 x ptr> [[TMP8]], ptr [[ARG_1]], i32 2
; CHECK-NEXT: [[TMP10:%.*]] = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> [[TMP9]], i32 8, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i64> poison)
; CHECK-NEXT: br label [[JOIN]]
; CHECK: join:
; CHECK-NEXT: [[TMP11:%.*]] = phi <4 x i64> [ [[TMP3]], [[IF]] ], [ [[TMP10]], [[ELSE]] ]
; CHECK-NEXT: ret void
;
br i1 %c, label %if, label %else
if:
%i2.0 = load i64, ptr %arg, align 8
%arg2.1 = getelementptr inbounds i8, ptr %arg, i64 8
%i2.1 = load i64, ptr %arg2.1, align 8
%arg2.2 = getelementptr inbounds i8, ptr %arg, i64 24
%i2.2 = load i64, ptr %arg2.2, align 8
%arg2.3 = getelementptr inbounds i8, ptr %arg, i64 32
%i2.3 = load i64, ptr %arg2.3, align 8
br label %join
else:
%i.0 = load i64, ptr %arg, align 8
%arg.1 = getelementptr inbounds i8, ptr %arg, i64 8
%i.1 = load i64, ptr %arg.1, align 8
%arg.2 = getelementptr inbounds i8, ptr %arg, i64 24
%i.2 = load i64, ptr %arg.2, align 8
%arg.3 = getelementptr inbounds i8, ptr %arg, i64 32
%i.3 = load i64, ptr %arg.3, align 8
br label %join
join:
%phi.3 = phi i64 [ %i2.3, %if ], [ %i.3, %else ]
%phi.2 = phi i64 [ %i2.2, %if ], [ %i.2, %else ]
%phi.1 = phi i64 [ %i2.1, %if ], [ %i.1, %else ]
%phi.0 = phi i64 [ %i2.0, %if ], [ %i.0, %else ]
ret void
}