Files
clang-p2996/llvm/test/Transforms/SLPVectorizer/X86/remark-partial-loads-vectorize.ll
Alexey Bataev f3d2609af3 [SLP]Improve/fix subvectors in gather/buildvector nodes handling
SLP vectorizer has an estimation for gather/buildvector nodes, which
contain some scalar loads. SLP vectorizer performs pretty similar (but
large in SLOCs) estimation, which not always correct. Instead, this
patch implements clustering analysis and actual node allocation with the
full analysis for the vectorized clustered scalars (not only loads, but
also some other instructions) with the correct cost estimation and
vector insert instructions. Improves overall vectorization quality and
simplifies analysis/estimations.

Reviewers: RKSimon

Reviewed By: RKSimon

Pull Request: https://github.com/llvm/llvm-project/pull/104144
2024-08-23 06:45:22 -07:00

40 lines
2.0 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3
; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=x86_64-unknown-linux-gnu -mcpu=skylake-avx512 -pass-remarks-output=%t | FileCheck %s
; RUN: FileCheck --input-file=%t --check-prefix=YAML %s
; YAML-LABEL: --- !Passed
; YAML-NEXT: Pass: slp-vectorizer
; YAML-NEXT: Name: VectorizedList
; YAML-NEXT: Function: test
; YAML-NEXT: Args:
; YAML-NEXT: - String: 'SLP vectorized with cost '
; YAML-NEXT: - Cost: '-4'
; YAML-NEXT: - String: ' and with tree size '
; YAML-NEXT: - TreeSize: '5'
define <4 x float> @test(ptr %x, float %v, float %a) {
; CHECK-LABEL: define <4 x float> @test(
; CHECK-SAME: ptr [[X:%.*]], float [[V:%.*]], float [[A:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x float>, ptr [[X]], align 4
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> poison, float [[A]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x float> poison, float [[V]], i32 0
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x float> [[TMP4]], <4 x float> poison, <4 x i32> <i32 0, i32 0, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP7:%.*]] = call <4 x float> @llvm.vector.insert.v4f32.v2f32(<4 x float> [[TMP5]], <2 x float> [[TMP1]], i64 2)
; CHECK-NEXT: [[TMP8:%.*]] = fadd <4 x float> [[TMP3]], [[TMP7]]
; CHECK-NEXT: ret <4 x float> [[TMP8]]
;
%gep1 = getelementptr inbounds <4 x float>, ptr %x, i64 0, i64 1
%x0 = load float, ptr %x, align 4
%x1 = load float, ptr %gep1, align 4
%add1 = fadd float %a, %v
%add2 = fadd float %a, %v
%add3 = fadd float %a, %x0
%add4 = fadd float %a, %x1
%i0 = insertelement <4 x float> undef, float %add1, i32 0
%i1 = insertelement <4 x float> %i0, float %add2, i32 1
%i2 = insertelement <4 x float> %i1, float %add3, i32 2
%i3 = insertelement <4 x float> %i2, float %add4, i32 3
ret <4 x float> %i3
}