Files
clang-p2996/llvm/test/Transforms/SLPVectorizer/RISCV/remarks-insert-into-small-vector.ll
Alexey Bataev 73ce13d79b [SLP][TTI]Improve detection of the insert-subvector pattern for SLP. (#74749)
SLP vectorizer passes the type of the subvector and the mask, which size
determines the size of the resulting vector. TTI should support this
pattern to improve cost estimation of the insert_subvector shuffle
pattern.
2024-01-10 10:39:34 -05:00

52 lines
2.7 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
; RUN: opt -S --passes=slp-vectorizer -mtriple=riscv64-unknown-linux -pass-remarks-output=%t -mattr=+v -slp-threshold=-10 < %s | FileCheck %s
; RUN: FileCheck %s --check-prefix=YAML < %t
; YAML-LABEL: --- !Passed
; YAML-NEXT: Pass: slp-vectorizer
; YAML-NEXT: Name: StoresVectorized
; YAML-NEXT: Function: test
; YAML-NEXT: Args:
; YAML-NEXT: - String: 'Stores SLP vectorized with cost '
; YAML-NEXT: - Cost: '3'
; YAML-NEXT: - String: ' and with tree size '
; YAML-NEXT: - TreeSize: '7'
define void @test() {
; CHECK-LABEL: define void @test(
; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = load float, ptr null, align 4
; CHECK-NEXT: [[TMP1:%.*]] = load float, ptr null, align 4
; CHECK-NEXT: [[TMP2:%.*]] = load float, ptr null, align 4
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x float> <float poison, float 0.000000e+00>, float [[TMP1]], i32 0
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x float> poison, float [[TMP0]], i32 0
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x float> [[TMP4]], float [[TMP2]], i32 1
; CHECK-NEXT: [[TMP6:%.*]] = fcmp ogt <2 x float> [[TMP3]], [[TMP5]]
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x i1> [[TMP6]], <2 x i1> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> [[TMP3]], <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x float> [[TMP8]], <4 x float> <float poison, float poison, float poison, float 0.000000e+00>, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
; CHECK-NEXT: [[TMP10:%.*]] = select <4 x i1> [[TMP7]], <4 x float> [[TMP9]], <4 x float> zeroinitializer
; CHECK-NEXT: store <4 x float> [[TMP10]], ptr null, align 4
; CHECK-NEXT: ret void
;
entry:
%0 = load float, ptr null, align 4
%1 = load float, ptr null, align 4
%2 = load float, ptr null, align 4
%cmp.i = fcmp ogt float %1, %0
%v14.0 = select i1 %cmp.i, float %1, float 0.000000e+00
%v0.0 = select i1 %cmp.i, float %0, float 0.000000e+00
%cmp4.i = fcmp ogt float 0.000000e+00, %2
%v19.0 = select i1 %cmp4.i, float 0.000000e+00, float 0.000000e+00
%v9.0 = select i1 %cmp4.i, float %2, float 0.000000e+00
store float %v0.0, ptr null, align 4
%v9idx = getelementptr i8, ptr null, i32 4
store float %v9.0, ptr %v9idx, align 4
%v14idx = getelementptr i8, ptr null, i32 8
store float %v14.0, ptr %v14idx, align 4
%v19idx = getelementptr i8, ptr null, i32 12
store float %v19.0, ptr %v19idx, align 4
ret void
}