Files
clang-p2996/llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-node-with-multi-users.ll
Alexey Bataev 37ae4ad0ee [SLP]Support minbitwidth analisys for buildvector nodes.
Metric: size..text

Program                                                                                                                                                size..text
                                                                                                                                                       exp           ref        diff
                                                                                  test-suite :: MultiSource/Benchmarks/mediabench/gsm/toast/toast.test    42906.00    42986.00  0.2%
                                                                           test-suite :: MultiSource/Benchmarks/MiBench/telecomm-gsm/telecomm-gsm.test    42909.00    42989.00  0.2%
                                                                                   test-suite :: External/SPEC/CINT2017rate/525.x264_r/525.x264_r.test   664581.00   664661.00  0.0%
                                                                                  test-suite :: External/SPEC/CINT2017speed/625.x264_s/625.x264_s.test   664581.00   664661.00  0.0%

Less is better.

Replaces `buildvector <p x in> + trunc <p x in> to <p x im>` sequences to
`buildvector <p x im> of { trunc in to im }` scalars, which is free in
most cases, results in better code.

Reviewers: RKSimon

Reviewed By: RKSimon

Pull Request: https://github.com/llvm/llvm-project/pull/88504
2024-04-29 09:57:37 -04:00

67 lines
2.7 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s
define void @test() {
; CHECK-LABEL: define void @test() {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr null, align 2
; CHECK-NEXT: [[TMP1:%.*]] = and i8 0, 1
; CHECK-NEXT: [[TMP2:%.*]] = and i32 0, 0
; CHECK-NEXT: [[TMP3:%.*]] = select i1 false, i32 0, i32 0
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i8> <i8 poison, i8 0, i8 poison, i8 poison>, i8 [[TMP1]], i32 0
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i8> [[TMP4]], <4 x i8> poison, <4 x i32> <i32 0, i32 0, i32 0, i32 1>
; CHECK-NEXT: [[TMP15:%.*]] = trunc <4 x i8> [[TMP5]] to <4 x i1>
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x i8> [[TMP4]], <4 x i8> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP8:%.*]] = or <4 x i8> [[TMP7]], zeroinitializer
; CHECK-NEXT: [[TMP9:%.*]] = trunc <4 x i8> [[TMP8]] to <4 x i1>
; CHECK-NEXT: [[TMP10:%.*]] = or <4 x i1> zeroinitializer, [[TMP15]]
; CHECK-NEXT: [[TMP11:%.*]] = icmp eq <4 x i1> [[TMP9]], [[TMP10]]
; CHECK-NEXT: [[TMP6:%.*]] = zext <4 x i1> [[TMP15]] to <4 x i32>
; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> <i32 0, i32 0, i32 poison, i32 0>, <4 x i32> <i32 4, i32 5, i32 2, i32 7>
; CHECK-NEXT: [[TMP13:%.*]] = select <4 x i1> [[TMP11]], <4 x i32> [[TMP12]], <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP14:%.*]] = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> [[TMP13]])
; CHECK-NEXT: [[OP_RDX:%.*]] = and i32 0, [[TMP14]]
; CHECK-NEXT: store i32 [[OP_RDX]], ptr null, align 4
; CHECK-NEXT: ret void
;
entry:
%0 = load i16, ptr null, align 2
%1 = and i8 0, 1
%2 = and i32 0, 0
%3 = select i1 false, i32 0, i32 0
%i2 = sext i8 %1 to i32
%i3 = or i8 %1, 0
%i4 = sext i8 %i3 to i32
%i5 = or i32 0, %i2
%b1 = icmp eq i32 %i4, %i5
%a1 = select i1 %b1, i32 0, i32 0
%4 = and i32 %a1, 0
%s1 = and i32 %4, 0
%i8 = sext i8 %1 to i32
%i9 = or i8 %1, 0
%i10 = sext i8 %i9 to i32
%i11 = or i32 0, %i8
%b2 = icmp eq i32 %i10, %i11
%a2 = select i1 %b2, i32 0, i32 0
%5 = and i32 %a2, 0
%s2 = and i32 %5, %s1
%i14 = sext i8 %1 to i32
%i15 = or i8 %1, 0
%i16 = sext i8 %i15 to i32
%i17 = or i32 0, %i14
%b3 = icmp eq i32 %i16, %i17
%a3 = select i1 %b3, i32 %i14, i32 0
%6 = and i32 %a3, 0
%s3 = and i32 %6, %s2
%i20 = sext i8 0 to i32
%i21 = or i8 %1, 0
%i22 = sext i8 %i21 to i32
%i23 = or i32 0, %i20
%b4 = icmp eq i32 %i22, %i23
%a4 = select i1 %b4, i32 0, i32 0
%7 = and i32 %a4, 0
%s4 = and i32 %7, %s3
store i32 %s4, ptr null, align 4
ret void
}