Metric: size..text
Program size..text
exp ref diff
test-suite :: MultiSource/Benchmarks/mediabench/gsm/toast/toast.test 42906.00 42986.00 0.2%
test-suite :: MultiSource/Benchmarks/MiBench/telecomm-gsm/telecomm-gsm.test 42909.00 42989.00 0.2%
test-suite :: External/SPEC/CINT2017rate/525.x264_r/525.x264_r.test 664581.00 664661.00 0.0%
test-suite :: External/SPEC/CINT2017speed/625.x264_s/625.x264_s.test 664581.00 664661.00 0.0%
Less is better.
Replaces `buildvector <p x in> + trunc <p x in> to <p x im>` sequences to
`buildvector <p x im> of { trunc in to im }` scalars, which is free in
most cases, results in better code.
Reviewers: RKSimon
Reviewed By: RKSimon
Pull Request: https://github.com/llvm/llvm-project/pull/88504
67 lines
2.7 KiB
LLVM
67 lines
2.7 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
|
|
; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s
|
|
|
|
define void @test() {
|
|
; CHECK-LABEL: define void @test() {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr null, align 2
|
|
; CHECK-NEXT: [[TMP1:%.*]] = and i8 0, 1
|
|
; CHECK-NEXT: [[TMP2:%.*]] = and i32 0, 0
|
|
; CHECK-NEXT: [[TMP3:%.*]] = select i1 false, i32 0, i32 0
|
|
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i8> <i8 poison, i8 0, i8 poison, i8 poison>, i8 [[TMP1]], i32 0
|
|
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i8> [[TMP4]], <4 x i8> poison, <4 x i32> <i32 0, i32 0, i32 0, i32 1>
|
|
; CHECK-NEXT: [[TMP15:%.*]] = trunc <4 x i8> [[TMP5]] to <4 x i1>
|
|
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x i8> [[TMP4]], <4 x i8> poison, <4 x i32> zeroinitializer
|
|
; CHECK-NEXT: [[TMP8:%.*]] = or <4 x i8> [[TMP7]], zeroinitializer
|
|
; CHECK-NEXT: [[TMP9:%.*]] = trunc <4 x i8> [[TMP8]] to <4 x i1>
|
|
; CHECK-NEXT: [[TMP10:%.*]] = or <4 x i1> zeroinitializer, [[TMP15]]
|
|
; CHECK-NEXT: [[TMP11:%.*]] = icmp eq <4 x i1> [[TMP9]], [[TMP10]]
|
|
; CHECK-NEXT: [[TMP6:%.*]] = zext <4 x i1> [[TMP15]] to <4 x i32>
|
|
; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> <i32 0, i32 0, i32 poison, i32 0>, <4 x i32> <i32 4, i32 5, i32 2, i32 7>
|
|
; CHECK-NEXT: [[TMP13:%.*]] = select <4 x i1> [[TMP11]], <4 x i32> [[TMP12]], <4 x i32> zeroinitializer
|
|
; CHECK-NEXT: [[TMP14:%.*]] = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> [[TMP13]])
|
|
; CHECK-NEXT: [[OP_RDX:%.*]] = and i32 0, [[TMP14]]
|
|
; CHECK-NEXT: store i32 [[OP_RDX]], ptr null, align 4
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
%0 = load i16, ptr null, align 2
|
|
%1 = and i8 0, 1
|
|
%2 = and i32 0, 0
|
|
%3 = select i1 false, i32 0, i32 0
|
|
%i2 = sext i8 %1 to i32
|
|
%i3 = or i8 %1, 0
|
|
%i4 = sext i8 %i3 to i32
|
|
%i5 = or i32 0, %i2
|
|
%b1 = icmp eq i32 %i4, %i5
|
|
%a1 = select i1 %b1, i32 0, i32 0
|
|
%4 = and i32 %a1, 0
|
|
%s1 = and i32 %4, 0
|
|
%i8 = sext i8 %1 to i32
|
|
%i9 = or i8 %1, 0
|
|
%i10 = sext i8 %i9 to i32
|
|
%i11 = or i32 0, %i8
|
|
%b2 = icmp eq i32 %i10, %i11
|
|
%a2 = select i1 %b2, i32 0, i32 0
|
|
%5 = and i32 %a2, 0
|
|
%s2 = and i32 %5, %s1
|
|
%i14 = sext i8 %1 to i32
|
|
%i15 = or i8 %1, 0
|
|
%i16 = sext i8 %i15 to i32
|
|
%i17 = or i32 0, %i14
|
|
%b3 = icmp eq i32 %i16, %i17
|
|
%a3 = select i1 %b3, i32 %i14, i32 0
|
|
%6 = and i32 %a3, 0
|
|
%s3 = and i32 %6, %s2
|
|
%i20 = sext i8 0 to i32
|
|
%i21 = or i8 %1, 0
|
|
%i22 = sext i8 %i21 to i32
|
|
%i23 = or i32 0, %i20
|
|
%b4 = icmp eq i32 %i22, %i23
|
|
%a4 = select i1 %b4, i32 0, i32 0
|
|
%7 = and i32 %a4, 0
|
|
%s4 = and i32 %7, %s3
|
|
store i32 %s4, ptr null, align 4
|
|
ret void
|
|
}
|