Files
clang-p2996/llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-node-with-multi-users.ll
Alexey Bataev a988821123 [SLP]Keep the original order in the reductions.
The patch tries to keep the original order of the instruction in the
reductions. Previously, two first instructions were switched, giving
reverse order.
The first step to support of the ordered reductions.

Reviewers: RKSimon

Reviewed By: RKSimon

Pull Request: https://github.com/llvm/llvm-project/pull/98025
2024-07-09 12:26:42 -04:00

67 lines
2.7 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s
define void @test() {
; CHECK-LABEL: define void @test() {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr null, align 2
; CHECK-NEXT: [[TMP1:%.*]] = and i8 0, 1
; CHECK-NEXT: [[TMP2:%.*]] = and i32 0, 0
; CHECK-NEXT: [[TMP3:%.*]] = select i1 false, i32 0, i32 0
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i8> <i8 0, i8 poison, i8 poison, i8 poison>, i8 [[TMP1]], i32 1
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i8> [[TMP4]], <4 x i8> poison, <4 x i32> <i32 0, i32 1, i32 1, i32 1>
; CHECK-NEXT: [[TMP15:%.*]] = trunc <4 x i8> [[TMP5]] to <4 x i1>
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x i8> [[TMP4]], <4 x i8> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
; CHECK-NEXT: [[TMP8:%.*]] = or <4 x i8> [[TMP7]], zeroinitializer
; CHECK-NEXT: [[TMP9:%.*]] = trunc <4 x i8> [[TMP8]] to <4 x i1>
; CHECK-NEXT: [[TMP10:%.*]] = or <4 x i1> zeroinitializer, [[TMP15]]
; CHECK-NEXT: [[TMP11:%.*]] = icmp eq <4 x i1> [[TMP9]], [[TMP10]]
; CHECK-NEXT: [[TMP6:%.*]] = zext <4 x i1> [[TMP15]] to <4 x i32>
; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> <i32 0, i32 poison, i32 0, i32 0>, <4 x i32> <i32 4, i32 1, i32 6, i32 7>
; CHECK-NEXT: [[TMP13:%.*]] = select <4 x i1> [[TMP11]], <4 x i32> [[TMP12]], <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP14:%.*]] = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> [[TMP13]])
; CHECK-NEXT: [[OP_RDX:%.*]] = and i32 0, [[TMP14]]
; CHECK-NEXT: store i32 [[OP_RDX]], ptr null, align 4
; CHECK-NEXT: ret void
;
entry:
%0 = load i16, ptr null, align 2
%1 = and i8 0, 1
%2 = and i32 0, 0
%3 = select i1 false, i32 0, i32 0
%i2 = sext i8 %1 to i32
%i3 = or i8 %1, 0
%i4 = sext i8 %i3 to i32
%i5 = or i32 0, %i2
%b1 = icmp eq i32 %i4, %i5
%a1 = select i1 %b1, i32 0, i32 0
%4 = and i32 %a1, 0
%s1 = and i32 %4, 0
%i8 = sext i8 %1 to i32
%i9 = or i8 %1, 0
%i10 = sext i8 %i9 to i32
%i11 = or i32 0, %i8
%b2 = icmp eq i32 %i10, %i11
%a2 = select i1 %b2, i32 0, i32 0
%5 = and i32 %a2, 0
%s2 = and i32 %5, %s1
%i14 = sext i8 %1 to i32
%i15 = or i8 %1, 0
%i16 = sext i8 %i15 to i32
%i17 = or i32 0, %i14
%b3 = icmp eq i32 %i16, %i17
%a3 = select i1 %b3, i32 %i14, i32 0
%6 = and i32 %a3, 0
%s3 = and i32 %6, %s2
%i20 = sext i8 0 to i32
%i21 = or i8 %1, 0
%i22 = sext i8 %i21 to i32
%i23 = or i32 0, %i20
%b4 = icmp eq i32 %i22, %i23
%a4 = select i1 %b4, i32 0, i32 0
%7 = and i32 %a4, 0
%s4 = and i32 %7, %s3
store i32 %s4, ptr null, align 4
ret void
}