The patch tries to keep the original order of the instruction in the reductions. Previously, two first instructions were switched, giving reverse order. The first step to support of the ordered reductions. Reviewers: RKSimon Reviewed By: RKSimon Pull Request: https://github.com/llvm/llvm-project/pull/98025
53 lines
2.4 KiB
LLVM
53 lines
2.4 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
|
|
; RUN: opt --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu -mcpu=icelake-server -S < %s | FileCheck %s
|
|
|
|
define i1 @test(i64 %v) {
|
|
; CHECK-LABEL: define i1 @test
|
|
; CHECK-SAME: (i64 [[V:%.*]]) #[[ATTR0:[0-9]+]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = shl i64 [[V]], 1
|
|
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[V]], 3
|
|
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[V]], 7
|
|
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x i64> <i64 poison, i64 poison, i64 poison, i64 poison, i64 0, i64 0, i64 0, i64 0>, i64 [[TMP1]], i32 0
|
|
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x i64> [[TMP3]], <8 x i64> poison, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 5, i32 6, i32 7>
|
|
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i64> <i64 undef, i64 undef, i64 0, i64 0>, i64 [[TMP2]], i32 0
|
|
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i64> [[TMP5]], i64 [[TMP0]], i32 1
|
|
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x i64> [[TMP6]], <4 x i64> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 0, i32 3, i32 1>
|
|
; CHECK-NEXT: [[TMP8:%.*]] = or <8 x i64> [[TMP4]], [[TMP7]]
|
|
; CHECK-NEXT: [[TMP9:%.*]] = sub <8 x i64> [[TMP4]], [[TMP7]]
|
|
; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <8 x i64> [[TMP8]], <8 x i64> [[TMP9]], <8 x i32> <i32 0, i32 1, i32 2, i32 11, i32 12, i32 5, i32 6, i32 7>
|
|
; CHECK-NEXT: [[TMP11:%.*]] = icmp ult <8 x i64> [[TMP10]], zeroinitializer
|
|
; CHECK-NEXT: [[TMP12:%.*]] = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> [[TMP11]])
|
|
; CHECK-NEXT: ret i1 [[TMP12]]
|
|
;
|
|
entry:
|
|
%0 = shl i64 %v, 1
|
|
%1 = add i64 %v, 3
|
|
%2 = add i64 %v, 7
|
|
%3 = or i64 %2, %1
|
|
%cmp750 = icmp ult i64 %3, 0
|
|
%4 = or i64 %0, %1
|
|
%cmp752 = icmp ult i64 %4, 0
|
|
%or753 = or i1 %cmp750, %cmp752
|
|
%5 = or i64 0, %1
|
|
%cmp754 = icmp ult i64 %5, 0
|
|
%or755 = or i1 %or753, %cmp754
|
|
%6 = extractelement <16 x i64> zeroinitializer, i32 0
|
|
%7 = sub i64 %1, %6
|
|
%cmp756 = icmp ult i64 %7, 0
|
|
%or757 = or i1 %or755, %cmp756
|
|
%8 = sub i64 0, %2
|
|
%cmp758 = icmp ult i64 %8, 0
|
|
%or759 = or i1 %or757, %cmp758
|
|
%9 = or i64 0, %2
|
|
%cmp760 = icmp ult i64 %9, 0
|
|
%or761 = or i1 %or759, %cmp760
|
|
%10 = or i64 0, %6
|
|
%cmp762 = icmp ult i64 %10, 0
|
|
%or763 = or i1 %or761, %cmp762
|
|
%11 = or i64 0, %0
|
|
%cmp764 = icmp ult i64 %11, 0
|
|
%or765 = or i1 %or763, %cmp764
|
|
ret i1 %or765
|
|
}
|