Previously only the very first gather/buildvector node might be probed for reshuffling of other nodes. But the compiler may do the same for other gather/buildvector nodes too, just need to check the dependency and postpone the emission of the dependent nodes, if the origin nodes were not emitted yet. Part of D110978 Differential Revision: https://reviews.llvm.org/D144958
46 lines
1.6 KiB
LLVM
46 lines
1.6 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
|
; RUN: opt -S -passes=slp-vectorizer < %s | FileCheck %s
|
|
|
|
define i64 @test() {
|
|
; CHECK-LABEL: @test(
|
|
; CHECK-NEXT: bb1:
|
|
; CHECK-NEXT: br label [[BB3:%.*]]
|
|
; CHECK: bb2:
|
|
; CHECK-NEXT: br label [[BB3]]
|
|
; CHECK: bb3:
|
|
; CHECK-NEXT: [[TMP:%.*]] = phi i32 [ 0, [[BB2:%.*]] ], [ 0, [[BB1:%.*]] ]
|
|
; CHECK-NEXT: [[TMP4:%.*]] = phi i32 [ 0, [[BB2]] ], [ 0, [[BB1]] ]
|
|
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <8 x i32> poison, i32 [[TMP4]], i32 0
|
|
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[TMP0]], <8 x i32> poison, <8 x i32> zeroinitializer
|
|
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.mul.v8i32(<8 x i32> [[TMP1]])
|
|
; CHECK-NEXT: [[OP_RDX:%.*]] = mul i32 [[TMP2]], [[TMP4]]
|
|
; CHECK-NEXT: [[OP_RDX1:%.*]] = mul i32 [[TMP4]], [[TMP4]]
|
|
; CHECK-NEXT: [[OP_RDX2:%.*]] = mul i32 [[OP_RDX]], [[OP_RDX1]]
|
|
; CHECK-NEXT: [[OP_RDX3:%.*]] = mul i32 [[OP_RDX2]], [[TMP]]
|
|
; CHECK-NEXT: [[TMP65:%.*]] = sext i32 [[OP_RDX3]] to i64
|
|
; CHECK-NEXT: ret i64 [[TMP65]]
|
|
;
|
|
bb1:
|
|
br label %bb3
|
|
|
|
bb2:
|
|
br label %bb3
|
|
|
|
bb3:
|
|
%tmp = phi i32 [ 0, %bb2 ], [ 0, %bb1 ]
|
|
%tmp4 = phi i32 [ 0, %bb2 ], [ 0, %bb1 ]
|
|
%tmp5 = mul i32 %tmp, %tmp4
|
|
%tmp6 = mul i32 %tmp5, %tmp4
|
|
%tmp7 = mul i32 %tmp6, %tmp4
|
|
%tmp8 = mul i32 %tmp7, %tmp4
|
|
%tmp9 = mul i32 %tmp8, %tmp4
|
|
%tmp10 = mul i32 %tmp9, %tmp4
|
|
%tmp11 = mul i32 %tmp10, %tmp4
|
|
%tmp12 = mul i32 %tmp11, %tmp4
|
|
%tmp13 = mul i32 %tmp12, %tmp4
|
|
%tmp14 = mul i32 %tmp13, %tmp4
|
|
%tmp15 = mul i32 %tmp14, %tmp4
|
|
%tmp65 = sext i32 %tmp15 to i64
|
|
ret i64 %tmp65
|
|
}
|