If the graph is only the buildvector node without main operation, need to inherit insrtpoint from the redution instruction. Otherwise the compiler crashes trying to insert instruction at the entry block.
33 lines
1.2 KiB
LLVM
33 lines
1.2 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
|
; RUN: opt -S -passes=slp-vectorizer < %s -mtriple=arm64-apple-macosx | FileCheck %s
|
|
|
|
define i8 @test() {
|
|
; CHECK-LABEL: @test(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
; CHECK: for.body:
|
|
; CHECK-NEXT: [[SUM:%.*]] = phi i32 [ [[TMP1:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
|
|
; CHECK-NEXT: [[CALL278:%.*]] = call i32 @fn(i32 [[SUM]])
|
|
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <8 x i32> poison, i32 [[CALL278]], i32 0
|
|
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i32> [[TMP0]], <8 x i32> poison, <8 x i32> zeroinitializer
|
|
; CHECK-NEXT: [[TMP1]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[SHUFFLE]])
|
|
; CHECK-NEXT: br label [[FOR_BODY]]
|
|
;
|
|
entry:
|
|
br label %for.body
|
|
|
|
for.body:
|
|
%sum = phi i32 [ %add285.19, %for.body ], [ 0, %entry ]
|
|
%call278 = call i32 @fn(i32 %sum)
|
|
%add285.13 = add i32 %call278, %call278
|
|
%add285.14 = add i32 %add285.13, %call278
|
|
%add285.15 = add i32 %add285.14, %call278
|
|
%add285.16 = add i32 %add285.15, %call278
|
|
%add285.17 = add i32 %add285.16, %call278
|
|
%add285.18 = add i32 %add285.17, %call278
|
|
%add285.19 = add i32 %add285.18, %call278
|
|
br label %for.body
|
|
}
|
|
|
|
declare i32 @fn(i32)
|