Need to set the insertion point for (non-schedulable) vector node after
the last instruction in the node to avoid def-use breakage. But it also
causes miscompilation with gather/buildvector operands of the phi nodes,
used in the same phi only in the block.
These nodes supposed to be inserted at the end of the block and after
changing the insertion point for the non-schedulable vec block, it also
may break def-use dependencies. Need to prevector such nodes, to emit
them as early as possible, so the vectorized nodes are inserted before
these nodes.
Fixes #139728
Recommit after revert 60fb921792
Reviewers: hiraditya, HanKuanChen, RKSimon
Reviewed By: RKSimon
Pull Request: https://github.com/llvm/llvm-project/pull/139917
60 lines
2.3 KiB
LLVM
60 lines
2.3 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
|
|
; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu -mcpu=haswell < %s | FileCheck %s
|
|
|
|
define void @test(float %0) {
|
|
; CHECK-LABEL: define void @test(
|
|
; CHECK-SAME: float [[TMP0:%.*]]) #[[ATTR0:[0-9]+]] {
|
|
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x float> <float poison, float 0.000000e+00>, float [[TMP0]], i32 0
|
|
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> <float 0.000000e+00, float poison>, <2 x i32> <i32 2, i32 0>
|
|
; CHECK-NEXT: [[TMP5:%.*]] = fdiv <2 x float> [[TMP4]], zeroinitializer
|
|
; CHECK-NEXT: [[TMP3:%.*]] = fdiv <2 x float> [[TMP6]], zeroinitializer
|
|
; CHECK-NEXT: br label %[[BB6:.*]]
|
|
; CHECK: [[BB6]]:
|
|
; CHECK-NEXT: [[TMP7:%.*]] = fmul <2 x float> [[TMP5]], zeroinitializer
|
|
; CHECK-NEXT: [[TMP8:%.*]] = fsub <2 x float> zeroinitializer, [[TMP7]]
|
|
; CHECK-NEXT: br label %[[BB10:.*]]
|
|
; CHECK: [[BB9:.*]]:
|
|
; CHECK-NEXT: br label %[[BB10]]
|
|
; CHECK: [[BB10]]:
|
|
; CHECK-NEXT: [[TMP11:%.*]] = phi <2 x float> [ [[TMP8]], %[[BB6]] ], [ poison, %[[BB9]] ]
|
|
; CHECK-NEXT: br label %[[BB12:.*]]
|
|
; CHECK: [[BB12]]:
|
|
; CHECK-NEXT: [[TMP13:%.*]] = fmul <2 x float> [[TMP3]], zeroinitializer
|
|
; CHECK-NEXT: [[TMP14:%.*]] = fsub <2 x float> [[TMP11]], [[TMP13]]
|
|
; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x float> [[TMP14]], i32 0
|
|
; CHECK-NEXT: [[TMP16:%.*]] = extractelement <2 x float> [[TMP14]], i32 1
|
|
; CHECK-NEXT: [[TMP17:%.*]] = fadd float [[TMP15]], [[TMP16]]
|
|
; CHECK-NEXT: [[TMP18:%.*]] = call float @llvm.fabs.f32(float [[TMP17]])
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%2 = fdiv float 0.000000e+00, 0.000000e+00
|
|
%3 = fdiv float 0.000000e+00, 0.000000e+00
|
|
%4 = fdiv float %0, 0.000000e+00
|
|
br label %5
|
|
|
|
5:
|
|
%6 = fmul float %4, 0.000000e+00
|
|
%7 = fsub float 0.000000e+00, %6
|
|
%8 = fmul float %3, 0.000000e+00
|
|
%9 = fsub float 0.000000e+00, %8
|
|
br label %11
|
|
|
|
10:
|
|
br label %11
|
|
|
|
11:
|
|
%12 = phi float [ %7, %5 ], [ 0.000000e+00, %10 ]
|
|
%13 = phi float [ %9, %5 ], [ 0.000000e+00, %10 ]
|
|
br label %14
|
|
|
|
14:
|
|
%15 = fmul float %2, 0.000000e+00
|
|
%16 = fsub float %12, %15
|
|
%17 = fmul float %4, 0.000000e+00
|
|
%18 = fsub float %13, %17
|
|
%19 = fadd float %16, %18
|
|
%20 = call float @llvm.fabs.f32(float %19)
|
|
ret void
|
|
}
|
|
|