This is re-commit of #69392 and also fixes issue #69670 which was uncovered with the prior commit. For delayed gather emission it may be incorrect to use stab instruction as insertion point if it is a PHI operand. For that case insertion point is adjusted to be at the end of block, ensuring that prior dependecy vector code is emitted earlier.
125 lines
5.8 KiB
LLVM
125 lines
5.8 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3
|
|
; RUN: opt -passes=slp-vectorizer -mtriple=x86_64 -S -mcpu=core-avx2 -pass-remarks-output=%t < %s | FileCheck %s
|
|
; RUN: FileCheck --input-file=%t --check-prefix=YAML %s
|
|
|
|
; YAML: --- !Passed
|
|
; YAML: Pass: slp-vectorizer
|
|
; YAML: Name: StoresVectorized
|
|
; YAML: Function: test
|
|
; YAML: Args:
|
|
; YAML: - String: 'Stores SLP vectorized with cost '
|
|
; YAML: - Cost: '-6'
|
|
; YAML: - String: ' and with tree size '
|
|
; YAML: - TreeSize: '14'
|
|
; YAML: ...
|
|
|
|
; Test that SLP cost modeling is able to match gathering tree
|
|
; entries: two instances of { call0 .. call3 }. Both used by
|
|
; vectorized PHI entries: { phi0 .. phi3 } and { phi4 .. phi7 }.
|
|
; Check that cost is not added twice for gathered calls.
|
|
|
|
define void @test(ptr %dst, float %a, float %b, float %c, float %d) {
|
|
; CHECK-LABEL: define void @test(
|
|
; CHECK-SAME: ptr [[DST:%.*]], float [[A:%.*]], float [[B:%.*]], float [[C:%.*]], float [[D:%.*]]) #[[ATTR0:[0-9]+]] {
|
|
; CHECK-NEXT: Entry:
|
|
; CHECK-NEXT: br i1 poison, label [[LOOP0:%.*]], label [[EXIT:%.*]]
|
|
; CHECK: loop0:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = phi <4 x float> [ <float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01>, [[ENTRY:%.*]] ], [ [[TMP7:%.*]], [[USERBLOCK1:%.*]] ]
|
|
; CHECK-NEXT: br i1 poison, label [[USERBLOCK0:%.*]], label [[BLKX:%.*]]
|
|
; CHECK: UserBlock0:
|
|
; CHECK-NEXT: [[TMP1:%.*]] = phi <4 x float> [ zeroinitializer, [[LOOP0]] ], [ [[TMP5:%.*]], [[BLKX]] ]
|
|
; CHECK-NEXT: br label [[LOOP_INNER:%.*]]
|
|
; CHECK: blkx:
|
|
; CHECK-NEXT: [[CALL0:%.*]] = call fast float @foo(float [[A]])
|
|
; CHECK-NEXT: [[CALL1:%.*]] = call fast float @foo(float [[B]])
|
|
; CHECK-NEXT: [[CALL2:%.*]] = call fast float @foo(float [[C]])
|
|
; CHECK-NEXT: [[CALL3:%.*]] = call fast float @foo(float [[D]])
|
|
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> poison, float [[CALL0]], i32 0
|
|
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x float> [[TMP2]], float [[CALL1]], i32 1
|
|
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x float> [[TMP3]], float [[CALL2]], i32 2
|
|
; CHECK-NEXT: [[TMP5]] = insertelement <4 x float> [[TMP4]], float [[CALL3]], i32 3
|
|
; CHECK-NEXT: br i1 poison, label [[USERBLOCK0]], label [[USERBLOCK1]]
|
|
; CHECK: UserBlock1:
|
|
; CHECK-NEXT: [[TMP6:%.*]] = phi <4 x float> [ [[TMP5]], [[BLKX]] ], [ [[TMP9:%.*]], [[LOOP_INNER]] ]
|
|
; CHECK-NEXT: [[TMP7]] = fadd fast <4 x float> [[TMP0]], [[TMP6]]
|
|
; CHECK-NEXT: br i1 poison, label [[EXIT]], label [[LOOP0]]
|
|
; CHECK: loop.inner:
|
|
; CHECK-NEXT: [[TMP8:%.*]] = phi <4 x float> [ [[TMP1]], [[USERBLOCK0]] ], [ [[TMP9]], [[LOOP_INNER]] ]
|
|
; CHECK-NEXT: [[TMP9]] = fadd fast <4 x float> [[TMP8]], poison
|
|
; CHECK-NEXT: br i1 poison, label [[USERBLOCK1]], label [[LOOP_INNER]]
|
|
; CHECK: Exit:
|
|
; CHECK-NEXT: [[TMP10:%.*]] = phi <4 x float> [ <float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01>, [[ENTRY]] ], [ [[TMP7]], [[USERBLOCK1]] ]
|
|
; CHECK-NEXT: [[IDX0:%.*]] = add i64 0, poison
|
|
; CHECK-NEXT: [[GEP0:%.*]] = getelementptr inbounds float, ptr [[DST]], i64 [[IDX0]]
|
|
; CHECK-NEXT: store <4 x float> [[TMP10]], ptr [[GEP0]], align 4
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
Entry:
|
|
br i1 poison, label %loop0, label %Exit
|
|
|
|
loop0:
|
|
%i0 = phi float [ 5.000000e-01, %Entry ], [ %add3, %UserBlock1 ]
|
|
%i1 = phi float [ 5.000000e-01, %Entry ], [ %add2, %UserBlock1 ]
|
|
%i2 = phi float [ 5.000000e-01, %Entry ], [ %add1, %UserBlock1 ]
|
|
%i3 = phi float [ 5.000000e-01, %Entry ], [ %add0, %UserBlock1 ]
|
|
br i1 poison, label %UserBlock0, label %blkx
|
|
|
|
UserBlock0:
|
|
%phi0 = phi float [ 0.000000e+00, %loop0 ], [ %call3, %blkx ]
|
|
%phi1 = phi float [ 0.000000e+00, %loop0 ], [ %call2, %blkx ]
|
|
%phi2 = phi float [ 0.000000e+00, %loop0 ], [ %call1, %blkx ]
|
|
%phi3 = phi float [ 0.000000e+00, %loop0 ], [ %call0, %blkx ]
|
|
br label %loop.inner
|
|
|
|
blkx:
|
|
%call0 = call fast float @foo(float %a)
|
|
%call1 = call fast float @foo(float %b)
|
|
%call2 = call fast float @foo(float %c)
|
|
%call3 = call fast float @foo(float %d)
|
|
br i1 poison, label %UserBlock0, label %UserBlock1
|
|
|
|
UserBlock1:
|
|
%phi4 = phi float [ %call0, %blkx ], [ %add4, %loop.inner ]
|
|
%phi5 = phi float [ %call1, %blkx ], [ %add5, %loop.inner ]
|
|
%phi6 = phi float [ %call2, %blkx ], [ %add6, %loop.inner ]
|
|
%phi7 = phi float [ %call3, %blkx ], [ %add7, %loop.inner ]
|
|
%add0 = fadd fast float %i3, %phi4
|
|
%add1 = fadd fast float %i2, %phi5
|
|
%add2 = fadd fast float %i1, %phi6
|
|
%add3 = fadd fast float %i0, %phi7
|
|
br i1 poison, label %Exit, label %loop0
|
|
|
|
loop.inner:
|
|
%t20 = phi float [ %phi3, %UserBlock0 ], [ %add4, %loop.inner ]
|
|
%t19 = phi float [ %phi2, %UserBlock0 ], [ %add5, %loop.inner ]
|
|
%t18 = phi float [ %phi1, %UserBlock0 ], [ %add6, %loop.inner ]
|
|
%t17 = phi float [ %phi0, %UserBlock0 ], [ %add7, %loop.inner ]
|
|
%add4 = fadd fast float %t20, poison
|
|
%add5 = fadd fast float %t19, poison
|
|
%add6 = fadd fast float %t18, poison
|
|
%add7 = fadd fast float %t17, poison
|
|
|
|
br i1 poison, label %UserBlock1, label %loop.inner
|
|
|
|
Exit:
|
|
%t48 = phi float [ 5.000000e-01, %Entry ], [ %add3, %UserBlock1 ]
|
|
%t47 = phi float [ 5.000000e-01, %Entry ], [ %add2, %UserBlock1 ]
|
|
%t46 = phi float [ 5.000000e-01, %Entry ], [ %add1, %UserBlock1 ]
|
|
%t45 = phi float [ 5.000000e-01, %Entry ], [ %add0, %UserBlock1 ]
|
|
%idx0 = add i64 0, poison
|
|
%gep0 = getelementptr inbounds float, ptr %dst, i64 %idx0
|
|
store float %t45, ptr %gep0, align 4
|
|
%idx1 = add i64 1, poison
|
|
%gep1 = getelementptr inbounds float, ptr %dst, i64 %idx1
|
|
store float %t46, ptr %gep1, align 4
|
|
%idx2 = add i64 2, poison
|
|
%gep2 = getelementptr inbounds float, ptr %dst, i64 %idx2
|
|
store float %t47, ptr %gep2, align 4
|
|
%idx3 = add i64 3, poison
|
|
%gep3 = getelementptr inbounds float, ptr %dst, i64 %idx3
|
|
store float %t48, ptr %gep3, align 4
|
|
ret void
|
|
}
|
|
|
|
declare float @foo(float)
|