Files
clang-p2996/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll
Florian Hahn fd267082ee [VPlan] Refactor VPlan creation, add transform introducing region (NFC). (#128419)
Create an empty VPlan first, then let the HCFG builder create a plain
CFG for the top-level loop (w/o a top-level region). The top-level
region is introduced by a separate VPlan-transform. This is instead of
creating the vector loop region before building the VPlan CFG for the
input loop.

This simplifies the HCFG builder (which should probably be renamed) and
moves along the roadmap ('buildLoop') outlined in [1].

As follow-up, I plan to also preserve the exit branches in the initial
VPlan out of the CFG builder, including connections to the exit blocks.

The conversion from plain CFG with potentially multiple exits to a
single entry/exit region will be done as VPlan transform in a follow-up.

This is needed to enable VPlan-based predication. Currently early exit
support relies on building the block-in masks on the original CFG,
because exiting branches and conditions aren't preserved in the VPlan.
So in order to switch to VPlan-based predication, we will have to
preserve them in the initial plain CFG, so the exit conditions are
available explicitly when we convert to single entry/exit regions.

Another follow-up is updating the outer loop handling to also introduce
VPRegionBlocks for nested loops as transform. Currently the existing
logic in the builder will take care of creating VPRegionBlocks for
nested loops, but not the top-level loop.

[1]
https://llvm.org/devmtg/2023-10/slides/techtalks/Hahn-VPlan-StatusUpdateAndRoadmap.pdf

PR: https://github.com/llvm/llvm-project/pull/128419
2025-03-09 15:05:35 +00:00

71 lines
2.6 KiB
LLVM

; REQUIRES: asserts
; RUN: opt -passes=loop-vectorize -pass-remarks=loop-vectorize -enable-vplan-native-path -debug -disable-output %s 2>&1 | FileCheck %s
@arr2 = external global [8 x i64], align 16
@arr = external global [8 x [8 x i64]], align 16
define void @foo(i64 %n) {
; CHECK: VPlan 'HCFGBuilder: Plain CFG
; CHECK-NEXT: {
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<entry>:
; CHECK-NEXT: Successor(s): vector.body
; CHECK-EMPTY:
; CHECK-NEXT: vector.body:
; CHECK-NEXT: WIDEN-PHI ir<%outer.iv> = phi ir<0>, ir<%outer.iv.next>
; CHECK-NEXT: EMIT ir<%gep.1> = getelementptr ir<@arr2>, ir<0>, ir<%outer.iv>
; CHECK-NEXT: EMIT store ir<%outer.iv>, ir<%gep.1>
; CHECK-NEXT: EMIT ir<%add> = add ir<%outer.iv>, ir<%n>
; CHECK-NEXT: Successor(s): inner
; CHECK-EMPTY:
; CHECK-NEXT: <x1> inner: {
; CHECK-NEXT: inner:
; CHECK-NEXT: WIDEN-PHI ir<%inner.iv> = phi ir<0>, ir<%inner.iv.next>
; CHECK-NEXT: EMIT ir<%gep.2> = getelementptr ir<@arr>, ir<0>, ir<%inner.iv>, ir<%outer.iv>
; CHECK-NEXT: EMIT store ir<%add>, ir<%gep.2>
; CHECK-NEXT: EMIT ir<%inner.iv.next> = add ir<%inner.iv>, ir<1>
; CHECK-NEXT: EMIT ir<%inner.ec> = icmp ir<%inner.iv.next>, ir<8>
; CHECK-NEXT: EMIT branch-on-cond ir<%inner.ec>
; CHECK-NEXT: No successors
; CHECK-NEXT: }
; CHECK-NEXT: Successor(s): outer.latch
; CHECK-EMPTY:
; CHECK-NEXT: outer.latch:
; CHECK-NEXT: EMIT ir<%outer.iv.next> = add ir<%outer.iv>, ir<1>
; CHECK-NEXT: EMIT ir<%outer.ec> = icmp ir<%outer.iv.next>, ir<8>
; CHECK-NEXT: Successor(s): vector.body
; CHECK-NEXT: }
entry:
br label %outer.header
outer.header:
%outer.iv = phi i64 [ 0, %entry ], [ %outer.iv.next, %outer.latch ]
%gep.1 = getelementptr inbounds [8 x i64], ptr @arr2, i64 0, i64 %outer.iv
store i64 %outer.iv, ptr %gep.1, align 4
%add = add nsw i64 %outer.iv, %n
br label %inner
inner:
%inner.iv = phi i64 [ 0, %outer.header ], [ %inner.iv.next, %inner ]
%gep.2 = getelementptr inbounds [8 x [8 x i64]], ptr @arr, i64 0, i64 %inner.iv, i64 %outer.iv
store i64 %add, ptr %gep.2, align 4
%inner.iv.next = add nuw nsw i64 %inner.iv, 1
%inner.ec = icmp eq i64 %inner.iv.next, 8
br i1 %inner.ec, label %outer.latch, label %inner
outer.latch:
%outer.iv.next = add nuw nsw i64 %outer.iv, 1
%outer.ec = icmp eq i64 %outer.iv.next, 8
br i1 %outer.ec, label %exit, label %outer.header, !llvm.loop !1
exit:
ret void
}
; CHECK: remark: <unknown>:0:0: vectorized outer loop (vectorization width: 4, interleaved count: 1)
!1 = distinct !{!1, !2, !3}
!2 = !{!"llvm.loop.vectorize.width", i32 4}
!3 = !{!"llvm.loop.vectorize.enable", i1 true}