Files
clang-p2996/llvm/test/Transforms/LoopVectorize/tail-folding-switch.ll
Florian Hahn 5fbd0658a0 [VPlan] Add initial CFG simplification, removing BranchOnCond true. (#106748)
Add an initial CFG simplification transform, which removes the dead
edges for blocks terminated with BranchOnCond true.

At the moment, this removes the edge between middle block and scalar
preheader when folding the tail.

PR: https://github.com/llvm/llvm-project/pull/106748
2025-04-04 15:44:26 +01:00

107 lines
5.3 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
; RUN: opt -p loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -prefer-predicate-over-epilogue=predicate-dont-vectorize -S %s | FileCheck %s
target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128-Fn32"
define void @tail_fold_switch(ptr %dst, i32 %0) {
; CHECK-LABEL: define void @tail_fold_switch(
; CHECK-SAME: ptr [[DST:%.*]], i32 [[TMP0:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP0]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <4 x i32> [[BROADCAST_SPLAT]], splat (i32 1)
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE6:.*]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i8> [ <i8 0, i8 1, i8 2, i8 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE6]] ]
; CHECK-NEXT: [[TMP1:%.*]] = icmp ule <4 x i8> [[VEC_IND]], splat (i8 4)
; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP1]], <4 x i1> [[TMP2]], <4 x i1> zeroinitializer
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i1> [[TMP3]], i32 0
; CHECK-NEXT: br i1 [[TMP4]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
; CHECK: [[PRED_STORE_IF]]:
; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 0
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP5]]
; CHECK-NEXT: store i32 0, ptr [[TMP6]], align 4
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]]
; CHECK: [[PRED_STORE_CONTINUE]]:
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i1> [[TMP3]], i32 1
; CHECK-NEXT: br i1 [[TMP7]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2:.*]]
; CHECK: [[PRED_STORE_IF1]]:
; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 1
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP8]]
; CHECK-NEXT: store i32 0, ptr [[TMP9]], align 4
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE2]]
; CHECK: [[PRED_STORE_CONTINUE2]]:
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP3]], i32 2
; CHECK-NEXT: br i1 [[TMP10]], label %[[PRED_STORE_IF3:.*]], label %[[PRED_STORE_CONTINUE4:.*]]
; CHECK: [[PRED_STORE_IF3]]:
; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 2
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP11]]
; CHECK-NEXT: store i32 0, ptr [[TMP12]], align 4
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE4]]
; CHECK: [[PRED_STORE_CONTINUE4]]:
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i1> [[TMP3]], i32 3
; CHECK-NEXT: br i1 [[TMP13]], label %[[PRED_STORE_IF5:.*]], label %[[PRED_STORE_CONTINUE6]]
; CHECK: [[PRED_STORE_IF5]]:
; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], 3
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP14]]
; CHECK-NEXT: store i32 0, ptr [[TMP15]], align 4
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE6]]
; CHECK: [[PRED_STORE_CONTINUE6]]:
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i8> [[VEC_IND]], splat (i8 4)
; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 8
; CHECK-NEXT: br i1 [[TMP16]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: br label %[[EXIT:.*]]
; CHECK: [[SCALAR_PH]]:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ]
; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
; CHECK: [[LOOP_HEADER]]:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
; CHECK-NEXT: switch i32 [[TMP0]], label %[[LOOP_LATCH]] [
; CHECK-NEXT: i32 0, label %[[LOOP_LATCH]]
; CHECK-NEXT: i32 1, label %[[IF_THEN:.*]]
; CHECK-NEXT: ]
; CHECK: [[IF_THEN]]:
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[IV]]
; CHECK-NEXT: store i32 0, ptr [[GEP]], align 4
; CHECK-NEXT: br label %[[LOOP_LATCH]]
; CHECK: [[LOOP_LATCH]]:
; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], 4
; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP3:![0-9]+]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: ret void
;
entry:
br label %loop.header
loop.header:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
switch i32 %0, label %loop.latch [
i32 0, label %loop.latch
i32 1, label %if.then
]
if.then:
%gep = getelementptr inbounds i32, ptr %dst, i64 %iv
store i32 0, ptr %gep, align 4
br label %loop.latch
loop.latch:
%iv.next = add i64 %iv, 1
%ec = icmp eq i64 %iv, 4
br i1 %ec, label %exit, label %loop.header
exit:
ret void
}
;.
; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
;.