Files
clang-p2996/llvm/test/Transforms/LoopVectorize/X86/iv-live-outs.ll
Florian Hahn 68ed1728bf [VPlan] Unify mayWriteToMemory and mayHaveSideEffects logic for VPInst.
Unify logic for mayWriteToMemory and mayHaveSideEffects for
VPInstruction, with the later relying on the former. Also extend to
handle binary operators.

Split off from https://github.com/llvm/llvm-project/pull/106441
2024-09-26 19:16:43 +01:00

96 lines
4.8 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
; RUN: opt -p loop-vectorize -mcpu=skylake-avx512 -S %s | FileCheck %s
target triple = "x86_64-unknown-linux-gnu"
define i64 @test_pr98660(ptr %dst, i64 %N) {
; CHECK-LABEL: define i64 @test_pr98660(
; CHECK-SAME: ptr [[DST:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N]], 1
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 32
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 32
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]]
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0
; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[TMP1]], 1
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i32, ptr [[DST]], i64 [[TMP5]]
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[TMP9]], i32 0
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[TMP9]], i32 8
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i32, ptr [[TMP9]], i32 16
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[TMP9]], i32 24
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i32>, ptr [[TMP13]], align 4
; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <8 x i32>, ptr [[TMP14]], align 4
; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <8 x i32>, ptr [[TMP15]], align 4
; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <8 x i32>, ptr [[TMP16]], align 4
; CHECK-NEXT: [[TMP17:%.*]] = icmp eq <8 x i32> [[WIDE_LOAD]], zeroinitializer
; CHECK-NEXT: [[TMP18:%.*]] = icmp eq <8 x i32> [[WIDE_LOAD1]], zeroinitializer
; CHECK-NEXT: [[TMP19:%.*]] = icmp eq <8 x i32> [[WIDE_LOAD2]], zeroinitializer
; CHECK-NEXT: [[TMP20:%.*]] = icmp eq <8 x i32> [[WIDE_LOAD3]], zeroinitializer
; CHECK-NEXT: call void @llvm.masked.store.v8i32.p0(<8 x i32> zeroinitializer, ptr [[TMP13]], i32 4, <8 x i1> [[TMP17]])
; CHECK-NEXT: call void @llvm.masked.store.v8i32.p0(<8 x i32> zeroinitializer, ptr [[TMP14]], i32 4, <8 x i1> [[TMP18]])
; CHECK-NEXT: call void @llvm.masked.store.v8i32.p0(<8 x i32> zeroinitializer, ptr [[TMP15]], i32 4, <8 x i1> [[TMP19]])
; CHECK-NEXT: call void @llvm.masked.store.v8i32.p0(<8 x i32> zeroinitializer, ptr [[TMP16]], i32 4, <8 x i1> [[TMP20]])
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32
; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP21]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
; CHECK-NEXT: [[IND_ESCAPE:%.*]] = sub i64 [[N_VEC]], 1
; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; CHECK: [[SCALAR_PH]]:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
; CHECK: [[LOOP_HEADER]]:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
; CHECK-NEXT: [[OR:%.*]] = or disjoint i64 [[IV]], 1
; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, ptr [[DST]], i64 [[OR]]
; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[GEP]], align 4
; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[L]], 0
; CHECK-NEXT: br i1 [[C]], label %[[THEN:.*]], label %[[LOOP_LATCH]]
; CHECK: [[THEN]]:
; CHECK-NEXT: store i32 0, ptr [[GEP]], align 4
; CHECK-NEXT: br label %[[LOOP_LATCH]]
; CHECK: [[LOOP_LATCH]]:
; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
; CHECK-NEXT: [[EC:%.*]] = icmp ult i64 [[IV]], [[N]]
; CHECK-NEXT: br i1 [[EC]], label %[[LOOP_HEADER]], label %[[EXIT]], !llvm.loop [[LOOP3:![0-9]+]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: [[RET:%.*]] = phi i64 [ [[IV]], %[[LOOP_LATCH]] ], [ [[IND_ESCAPE]], %[[MIDDLE_BLOCK]] ]
; CHECK-NEXT: ret i64 [[RET]]
;
entry:
br label %loop.header
loop.header:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
%or = or disjoint i64 %iv, 1
%gep = getelementptr i32, ptr %dst, i64 %or
%l = load i32, ptr %gep
%c = icmp eq i32 %l, 0
br i1 %c, label %then, label %loop.latch
then:
store i32 0, ptr %gep, align 4
br label %loop.latch
loop.latch:
%iv.next = add i64 %iv, 1
%ec = icmp ult i64 %iv, %N
br i1 %ec, label %loop.header, label %exit
exit:
%ret = phi i64 [ %iv, %loop.latch ]
ret i64 %ret
}
;.
; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
;.