Files
clang-p2996/llvm/test/Transforms/LoopVectorize/X86/vectorize-interleaved-accesses-gap.ll
Florian Hahn dfca6c0d3b [VPlan] Remove no-op SCALAR-STEPS after unrolling. (#123655)
After unrolling, there may be additional simplifications that can be
applied. One example is removing SCALAR-STEPS for the first part where
only the first lane is demanded.

This removes redundant adds of 0 from a large number of tests (~200),
many which I am still working on updating.

In preparation for removing redundant WideIV steps added in
https://github.com/llvm/llvm-project/pull/119284.

PR: https://github.com/llvm/llvm-project/pull/123655
2025-03-25 12:57:24 +00:00

123 lines
8.2 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -passes=loop-vectorize -enable-masked-interleaved-mem-accesses -enable-interleaved-mem-accesses -prefer-predicate-over-epilogue=predicate-dont-vectorize -force-vector-width=8 -S %s | FileCheck %s
target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx13.0.0"
define void @test_pr59090(ptr %l_out, ptr noalias %b) #0 {
; CHECK-LABEL: @test_pr59090(
; CHECK-NEXT: entry:
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE14:%.*]] ]
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[INDEX]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer
; CHECK-NEXT: [[VEC_IV:%.*]] = add <8 x i64> [[BROADCAST_SPLAT]], <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>
; CHECK-NEXT: [[TMP1:%.*]] = icmp ule <8 x i64> [[VEC_IV]], splat (i64 10000)
; CHECK-NEXT: [[TMP2:%.*]] = mul nuw i64 [[INDEX]], 6
; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr [[B:%.*]], align 1, !llvm.access.group [[ACC_GRP0:![0-9]+]]
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <8 x i1> [[TMP1]], i32 0
; CHECK-NEXT: br i1 [[TMP4]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
; CHECK: pred.store.if:
; CHECK-NEXT: store i8 [[TMP3]], ptr [[B]], align 1, !llvm.access.group [[ACC_GRP0]]
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]]
; CHECK: pred.store.continue:
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <8 x i1> [[TMP1]], i32 1
; CHECK-NEXT: br i1 [[TMP5]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2:%.*]]
; CHECK: pred.store.if1:
; CHECK-NEXT: store i8 [[TMP3]], ptr [[B]], align 1, !llvm.access.group [[ACC_GRP0]]
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE2]]
; CHECK: pred.store.continue2:
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <8 x i1> [[TMP1]], i32 2
; CHECK-NEXT: br i1 [[TMP6]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]]
; CHECK: pred.store.if3:
; CHECK-NEXT: store i8 [[TMP3]], ptr [[B]], align 1, !llvm.access.group [[ACC_GRP0]]
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE4]]
; CHECK: pred.store.continue4:
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <8 x i1> [[TMP1]], i32 3
; CHECK-NEXT: br i1 [[TMP7]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6:%.*]]
; CHECK: pred.store.if5:
; CHECK-NEXT: store i8 [[TMP3]], ptr [[B]], align 1, !llvm.access.group [[ACC_GRP0]]
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE6]]
; CHECK: pred.store.continue6:
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <8 x i1> [[TMP1]], i32 4
; CHECK-NEXT: br i1 [[TMP8]], label [[PRED_STORE_IF7:%.*]], label [[PRED_STORE_CONTINUE8:%.*]]
; CHECK: pred.store.if7:
; CHECK-NEXT: store i8 [[TMP3]], ptr [[B]], align 1, !llvm.access.group [[ACC_GRP0]]
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE8]]
; CHECK: pred.store.continue8:
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <8 x i1> [[TMP1]], i32 5
; CHECK-NEXT: br i1 [[TMP9]], label [[PRED_STORE_IF9:%.*]], label [[PRED_STORE_CONTINUE10:%.*]]
; CHECK: pred.store.if9:
; CHECK-NEXT: store i8 [[TMP3]], ptr [[B]], align 1, !llvm.access.group [[ACC_GRP0]]
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE10]]
; CHECK: pred.store.continue10:
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <8 x i1> [[TMP1]], i32 6
; CHECK-NEXT: br i1 [[TMP10]], label [[PRED_STORE_IF11:%.*]], label [[PRED_STORE_CONTINUE12:%.*]]
; CHECK: pred.store.if11:
; CHECK-NEXT: store i8 [[TMP3]], ptr [[B]], align 1, !llvm.access.group [[ACC_GRP0]]
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE12]]
; CHECK: pred.store.continue12:
; CHECK-NEXT: [[TMP11:%.*]] = extractelement <8 x i1> [[TMP1]], i32 7
; CHECK-NEXT: br i1 [[TMP11]], label [[PRED_STORE_IF13:%.*]], label [[PRED_STORE_CONTINUE14]]
; CHECK: pred.store.if13:
; CHECK-NEXT: store i8 [[TMP3]], ptr [[B]], align 1, !llvm.access.group [[ACC_GRP0]]
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE14]]
; CHECK: pred.store.continue14:
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[L_OUT:%.*]], i64 [[TMP2]]
; CHECK-NEXT: [[INTERLEAVED_MASK:%.*]] = shufflevector <8 x i1> [[TMP1]], <8 x i1> poison, <48 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
; CHECK-NEXT: [[TMP15:%.*]] = and <48 x i1> [[INTERLEAVED_MASK]], <i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false>
; CHECK-NEXT: call void @llvm.masked.store.v48i8.p0(<48 x i8> <i8 0, i8 poison, i8 0, i8 poison, i8 poison, i8 poison, i8 0, i8 poison, i8 0, i8 poison, i8 poison, i8 poison, i8 0, i8 poison, i8 0, i8 poison, i8 poison, i8 poison, i8 0, i8 poison, i8 0, i8 poison, i8 poison, i8 poison, i8 0, i8 poison, i8 0, i8 poison, i8 poison, i8 poison, i8 0, i8 poison, i8 0, i8 poison, i8 poison, i8 poison, i8 0, i8 poison, i8 0, i8 poison, i8 poison, i8 poison, i8 0, i8 poison, i8 0, i8 poison, i8 poison, i8 poison>, ptr [[TMP13]], i32 1, <48 x i1> [[TMP15]])
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10008
; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP1:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 10008, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
; CHECK-NEXT: [[IV_MUL:%.*]] = mul nuw i64 [[IV]], 6
; CHECK-NEXT: [[L:%.*]] = load i8, ptr [[B]], align 1, !llvm.access.group [[ACC_GRP0]]
; CHECK-NEXT: store i8 [[L]], ptr [[B]], align 1, !llvm.access.group [[ACC_GRP0]]
; CHECK-NEXT: [[ARRAYIDX77:%.*]] = getelementptr i8, ptr [[L_OUT]], i64 [[IV_MUL]]
; CHECK-NEXT: store i8 0, ptr [[ARRAYIDX77]], align 1, !llvm.access.group [[ACC_GRP0]]
; CHECK-NEXT: [[ADD_2:%.*]] = add i64 [[IV_MUL]], 2
; CHECK-NEXT: [[ARRAYIDX97:%.*]] = getelementptr i8, ptr [[L_OUT]], i64 [[ADD_2]]
; CHECK-NEXT: store i8 0, ptr [[ARRAYIDX97]], align 1, !llvm.access.group [[ACC_GRP0]]
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV]], 10000
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP5:![0-9]+]]
; CHECK: exit:
; CHECK-NEXT: ret void
;
entry:
br label %loop
loop:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
%iv.mul = mul nuw i64 %iv, 6
%l = load i8, ptr %b, align 1, !llvm.access.group !0
store i8 %l, ptr %b, align 1, !llvm.access.group !0
%arrayidx77 = getelementptr i8, ptr %l_out, i64 %iv.mul
store i8 0, ptr %arrayidx77, align 1, !llvm.access.group !0
%add.2 = add i64 %iv.mul, 2
%arrayidx97 = getelementptr i8, ptr %l_out, i64 %add.2
store i8 0, ptr %arrayidx97, align 1, !llvm.access.group !0
%iv.next = add nsw nuw i64 %iv, 1
%exitcond.not = icmp eq i64 %iv, 10000
br i1 %exitcond.not, label %exit, label %loop, !llvm.loop !1
exit:
ret void
}
attributes #0 = { "target-cpu"="skx" }
!0 = distinct !{}
!1 = distinct !{!1, !3}
!3 = !{!"llvm.loop.parallel_accesses", !0}