After unrolling, there may be additional simplifications that can be applied. One example is removing SCALAR-STEPS for the first part where only the first lane is demanded. This removes redundant adds of 0 from a large number of tests (~200), many which I am still working on updating. In preparation for removing redundant WideIV steps added in https://github.com/llvm/llvm-project/pull/119284. PR: https://github.com/llvm/llvm-project/pull/123655
123 lines
8.2 KiB
LLVM
123 lines
8.2 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
|
; RUN: opt -passes=loop-vectorize -enable-masked-interleaved-mem-accesses -enable-interleaved-mem-accesses -prefer-predicate-over-epilogue=predicate-dont-vectorize -force-vector-width=8 -S %s | FileCheck %s
|
|
|
|
target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
|
|
target triple = "x86_64-apple-macosx13.0.0"
|
|
|
|
define void @test_pr59090(ptr %l_out, ptr noalias %b) #0 {
|
|
; CHECK-LABEL: @test_pr59090(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
|
|
; CHECK: vector.ph:
|
|
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
|
|
; CHECK: vector.body:
|
|
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE14:%.*]] ]
|
|
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[INDEX]], i64 0
|
|
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer
|
|
; CHECK-NEXT: [[VEC_IV:%.*]] = add <8 x i64> [[BROADCAST_SPLAT]], <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>
|
|
; CHECK-NEXT: [[TMP1:%.*]] = icmp ule <8 x i64> [[VEC_IV]], splat (i64 10000)
|
|
; CHECK-NEXT: [[TMP2:%.*]] = mul nuw i64 [[INDEX]], 6
|
|
; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr [[B:%.*]], align 1, !llvm.access.group [[ACC_GRP0:![0-9]+]]
|
|
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <8 x i1> [[TMP1]], i32 0
|
|
; CHECK-NEXT: br i1 [[TMP4]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
|
|
; CHECK: pred.store.if:
|
|
; CHECK-NEXT: store i8 [[TMP3]], ptr [[B]], align 1, !llvm.access.group [[ACC_GRP0]]
|
|
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]]
|
|
; CHECK: pred.store.continue:
|
|
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <8 x i1> [[TMP1]], i32 1
|
|
; CHECK-NEXT: br i1 [[TMP5]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2:%.*]]
|
|
; CHECK: pred.store.if1:
|
|
; CHECK-NEXT: store i8 [[TMP3]], ptr [[B]], align 1, !llvm.access.group [[ACC_GRP0]]
|
|
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE2]]
|
|
; CHECK: pred.store.continue2:
|
|
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <8 x i1> [[TMP1]], i32 2
|
|
; CHECK-NEXT: br i1 [[TMP6]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]]
|
|
; CHECK: pred.store.if3:
|
|
; CHECK-NEXT: store i8 [[TMP3]], ptr [[B]], align 1, !llvm.access.group [[ACC_GRP0]]
|
|
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE4]]
|
|
; CHECK: pred.store.continue4:
|
|
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <8 x i1> [[TMP1]], i32 3
|
|
; CHECK-NEXT: br i1 [[TMP7]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6:%.*]]
|
|
; CHECK: pred.store.if5:
|
|
; CHECK-NEXT: store i8 [[TMP3]], ptr [[B]], align 1, !llvm.access.group [[ACC_GRP0]]
|
|
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE6]]
|
|
; CHECK: pred.store.continue6:
|
|
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <8 x i1> [[TMP1]], i32 4
|
|
; CHECK-NEXT: br i1 [[TMP8]], label [[PRED_STORE_IF7:%.*]], label [[PRED_STORE_CONTINUE8:%.*]]
|
|
; CHECK: pred.store.if7:
|
|
; CHECK-NEXT: store i8 [[TMP3]], ptr [[B]], align 1, !llvm.access.group [[ACC_GRP0]]
|
|
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE8]]
|
|
; CHECK: pred.store.continue8:
|
|
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <8 x i1> [[TMP1]], i32 5
|
|
; CHECK-NEXT: br i1 [[TMP9]], label [[PRED_STORE_IF9:%.*]], label [[PRED_STORE_CONTINUE10:%.*]]
|
|
; CHECK: pred.store.if9:
|
|
; CHECK-NEXT: store i8 [[TMP3]], ptr [[B]], align 1, !llvm.access.group [[ACC_GRP0]]
|
|
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE10]]
|
|
; CHECK: pred.store.continue10:
|
|
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <8 x i1> [[TMP1]], i32 6
|
|
; CHECK-NEXT: br i1 [[TMP10]], label [[PRED_STORE_IF11:%.*]], label [[PRED_STORE_CONTINUE12:%.*]]
|
|
; CHECK: pred.store.if11:
|
|
; CHECK-NEXT: store i8 [[TMP3]], ptr [[B]], align 1, !llvm.access.group [[ACC_GRP0]]
|
|
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE12]]
|
|
; CHECK: pred.store.continue12:
|
|
; CHECK-NEXT: [[TMP11:%.*]] = extractelement <8 x i1> [[TMP1]], i32 7
|
|
; CHECK-NEXT: br i1 [[TMP11]], label [[PRED_STORE_IF13:%.*]], label [[PRED_STORE_CONTINUE14]]
|
|
; CHECK: pred.store.if13:
|
|
; CHECK-NEXT: store i8 [[TMP3]], ptr [[B]], align 1, !llvm.access.group [[ACC_GRP0]]
|
|
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE14]]
|
|
; CHECK: pred.store.continue14:
|
|
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[L_OUT:%.*]], i64 [[TMP2]]
|
|
; CHECK-NEXT: [[INTERLEAVED_MASK:%.*]] = shufflevector <8 x i1> [[TMP1]], <8 x i1> poison, <48 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
|
|
; CHECK-NEXT: [[TMP15:%.*]] = and <48 x i1> [[INTERLEAVED_MASK]], <i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false>
|
|
; CHECK-NEXT: call void @llvm.masked.store.v48i8.p0(<48 x i8> <i8 0, i8 poison, i8 0, i8 poison, i8 poison, i8 poison, i8 0, i8 poison, i8 0, i8 poison, i8 poison, i8 poison, i8 0, i8 poison, i8 0, i8 poison, i8 poison, i8 poison, i8 0, i8 poison, i8 0, i8 poison, i8 poison, i8 poison, i8 0, i8 poison, i8 0, i8 poison, i8 poison, i8 poison, i8 0, i8 poison, i8 0, i8 poison, i8 poison, i8 poison, i8 0, i8 poison, i8 0, i8 poison, i8 poison, i8 poison, i8 0, i8 poison, i8 0, i8 poison, i8 poison, i8 poison>, ptr [[TMP13]], i32 1, <48 x i1> [[TMP15]])
|
|
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
|
|
; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10008
|
|
; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP1:![0-9]+]]
|
|
; CHECK: middle.block:
|
|
; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
|
|
; CHECK: scalar.ph:
|
|
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 10008, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
|
|
; CHECK-NEXT: br label [[LOOP:%.*]]
|
|
; CHECK: loop:
|
|
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
|
|
; CHECK-NEXT: [[IV_MUL:%.*]] = mul nuw i64 [[IV]], 6
|
|
; CHECK-NEXT: [[L:%.*]] = load i8, ptr [[B]], align 1, !llvm.access.group [[ACC_GRP0]]
|
|
; CHECK-NEXT: store i8 [[L]], ptr [[B]], align 1, !llvm.access.group [[ACC_GRP0]]
|
|
; CHECK-NEXT: [[ARRAYIDX77:%.*]] = getelementptr i8, ptr [[L_OUT]], i64 [[IV_MUL]]
|
|
; CHECK-NEXT: store i8 0, ptr [[ARRAYIDX77]], align 1, !llvm.access.group [[ACC_GRP0]]
|
|
; CHECK-NEXT: [[ADD_2:%.*]] = add i64 [[IV_MUL]], 2
|
|
; CHECK-NEXT: [[ARRAYIDX97:%.*]] = getelementptr i8, ptr [[L_OUT]], i64 [[ADD_2]]
|
|
; CHECK-NEXT: store i8 0, ptr [[ARRAYIDX97]], align 1, !llvm.access.group [[ACC_GRP0]]
|
|
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
|
|
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV]], 10000
|
|
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP5:![0-9]+]]
|
|
; CHECK: exit:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
br label %loop
|
|
|
|
loop:
|
|
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
|
|
%iv.mul = mul nuw i64 %iv, 6
|
|
%l = load i8, ptr %b, align 1, !llvm.access.group !0
|
|
store i8 %l, ptr %b, align 1, !llvm.access.group !0
|
|
%arrayidx77 = getelementptr i8, ptr %l_out, i64 %iv.mul
|
|
store i8 0, ptr %arrayidx77, align 1, !llvm.access.group !0
|
|
%add.2 = add i64 %iv.mul, 2
|
|
%arrayidx97 = getelementptr i8, ptr %l_out, i64 %add.2
|
|
store i8 0, ptr %arrayidx97, align 1, !llvm.access.group !0
|
|
%iv.next = add nsw nuw i64 %iv, 1
|
|
%exitcond.not = icmp eq i64 %iv, 10000
|
|
br i1 %exitcond.not, label %exit, label %loop, !llvm.loop !1
|
|
|
|
exit:
|
|
ret void
|
|
}
|
|
|
|
attributes #0 = { "target-cpu"="skx" }
|
|
|
|
!0 = distinct !{}
|
|
!1 = distinct !{!1, !3}
|
|
!3 = !{!"llvm.loop.parallel_accesses", !0}
|