This patch adds a test that shows incorrect branch weights being set in function EpilogueVectorizerEpilogueLoop::emitMinimumVectorEpilogueIterCountCheck
155 lines
9.6 KiB
LLVM
155 lines
9.6 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --filter "br " --filter "^.*:" --filter "icmp" --version 5
|
|
; RUN: opt < %s -S -passes=loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -enable-epilogue-vectorization \
|
|
; RUN: -epilogue-vectorization-force-VF=4 | FileCheck %s --check-prefix=MAINVF4IC1_EPI4
|
|
; RUN: opt < %s -S -passes=loop-vectorize -force-vector-interleave=2 -force-vector-width=4 -enable-epilogue-vectorization \
|
|
; RUN: -epilogue-vectorization-force-VF=4 | FileCheck %s --check-prefix=MAINVF4IC2_EPI4
|
|
|
|
; FIXME: For MAINVF4IC2_EPI4 the branch weights in the terminator of
|
|
; the VEC_EPILOG_ITER_CHECK block should be [4,4] since we process 8
|
|
; scalar iterations in the main loop, leaving the remaining count to
|
|
; be in the range [0,7]. That gives a 4:4 chance of skipping the
|
|
; vector epilogue. I believe the problem lies in
|
|
; EpilogueVectorizerEpilogueLoop::emitMinimumVectorEpilogueIterCountCheck
|
|
; where the main loop VF is set to the same value as the epilogue VF.
|
|
define void @f0(i8 %n, i32 %len, ptr %p) !prof !0 {
|
|
; MAINVF4IC1_EPI4-LABEL: define void @f0(
|
|
; MAINVF4IC1_EPI4-SAME: i8 [[N:%.*]], i32 [[LEN:%.*]], ptr [[P:%.*]]) !prof [[PROF0:![0-9]+]] {
|
|
; MAINVF4IC1_EPI4: [[ENTRY:.*:]]
|
|
; MAINVF4IC1_EPI4: [[CMP_ENTRY:%.*]] = icmp sgt i32 [[LEN]], 0
|
|
; MAINVF4IC1_EPI4: br i1 [[CMP_ENTRY]], label %[[ITER_CHECK:.*]], label %[[EXIT:.*]], !prof [[PROF1:![0-9]+]]
|
|
; MAINVF4IC1_EPI4: [[ITER_CHECK]]:
|
|
; MAINVF4IC1_EPI4: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0:%.*]], 4
|
|
; MAINVF4IC1_EPI4: br i1 [[MIN_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]], !prof [[PROF2:![0-9]+]]
|
|
; MAINVF4IC1_EPI4: [[VECTOR_SCEVCHECK]]:
|
|
; MAINVF4IC1_EPI4: [[TMP2:%.*]] = icmp slt i8 [[TMP1:%.*]], 0
|
|
; MAINVF4IC1_EPI4: [[TMP3:%.*]] = icmp ugt i32 [[LEN]], 255
|
|
; MAINVF4IC1_EPI4: br i1 [[TMP4:%.*]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]], !prof [[PROF2]]
|
|
; MAINVF4IC1_EPI4: [[VECTOR_MAIN_LOOP_ITER_CHECK]]:
|
|
; MAINVF4IC1_EPI4: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i32 [[TMP0]], 4
|
|
; MAINVF4IC1_EPI4: br i1 [[MIN_ITERS_CHECK1]], label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]], !prof [[PROF2]]
|
|
; MAINVF4IC1_EPI4: [[VECTOR_PH]]:
|
|
; MAINVF4IC1_EPI4: br label %[[VECTOR_BODY:.*]]
|
|
; MAINVF4IC1_EPI4: [[VECTOR_BODY]]:
|
|
; MAINVF4IC1_EPI4: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT:%.*]], [[N_VEC:%.*]]
|
|
; MAINVF4IC1_EPI4: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !prof [[PROF3:![0-9]+]], !llvm.loop [[LOOP4:![0-9]+]]
|
|
; MAINVF4IC1_EPI4: [[MIDDLE_BLOCK]]:
|
|
; MAINVF4IC1_EPI4: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]]
|
|
; MAINVF4IC1_EPI4: br i1 [[CMP_N]], label %[[EXIT_LOOPEXIT:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]], !prof [[PROF7:![0-9]+]]
|
|
; MAINVF4IC1_EPI4: [[VEC_EPILOG_ITER_CHECK]]:
|
|
; MAINVF4IC1_EPI4: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i32 [[N_VEC_REMAINING:%.*]], 4
|
|
; MAINVF4IC1_EPI4: br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF8:![0-9]+]]
|
|
; MAINVF4IC1_EPI4: [[VEC_EPILOG_PH]]:
|
|
; MAINVF4IC1_EPI4: br label %[[VEC_EPILOG_VECTOR_BODY:.*]]
|
|
; MAINVF4IC1_EPI4: [[VEC_EPILOG_VECTOR_BODY]]:
|
|
; MAINVF4IC1_EPI4: [[TMP12:%.*]] = icmp eq i32 [[INDEX_NEXT6:%.*]], [[N_VEC3:%.*]]
|
|
; MAINVF4IC1_EPI4: br i1 [[TMP12]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !prof [[PROF9:![0-9]+]], !llvm.loop [[LOOP10:![0-9]+]]
|
|
; MAINVF4IC1_EPI4: [[VEC_EPILOG_MIDDLE_BLOCK]]:
|
|
; MAINVF4IC1_EPI4: [[CMP_N8:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC3]]
|
|
; MAINVF4IC1_EPI4: br i1 [[CMP_N8]], label %[[EXIT_LOOPEXIT]], label %[[VEC_EPILOG_SCALAR_PH]], !prof [[PROF7]]
|
|
; MAINVF4IC1_EPI4: [[VEC_EPILOG_SCALAR_PH]]:
|
|
; MAINVF4IC1_EPI4: br label %[[LOOP:.*]]
|
|
; MAINVF4IC1_EPI4: [[LOOP]]:
|
|
; MAINVF4IC1_EPI4: [[CMP_LOOP:%.*]] = icmp ult i32 [[I32:%.*]], [[LEN]]
|
|
; MAINVF4IC1_EPI4: br i1 [[CMP_LOOP]], label %[[LOOP]], label %[[EXIT_LOOPEXIT]], !prof [[PROF11:![0-9]+]], !llvm.loop [[LOOP12:![0-9]+]]
|
|
; MAINVF4IC1_EPI4: [[EXIT_LOOPEXIT]]:
|
|
; MAINVF4IC1_EPI4: br label %[[EXIT]]
|
|
; MAINVF4IC1_EPI4: [[EXIT]]:
|
|
;
|
|
; MAINVF4IC2_EPI4-LABEL: define void @f0(
|
|
; MAINVF4IC2_EPI4-SAME: i8 [[N:%.*]], i32 [[LEN:%.*]], ptr [[P:%.*]]) !prof [[PROF0:![0-9]+]] {
|
|
; MAINVF4IC2_EPI4: [[ENTRY:.*:]]
|
|
; MAINVF4IC2_EPI4: [[CMP_ENTRY:%.*]] = icmp sgt i32 [[LEN]], 0
|
|
; MAINVF4IC2_EPI4: br i1 [[CMP_ENTRY]], label %[[ITER_CHECK:.*]], label %[[EXIT:.*]], !prof [[PROF1:![0-9]+]]
|
|
; MAINVF4IC2_EPI4: [[ITER_CHECK]]:
|
|
; MAINVF4IC2_EPI4: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0:%.*]], 4
|
|
; MAINVF4IC2_EPI4: br i1 [[MIN_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]], !prof [[PROF2:![0-9]+]]
|
|
; MAINVF4IC2_EPI4: [[VECTOR_SCEVCHECK]]:
|
|
; MAINVF4IC2_EPI4: [[TMP2:%.*]] = icmp slt i8 [[TMP1:%.*]], 0
|
|
; MAINVF4IC2_EPI4: [[TMP3:%.*]] = icmp ugt i32 [[LEN]], 255
|
|
; MAINVF4IC2_EPI4: br i1 [[TMP4:%.*]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]], !prof [[PROF2]]
|
|
; MAINVF4IC2_EPI4: [[VECTOR_MAIN_LOOP_ITER_CHECK]]:
|
|
; MAINVF4IC2_EPI4: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i32 [[TMP0]], 8
|
|
; MAINVF4IC2_EPI4: br i1 [[MIN_ITERS_CHECK1]], label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]], !prof [[PROF2]]
|
|
; MAINVF4IC2_EPI4: [[VECTOR_PH]]:
|
|
; MAINVF4IC2_EPI4: br label %[[VECTOR_BODY:.*]]
|
|
; MAINVF4IC2_EPI4: [[VECTOR_BODY]]:
|
|
; MAINVF4IC2_EPI4: [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT:%.*]], [[N_VEC:%.*]]
|
|
; MAINVF4IC2_EPI4: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !prof [[PROF3:![0-9]+]], !llvm.loop [[LOOP4:![0-9]+]]
|
|
; MAINVF4IC2_EPI4: [[MIDDLE_BLOCK]]:
|
|
; MAINVF4IC2_EPI4: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]]
|
|
; MAINVF4IC2_EPI4: br i1 [[CMP_N]], label %[[EXIT_LOOPEXIT:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]], !prof [[PROF7:![0-9]+]]
|
|
; MAINVF4IC2_EPI4: [[VEC_EPILOG_ITER_CHECK]]:
|
|
; MAINVF4IC2_EPI4: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i32 [[N_VEC_REMAINING:%.*]], 4
|
|
; MAINVF4IC2_EPI4: br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF8:![0-9]+]]
|
|
; MAINVF4IC2_EPI4: [[VEC_EPILOG_PH]]:
|
|
; MAINVF4IC2_EPI4: br label %[[VEC_EPILOG_VECTOR_BODY:.*]]
|
|
; MAINVF4IC2_EPI4: [[VEC_EPILOG_VECTOR_BODY]]:
|
|
; MAINVF4IC2_EPI4: [[TMP13:%.*]] = icmp eq i32 [[INDEX_NEXT6:%.*]], [[N_VEC3:%.*]]
|
|
; MAINVF4IC2_EPI4: br i1 [[TMP13]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !prof [[PROF9:![0-9]+]], !llvm.loop [[LOOP10:![0-9]+]]
|
|
; MAINVF4IC2_EPI4: [[VEC_EPILOG_MIDDLE_BLOCK]]:
|
|
; MAINVF4IC2_EPI4: [[CMP_N8:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC3]]
|
|
; MAINVF4IC2_EPI4: br i1 [[CMP_N8]], label %[[EXIT_LOOPEXIT]], label %[[VEC_EPILOG_SCALAR_PH]], !prof [[PROF11:![0-9]+]]
|
|
; MAINVF4IC2_EPI4: [[VEC_EPILOG_SCALAR_PH]]:
|
|
; MAINVF4IC2_EPI4: br label %[[LOOP:.*]]
|
|
; MAINVF4IC2_EPI4: [[LOOP]]:
|
|
; MAINVF4IC2_EPI4: [[CMP_LOOP:%.*]] = icmp ult i32 [[I32:%.*]], [[LEN]]
|
|
; MAINVF4IC2_EPI4: br i1 [[CMP_LOOP]], label %[[LOOP]], label %[[EXIT_LOOPEXIT]], !prof [[PROF12:![0-9]+]], !llvm.loop [[LOOP13:![0-9]+]]
|
|
; MAINVF4IC2_EPI4: [[EXIT_LOOPEXIT]]:
|
|
; MAINVF4IC2_EPI4: br label %[[EXIT]]
|
|
; MAINVF4IC2_EPI4: [[EXIT]]:
|
|
;
|
|
entry:
|
|
%cmp.entry = icmp sgt i32 %len, 0
|
|
br i1 %cmp.entry, label %loop, label %exit, !prof !1
|
|
|
|
loop:
|
|
%i8 = phi i8 [0, %entry], [%i8.inc, %loop]
|
|
%i32 = phi i32 [0, %entry], [%i32.inc, %loop]
|
|
|
|
%ptr = getelementptr inbounds i32, ptr %p, i8 %i8
|
|
store i32 %i32, ptr %ptr
|
|
|
|
%i8.inc = add i8 %i8, 1
|
|
%i32.inc = add i32 %i32, 1
|
|
|
|
%cmp.loop = icmp ult i32 %i32, %len
|
|
br i1 %cmp.loop, label %loop, label %exit, !prof !2
|
|
|
|
exit:
|
|
ret void
|
|
}
|
|
|
|
!0 = !{!"function_entry_count", i64 13}
|
|
!1 = !{!"branch_weights", i32 12, i32 1}
|
|
!2 = !{!"branch_weights", i32 1234, i32 1}
|
|
;.
|
|
; MAINVF4IC1_EPI4: [[PROF0]] = !{!"function_entry_count", i64 13}
|
|
; MAINVF4IC1_EPI4: [[PROF1]] = !{!"branch_weights", i32 12, i32 1}
|
|
; MAINVF4IC1_EPI4: [[PROF2]] = !{!"branch_weights", i32 1, i32 127}
|
|
; MAINVF4IC1_EPI4: [[PROF3]] = !{!"branch_weights", i32 1, i32 307}
|
|
; MAINVF4IC1_EPI4: [[LOOP4]] = distinct !{[[LOOP4]], [[META5:![0-9]+]], [[META6:![0-9]+]]}
|
|
; MAINVF4IC1_EPI4: [[META5]] = !{!"llvm.loop.isvectorized", i32 1}
|
|
; MAINVF4IC1_EPI4: [[META6]] = !{!"llvm.loop.unroll.runtime.disable"}
|
|
; MAINVF4IC1_EPI4: [[PROF7]] = !{!"branch_weights", i32 1, i32 3}
|
|
; MAINVF4IC1_EPI4: [[PROF8]] = !{!"branch_weights", i32 4, i32 0}
|
|
; MAINVF4IC1_EPI4: [[PROF9]] = !{!"branch_weights", i32 0, i32 0}
|
|
; MAINVF4IC1_EPI4: [[LOOP10]] = distinct !{[[LOOP10]], [[META5]], [[META6]]}
|
|
; MAINVF4IC1_EPI4: [[PROF11]] = !{!"branch_weights", i32 2, i32 1}
|
|
; MAINVF4IC1_EPI4: [[LOOP12]] = distinct !{[[LOOP12]], [[META5]]}
|
|
;.
|
|
; MAINVF4IC2_EPI4: [[PROF0]] = !{!"function_entry_count", i64 13}
|
|
; MAINVF4IC2_EPI4: [[PROF1]] = !{!"branch_weights", i32 12, i32 1}
|
|
; MAINVF4IC2_EPI4: [[PROF2]] = !{!"branch_weights", i32 1, i32 127}
|
|
; MAINVF4IC2_EPI4: [[PROF3]] = !{!"branch_weights", i32 1, i32 153}
|
|
; MAINVF4IC2_EPI4: [[LOOP4]] = distinct !{[[LOOP4]], [[META5:![0-9]+]], [[META6:![0-9]+]]}
|
|
; MAINVF4IC2_EPI4: [[META5]] = !{!"llvm.loop.isvectorized", i32 1}
|
|
; MAINVF4IC2_EPI4: [[META6]] = !{!"llvm.loop.unroll.runtime.disable"}
|
|
; MAINVF4IC2_EPI4: [[PROF7]] = !{!"branch_weights", i32 1, i32 7}
|
|
; MAINVF4IC2_EPI4: [[PROF8]] = !{!"branch_weights", i32 4, i32 0}
|
|
; MAINVF4IC2_EPI4: [[PROF9]] = !{!"branch_weights", i32 0, i32 0}
|
|
; MAINVF4IC2_EPI4: [[LOOP10]] = distinct !{[[LOOP10]], [[META5]], [[META6]]}
|
|
; MAINVF4IC2_EPI4: [[PROF11]] = !{!"branch_weights", i32 1, i32 3}
|
|
; MAINVF4IC2_EPI4: [[PROF12]] = !{!"branch_weights", i32 2, i32 1}
|
|
; MAINVF4IC2_EPI4: [[LOOP13]] = distinct !{[[LOOP13]], [[META5]]}
|
|
;.
|