Files
clang-p2996/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll
Florian Hahn 5fbd0658a0 [VPlan] Add initial CFG simplification, removing BranchOnCond true. (#106748)
Add an initial CFG simplification transform, which removes the dead
edges for blocks terminated with BranchOnCond true.

At the moment, this removes the edge between middle block and scalar
preheader when folding the tail.

PR: https://github.com/llvm/llvm-project/pull/106748
2025-04-04 15:44:26 +01:00

3751 lines
250 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -passes=loop-vectorize -force-vector-width=4 -force-vector-interleave=2 -force-widen-divrem-via-safe-divisor=0 -S | FileCheck %s --check-prefix=UNROLL-NO-IC
; RUN: opt < %s -passes=loop-vectorize -force-vector-width=1 -force-vector-interleave=2 -force-widen-divrem-via-safe-divisor=0 -S | FileCheck %s --check-prefix=UNROLL-NO-VF
; RUN: opt < %s -passes=loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -force-widen-divrem-via-safe-divisor=0 -S | FileCheck %s --check-prefix=SINK-AFTER
target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
; void recurrence_1(int *a, int *b, int n) {
; for(int i = 0; i < n; i++)
; b[i] = a[i] + a[i - 1]
; }
;
;
;
define void @recurrence_1(ptr readonly noalias %a, ptr noalias %b, i32 %n) {
; UNROLL-NO-IC-LABEL: @recurrence_1(
; UNROLL-NO-IC-NEXT: entry:
; UNROLL-NO-IC-NEXT: br label [[FOR_PREHEADER:%.*]]
; UNROLL-NO-IC: for.preheader:
; UNROLL-NO-IC-NEXT: [[PRE_LOAD:%.*]] = load i32, ptr [[A:%.*]], align 4
; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = add i32 [[N:%.*]], -1
; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
; UNROLL-NO-IC-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 8
; UNROLL-NO-IC-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; UNROLL-NO-IC: vector.ph:
; UNROLL-NO-IC-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 8
; UNROLL-NO-IC-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]]
; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i32> poison, i32 [[PRE_LOAD]], i32 3
; UNROLL-NO-IC-NEXT: br label [[VECTOR_BODY:%.*]]
; UNROLL-NO-IC: vector.body:
; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[WIDE_LOAD1:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = add nuw nsw i64 [[INDEX]], 1
; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP4]]
; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 0
; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 4
; UNROLL-NO-IC-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP6]], align 4
; UNROLL-NO-IC-NEXT: [[WIDE_LOAD1]] = load <4 x i32>, ptr [[TMP7]], align 4
; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[WIDE_LOAD]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD]], <4 x i32> [[WIDE_LOAD1]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDEX]]
; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = add <4 x i32> [[WIDE_LOAD]], [[TMP8]]
; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = add <4 x i32> [[WIDE_LOAD1]], [[TMP9]]
; UNROLL-NO-IC-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 0
; UNROLL-NO-IC-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 4
; UNROLL-NO-IC-NEXT: store <4 x i32> [[TMP11]], ptr [[TMP13]], align 4
; UNROLL-NO-IC-NEXT: store <4 x i32> [[TMP12]], ptr [[TMP14]], align 4
; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
; UNROLL-NO-IC-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; UNROLL-NO-IC-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; UNROLL-NO-IC: middle.block:
; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[WIDE_LOAD1]], i32 3
; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
; UNROLL-NO-IC-NEXT: br i1 [[CMP_N]], label [[FOR_EXIT:%.*]], label [[SCALAR_PH]]
; UNROLL-NO-IC: scalar.ph:
; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ [[PRE_LOAD]], [[FOR_PREHEADER]] ]
; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_PREHEADER]] ]
; UNROLL-NO-IC-NEXT: br label [[SCALAR_BODY:%.*]]
; UNROLL-NO-IC: scalar.body:
; UNROLL-NO-IC-NEXT: [[TMP16:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP17:%.*]], [[SCALAR_BODY]] ]
; UNROLL-NO-IC-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[SCALAR_BODY]] ]
; UNROLL-NO-IC-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
; UNROLL-NO-IC-NEXT: [[ARRAYIDX32:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT]]
; UNROLL-NO-IC-NEXT: [[TMP17]] = load i32, ptr [[ARRAYIDX32]], align 4
; UNROLL-NO-IC-NEXT: [[ARRAYIDX34:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]]
; UNROLL-NO-IC-NEXT: [[ADD35:%.*]] = add i32 [[TMP17]], [[TMP16]]
; UNROLL-NO-IC-NEXT: store i32 [[ADD35]], ptr [[ARRAYIDX34]], align 4
; UNROLL-NO-IC-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
; UNROLL-NO-IC-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
; UNROLL-NO-IC-NEXT: br i1 [[EXITCOND]], label [[FOR_EXIT]], label [[SCALAR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
; UNROLL-NO-IC: for.exit:
; UNROLL-NO-IC-NEXT: ret void
;
; UNROLL-NO-VF-LABEL: @recurrence_1(
; UNROLL-NO-VF-NEXT: entry:
; UNROLL-NO-VF-NEXT: br label [[FOR_PREHEADER:%.*]]
; UNROLL-NO-VF: for.preheader:
; UNROLL-NO-VF-NEXT: [[PRE_LOAD:%.*]] = load i32, ptr [[A:%.*]], align 4
; UNROLL-NO-VF-NEXT: [[TMP0:%.*]] = add i32 [[N:%.*]], -1
; UNROLL-NO-VF-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
; UNROLL-NO-VF-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
; UNROLL-NO-VF-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 2
; UNROLL-NO-VF-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; UNROLL-NO-VF: vector.ph:
; UNROLL-NO-VF-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 2
; UNROLL-NO-VF-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]]
; UNROLL-NO-VF-NEXT: br label [[VECTOR_BODY:%.*]]
; UNROLL-NO-VF: vector.body:
; UNROLL-NO-VF-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NO-VF-NEXT: [[VECTOR_RECUR:%.*]] = phi i32 [ [[PRE_LOAD]], [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NO-VF-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 1
; UNROLL-NO-VF-NEXT: [[TMP5:%.*]] = add nuw nsw i64 [[INDEX]], 1
; UNROLL-NO-VF-NEXT: [[TMP6:%.*]] = add nuw nsw i64 [[TMP4]], 1
; UNROLL-NO-VF-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP5]]
; UNROLL-NO-VF-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP6]]
; UNROLL-NO-VF-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP7]], align 4
; UNROLL-NO-VF-NEXT: [[TMP10]] = load i32, ptr [[TMP8]], align 4
; UNROLL-NO-VF-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDEX]]
; UNROLL-NO-VF-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP4]]
; UNROLL-NO-VF-NEXT: [[TMP13:%.*]] = add i32 [[TMP9]], [[VECTOR_RECUR]]
; UNROLL-NO-VF-NEXT: [[TMP14:%.*]] = add i32 [[TMP10]], [[TMP9]]
; UNROLL-NO-VF-NEXT: store i32 [[TMP13]], ptr [[TMP11]], align 4
; UNROLL-NO-VF-NEXT: store i32 [[TMP14]], ptr [[TMP12]], align 4
; UNROLL-NO-VF-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
; UNROLL-NO-VF-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; UNROLL-NO-VF-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; UNROLL-NO-VF: middle.block:
; UNROLL-NO-VF-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
; UNROLL-NO-VF-NEXT: br i1 [[CMP_N]], label [[FOR_EXIT:%.*]], label [[SCALAR_PH]]
; UNROLL-NO-VF: scalar.ph:
; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[TMP10]], [[MIDDLE_BLOCK]] ], [ [[PRE_LOAD]], [[FOR_PREHEADER]] ]
; UNROLL-NO-VF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_PREHEADER]] ]
; UNROLL-NO-VF-NEXT: br label [[SCALAR_BODY:%.*]]
; UNROLL-NO-VF: scalar.body:
; UNROLL-NO-VF-NEXT: [[TMP16:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP17:%.*]], [[SCALAR_BODY]] ]
; UNROLL-NO-VF-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[SCALAR_BODY]] ]
; UNROLL-NO-VF-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
; UNROLL-NO-VF-NEXT: [[ARRAYIDX32:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT]]
; UNROLL-NO-VF-NEXT: [[TMP17]] = load i32, ptr [[ARRAYIDX32]], align 4
; UNROLL-NO-VF-NEXT: [[ARRAYIDX34:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]]
; UNROLL-NO-VF-NEXT: [[ADD35:%.*]] = add i32 [[TMP17]], [[TMP16]]
; UNROLL-NO-VF-NEXT: store i32 [[ADD35]], ptr [[ARRAYIDX34]], align 4
; UNROLL-NO-VF-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
; UNROLL-NO-VF-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
; UNROLL-NO-VF-NEXT: br i1 [[EXITCOND]], label [[FOR_EXIT]], label [[SCALAR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
; UNROLL-NO-VF: for.exit:
; UNROLL-NO-VF-NEXT: ret void
;
; SINK-AFTER-LABEL: @recurrence_1(
; SINK-AFTER-NEXT: entry:
; SINK-AFTER-NEXT: br label [[FOR_PREHEADER:%.*]]
; SINK-AFTER: for.preheader:
; SINK-AFTER-NEXT: [[PRE_LOAD:%.*]] = load i32, ptr [[A:%.*]], align 4
; SINK-AFTER-NEXT: [[TMP0:%.*]] = add i32 [[N:%.*]], -1
; SINK-AFTER-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
; SINK-AFTER-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
; SINK-AFTER-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 4
; SINK-AFTER-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; SINK-AFTER: vector.ph:
; SINK-AFTER-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 4
; SINK-AFTER-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]]
; SINK-AFTER-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i32> poison, i32 [[PRE_LOAD]], i32 3
; SINK-AFTER-NEXT: br label [[VECTOR_BODY:%.*]]
; SINK-AFTER: vector.body:
; SINK-AFTER-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; SINK-AFTER-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[WIDE_LOAD:%.*]], [[VECTOR_BODY]] ]
; SINK-AFTER-NEXT: [[TMP4:%.*]] = add nuw nsw i64 [[INDEX]], 1
; SINK-AFTER-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP4]]
; SINK-AFTER-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 0
; SINK-AFTER-NEXT: [[WIDE_LOAD]] = load <4 x i32>, ptr [[TMP6]], align 4
; SINK-AFTER-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[WIDE_LOAD]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
; SINK-AFTER-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDEX]]
; SINK-AFTER-NEXT: [[TMP9:%.*]] = add <4 x i32> [[WIDE_LOAD]], [[TMP7]]
; SINK-AFTER-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i32 0
; SINK-AFTER-NEXT: store <4 x i32> [[TMP9]], ptr [[TMP10]], align 4
; SINK-AFTER-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; SINK-AFTER-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; SINK-AFTER-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; SINK-AFTER: middle.block:
; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[WIDE_LOAD]], i32 3
; SINK-AFTER-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
; SINK-AFTER-NEXT: br i1 [[CMP_N]], label [[FOR_EXIT:%.*]], label [[SCALAR_PH]]
; SINK-AFTER: scalar.ph:
; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ [[PRE_LOAD]], [[FOR_PREHEADER]] ]
; SINK-AFTER-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_PREHEADER]] ]
; SINK-AFTER-NEXT: br label [[SCALAR_BODY:%.*]]
; SINK-AFTER: scalar.body:
; SINK-AFTER-NEXT: [[TMP12:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP13:%.*]], [[SCALAR_BODY]] ]
; SINK-AFTER-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[SCALAR_BODY]] ]
; SINK-AFTER-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
; SINK-AFTER-NEXT: [[ARRAYIDX32:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT]]
; SINK-AFTER-NEXT: [[TMP13]] = load i32, ptr [[ARRAYIDX32]], align 4
; SINK-AFTER-NEXT: [[ARRAYIDX34:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]]
; SINK-AFTER-NEXT: [[ADD35:%.*]] = add i32 [[TMP13]], [[TMP12]]
; SINK-AFTER-NEXT: store i32 [[ADD35]], ptr [[ARRAYIDX34]], align 4
; SINK-AFTER-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
; SINK-AFTER-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
; SINK-AFTER-NEXT: br i1 [[EXITCOND]], label [[FOR_EXIT]], label [[SCALAR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
; SINK-AFTER: for.exit:
; SINK-AFTER-NEXT: ret void
;
entry:
br label %for.preheader
for.preheader:
%pre_load = load i32, ptr %a
br label %scalar.body
scalar.body:
%0 = phi i32 [ %pre_load, %for.preheader ], [ %1, %scalar.body ]
%indvars.iv = phi i64 [ 0, %for.preheader ], [ %indvars.iv.next, %scalar.body ]
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%arrayidx32 = getelementptr inbounds i32, ptr %a, i64 %indvars.iv.next
%1 = load i32, ptr %arrayidx32
%arrayidx34 = getelementptr inbounds i32, ptr %b, i64 %indvars.iv
%add35 = add i32 %1, %0
store i32 %add35, ptr %arrayidx34
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
%exitcond = icmp eq i32 %lftr.wideiv, %n
br i1 %exitcond, label %for.exit, label %scalar.body
for.exit:
ret void
}
; int recurrence_2(int *a, int n) {
; int minmax;
; for (int i = 0; i < n; ++i)
; minmax = min(minmax, max(a[i] - a[i-1], 0));
; return minmax;
; }
;
;
;
define i32 @recurrence_2(ptr nocapture readonly %a, i32 %n) {
; UNROLL-NO-IC-LABEL: @recurrence_2(
; UNROLL-NO-IC-NEXT: entry:
; UNROLL-NO-IC-NEXT: [[CMP27:%.*]] = icmp sgt i32 [[N:%.*]], 0
; UNROLL-NO-IC-NEXT: br i1 [[CMP27]], label [[FOR_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
; UNROLL-NO-IC: for.preheader:
; UNROLL-NO-IC-NEXT: [[ARRAYIDX2_PHI_TRANS_INSERT:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 -1
; UNROLL-NO-IC-NEXT: [[DOTPRE:%.*]] = load i32, ptr [[ARRAYIDX2_PHI_TRANS_INSERT]], align 4
; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = zext i32 [[N]] to i64
; UNROLL-NO-IC-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 8
; UNROLL-NO-IC-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; UNROLL-NO-IC: vector.ph:
; UNROLL-NO-IC-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 8
; UNROLL-NO-IC-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]]
; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i32> poison, i32 [[DOTPRE]], i32 3
; UNROLL-NO-IC-NEXT: br label [[VECTOR_BODY:%.*]]
; UNROLL-NO-IC: vector.body:
; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[WIDE_LOAD2:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NO-IC-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ poison, [[VECTOR_PH]] ], [ [[TMP15:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NO-IC-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ poison, [[VECTOR_PH]] ], [ [[TMP16:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDEX]]
; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 0
; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 4
; UNROLL-NO-IC-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP3]], align 4
; UNROLL-NO-IC-NEXT: [[WIDE_LOAD2]] = load <4 x i32>, ptr [[TMP4]], align 4
; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[WIDE_LOAD]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD]], <4 x i32> [[WIDE_LOAD2]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = sub nsw <4 x i32> [[WIDE_LOAD]], [[TMP5]]
; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = sub nsw <4 x i32> [[WIDE_LOAD2]], [[TMP6]]
; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = icmp sgt <4 x i32> [[TMP7]], zeroinitializer
; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = icmp sgt <4 x i32> [[TMP8]], zeroinitializer
; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = select <4 x i1> [[TMP9]], <4 x i32> [[TMP7]], <4 x i32> zeroinitializer
; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = select <4 x i1> [[TMP10]], <4 x i32> [[TMP8]], <4 x i32> zeroinitializer
; UNROLL-NO-IC-NEXT: [[TMP13:%.*]] = icmp slt <4 x i32> [[VEC_PHI]], [[TMP11]]
; UNROLL-NO-IC-NEXT: [[TMP14:%.*]] = icmp slt <4 x i32> [[VEC_PHI1]], [[TMP12]]
; UNROLL-NO-IC-NEXT: [[TMP15]] = select <4 x i1> [[TMP13]], <4 x i32> [[VEC_PHI]], <4 x i32> [[TMP11]]
; UNROLL-NO-IC-NEXT: [[TMP16]] = select <4 x i1> [[TMP14]], <4 x i32> [[VEC_PHI1]], <4 x i32> [[TMP12]]
; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
; UNROLL-NO-IC-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; UNROLL-NO-IC-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; UNROLL-NO-IC: middle.block:
; UNROLL-NO-IC-NEXT: [[RDX_MINMAX:%.*]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> [[TMP15]], <4 x i32> [[TMP16]])
; UNROLL-NO-IC-NEXT: [[TMP18:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[RDX_MINMAX]])
; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[WIDE_LOAD2]], i32 3
; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
; UNROLL-NO-IC-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]]
; UNROLL-NO-IC: scalar.ph:
; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ [[DOTPRE]], [[FOR_PREHEADER]] ]
; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_PREHEADER]] ]
; UNROLL-NO-IC-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP18]], [[MIDDLE_BLOCK]] ], [ poison, [[FOR_PREHEADER]] ]
; UNROLL-NO-IC-NEXT: br label [[SCALAR_BODY:%.*]]
; UNROLL-NO-IC: for.cond.cleanup.loopexit:
; UNROLL-NO-IC-NEXT: [[MINMAX_0_COND_LCSSA:%.*]] = phi i32 [ [[MINMAX_0_COND:%.*]], [[SCALAR_BODY]] ], [ [[TMP18]], [[MIDDLE_BLOCK]] ]
; UNROLL-NO-IC-NEXT: br label [[FOR_COND_CLEANUP]]
; UNROLL-NO-IC: for.cond.cleanup:
; UNROLL-NO-IC-NEXT: [[MINMAX_0_LCSSA:%.*]] = phi i32 [ poison, [[ENTRY:%.*]] ], [ [[MINMAX_0_COND_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ]
; UNROLL-NO-IC-NEXT: ret i32 [[MINMAX_0_LCSSA]]
; UNROLL-NO-IC: scalar.body:
; UNROLL-NO-IC-NEXT: [[TMP19:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP20:%.*]], [[SCALAR_BODY]] ]
; UNROLL-NO-IC-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[SCALAR_BODY]] ]
; UNROLL-NO-IC-NEXT: [[MINMAX_028:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[MINMAX_0_COND]], [[SCALAR_BODY]] ]
; UNROLL-NO-IC-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]]
; UNROLL-NO-IC-NEXT: [[TMP20]] = load i32, ptr [[ARRAYIDX]], align 4
; UNROLL-NO-IC-NEXT: [[SUB3:%.*]] = sub nsw i32 [[TMP20]], [[TMP19]]
; UNROLL-NO-IC-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[SUB3]], 0
; UNROLL-NO-IC-NEXT: [[COND:%.*]] = select i1 [[CMP4]], i32 [[SUB3]], i32 0
; UNROLL-NO-IC-NEXT: [[CMP5:%.*]] = icmp slt i32 [[MINMAX_028]], [[COND]]
; UNROLL-NO-IC-NEXT: [[MINMAX_0_COND]] = select i1 [[CMP5]], i32 [[MINMAX_028]], i32 [[COND]]
; UNROLL-NO-IC-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
; UNROLL-NO-IC-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
; UNROLL-NO-IC-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
; UNROLL-NO-IC-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[SCALAR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
;
; UNROLL-NO-VF-LABEL: @recurrence_2(
; UNROLL-NO-VF-NEXT: entry:
; UNROLL-NO-VF-NEXT: [[CMP27:%.*]] = icmp sgt i32 [[N:%.*]], 0
; UNROLL-NO-VF-NEXT: br i1 [[CMP27]], label [[FOR_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
; UNROLL-NO-VF: for.preheader:
; UNROLL-NO-VF-NEXT: [[ARRAYIDX2_PHI_TRANS_INSERT:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 -1
; UNROLL-NO-VF-NEXT: [[DOTPRE:%.*]] = load i32, ptr [[ARRAYIDX2_PHI_TRANS_INSERT]], align 4
; UNROLL-NO-VF-NEXT: [[TMP0:%.*]] = zext i32 [[N]] to i64
; UNROLL-NO-VF-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 2
; UNROLL-NO-VF-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; UNROLL-NO-VF: vector.ph:
; UNROLL-NO-VF-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 2
; UNROLL-NO-VF-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]]
; UNROLL-NO-VF-NEXT: br label [[VECTOR_BODY:%.*]]
; UNROLL-NO-VF: vector.body:
; UNROLL-NO-VF-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NO-VF-NEXT: [[VECTOR_RECUR:%.*]] = phi i32 [ [[DOTPRE]], [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NO-VF-NEXT: [[VEC_PHI:%.*]] = phi i32 [ poison, [[VECTOR_PH]] ], [ [[TMP15:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NO-VF-NEXT: [[VEC_PHI1:%.*]] = phi i32 [ poison, [[VECTOR_PH]] ], [ [[TMP16:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NO-VF-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 1
; UNROLL-NO-VF-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDEX]]
; UNROLL-NO-VF-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP2]]
; UNROLL-NO-VF-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP3]], align 4
; UNROLL-NO-VF-NEXT: [[TMP6]] = load i32, ptr [[TMP4]], align 4
; UNROLL-NO-VF-NEXT: [[TMP7:%.*]] = sub nsw i32 [[TMP5]], [[VECTOR_RECUR]]
; UNROLL-NO-VF-NEXT: [[TMP8:%.*]] = sub nsw i32 [[TMP6]], [[TMP5]]
; UNROLL-NO-VF-NEXT: [[TMP9:%.*]] = icmp sgt i32 [[TMP7]], 0
; UNROLL-NO-VF-NEXT: [[TMP10:%.*]] = icmp sgt i32 [[TMP8]], 0
; UNROLL-NO-VF-NEXT: [[TMP11:%.*]] = select i1 [[TMP9]], i32 [[TMP7]], i32 0
; UNROLL-NO-VF-NEXT: [[TMP12:%.*]] = select i1 [[TMP10]], i32 [[TMP8]], i32 0
; UNROLL-NO-VF-NEXT: [[TMP13:%.*]] = icmp slt i32 [[VEC_PHI]], [[TMP11]]
; UNROLL-NO-VF-NEXT: [[TMP14:%.*]] = icmp slt i32 [[VEC_PHI1]], [[TMP12]]
; UNROLL-NO-VF-NEXT: [[TMP15]] = select i1 [[TMP13]], i32 [[VEC_PHI]], i32 [[TMP11]]
; UNROLL-NO-VF-NEXT: [[TMP16]] = select i1 [[TMP14]], i32 [[VEC_PHI1]], i32 [[TMP12]]
; UNROLL-NO-VF-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
; UNROLL-NO-VF-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; UNROLL-NO-VF-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; UNROLL-NO-VF: middle.block:
; UNROLL-NO-VF-NEXT: [[RDX_MINMAX:%.*]] = call i32 @llvm.smin.i32(i32 [[TMP15]], i32 [[TMP16]])
; UNROLL-NO-VF-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
; UNROLL-NO-VF-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]]
; UNROLL-NO-VF: scalar.ph:
; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[TMP6]], [[MIDDLE_BLOCK]] ], [ [[DOTPRE]], [[FOR_PREHEADER]] ]
; UNROLL-NO-VF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_PREHEADER]] ]
; UNROLL-NO-VF-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_MINMAX]], [[MIDDLE_BLOCK]] ], [ poison, [[FOR_PREHEADER]] ]
; UNROLL-NO-VF-NEXT: br label [[SCALAR_BODY:%.*]]
; UNROLL-NO-VF: for.cond.cleanup.loopexit:
; UNROLL-NO-VF-NEXT: [[MINMAX_0_COND_LCSSA:%.*]] = phi i32 [ [[MINMAX_0_COND:%.*]], [[SCALAR_BODY]] ], [ [[RDX_MINMAX]], [[MIDDLE_BLOCK]] ]
; UNROLL-NO-VF-NEXT: br label [[FOR_COND_CLEANUP]]
; UNROLL-NO-VF: for.cond.cleanup:
; UNROLL-NO-VF-NEXT: [[MINMAX_0_LCSSA:%.*]] = phi i32 [ poison, [[ENTRY:%.*]] ], [ [[MINMAX_0_COND_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ]
; UNROLL-NO-VF-NEXT: ret i32 [[MINMAX_0_LCSSA]]
; UNROLL-NO-VF: scalar.body:
; UNROLL-NO-VF-NEXT: [[TMP18:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP19:%.*]], [[SCALAR_BODY]] ]
; UNROLL-NO-VF-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[SCALAR_BODY]] ]
; UNROLL-NO-VF-NEXT: [[MINMAX_028:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[MINMAX_0_COND]], [[SCALAR_BODY]] ]
; UNROLL-NO-VF-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]]
; UNROLL-NO-VF-NEXT: [[TMP19]] = load i32, ptr [[ARRAYIDX]], align 4
; UNROLL-NO-VF-NEXT: [[SUB3:%.*]] = sub nsw i32 [[TMP19]], [[TMP18]]
; UNROLL-NO-VF-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[SUB3]], 0
; UNROLL-NO-VF-NEXT: [[COND:%.*]] = select i1 [[CMP4]], i32 [[SUB3]], i32 0
; UNROLL-NO-VF-NEXT: [[CMP5:%.*]] = icmp slt i32 [[MINMAX_028]], [[COND]]
; UNROLL-NO-VF-NEXT: [[MINMAX_0_COND]] = select i1 [[CMP5]], i32 [[MINMAX_028]], i32 [[COND]]
; UNROLL-NO-VF-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
; UNROLL-NO-VF-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
; UNROLL-NO-VF-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
; UNROLL-NO-VF-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[SCALAR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
;
; SINK-AFTER-LABEL: @recurrence_2(
; SINK-AFTER-NEXT: entry:
; SINK-AFTER-NEXT: [[CMP27:%.*]] = icmp sgt i32 [[N:%.*]], 0
; SINK-AFTER-NEXT: br i1 [[CMP27]], label [[FOR_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
; SINK-AFTER: for.preheader:
; SINK-AFTER-NEXT: [[ARRAYIDX2_PHI_TRANS_INSERT:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 -1
; SINK-AFTER-NEXT: [[DOTPRE:%.*]] = load i32, ptr [[ARRAYIDX2_PHI_TRANS_INSERT]], align 4
; SINK-AFTER-NEXT: [[TMP0:%.*]] = zext i32 [[N]] to i64
; SINK-AFTER-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 4
; SINK-AFTER-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; SINK-AFTER: vector.ph:
; SINK-AFTER-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 4
; SINK-AFTER-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]]
; SINK-AFTER-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i32> poison, i32 [[DOTPRE]], i32 3
; SINK-AFTER-NEXT: br label [[VECTOR_BODY:%.*]]
; SINK-AFTER: vector.body:
; SINK-AFTER-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; SINK-AFTER-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[WIDE_LOAD:%.*]], [[VECTOR_BODY]] ]
; SINK-AFTER-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ poison, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ]
; SINK-AFTER-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDEX]]
; SINK-AFTER-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 0
; SINK-AFTER-NEXT: [[WIDE_LOAD]] = load <4 x i32>, ptr [[TMP3]], align 4
; SINK-AFTER-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[WIDE_LOAD]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
; SINK-AFTER-NEXT: [[TMP5:%.*]] = sub nsw <4 x i32> [[WIDE_LOAD]], [[TMP4]]
; SINK-AFTER-NEXT: [[TMP6:%.*]] = icmp sgt <4 x i32> [[TMP5]], zeroinitializer
; SINK-AFTER-NEXT: [[TMP7:%.*]] = select <4 x i1> [[TMP6]], <4 x i32> [[TMP5]], <4 x i32> zeroinitializer
; SINK-AFTER-NEXT: [[TMP8:%.*]] = icmp slt <4 x i32> [[VEC_PHI]], [[TMP7]]
; SINK-AFTER-NEXT: [[TMP9]] = select <4 x i1> [[TMP8]], <4 x i32> [[VEC_PHI]], <4 x i32> [[TMP7]]
; SINK-AFTER-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; SINK-AFTER-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; SINK-AFTER-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; SINK-AFTER: middle.block:
; SINK-AFTER-NEXT: [[TMP11:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[TMP9]])
; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[WIDE_LOAD]], i32 3
; SINK-AFTER-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
; SINK-AFTER-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]]
; SINK-AFTER: scalar.ph:
; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ [[DOTPRE]], [[FOR_PREHEADER]] ]
; SINK-AFTER-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_PREHEADER]] ]
; SINK-AFTER-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP11]], [[MIDDLE_BLOCK]] ], [ poison, [[FOR_PREHEADER]] ]
; SINK-AFTER-NEXT: br label [[SCALAR_BODY:%.*]]
; SINK-AFTER: for.cond.cleanup.loopexit:
; SINK-AFTER-NEXT: [[MINMAX_0_COND_LCSSA:%.*]] = phi i32 [ [[MINMAX_0_COND:%.*]], [[SCALAR_BODY]] ], [ [[TMP11]], [[MIDDLE_BLOCK]] ]
; SINK-AFTER-NEXT: br label [[FOR_COND_CLEANUP]]
; SINK-AFTER: for.cond.cleanup:
; SINK-AFTER-NEXT: [[MINMAX_0_LCSSA:%.*]] = phi i32 [ poison, [[ENTRY:%.*]] ], [ [[MINMAX_0_COND_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ]
; SINK-AFTER-NEXT: ret i32 [[MINMAX_0_LCSSA]]
; SINK-AFTER: scalar.body:
; SINK-AFTER-NEXT: [[TMP12:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP13:%.*]], [[SCALAR_BODY]] ]
; SINK-AFTER-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[SCALAR_BODY]] ]
; SINK-AFTER-NEXT: [[MINMAX_028:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[MINMAX_0_COND]], [[SCALAR_BODY]] ]
; SINK-AFTER-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]]
; SINK-AFTER-NEXT: [[TMP13]] = load i32, ptr [[ARRAYIDX]], align 4
; SINK-AFTER-NEXT: [[SUB3:%.*]] = sub nsw i32 [[TMP13]], [[TMP12]]
; SINK-AFTER-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[SUB3]], 0
; SINK-AFTER-NEXT: [[COND:%.*]] = select i1 [[CMP4]], i32 [[SUB3]], i32 0
; SINK-AFTER-NEXT: [[CMP5:%.*]] = icmp slt i32 [[MINMAX_028]], [[COND]]
; SINK-AFTER-NEXT: [[MINMAX_0_COND]] = select i1 [[CMP5]], i32 [[MINMAX_028]], i32 [[COND]]
; SINK-AFTER-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
; SINK-AFTER-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
; SINK-AFTER-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
; SINK-AFTER-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[SCALAR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
;
entry:
%cmp27 = icmp sgt i32 %n, 0
br i1 %cmp27, label %for.preheader, label %for.cond.cleanup
for.preheader:
%arrayidx2.phi.trans.insert = getelementptr inbounds i32, ptr %a, i64 -1
%.pre = load i32, ptr %arrayidx2.phi.trans.insert, align 4
br label %scalar.body
for.cond.cleanup.loopexit:
%minmax.0.cond.lcssa = phi i32 [ %minmax.0.cond, %scalar.body ]
br label %for.cond.cleanup
for.cond.cleanup:
%minmax.0.lcssa = phi i32 [ poison, %entry ], [ %minmax.0.cond.lcssa, %for.cond.cleanup.loopexit ]
ret i32 %minmax.0.lcssa
scalar.body:
%0 = phi i32 [ %.pre, %for.preheader ], [ %1, %scalar.body ]
%indvars.iv = phi i64 [ 0, %for.preheader ], [ %indvars.iv.next, %scalar.body ]
%minmax.028 = phi i32 [ poison, %for.preheader ], [ %minmax.0.cond, %scalar.body ]
%arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
%1 = load i32, ptr %arrayidx, align 4
%sub3 = sub nsw i32 %1, %0
%cmp4 = icmp sgt i32 %sub3, 0
%cond = select i1 %cmp4, i32 %sub3, i32 0
%cmp5 = icmp slt i32 %minmax.028, %cond
%minmax.0.cond = select i1 %cmp5, i32 %minmax.028, i32 %cond
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
%exitcond = icmp eq i32 %lftr.wideiv, %n
br i1 %exitcond, label %for.cond.cleanup.loopexit, label %scalar.body
}
; void recurrence_3(short *a, ptr b, int n, float f, short p) {
; b[0] = (double)a[0] - f * (double)p;
; for (int i = 1; i < n; i++)
; b[i] = (double)a[i] - f * (double)a[i - 1];
; }
;
; Check also that the casts were not moved needlessly.
;
;
define void @recurrence_3(ptr readonly noalias %a, ptr noalias %b, i32 %n, float %f, i16 %p) {
; UNROLL-NO-IC-LABEL: @recurrence_3(
; UNROLL-NO-IC-NEXT: entry:
; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = load i16, ptr [[A:%.*]], align 2
; UNROLL-NO-IC-NEXT: [[CONV:%.*]] = sitofp i16 [[TMP0]] to double
; UNROLL-NO-IC-NEXT: [[CONV1:%.*]] = fpext float [[F:%.*]] to double
; UNROLL-NO-IC-NEXT: [[CONV2:%.*]] = sitofp i16 [[P:%.*]] to double
; UNROLL-NO-IC-NEXT: [[MUL:%.*]] = fmul fast double [[CONV2]], [[CONV1]]
; UNROLL-NO-IC-NEXT: [[SUB:%.*]] = fsub fast double [[CONV]], [[MUL]]
; UNROLL-NO-IC-NEXT: store double [[SUB]], ptr [[B:%.*]], align 8
; UNROLL-NO-IC-NEXT: [[CMP25:%.*]] = icmp sgt i32 [[N:%.*]], 1
; UNROLL-NO-IC-NEXT: br i1 [[CMP25]], label [[FOR_PREHEADER:%.*]], label [[FOR_END:%.*]]
; UNROLL-NO-IC: for.preheader:
; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = add i32 [[N]], -1
; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
; UNROLL-NO-IC-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 8
; UNROLL-NO-IC-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; UNROLL-NO-IC: vector.ph:
; UNROLL-NO-IC-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 8
; UNROLL-NO-IC-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]]
; UNROLL-NO-IC-NEXT: [[IND_END:%.*]] = add i64 1, [[N_VEC]]
; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x double> poison, double [[CONV1]], i64 0
; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x double> [[BROADCAST_SPLATINSERT]], <4 x double> poison, <4 x i32> zeroinitializer
; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i16> poison, i16 [[TMP0]], i32 3
; UNROLL-NO-IC-NEXT: br label [[VECTOR_BODY:%.*]]
; UNROLL-NO-IC: vector.body:
; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[WIDE_LOAD1:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NO-IC-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[INDEX]]
; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = getelementptr inbounds i16, ptr [[A]], i64 [[OFFSET_IDX]]
; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, ptr [[TMP4]], i32 0
; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, ptr [[TMP4]], i32 4
; UNROLL-NO-IC-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i16>, ptr [[TMP5]], align 2
; UNROLL-NO-IC-NEXT: [[WIDE_LOAD1]] = load <4 x i16>, ptr [[TMP6]], align 2
; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[WIDE_LOAD]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = shufflevector <4 x i16> [[WIDE_LOAD]], <4 x i16> [[WIDE_LOAD1]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = sitofp <4 x i16> [[WIDE_LOAD]] to <4 x double>
; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = sitofp <4 x i16> [[WIDE_LOAD1]] to <4 x double>
; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = sitofp <4 x i16> [[TMP7]] to <4 x double>
; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = sitofp <4 x i16> [[TMP8]] to <4 x double>
; UNROLL-NO-IC-NEXT: [[TMP13:%.*]] = fmul fast <4 x double> [[TMP11]], [[BROADCAST_SPLAT]]
; UNROLL-NO-IC-NEXT: [[TMP14:%.*]] = fmul fast <4 x double> [[TMP12]], [[BROADCAST_SPLAT]]
; UNROLL-NO-IC-NEXT: [[TMP15:%.*]] = fsub fast <4 x double> [[TMP9]], [[TMP13]]
; UNROLL-NO-IC-NEXT: [[TMP16:%.*]] = fsub fast <4 x double> [[TMP10]], [[TMP14]]
; UNROLL-NO-IC-NEXT: [[TMP17:%.*]] = getelementptr inbounds double, ptr [[B]], i64 [[OFFSET_IDX]]
; UNROLL-NO-IC-NEXT: [[TMP18:%.*]] = getelementptr inbounds double, ptr [[TMP17]], i32 0
; UNROLL-NO-IC-NEXT: [[TMP19:%.*]] = getelementptr inbounds double, ptr [[TMP17]], i32 4
; UNROLL-NO-IC-NEXT: store <4 x double> [[TMP15]], ptr [[TMP18]], align 8
; UNROLL-NO-IC-NEXT: store <4 x double> [[TMP16]], ptr [[TMP19]], align 8
; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
; UNROLL-NO-IC-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; UNROLL-NO-IC-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
; UNROLL-NO-IC: middle.block:
; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD1]], i32 3
; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
; UNROLL-NO-IC-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]]
; UNROLL-NO-IC: scalar.ph:
; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ [[TMP0]], [[FOR_PREHEADER]] ]
; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 1, [[FOR_PREHEADER]] ]
; UNROLL-NO-IC-NEXT: br label [[SCALAR_BODY:%.*]]
; UNROLL-NO-IC: scalar.body:
; UNROLL-NO-IC-NEXT: [[TMP21:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP22:%.*]], [[SCALAR_BODY]] ]
; UNROLL-NO-IC-NEXT: [[ADVARS_IV:%.*]] = phi i64 [ [[ADVARS_IV_NEXT:%.*]], [[SCALAR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
; UNROLL-NO-IC-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, ptr [[A]], i64 [[ADVARS_IV]]
; UNROLL-NO-IC-NEXT: [[TMP22]] = load i16, ptr [[ARRAYIDX5]], align 2
; UNROLL-NO-IC-NEXT: [[CONV6:%.*]] = sitofp i16 [[TMP22]] to double
; UNROLL-NO-IC-NEXT: [[CONV11:%.*]] = sitofp i16 [[TMP21]] to double
; UNROLL-NO-IC-NEXT: [[MUL12:%.*]] = fmul fast double [[CONV11]], [[CONV1]]
; UNROLL-NO-IC-NEXT: [[SUB13:%.*]] = fsub fast double [[CONV6]], [[MUL12]]
; UNROLL-NO-IC-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds double, ptr [[B]], i64 [[ADVARS_IV]]
; UNROLL-NO-IC-NEXT: store double [[SUB13]], ptr [[ARRAYIDX15]], align 8
; UNROLL-NO-IC-NEXT: [[ADVARS_IV_NEXT]] = add nuw nsw i64 [[ADVARS_IV]], 1
; UNROLL-NO-IC-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[ADVARS_IV_NEXT]] to i32
; UNROLL-NO-IC-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
; UNROLL-NO-IC-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[SCALAR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
; UNROLL-NO-IC: for.end.loopexit:
; UNROLL-NO-IC-NEXT: br label [[FOR_END]]
; UNROLL-NO-IC: for.end:
; UNROLL-NO-IC-NEXT: ret void
;
; UNROLL-NO-VF-LABEL: @recurrence_3(
; UNROLL-NO-VF-NEXT: entry:
; UNROLL-NO-VF-NEXT: [[TMP0:%.*]] = load i16, ptr [[A:%.*]], align 2
; UNROLL-NO-VF-NEXT: [[CONV:%.*]] = sitofp i16 [[TMP0]] to double
; UNROLL-NO-VF-NEXT: [[CONV1:%.*]] = fpext float [[F:%.*]] to double
; UNROLL-NO-VF-NEXT: [[CONV2:%.*]] = sitofp i16 [[P:%.*]] to double
; UNROLL-NO-VF-NEXT: [[MUL:%.*]] = fmul fast double [[CONV2]], [[CONV1]]
; UNROLL-NO-VF-NEXT: [[SUB:%.*]] = fsub fast double [[CONV]], [[MUL]]
; UNROLL-NO-VF-NEXT: store double [[SUB]], ptr [[B:%.*]], align 8
; UNROLL-NO-VF-NEXT: [[CMP25:%.*]] = icmp sgt i32 [[N:%.*]], 1
; UNROLL-NO-VF-NEXT: br i1 [[CMP25]], label [[FOR_PREHEADER:%.*]], label [[FOR_END:%.*]]
; UNROLL-NO-VF: for.preheader:
; UNROLL-NO-VF-NEXT: [[TMP1:%.*]] = add i32 [[N]], -1
; UNROLL-NO-VF-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
; UNROLL-NO-VF-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 2
; UNROLL-NO-VF-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; UNROLL-NO-VF: vector.ph:
; UNROLL-NO-VF-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 2
; UNROLL-NO-VF-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]]
; UNROLL-NO-VF-NEXT: [[IND_END:%.*]] = add i64 1, [[N_VEC]]
; UNROLL-NO-VF-NEXT: br label [[VECTOR_BODY:%.*]]
; UNROLL-NO-VF: vector.body:
; UNROLL-NO-VF-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NO-VF-NEXT: [[VECTOR_RECUR:%.*]] = phi i16 [ [[TMP0]], [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NO-VF-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[INDEX]]
; UNROLL-NO-VF-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 1
; UNROLL-NO-VF-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, ptr [[A]], i64 [[OFFSET_IDX]]
; UNROLL-NO-VF-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, ptr [[A]], i64 [[TMP4]]
; UNROLL-NO-VF-NEXT: [[TMP7:%.*]] = load i16, ptr [[TMP5]], align 2
; UNROLL-NO-VF-NEXT: [[TMP8]] = load i16, ptr [[TMP6]], align 2
; UNROLL-NO-VF-NEXT: [[TMP9:%.*]] = sitofp i16 [[TMP7]] to double
; UNROLL-NO-VF-NEXT: [[TMP10:%.*]] = sitofp i16 [[TMP8]] to double
; UNROLL-NO-VF-NEXT: [[TMP11:%.*]] = sitofp i16 [[VECTOR_RECUR]] to double
; UNROLL-NO-VF-NEXT: [[TMP12:%.*]] = sitofp i16 [[TMP7]] to double
; UNROLL-NO-VF-NEXT: [[TMP13:%.*]] = fmul fast double [[TMP11]], [[CONV1]]
; UNROLL-NO-VF-NEXT: [[TMP14:%.*]] = fmul fast double [[TMP12]], [[CONV1]]
; UNROLL-NO-VF-NEXT: [[TMP15:%.*]] = fsub fast double [[TMP9]], [[TMP13]]
; UNROLL-NO-VF-NEXT: [[TMP16:%.*]] = fsub fast double [[TMP10]], [[TMP14]]
; UNROLL-NO-VF-NEXT: [[TMP17:%.*]] = getelementptr inbounds double, ptr [[B]], i64 [[OFFSET_IDX]]
; UNROLL-NO-VF-NEXT: [[TMP18:%.*]] = getelementptr inbounds double, ptr [[B]], i64 [[TMP4]]
; UNROLL-NO-VF-NEXT: store double [[TMP15]], ptr [[TMP17]], align 8
; UNROLL-NO-VF-NEXT: store double [[TMP16]], ptr [[TMP18]], align 8
; UNROLL-NO-VF-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
; UNROLL-NO-VF-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; UNROLL-NO-VF-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
; UNROLL-NO-VF: middle.block:
; UNROLL-NO-VF-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
; UNROLL-NO-VF-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]]
; UNROLL-NO-VF: scalar.ph:
; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[TMP8]], [[MIDDLE_BLOCK]] ], [ [[TMP0]], [[FOR_PREHEADER]] ]
; UNROLL-NO-VF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 1, [[FOR_PREHEADER]] ]
; UNROLL-NO-VF-NEXT: br label [[SCALAR_BODY:%.*]]
; UNROLL-NO-VF: scalar.body:
; UNROLL-NO-VF-NEXT: [[TMP20:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP21:%.*]], [[SCALAR_BODY]] ]
; UNROLL-NO-VF-NEXT: [[ADVARS_IV:%.*]] = phi i64 [ [[ADVARS_IV_NEXT:%.*]], [[SCALAR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
; UNROLL-NO-VF-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, ptr [[A]], i64 [[ADVARS_IV]]
; UNROLL-NO-VF-NEXT: [[TMP21]] = load i16, ptr [[ARRAYIDX5]], align 2
; UNROLL-NO-VF-NEXT: [[CONV6:%.*]] = sitofp i16 [[TMP21]] to double
; UNROLL-NO-VF-NEXT: [[CONV11:%.*]] = sitofp i16 [[TMP20]] to double
; UNROLL-NO-VF-NEXT: [[MUL12:%.*]] = fmul fast double [[CONV11]], [[CONV1]]
; UNROLL-NO-VF-NEXT: [[SUB13:%.*]] = fsub fast double [[CONV6]], [[MUL12]]
; UNROLL-NO-VF-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds double, ptr [[B]], i64 [[ADVARS_IV]]
; UNROLL-NO-VF-NEXT: store double [[SUB13]], ptr [[ARRAYIDX15]], align 8
; UNROLL-NO-VF-NEXT: [[ADVARS_IV_NEXT]] = add nuw nsw i64 [[ADVARS_IV]], 1
; UNROLL-NO-VF-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[ADVARS_IV_NEXT]] to i32
; UNROLL-NO-VF-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
; UNROLL-NO-VF-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[SCALAR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
; UNROLL-NO-VF: for.end.loopexit:
; UNROLL-NO-VF-NEXT: br label [[FOR_END]]
; UNROLL-NO-VF: for.end:
; UNROLL-NO-VF-NEXT: ret void
;
; SINK-AFTER-LABEL: @recurrence_3(
; SINK-AFTER-NEXT: entry:
; SINK-AFTER-NEXT: [[TMP0:%.*]] = load i16, ptr [[A:%.*]], align 2
; SINK-AFTER-NEXT: [[CONV:%.*]] = sitofp i16 [[TMP0]] to double
; SINK-AFTER-NEXT: [[CONV1:%.*]] = fpext float [[F:%.*]] to double
; SINK-AFTER-NEXT: [[CONV2:%.*]] = sitofp i16 [[P:%.*]] to double
; SINK-AFTER-NEXT: [[MUL:%.*]] = fmul fast double [[CONV2]], [[CONV1]]
; SINK-AFTER-NEXT: [[SUB:%.*]] = fsub fast double [[CONV]], [[MUL]]
; SINK-AFTER-NEXT: store double [[SUB]], ptr [[B:%.*]], align 8
; SINK-AFTER-NEXT: [[CMP25:%.*]] = icmp sgt i32 [[N:%.*]], 1
; SINK-AFTER-NEXT: br i1 [[CMP25]], label [[FOR_PREHEADER:%.*]], label [[FOR_END:%.*]]
; SINK-AFTER: for.preheader:
; SINK-AFTER-NEXT: [[TMP1:%.*]] = add i32 [[N]], -1
; SINK-AFTER-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
; SINK-AFTER-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 4
; SINK-AFTER-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; SINK-AFTER: vector.ph:
; SINK-AFTER-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 4
; SINK-AFTER-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]]
; SINK-AFTER-NEXT: [[IND_END:%.*]] = add i64 1, [[N_VEC]]
; SINK-AFTER-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x double> poison, double [[CONV1]], i64 0
; SINK-AFTER-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x double> [[BROADCAST_SPLATINSERT]], <4 x double> poison, <4 x i32> zeroinitializer
; SINK-AFTER-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i16> poison, i16 [[TMP0]], i32 3
; SINK-AFTER-NEXT: br label [[VECTOR_BODY:%.*]]
; SINK-AFTER: vector.body:
; SINK-AFTER-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; SINK-AFTER-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[WIDE_LOAD:%.*]], [[VECTOR_BODY]] ]
; SINK-AFTER-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[INDEX]]
; SINK-AFTER-NEXT: [[TMP4:%.*]] = getelementptr inbounds i16, ptr [[A]], i64 [[OFFSET_IDX]]
; SINK-AFTER-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, ptr [[TMP4]], i32 0
; SINK-AFTER-NEXT: [[WIDE_LOAD]] = load <4 x i16>, ptr [[TMP5]], align 2
; SINK-AFTER-NEXT: [[TMP6:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[WIDE_LOAD]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
; SINK-AFTER-NEXT: [[TMP7:%.*]] = sitofp <4 x i16> [[WIDE_LOAD]] to <4 x double>
; SINK-AFTER-NEXT: [[TMP8:%.*]] = sitofp <4 x i16> [[TMP6]] to <4 x double>
; SINK-AFTER-NEXT: [[TMP9:%.*]] = fmul fast <4 x double> [[TMP8]], [[BROADCAST_SPLAT]]
; SINK-AFTER-NEXT: [[TMP10:%.*]] = fsub fast <4 x double> [[TMP7]], [[TMP9]]
; SINK-AFTER-NEXT: [[TMP11:%.*]] = getelementptr inbounds double, ptr [[B]], i64 [[OFFSET_IDX]]
; SINK-AFTER-NEXT: [[TMP12:%.*]] = getelementptr inbounds double, ptr [[TMP11]], i32 0
; SINK-AFTER-NEXT: store <4 x double> [[TMP10]], ptr [[TMP12]], align 8
; SINK-AFTER-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; SINK-AFTER-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; SINK-AFTER-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
; SINK-AFTER: middle.block:
; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 3
; SINK-AFTER-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
; SINK-AFTER-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]]
; SINK-AFTER: scalar.ph:
; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ [[TMP0]], [[FOR_PREHEADER]] ]
; SINK-AFTER-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 1, [[FOR_PREHEADER]] ]
; SINK-AFTER-NEXT: br label [[SCALAR_BODY:%.*]]
; SINK-AFTER: scalar.body:
; SINK-AFTER-NEXT: [[TMP14:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP15:%.*]], [[SCALAR_BODY]] ]
; SINK-AFTER-NEXT: [[ADVARS_IV:%.*]] = phi i64 [ [[ADVARS_IV_NEXT:%.*]], [[SCALAR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
; SINK-AFTER-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, ptr [[A]], i64 [[ADVARS_IV]]
; SINK-AFTER-NEXT: [[TMP15]] = load i16, ptr [[ARRAYIDX5]], align 2
; SINK-AFTER-NEXT: [[CONV6:%.*]] = sitofp i16 [[TMP15]] to double
; SINK-AFTER-NEXT: [[CONV11:%.*]] = sitofp i16 [[TMP14]] to double
; SINK-AFTER-NEXT: [[MUL12:%.*]] = fmul fast double [[CONV11]], [[CONV1]]
; SINK-AFTER-NEXT: [[SUB13:%.*]] = fsub fast double [[CONV6]], [[MUL12]]
; SINK-AFTER-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds double, ptr [[B]], i64 [[ADVARS_IV]]
; SINK-AFTER-NEXT: store double [[SUB13]], ptr [[ARRAYIDX15]], align 8
; SINK-AFTER-NEXT: [[ADVARS_IV_NEXT]] = add nuw nsw i64 [[ADVARS_IV]], 1
; SINK-AFTER-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[ADVARS_IV_NEXT]] to i32
; SINK-AFTER-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
; SINK-AFTER-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[SCALAR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
; SINK-AFTER: for.end.loopexit:
; SINK-AFTER-NEXT: br label [[FOR_END]]
; SINK-AFTER: for.end:
; SINK-AFTER-NEXT: ret void
;
entry:
%0 = load i16, ptr %a, align 2
%conv = sitofp i16 %0 to double
%conv1 = fpext float %f to double
%conv2 = sitofp i16 %p to double
%mul = fmul fast double %conv2, %conv1
%sub = fsub fast double %conv, %mul
store double %sub, ptr %b, align 8
%cmp25 = icmp sgt i32 %n, 1
br i1 %cmp25, label %for.preheader, label %for.end
for.preheader:
br label %scalar.body
scalar.body:
%1 = phi i16 [ %0, %for.preheader ], [ %2, %scalar.body ]
%advars.iv = phi i64 [ %advars.iv.next, %scalar.body ], [ 1, %for.preheader ]
%arrayidx5 = getelementptr inbounds i16, ptr %a, i64 %advars.iv
%2 = load i16, ptr %arrayidx5, align 2
%conv6 = sitofp i16 %2 to double
%conv11 = sitofp i16 %1 to double
%mul12 = fmul fast double %conv11, %conv1
%sub13 = fsub fast double %conv6, %mul12
%arrayidx15 = getelementptr inbounds double, ptr %b, i64 %advars.iv
store double %sub13, ptr %arrayidx15, align 8
%advars.iv.next = add nuw nsw i64 %advars.iv, 1
%lftr.wideiv = trunc i64 %advars.iv.next to i32
%exitcond = icmp eq i32 %lftr.wideiv, %n
br i1 %exitcond, label %for.end.loopexit, label %scalar.body
for.end.loopexit:
br label %for.end
for.end:
ret void
}
; void PR26734(short *a, int *b, int *c, int d, short *e) {
; for (; d != 21; d++) {
; *b &= *c;
; *e = *a - 6;
; *c = *e;
; }
; }
;
;
define void @PR26734(ptr %a, ptr %b, ptr %c, i32 %d, ptr %e) {
; UNROLL-NO-IC-LABEL: @PR26734(
; UNROLL-NO-IC-NEXT: entry:
; UNROLL-NO-IC-NEXT: [[CMP4:%.*]] = icmp eq i32 [[D:%.*]], 21
; UNROLL-NO-IC-NEXT: br i1 [[CMP4]], label [[ENTRY_FOR_END_CRIT_EDGE:%.*]], label [[FOR_BODY_LR_PH:%.*]]
; UNROLL-NO-IC: entry.for.end_crit_edge:
; UNROLL-NO-IC-NEXT: [[DOTPRE:%.*]] = load i32, ptr [[B:%.*]], align 4
; UNROLL-NO-IC-NEXT: br label [[FOR_END:%.*]]
; UNROLL-NO-IC: for.body.lr.ph:
; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = load i16, ptr [[A:%.*]], align 2
; UNROLL-NO-IC-NEXT: [[SUB:%.*]] = add i16 [[TMP0]], -6
; UNROLL-NO-IC-NEXT: [[CONV2:%.*]] = sext i16 [[SUB]] to i32
; UNROLL-NO-IC-NEXT: [[C_PROMOTED:%.*]] = load i32, ptr [[C:%.*]], align 4
; UNROLL-NO-IC-NEXT: [[B_PROMOTED:%.*]] = load i32, ptr [[B]], align 4
; UNROLL-NO-IC-NEXT: br label [[FOR_BODY:%.*]]
; UNROLL-NO-IC: for.body:
; UNROLL-NO-IC-NEXT: [[INC7:%.*]] = phi i32 [ [[D]], [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
; UNROLL-NO-IC-NEXT: [[AND6:%.*]] = phi i32 [ [[B_PROMOTED]], [[FOR_BODY_LR_PH]] ], [ [[AND:%.*]], [[FOR_BODY]] ]
; UNROLL-NO-IC-NEXT: [[CONV25:%.*]] = phi i32 [ [[C_PROMOTED]], [[FOR_BODY_LR_PH]] ], [ [[CONV2]], [[FOR_BODY]] ]
; UNROLL-NO-IC-NEXT: [[AND]] = and i32 [[AND6]], [[CONV25]]
; UNROLL-NO-IC-NEXT: [[INC]] = add nsw i32 [[INC7]], 1
; UNROLL-NO-IC-NEXT: [[CMP:%.*]] = icmp eq i32 [[INC]], 21
; UNROLL-NO-IC-NEXT: br i1 [[CMP]], label [[FOR_COND_FOR_END_CRIT_EDGE:%.*]], label [[FOR_BODY]]
; UNROLL-NO-IC: for.cond.for.end_crit_edge:
; UNROLL-NO-IC-NEXT: [[AND_LCSSA:%.*]] = phi i32 [ [[AND]], [[FOR_BODY]] ]
; UNROLL-NO-IC-NEXT: store i32 [[CONV2]], ptr [[C]], align 4
; UNROLL-NO-IC-NEXT: store i32 [[AND_LCSSA]], ptr [[B]], align 4
; UNROLL-NO-IC-NEXT: store i16 [[SUB]], ptr [[E:%.*]], align 2
; UNROLL-NO-IC-NEXT: br label [[FOR_END]]
; UNROLL-NO-IC: for.end:
; UNROLL-NO-IC-NEXT: ret void
;
; UNROLL-NO-VF-LABEL: @PR26734(
; UNROLL-NO-VF-NEXT: entry:
; UNROLL-NO-VF-NEXT: [[CMP4:%.*]] = icmp eq i32 [[D:%.*]], 21
; UNROLL-NO-VF-NEXT: br i1 [[CMP4]], label [[ENTRY_FOR_END_CRIT_EDGE:%.*]], label [[FOR_BODY_LR_PH:%.*]]
; UNROLL-NO-VF: entry.for.end_crit_edge:
; UNROLL-NO-VF-NEXT: [[DOTPRE:%.*]] = load i32, ptr [[B:%.*]], align 4
; UNROLL-NO-VF-NEXT: br label [[FOR_END:%.*]]
; UNROLL-NO-VF: for.body.lr.ph:
; UNROLL-NO-VF-NEXT: [[TMP0:%.*]] = load i16, ptr [[A:%.*]], align 2
; UNROLL-NO-VF-NEXT: [[SUB:%.*]] = add i16 [[TMP0]], -6
; UNROLL-NO-VF-NEXT: [[CONV2:%.*]] = sext i16 [[SUB]] to i32
; UNROLL-NO-VF-NEXT: [[C_PROMOTED:%.*]] = load i32, ptr [[C:%.*]], align 4
; UNROLL-NO-VF-NEXT: [[B_PROMOTED:%.*]] = load i32, ptr [[B]], align 4
; UNROLL-NO-VF-NEXT: br label [[FOR_BODY:%.*]]
; UNROLL-NO-VF: for.body:
; UNROLL-NO-VF-NEXT: [[INC7:%.*]] = phi i32 [ [[D]], [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
; UNROLL-NO-VF-NEXT: [[AND6:%.*]] = phi i32 [ [[B_PROMOTED]], [[FOR_BODY_LR_PH]] ], [ [[AND:%.*]], [[FOR_BODY]] ]
; UNROLL-NO-VF-NEXT: [[CONV25:%.*]] = phi i32 [ [[C_PROMOTED]], [[FOR_BODY_LR_PH]] ], [ [[CONV2]], [[FOR_BODY]] ]
; UNROLL-NO-VF-NEXT: [[AND]] = and i32 [[AND6]], [[CONV25]]
; UNROLL-NO-VF-NEXT: [[INC]] = add nsw i32 [[INC7]], 1
; UNROLL-NO-VF-NEXT: [[CMP:%.*]] = icmp eq i32 [[INC]], 21
; UNROLL-NO-VF-NEXT: br i1 [[CMP]], label [[FOR_COND_FOR_END_CRIT_EDGE:%.*]], label [[FOR_BODY]]
; UNROLL-NO-VF: for.cond.for.end_crit_edge:
; UNROLL-NO-VF-NEXT: [[AND_LCSSA:%.*]] = phi i32 [ [[AND]], [[FOR_BODY]] ]
; UNROLL-NO-VF-NEXT: store i32 [[CONV2]], ptr [[C]], align 4
; UNROLL-NO-VF-NEXT: store i32 [[AND_LCSSA]], ptr [[B]], align 4
; UNROLL-NO-VF-NEXT: store i16 [[SUB]], ptr [[E:%.*]], align 2
; UNROLL-NO-VF-NEXT: br label [[FOR_END]]
; UNROLL-NO-VF: for.end:
; UNROLL-NO-VF-NEXT: ret void
;
; SINK-AFTER-LABEL: @PR26734(
; SINK-AFTER-NEXT: entry:
; SINK-AFTER-NEXT: [[CMP4:%.*]] = icmp eq i32 [[D:%.*]], 21
; SINK-AFTER-NEXT: br i1 [[CMP4]], label [[ENTRY_FOR_END_CRIT_EDGE:%.*]], label [[FOR_BODY_LR_PH:%.*]]
; SINK-AFTER: entry.for.end_crit_edge:
; SINK-AFTER-NEXT: [[DOTPRE:%.*]] = load i32, ptr [[B:%.*]], align 4
; SINK-AFTER-NEXT: br label [[FOR_END:%.*]]
; SINK-AFTER: for.body.lr.ph:
; SINK-AFTER-NEXT: [[TMP0:%.*]] = load i16, ptr [[A:%.*]], align 2
; SINK-AFTER-NEXT: [[SUB:%.*]] = add i16 [[TMP0]], -6
; SINK-AFTER-NEXT: [[CONV2:%.*]] = sext i16 [[SUB]] to i32
; SINK-AFTER-NEXT: [[C_PROMOTED:%.*]] = load i32, ptr [[C:%.*]], align 4
; SINK-AFTER-NEXT: [[B_PROMOTED:%.*]] = load i32, ptr [[B]], align 4
; SINK-AFTER-NEXT: br label [[FOR_BODY:%.*]]
; SINK-AFTER: for.body:
; SINK-AFTER-NEXT: [[INC7:%.*]] = phi i32 [ [[D]], [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
; SINK-AFTER-NEXT: [[AND6:%.*]] = phi i32 [ [[B_PROMOTED]], [[FOR_BODY_LR_PH]] ], [ [[AND:%.*]], [[FOR_BODY]] ]
; SINK-AFTER-NEXT: [[CONV25:%.*]] = phi i32 [ [[C_PROMOTED]], [[FOR_BODY_LR_PH]] ], [ [[CONV2]], [[FOR_BODY]] ]
; SINK-AFTER-NEXT: [[AND]] = and i32 [[AND6]], [[CONV25]]
; SINK-AFTER-NEXT: [[INC]] = add nsw i32 [[INC7]], 1
; SINK-AFTER-NEXT: [[CMP:%.*]] = icmp eq i32 [[INC]], 21
; SINK-AFTER-NEXT: br i1 [[CMP]], label [[FOR_COND_FOR_END_CRIT_EDGE:%.*]], label [[FOR_BODY]]
; SINK-AFTER: for.cond.for.end_crit_edge:
; SINK-AFTER-NEXT: [[AND_LCSSA:%.*]] = phi i32 [ [[AND]], [[FOR_BODY]] ]
; SINK-AFTER-NEXT: store i32 [[CONV2]], ptr [[C]], align 4
; SINK-AFTER-NEXT: store i32 [[AND_LCSSA]], ptr [[B]], align 4
; SINK-AFTER-NEXT: store i16 [[SUB]], ptr [[E:%.*]], align 2
; SINK-AFTER-NEXT: br label [[FOR_END]]
; SINK-AFTER: for.end:
; SINK-AFTER-NEXT: ret void
;
entry:
%cmp4 = icmp eq i32 %d, 21
br i1 %cmp4, label %entry.for.end_crit_edge, label %for.body.lr.ph
entry.for.end_crit_edge:
%.pre = load i32, ptr %b, align 4
br label %for.end
for.body.lr.ph:
%0 = load i16, ptr %a, align 2
%sub = add i16 %0, -6
%conv2 = sext i16 %sub to i32
%c.promoted = load i32, ptr %c, align 4
%b.promoted = load i32, ptr %b, align 4
br label %for.body
for.body:
%inc7 = phi i32 [ %d, %for.body.lr.ph ], [ %inc, %for.body ]
%and6 = phi i32 [ %b.promoted, %for.body.lr.ph ], [ %and, %for.body ]
%conv25 = phi i32 [ %c.promoted, %for.body.lr.ph ], [ %conv2, %for.body ]
%and = and i32 %and6, %conv25
%inc = add nsw i32 %inc7, 1
%cmp = icmp eq i32 %inc, 21
br i1 %cmp, label %for.cond.for.end_crit_edge, label %for.body
for.cond.for.end_crit_edge:
%and.lcssa = phi i32 [ %and, %for.body ]
store i32 %conv2, ptr %c, align 4
store i32 %and.lcssa, ptr %b, align 4
store i16 %sub, ptr %e, align 2
br label %for.end
for.end:
ret void
}
; int PR27246() {
; unsigned int e, n;
; for (int i = 1; i < 49; ++i) {
; for (int k = i; k > 1; --k)
; e = k;
; n = e;
; }
; return n;
; }
;
;
define i32 @PR27246() {
; UNROLL-NO-IC-LABEL: @PR27246(
; UNROLL-NO-IC-NEXT: entry:
; UNROLL-NO-IC-NEXT: br label [[FOR_COND1_PREHEADER:%.*]]
; UNROLL-NO-IC: for.cond1.preheader:
; UNROLL-NO-IC-NEXT: [[I_016:%.*]] = phi i32 [ 1, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_COND_CLEANUP3:%.*]] ]
; UNROLL-NO-IC-NEXT: [[E_015:%.*]] = phi i32 [ poison, [[ENTRY]] ], [ [[E_1_LCSSA:%.*]], [[FOR_COND_CLEANUP3]] ]
; UNROLL-NO-IC-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[I_016]], 8
; UNROLL-NO-IC-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; UNROLL-NO-IC: vector.ph:
; UNROLL-NO-IC-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[I_016]], 8
; UNROLL-NO-IC-NEXT: [[N_VEC:%.*]] = sub i32 [[I_016]], [[N_MOD_VF]]
; UNROLL-NO-IC-NEXT: [[IND_END:%.*]] = sub i32 [[I_016]], [[N_VEC]]
; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i32> poison, i32 [[E_015]], i32 3
; UNROLL-NO-IC-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[I_016]], i64 0
; UNROLL-NO-IC-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
; UNROLL-NO-IC-NEXT: [[INDUCTION:%.*]] = add <4 x i32> [[DOTSPLAT]], <i32 0, i32 -1, i32 -2, i32 -3>
; UNROLL-NO-IC-NEXT: br label [[VECTOR_BODY:%.*]]
; UNROLL-NO-IC: vector.body:
; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[STEP_ADD:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NO-IC-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NO-IC-NEXT: [[STEP_ADD]] = add <4 x i32> [[VEC_IND]], splat (i32 -4)
; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[STEP_ADD]], splat (i32 -4)
; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
; UNROLL-NO-IC-NEXT: br i1 [[TMP0]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
; UNROLL-NO-IC: middle.block:
; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i32> [[STEP_ADD]], i32 2
; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[STEP_ADD]], i32 3
; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[I_016]], [[N_VEC]]
; UNROLL-NO-IC-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP3]], label [[SCALAR_PH]]
; UNROLL-NO-IC: scalar.ph:
; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ [[E_015]], [[FOR_COND1_PREHEADER]] ]
; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[I_016]], [[FOR_COND1_PREHEADER]] ]
; UNROLL-NO-IC-NEXT: br label [[FOR_COND1:%.*]]
; UNROLL-NO-IC: for.cond.cleanup:
; UNROLL-NO-IC-NEXT: [[E_1_LCSSA_LCSSA:%.*]] = phi i32 [ [[E_1_LCSSA]], [[FOR_COND_CLEANUP3]] ]
; UNROLL-NO-IC-NEXT: ret i32 [[E_1_LCSSA_LCSSA]]
; UNROLL-NO-IC: for.cond1:
; UNROLL-NO-IC-NEXT: [[E_1:%.*]] = phi i32 [ [[K_0:%.*]], [[FOR_COND1]] ], [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ]
; UNROLL-NO-IC-NEXT: [[K_0]] = phi i32 [ [[DEC:%.*]], [[FOR_COND1]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
; UNROLL-NO-IC-NEXT: [[CMP2:%.*]] = icmp sgt i32 [[K_0]], 1
; UNROLL-NO-IC-NEXT: [[DEC]] = add nsw i32 [[K_0]], -1
; UNROLL-NO-IC-NEXT: br i1 [[CMP2]], label [[FOR_COND1]], label [[FOR_COND_CLEANUP3]], !llvm.loop [[LOOP9:![0-9]+]]
; UNROLL-NO-IC: for.cond.cleanup3:
; UNROLL-NO-IC-NEXT: [[E_1_LCSSA]] = phi i32 [ [[E_1]], [[FOR_COND1]] ], [ [[VECTOR_RECUR_EXTRACT_FOR_PHI]], [[MIDDLE_BLOCK]] ]
; UNROLL-NO-IC-NEXT: [[INC]] = add nuw nsw i32 [[I_016]], 1
; UNROLL-NO-IC-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 49
; UNROLL-NO-IC-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_COND1_PREHEADER]]
;
; UNROLL-NO-VF-LABEL: @PR27246(
; UNROLL-NO-VF-NEXT: entry:
; UNROLL-NO-VF-NEXT: br label [[FOR_COND1_PREHEADER:%.*]]
; UNROLL-NO-VF: for.cond1.preheader:
; UNROLL-NO-VF-NEXT: [[I_016:%.*]] = phi i32 [ 1, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_COND_CLEANUP3:%.*]] ]
; UNROLL-NO-VF-NEXT: [[E_015:%.*]] = phi i32 [ poison, [[ENTRY]] ], [ [[E_1_LCSSA:%.*]], [[FOR_COND_CLEANUP3]] ]
; UNROLL-NO-VF-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[I_016]], 2
; UNROLL-NO-VF-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; UNROLL-NO-VF: vector.ph:
; UNROLL-NO-VF-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[I_016]], 2
; UNROLL-NO-VF-NEXT: [[N_VEC:%.*]] = sub i32 [[I_016]], [[N_MOD_VF]]
; UNROLL-NO-VF-NEXT: [[IND_END:%.*]] = sub i32 [[I_016]], [[N_VEC]]
; UNROLL-NO-VF-NEXT: br label [[VECTOR_BODY:%.*]]
; UNROLL-NO-VF: vector.body:
; UNROLL-NO-VF-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NO-VF-NEXT: [[VECTOR_RECUR:%.*]] = phi i32 [ [[E_015]], [[VECTOR_PH]] ], [ [[TMP1:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NO-VF-NEXT: [[OFFSET_IDX:%.*]] = sub i32 [[I_016]], [[INDEX]]
; UNROLL-NO-VF-NEXT: [[TMP1]] = add i32 [[OFFSET_IDX]], -1
; UNROLL-NO-VF-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
; UNROLL-NO-VF-NEXT: [[TMP2:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
; UNROLL-NO-VF-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
; UNROLL-NO-VF: middle.block:
; UNROLL-NO-VF-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[I_016]], [[N_VEC]]
; UNROLL-NO-VF-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP3]], label [[SCALAR_PH]]
; UNROLL-NO-VF: scalar.ph:
; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[TMP1]], [[MIDDLE_BLOCK]] ], [ [[E_015]], [[FOR_COND1_PREHEADER]] ]
; UNROLL-NO-VF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[I_016]], [[FOR_COND1_PREHEADER]] ]
; UNROLL-NO-VF-NEXT: br label [[FOR_COND1:%.*]]
; UNROLL-NO-VF: for.cond.cleanup:
; UNROLL-NO-VF-NEXT: [[E_1_LCSSA_LCSSA:%.*]] = phi i32 [ [[E_1_LCSSA]], [[FOR_COND_CLEANUP3]] ]
; UNROLL-NO-VF-NEXT: ret i32 [[E_1_LCSSA_LCSSA]]
; UNROLL-NO-VF: for.cond1:
; UNROLL-NO-VF-NEXT: [[E_1:%.*]] = phi i32 [ [[K_0:%.*]], [[FOR_COND1]] ], [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ]
; UNROLL-NO-VF-NEXT: [[K_0]] = phi i32 [ [[DEC:%.*]], [[FOR_COND1]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
; UNROLL-NO-VF-NEXT: [[CMP2:%.*]] = icmp sgt i32 [[K_0]], 1
; UNROLL-NO-VF-NEXT: [[DEC]] = add nsw i32 [[K_0]], -1
; UNROLL-NO-VF-NEXT: br i1 [[CMP2]], label [[FOR_COND1]], label [[FOR_COND_CLEANUP3]], !llvm.loop [[LOOP9:![0-9]+]]
; UNROLL-NO-VF: for.cond.cleanup3:
; UNROLL-NO-VF-NEXT: [[E_1_LCSSA]] = phi i32 [ [[E_1]], [[FOR_COND1]] ], [ [[OFFSET_IDX]], [[MIDDLE_BLOCK]] ]
; UNROLL-NO-VF-NEXT: [[INC]] = add nuw nsw i32 [[I_016]], 1
; UNROLL-NO-VF-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 49
; UNROLL-NO-VF-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_COND1_PREHEADER]]
;
; SINK-AFTER-LABEL: @PR27246(
; SINK-AFTER-NEXT: entry:
; SINK-AFTER-NEXT: br label [[FOR_COND1_PREHEADER:%.*]]
; SINK-AFTER: for.cond1.preheader:
; SINK-AFTER-NEXT: [[I_016:%.*]] = phi i32 [ 1, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_COND_CLEANUP3:%.*]] ]
; SINK-AFTER-NEXT: [[E_015:%.*]] = phi i32 [ poison, [[ENTRY]] ], [ [[E_1_LCSSA:%.*]], [[FOR_COND_CLEANUP3]] ]
; SINK-AFTER-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[I_016]], 4
; SINK-AFTER-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; SINK-AFTER: vector.ph:
; SINK-AFTER-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[I_016]], 4
; SINK-AFTER-NEXT: [[N_VEC:%.*]] = sub i32 [[I_016]], [[N_MOD_VF]]
; SINK-AFTER-NEXT: [[IND_END:%.*]] = sub i32 [[I_016]], [[N_VEC]]
; SINK-AFTER-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i32> poison, i32 [[E_015]], i32 3
; SINK-AFTER-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[I_016]], i64 0
; SINK-AFTER-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
; SINK-AFTER-NEXT: [[INDUCTION:%.*]] = add <4 x i32> [[DOTSPLAT]], <i32 0, i32 -1, i32 -2, i32 -3>
; SINK-AFTER-NEXT: br label [[VECTOR_BODY:%.*]]
; SINK-AFTER: vector.body:
; SINK-AFTER-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; SINK-AFTER-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[VEC_IND:%.*]], [[VECTOR_BODY]] ]
; SINK-AFTER-NEXT: [[VEC_IND]] = phi <4 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; SINK-AFTER-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
; SINK-AFTER-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 -4)
; SINK-AFTER-NEXT: [[TMP0:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
; SINK-AFTER-NEXT: br i1 [[TMP0]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
; SINK-AFTER: middle.block:
; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i32> [[VEC_IND]], i32 2
; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[VEC_IND]], i32 3
; SINK-AFTER-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[I_016]], [[N_VEC]]
; SINK-AFTER-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP3]], label [[SCALAR_PH]]
; SINK-AFTER: scalar.ph:
; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ [[E_015]], [[FOR_COND1_PREHEADER]] ]
; SINK-AFTER-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[I_016]], [[FOR_COND1_PREHEADER]] ]
; SINK-AFTER-NEXT: br label [[FOR_COND1:%.*]]
; SINK-AFTER: for.cond.cleanup:
; SINK-AFTER-NEXT: [[E_1_LCSSA_LCSSA:%.*]] = phi i32 [ [[E_1_LCSSA]], [[FOR_COND_CLEANUP3]] ]
; SINK-AFTER-NEXT: ret i32 [[E_1_LCSSA_LCSSA]]
; SINK-AFTER: for.cond1:
; SINK-AFTER-NEXT: [[E_1:%.*]] = phi i32 [ [[K_0:%.*]], [[FOR_COND1]] ], [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ]
; SINK-AFTER-NEXT: [[K_0]] = phi i32 [ [[DEC:%.*]], [[FOR_COND1]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
; SINK-AFTER-NEXT: [[CMP2:%.*]] = icmp sgt i32 [[K_0]], 1
; SINK-AFTER-NEXT: [[DEC]] = add nsw i32 [[K_0]], -1
; SINK-AFTER-NEXT: br i1 [[CMP2]], label [[FOR_COND1]], label [[FOR_COND_CLEANUP3]], !llvm.loop [[LOOP9:![0-9]+]]
; SINK-AFTER: for.cond.cleanup3:
; SINK-AFTER-NEXT: [[E_1_LCSSA]] = phi i32 [ [[E_1]], [[FOR_COND1]] ], [ [[VECTOR_RECUR_EXTRACT_FOR_PHI]], [[MIDDLE_BLOCK]] ]
; SINK-AFTER-NEXT: [[INC]] = add nuw nsw i32 [[I_016]], 1
; SINK-AFTER-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 49
; SINK-AFTER-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_COND1_PREHEADER]]
;
entry:
br label %for.cond1.preheader
for.cond1.preheader:
%i.016 = phi i32 [ 1, %entry ], [ %inc, %for.cond.cleanup3 ]
%e.015 = phi i32 [ poison, %entry ], [ %e.1.lcssa, %for.cond.cleanup3 ]
br label %for.cond1
for.cond.cleanup:
%e.1.lcssa.lcssa = phi i32 [ %e.1.lcssa, %for.cond.cleanup3 ]
ret i32 %e.1.lcssa.lcssa
for.cond1:
%e.1 = phi i32 [ %k.0, %for.cond1 ], [ %e.015, %for.cond1.preheader ]
%k.0 = phi i32 [ %dec, %for.cond1 ], [ %i.016, %for.cond1.preheader ]
%cmp2 = icmp sgt i32 %k.0, 1
%dec = add nsw i32 %k.0, -1
br i1 %cmp2, label %for.cond1, label %for.cond.cleanup3
for.cond.cleanup3:
%e.1.lcssa = phi i32 [ %e.1, %for.cond1 ]
%inc = add nuw nsw i32 %i.016, 1
%exitcond = icmp eq i32 %inc, 49
br i1 %exitcond, label %for.cond.cleanup, label %for.cond1.preheader
}
;
define i32 @PR30183(i32 %pre_load, ptr %a, ptr %b, i64 %n) {
; UNROLL-NO-IC-LABEL: @PR30183(
; UNROLL-NO-IC-NEXT: entry:
; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = add i64 [[N:%.*]], -2
; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = lshr i64 [[TMP0]], 1
; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = add nuw i64 [[TMP1]], 1
; UNROLL-NO-IC-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 8
; UNROLL-NO-IC-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; UNROLL-NO-IC: vector.ph:
; UNROLL-NO-IC-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 8
; UNROLL-NO-IC-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]]
; UNROLL-NO-IC-NEXT: [[IND_END:%.*]] = mul i64 [[N_VEC]], 2
; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i32> poison, i32 [[PRE_LOAD:%.*]], i32 3
; UNROLL-NO-IC-NEXT: br label [[VECTOR_BODY:%.*]]
; UNROLL-NO-IC: vector.body:
; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[TMP22:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NO-IC-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2
; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 8
; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 10
; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 12
; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 14
; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = add nuw nsw i64 [[TMP3]], 2
; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = add nuw nsw i64 [[TMP4]], 2
; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = add nuw nsw i64 [[TMP5]], 2
; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = add nuw nsw i64 [[TMP6]], 2
; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP7]]
; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP8]]
; UNROLL-NO-IC-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP9]]
; UNROLL-NO-IC-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP10]]
; UNROLL-NO-IC-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP11]], align 4
; UNROLL-NO-IC-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP12]], align 4
; UNROLL-NO-IC-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP13]], align 4
; UNROLL-NO-IC-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP14]], align 4
; UNROLL-NO-IC-NEXT: [[TMP19:%.*]] = insertelement <4 x i32> poison, i32 [[TMP15]], i32 0
; UNROLL-NO-IC-NEXT: [[TMP20:%.*]] = insertelement <4 x i32> [[TMP19]], i32 [[TMP16]], i32 1
; UNROLL-NO-IC-NEXT: [[TMP21:%.*]] = insertelement <4 x i32> [[TMP20]], i32 [[TMP17]], i32 2
; UNROLL-NO-IC-NEXT: [[TMP22]] = insertelement <4 x i32> [[TMP21]], i32 [[TMP18]], i32 3
; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
; UNROLL-NO-IC-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; UNROLL-NO-IC-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
; UNROLL-NO-IC: middle.block:
; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
; UNROLL-NO-IC-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
; UNROLL-NO-IC: scalar.ph:
; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[TMP18]], [[MIDDLE_BLOCK]] ], [ [[PRE_LOAD]], [[ENTRY]] ]
; UNROLL-NO-IC-NEXT: br label [[SCALAR_BODY:%.*]]
; UNROLL-NO-IC: scalar.body:
; UNROLL-NO-IC-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[SCALAR_BODY]] ]
; UNROLL-NO-IC-NEXT: [[VAR0:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[VAR2:%.*]], [[SCALAR_BODY]] ]
; UNROLL-NO-IC-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 2
; UNROLL-NO-IC-NEXT: [[VAR1:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[I_NEXT]]
; UNROLL-NO-IC-NEXT: [[VAR2]] = load i32, ptr [[VAR1]], align 4
; UNROLL-NO-IC-NEXT: [[COND:%.*]] = icmp eq i64 [[I_NEXT]], [[N]]
; UNROLL-NO-IC-NEXT: br i1 [[COND]], label [[FOR_END]], label [[SCALAR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
; UNROLL-NO-IC: for.end:
; UNROLL-NO-IC-NEXT: [[VAR0_LCSSA:%.*]] = phi i32 [ [[VAR0]], [[SCALAR_BODY]] ], [ [[TMP17]], [[MIDDLE_BLOCK]] ]
; UNROLL-NO-IC-NEXT: ret i32 [[VAR0_LCSSA]]
;
; UNROLL-NO-VF-LABEL: @PR30183(
; UNROLL-NO-VF-NEXT: entry:
; UNROLL-NO-VF-NEXT: [[TMP0:%.*]] = add i64 [[N:%.*]], -2
; UNROLL-NO-VF-NEXT: [[TMP1:%.*]] = lshr i64 [[TMP0]], 1
; UNROLL-NO-VF-NEXT: [[TMP2:%.*]] = add nuw i64 [[TMP1]], 1
; UNROLL-NO-VF-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 2
; UNROLL-NO-VF-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; UNROLL-NO-VF: vector.ph:
; UNROLL-NO-VF-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 2
; UNROLL-NO-VF-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]]
; UNROLL-NO-VF-NEXT: [[IND_END:%.*]] = mul i64 [[N_VEC]], 2
; UNROLL-NO-VF-NEXT: br label [[VECTOR_BODY:%.*]]
; UNROLL-NO-VF: vector.body:
; UNROLL-NO-VF-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NO-VF-NEXT: [[VECTOR_RECUR:%.*]] = phi i32 [ [[PRE_LOAD:%.*]], [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NO-VF-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2
; UNROLL-NO-VF-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 2
; UNROLL-NO-VF-NEXT: [[TMP5:%.*]] = add nuw nsw i64 [[OFFSET_IDX]], 2
; UNROLL-NO-VF-NEXT: [[TMP6:%.*]] = add nuw nsw i64 [[TMP4]], 2
; UNROLL-NO-VF-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP5]]
; UNROLL-NO-VF-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP6]]
; UNROLL-NO-VF-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP7]], align 4
; UNROLL-NO-VF-NEXT: [[TMP10]] = load i32, ptr [[TMP8]], align 4
; UNROLL-NO-VF-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
; UNROLL-NO-VF-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; UNROLL-NO-VF-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
; UNROLL-NO-VF: middle.block:
; UNROLL-NO-VF-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
; UNROLL-NO-VF-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
; UNROLL-NO-VF: scalar.ph:
; UNROLL-NO-VF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[TMP10]], [[MIDDLE_BLOCK]] ], [ [[PRE_LOAD]], [[ENTRY]] ]
; UNROLL-NO-VF-NEXT: br label [[SCALAR_BODY:%.*]]
; UNROLL-NO-VF: scalar.body:
; UNROLL-NO-VF-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[SCALAR_BODY]] ]
; UNROLL-NO-VF-NEXT: [[VAR0:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[VAR2:%.*]], [[SCALAR_BODY]] ]
; UNROLL-NO-VF-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 2
; UNROLL-NO-VF-NEXT: [[VAR1:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[I_NEXT]]
; UNROLL-NO-VF-NEXT: [[VAR2]] = load i32, ptr [[VAR1]], align 4
; UNROLL-NO-VF-NEXT: [[COND:%.*]] = icmp eq i64 [[I_NEXT]], [[N]]
; UNROLL-NO-VF-NEXT: br i1 [[COND]], label [[FOR_END]], label [[SCALAR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
; UNROLL-NO-VF: for.end:
; UNROLL-NO-VF-NEXT: [[VAR0_LCSSA:%.*]] = phi i32 [ [[VAR0]], [[SCALAR_BODY]] ], [ [[TMP9]], [[MIDDLE_BLOCK]] ]
; UNROLL-NO-VF-NEXT: ret i32 [[VAR0_LCSSA]]
;
; SINK-AFTER-LABEL: @PR30183(
; SINK-AFTER-NEXT: entry:
; SINK-AFTER-NEXT: [[TMP0:%.*]] = add i64 [[N:%.*]], -2
; SINK-AFTER-NEXT: [[TMP1:%.*]] = lshr i64 [[TMP0]], 1
; SINK-AFTER-NEXT: [[TMP2:%.*]] = add nuw i64 [[TMP1]], 1
; SINK-AFTER-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 4
; SINK-AFTER-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; SINK-AFTER: vector.ph:
; SINK-AFTER-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 4
; SINK-AFTER-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]]
; SINK-AFTER-NEXT: [[IND_END:%.*]] = mul i64 [[N_VEC]], 2
; SINK-AFTER-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i32> poison, i32 [[PRE_LOAD:%.*]], i32 3
; SINK-AFTER-NEXT: br label [[VECTOR_BODY:%.*]]
; SINK-AFTER: vector.body:
; SINK-AFTER-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; SINK-AFTER-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[TMP22:%.*]], [[VECTOR_BODY]] ]
; SINK-AFTER-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2
; SINK-AFTER-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 0
; SINK-AFTER-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 2
; SINK-AFTER-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 4
; SINK-AFTER-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 6
; SINK-AFTER-NEXT: [[TMP7:%.*]] = add nuw nsw i64 [[TMP3]], 2
; SINK-AFTER-NEXT: [[TMP8:%.*]] = add nuw nsw i64 [[TMP4]], 2
; SINK-AFTER-NEXT: [[TMP9:%.*]] = add nuw nsw i64 [[TMP5]], 2
; SINK-AFTER-NEXT: [[TMP10:%.*]] = add nuw nsw i64 [[TMP6]], 2
; SINK-AFTER-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP7]]
; SINK-AFTER-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP8]]
; SINK-AFTER-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP9]]
; SINK-AFTER-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP10]]
; SINK-AFTER-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP11]], align 4
; SINK-AFTER-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP12]], align 4
; SINK-AFTER-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP13]], align 4
; SINK-AFTER-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP14]], align 4
; SINK-AFTER-NEXT: [[TMP19:%.*]] = insertelement <4 x i32> poison, i32 [[TMP15]], i32 0
; SINK-AFTER-NEXT: [[TMP20:%.*]] = insertelement <4 x i32> [[TMP19]], i32 [[TMP16]], i32 1
; SINK-AFTER-NEXT: [[TMP21:%.*]] = insertelement <4 x i32> [[TMP20]], i32 [[TMP17]], i32 2
; SINK-AFTER-NEXT: [[TMP22]] = insertelement <4 x i32> [[TMP21]], i32 [[TMP18]], i32 3
; SINK-AFTER-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; SINK-AFTER-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; SINK-AFTER-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
; SINK-AFTER: middle.block:
; SINK-AFTER-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
; SINK-AFTER-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
; SINK-AFTER: scalar.ph:
; SINK-AFTER-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[TMP18]], [[MIDDLE_BLOCK]] ], [ [[PRE_LOAD]], [[ENTRY]] ]
; SINK-AFTER-NEXT: br label [[SCALAR_BODY:%.*]]
; SINK-AFTER: scalar.body:
; SINK-AFTER-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[SCALAR_BODY]] ]
; SINK-AFTER-NEXT: [[VAR0:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[VAR2:%.*]], [[SCALAR_BODY]] ]
; SINK-AFTER-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 2
; SINK-AFTER-NEXT: [[VAR1:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[I_NEXT]]
; SINK-AFTER-NEXT: [[VAR2]] = load i32, ptr [[VAR1]], align 4
; SINK-AFTER-NEXT: [[COND:%.*]] = icmp eq i64 [[I_NEXT]], [[N]]
; SINK-AFTER-NEXT: br i1 [[COND]], label [[FOR_END]], label [[SCALAR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
; SINK-AFTER: for.end:
; SINK-AFTER-NEXT: [[VAR0_LCSSA:%.*]] = phi i32 [ [[VAR0]], [[SCALAR_BODY]] ], [ [[TMP17]], [[MIDDLE_BLOCK]] ]
; SINK-AFTER-NEXT: ret i32 [[VAR0_LCSSA]]
;
entry:
br label %scalar.body
scalar.body:
%i = phi i64 [ 0, %entry ], [ %i.next, %scalar.body ]
%var0 = phi i32 [ %pre_load, %entry ], [ %var2, %scalar.body ]
%i.next = add nuw nsw i64 %i, 2
%var1 = getelementptr inbounds i32, ptr %a, i64 %i.next
%var2 = load i32, ptr %var1
%cond = icmp eq i64 %i.next,%n
br i1 %cond, label %for.end, label %scalar.body
for.end:
ret i32 %var0
}
;
define i64 @constant_folded_previous_value() {
; UNROLL-NO-IC-LABEL: @constant_folded_previous_value(
; UNROLL-NO-IC-NEXT: entry:
; UNROLL-NO-IC-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; UNROLL-NO-IC: vector.ph:
; UNROLL-NO-IC-NEXT: br label [[VECTOR_BODY:%.*]]
; UNROLL-NO-IC: vector.body:
; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i64> [ <i64 poison, i64 poison, i64 poison, i64 0>, [[VECTOR_PH]] ], [ splat (i64 1), [[VECTOR_BODY]] ]
; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
; UNROLL-NO-IC-NEXT: br i1 [[TMP0]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
; UNROLL-NO-IC: middle.block:
; UNROLL-NO-IC-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
; UNROLL-NO-IC: scalar.ph:
; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ 1, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
; UNROLL-NO-IC-NEXT: br label [[SCALAR_BODY:%.*]]
; UNROLL-NO-IC: scalar.body:
; UNROLL-NO-IC-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[SCALAR_BODY]] ]
; UNROLL-NO-IC-NEXT: [[VAR2:%.*]] = phi i64 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[VAR3:%.*]], [[SCALAR_BODY]] ]
; UNROLL-NO-IC-NEXT: [[VAR3]] = add i64 0, 1
; UNROLL-NO-IC-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1
; UNROLL-NO-IC-NEXT: [[COND:%.*]] = icmp eq i64 [[I_NEXT]], 1000
; UNROLL-NO-IC-NEXT: br i1 [[COND]], label [[FOR_END]], label [[SCALAR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
; UNROLL-NO-IC: for.end:
; UNROLL-NO-IC-NEXT: [[VAR2_LCSSA:%.*]] = phi i64 [ [[VAR2]], [[SCALAR_BODY]] ], [ 1, [[MIDDLE_BLOCK]] ]
; UNROLL-NO-IC-NEXT: ret i64 [[VAR2_LCSSA]]
;
; UNROLL-NO-VF-LABEL: @constant_folded_previous_value(
; UNROLL-NO-VF-NEXT: entry:
; UNROLL-NO-VF-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; UNROLL-NO-VF: vector.ph:
; UNROLL-NO-VF-NEXT: [[TMP0:%.*]] = add i64 0, 1
; UNROLL-NO-VF-NEXT: br label [[VECTOR_BODY:%.*]]
; UNROLL-NO-VF: vector.body:
; UNROLL-NO-VF-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NO-VF-NEXT: [[VECTOR_RECUR:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[TMP0]], [[VECTOR_BODY]] ]
; UNROLL-NO-VF-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
; UNROLL-NO-VF-NEXT: [[TMP1:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
; UNROLL-NO-VF-NEXT: br i1 [[TMP1]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
; UNROLL-NO-VF: middle.block:
; UNROLL-NO-VF-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
; UNROLL-NO-VF: scalar.ph:
; UNROLL-NO-VF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ [[TMP0]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
; UNROLL-NO-VF-NEXT: br label [[SCALAR_BODY:%.*]]
; UNROLL-NO-VF: scalar.body:
; UNROLL-NO-VF-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[SCALAR_BODY]] ]
; UNROLL-NO-VF-NEXT: [[VAR2:%.*]] = phi i64 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[VAR3:%.*]], [[SCALAR_BODY]] ]
; UNROLL-NO-VF-NEXT: [[VAR3]] = add i64 0, 1
; UNROLL-NO-VF-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1
; UNROLL-NO-VF-NEXT: [[COND:%.*]] = icmp eq i64 [[I_NEXT]], 1000
; UNROLL-NO-VF-NEXT: br i1 [[COND]], label [[FOR_END]], label [[SCALAR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
; UNROLL-NO-VF: for.end:
; UNROLL-NO-VF-NEXT: [[VAR2_LCSSA:%.*]] = phi i64 [ [[VAR2]], [[SCALAR_BODY]] ], [ [[TMP0]], [[MIDDLE_BLOCK]] ]
; UNROLL-NO-VF-NEXT: ret i64 [[VAR2_LCSSA]]
;
; SINK-AFTER-LABEL: @constant_folded_previous_value(
; SINK-AFTER-NEXT: entry:
; SINK-AFTER-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; SINK-AFTER: vector.ph:
; SINK-AFTER-NEXT: br label [[VECTOR_BODY:%.*]]
; SINK-AFTER: vector.body:
; SINK-AFTER-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; SINK-AFTER-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i64> [ <i64 poison, i64 poison, i64 poison, i64 0>, [[VECTOR_PH]] ], [ splat (i64 1), [[VECTOR_BODY]] ]
; SINK-AFTER-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; SINK-AFTER-NEXT: [[TMP0:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
; SINK-AFTER-NEXT: br i1 [[TMP0]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
; SINK-AFTER: middle.block:
; SINK-AFTER-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
; SINK-AFTER: scalar.ph:
; SINK-AFTER-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ 1, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
; SINK-AFTER-NEXT: br label [[SCALAR_BODY:%.*]]
; SINK-AFTER: scalar.body:
; SINK-AFTER-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[SCALAR_BODY]] ]
; SINK-AFTER-NEXT: [[VAR2:%.*]] = phi i64 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[VAR3:%.*]], [[SCALAR_BODY]] ]
; SINK-AFTER-NEXT: [[VAR3]] = add i64 0, 1
; SINK-AFTER-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1
; SINK-AFTER-NEXT: [[COND:%.*]] = icmp eq i64 [[I_NEXT]], 1000
; SINK-AFTER-NEXT: br i1 [[COND]], label [[FOR_END]], label [[SCALAR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
; SINK-AFTER: for.end:
; SINK-AFTER-NEXT: [[VAR2_LCSSA:%.*]] = phi i64 [ [[VAR2]], [[SCALAR_BODY]] ], [ 1, [[MIDDLE_BLOCK]] ]
; SINK-AFTER-NEXT: ret i64 [[VAR2_LCSSA]]
;
entry:
br label %scalar.body
scalar.body:
%i = phi i64 [ 0, %entry ], [ %i.next, %scalar.body ]
%var2 = phi i64 [ 0, %entry ], [ %var3, %scalar.body ]
%var3 = add i64 0, 1
%i.next = add nuw nsw i64 %i, 1
%cond = icmp eq i64 %i.next, 1000
br i1 %cond, label %for.end, label %scalar.body
for.end:
ret i64 %var2
}
; We vectorize this first order recurrence, by generating two
; extracts for the phi `val.phi` - one at the last index and
; another at the second last index. We need these 2 extracts because
; the first order recurrence phi is used outside the loop, so we require the phi
; itself and not its update (addx).
; Check the case when unrolled but not vectorized.
define i32 @extract_second_last_iteration(ptr %cval, i32 %x) {
; UNROLL-NO-IC-LABEL: @extract_second_last_iteration(
; UNROLL-NO-IC-NEXT: entry:
; UNROLL-NO-IC-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; UNROLL-NO-IC: vector.ph:
; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[X:%.*]], i64 0
; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
; UNROLL-NO-IC-NEXT: br label [[VECTOR_BODY:%.*]]
; UNROLL-NO-IC: vector.body:
; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NO-IC-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ <i32 poison, i32 poison, i32 poison, i32 0>, [[VECTOR_PH]] ], [ [[TMP0:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
; UNROLL-NO-IC-NEXT: [[TMP0]] = add <4 x i32> [[STEP_ADD]], [[BROADCAST_SPLAT]]
; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[STEP_ADD]], splat (i32 4)
; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = icmp eq i32 [[INDEX_NEXT]], 96
; UNROLL-NO-IC-NEXT: br i1 [[TMP1]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
; UNROLL-NO-IC: middle.block:
; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i32> [[TMP0]], i32 2
; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[TMP0]], i32 3
; UNROLL-NO-IC-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
; UNROLL-NO-IC: scalar.ph:
; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 96, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
; UNROLL-NO-IC-NEXT: br label [[FOR_BODY:%.*]]
; UNROLL-NO-IC: for.body:
; UNROLL-NO-IC-NEXT: [[INC_PHI:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
; UNROLL-NO-IC-NEXT: [[VAL_PHI:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[ADDX:%.*]], [[FOR_BODY]] ]
; UNROLL-NO-IC-NEXT: [[INC]] = add i32 [[INC_PHI]], 1
; UNROLL-NO-IC-NEXT: [[BC:%.*]] = zext i32 [[INC_PHI]] to i64
; UNROLL-NO-IC-NEXT: [[ADDX]] = add i32 [[INC_PHI]], [[X]]
; UNROLL-NO-IC-NEXT: [[CMP:%.*]] = icmp eq i32 [[INC_PHI]], 95
; UNROLL-NO-IC-NEXT: br i1 [[CMP]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]]
; UNROLL-NO-IC: for.end:
; UNROLL-NO-IC-NEXT: [[VAL_PHI_LCSSA:%.*]] = phi i32 [ [[VAL_PHI]], [[FOR_BODY]] ], [ [[VECTOR_RECUR_EXTRACT_FOR_PHI]], [[MIDDLE_BLOCK]] ]
; UNROLL-NO-IC-NEXT: ret i32 [[VAL_PHI_LCSSA]]
;
; UNROLL-NO-VF-LABEL: @extract_second_last_iteration(
; UNROLL-NO-VF-NEXT: entry:
; UNROLL-NO-VF-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; UNROLL-NO-VF: vector.ph:
; UNROLL-NO-VF-NEXT: br label [[VECTOR_BODY:%.*]]
; UNROLL-NO-VF: vector.body:
; UNROLL-NO-VF-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NO-VF-NEXT: [[VECTOR_RECUR:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NO-VF-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 1
; UNROLL-NO-VF-NEXT: [[TMP2:%.*]] = add i32 [[INDEX]], [[X:%.*]]
; UNROLL-NO-VF-NEXT: [[TMP3]] = add i32 [[TMP1]], [[X]]
; UNROLL-NO-VF-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
; UNROLL-NO-VF-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], 96
; UNROLL-NO-VF-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
; UNROLL-NO-VF: middle.block:
; UNROLL-NO-VF-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
; UNROLL-NO-VF: scalar.ph:
; UNROLL-NO-VF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 96, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[TMP3]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
; UNROLL-NO-VF-NEXT: br label [[FOR_BODY:%.*]]
; UNROLL-NO-VF: for.body:
; UNROLL-NO-VF-NEXT: [[INC_PHI:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
; UNROLL-NO-VF-NEXT: [[VAL_PHI:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[ADDX:%.*]], [[FOR_BODY]] ]
; UNROLL-NO-VF-NEXT: [[INC]] = add i32 [[INC_PHI]], 1
; UNROLL-NO-VF-NEXT: [[BC:%.*]] = zext i32 [[INC_PHI]] to i64
; UNROLL-NO-VF-NEXT: [[ADDX]] = add i32 [[INC_PHI]], [[X]]
; UNROLL-NO-VF-NEXT: [[CMP:%.*]] = icmp eq i32 [[INC_PHI]], 95
; UNROLL-NO-VF-NEXT: br i1 [[CMP]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]]
; UNROLL-NO-VF: for.end:
; UNROLL-NO-VF-NEXT: [[VAL_PHI_LCSSA:%.*]] = phi i32 [ [[VAL_PHI]], [[FOR_BODY]] ], [ [[TMP2]], [[MIDDLE_BLOCK]] ]
; UNROLL-NO-VF-NEXT: ret i32 [[VAL_PHI_LCSSA]]
;
; SINK-AFTER-LABEL: @extract_second_last_iteration(
; SINK-AFTER-NEXT: entry:
; SINK-AFTER-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; SINK-AFTER: vector.ph:
; SINK-AFTER-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[X:%.*]], i64 0
; SINK-AFTER-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
; SINK-AFTER-NEXT: br label [[VECTOR_BODY:%.*]]
; SINK-AFTER: vector.body:
; SINK-AFTER-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; SINK-AFTER-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; SINK-AFTER-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ <i32 poison, i32 poison, i32 poison, i32 0>, [[VECTOR_PH]] ], [ [[TMP0:%.*]], [[VECTOR_BODY]] ]
; SINK-AFTER-NEXT: [[TMP0]] = add <4 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]]
; SINK-AFTER-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
; SINK-AFTER-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
; SINK-AFTER-NEXT: [[TMP1:%.*]] = icmp eq i32 [[INDEX_NEXT]], 96
; SINK-AFTER-NEXT: br i1 [[TMP1]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
; SINK-AFTER: middle.block:
; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i32> [[TMP0]], i32 2
; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[TMP0]], i32 3
; SINK-AFTER-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
; SINK-AFTER: scalar.ph:
; SINK-AFTER-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 96, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
; SINK-AFTER-NEXT: br label [[FOR_BODY:%.*]]
; SINK-AFTER: for.body:
; SINK-AFTER-NEXT: [[INC_PHI:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
; SINK-AFTER-NEXT: [[VAL_PHI:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[ADDX:%.*]], [[FOR_BODY]] ]
; SINK-AFTER-NEXT: [[INC]] = add i32 [[INC_PHI]], 1
; SINK-AFTER-NEXT: [[BC:%.*]] = zext i32 [[INC_PHI]] to i64
; SINK-AFTER-NEXT: [[ADDX]] = add i32 [[INC_PHI]], [[X]]
; SINK-AFTER-NEXT: [[CMP:%.*]] = icmp eq i32 [[INC_PHI]], 95
; SINK-AFTER-NEXT: br i1 [[CMP]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]]
; SINK-AFTER: for.end:
; SINK-AFTER-NEXT: [[VAL_PHI_LCSSA:%.*]] = phi i32 [ [[VAL_PHI]], [[FOR_BODY]] ], [ [[VECTOR_RECUR_EXTRACT_FOR_PHI]], [[MIDDLE_BLOCK]] ]
; SINK-AFTER-NEXT: ret i32 [[VAL_PHI_LCSSA]]
;
entry:
br label %for.body
for.body:
%inc.phi = phi i32 [ 0, %entry ], [ %inc, %for.body ]
%val.phi = phi i32 [ 0, %entry ], [ %addx, %for.body ]
%inc = add i32 %inc.phi, 1
%bc = zext i32 %inc.phi to i64
%addx = add i32 %inc.phi, %x
%cmp = icmp eq i32 %inc.phi, 95
br i1 %cmp, label %for.end, label %for.body
for.end:
ret i32 %val.phi
}
; We vectorize this first order recurrence, with a set of insertelements for
; each unrolled part. Make sure these insertelements are generated in-order,
; because the shuffle of the first order recurrence will be added after the
; insertelement of the last part UF - 1, assuming the latter appears after the
; insertelements of all other parts.
;
; int PR33613(ptr b, double j, int d) {
; int a = 0;
; for(int i = 0; i < 10240; i++, b+=25) {
; double f = b[d]; // Scalarize to form insertelements
; if (j * f)
; a++;
; j = f;
; }
; return a;
; }
;
;
define i32 @PR33613(ptr %b, double %j, i32 %d) {
; UNROLL-NO-IC-LABEL: @PR33613(
; UNROLL-NO-IC-NEXT: entry:
; UNROLL-NO-IC-NEXT: [[IDXPROM:%.*]] = sext i32 [[D:%.*]] to i64
; UNROLL-NO-IC-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; UNROLL-NO-IC: vector.ph:
; UNROLL-NO-IC-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[B:%.*]], i64 2048000
; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x double> poison, double [[J:%.*]], i32 3
; UNROLL-NO-IC-NEXT: br label [[VECTOR_BODY:%.*]]
; UNROLL-NO-IC: vector.body:
; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NO-IC-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP40:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NO-IC-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP41:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x double> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[TMP31:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NO-IC-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 200
; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 200
; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 400
; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 600
; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 800
; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 1000
; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 1200
; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = add i64 [[OFFSET_IDX]], 1400
; UNROLL-NO-IC-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP0]]
; UNROLL-NO-IC-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP1]]
; UNROLL-NO-IC-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP2]]
; UNROLL-NO-IC-NEXT: [[NEXT_GEP5:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP3]]
; UNROLL-NO-IC-NEXT: [[NEXT_GEP6:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP4]]
; UNROLL-NO-IC-NEXT: [[NEXT_GEP7:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP5]]
; UNROLL-NO-IC-NEXT: [[NEXT_GEP8:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP6]]
; UNROLL-NO-IC-NEXT: [[NEXT_GEP9:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP7]]
; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = getelementptr inbounds double, ptr [[NEXT_GEP]], i64 [[IDXPROM]]
; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = getelementptr inbounds double, ptr [[NEXT_GEP3]], i64 [[IDXPROM]]
; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = getelementptr inbounds double, ptr [[NEXT_GEP4]], i64 [[IDXPROM]]
; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = getelementptr inbounds double, ptr [[NEXT_GEP5]], i64 [[IDXPROM]]
; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = getelementptr inbounds double, ptr [[NEXT_GEP6]], i64 [[IDXPROM]]
; UNROLL-NO-IC-NEXT: [[TMP13:%.*]] = getelementptr inbounds double, ptr [[NEXT_GEP7]], i64 [[IDXPROM]]
; UNROLL-NO-IC-NEXT: [[TMP14:%.*]] = getelementptr inbounds double, ptr [[NEXT_GEP8]], i64 [[IDXPROM]]
; UNROLL-NO-IC-NEXT: [[TMP15:%.*]] = getelementptr inbounds double, ptr [[NEXT_GEP9]], i64 [[IDXPROM]]
; UNROLL-NO-IC-NEXT: [[TMP16:%.*]] = load double, ptr [[TMP8]], align 8
; UNROLL-NO-IC-NEXT: [[TMP17:%.*]] = load double, ptr [[TMP9]], align 8
; UNROLL-NO-IC-NEXT: [[TMP18:%.*]] = load double, ptr [[TMP10]], align 8
; UNROLL-NO-IC-NEXT: [[TMP19:%.*]] = load double, ptr [[TMP11]], align 8
; UNROLL-NO-IC-NEXT: [[TMP20:%.*]] = insertelement <4 x double> poison, double [[TMP16]], i32 0
; UNROLL-NO-IC-NEXT: [[TMP21:%.*]] = insertelement <4 x double> [[TMP20]], double [[TMP17]], i32 1
; UNROLL-NO-IC-NEXT: [[TMP22:%.*]] = insertelement <4 x double> [[TMP21]], double [[TMP18]], i32 2
; UNROLL-NO-IC-NEXT: [[TMP23:%.*]] = insertelement <4 x double> [[TMP22]], double [[TMP19]], i32 3
; UNROLL-NO-IC-NEXT: [[TMP24:%.*]] = load double, ptr [[TMP12]], align 8
; UNROLL-NO-IC-NEXT: [[TMP25:%.*]] = load double, ptr [[TMP13]], align 8
; UNROLL-NO-IC-NEXT: [[TMP26:%.*]] = load double, ptr [[TMP14]], align 8
; UNROLL-NO-IC-NEXT: [[TMP27:%.*]] = load double, ptr [[TMP15]], align 8
; UNROLL-NO-IC-NEXT: [[TMP28:%.*]] = insertelement <4 x double> poison, double [[TMP24]], i32 0
; UNROLL-NO-IC-NEXT: [[TMP29:%.*]] = insertelement <4 x double> [[TMP28]], double [[TMP25]], i32 1
; UNROLL-NO-IC-NEXT: [[TMP30:%.*]] = insertelement <4 x double> [[TMP29]], double [[TMP26]], i32 2
; UNROLL-NO-IC-NEXT: [[TMP31]] = insertelement <4 x double> [[TMP30]], double [[TMP27]], i32 3
; UNROLL-NO-IC-NEXT: [[TMP32:%.*]] = shufflevector <4 x double> [[VECTOR_RECUR]], <4 x double> [[TMP23]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
; UNROLL-NO-IC-NEXT: [[TMP33:%.*]] = shufflevector <4 x double> [[TMP23]], <4 x double> [[TMP31]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
; UNROLL-NO-IC-NEXT: [[TMP34:%.*]] = fmul <4 x double> [[TMP32]], [[TMP23]]
; UNROLL-NO-IC-NEXT: [[TMP35:%.*]] = fmul <4 x double> [[TMP33]], [[TMP31]]
; UNROLL-NO-IC-NEXT: [[TMP36:%.*]] = fcmp une <4 x double> [[TMP34]], zeroinitializer
; UNROLL-NO-IC-NEXT: [[TMP37:%.*]] = fcmp une <4 x double> [[TMP35]], zeroinitializer
; UNROLL-NO-IC-NEXT: [[TMP38:%.*]] = zext <4 x i1> [[TMP36]] to <4 x i32>
; UNROLL-NO-IC-NEXT: [[TMP39:%.*]] = zext <4 x i1> [[TMP37]] to <4 x i32>
; UNROLL-NO-IC-NEXT: [[TMP40]] = add <4 x i32> [[VEC_PHI]], [[TMP38]]
; UNROLL-NO-IC-NEXT: [[TMP41]] = add <4 x i32> [[VEC_PHI2]], [[TMP39]]
; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
; UNROLL-NO-IC-NEXT: [[TMP42:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10240
; UNROLL-NO-IC-NEXT: br i1 [[TMP42]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
; UNROLL-NO-IC: middle.block:
; UNROLL-NO-IC-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[TMP41]], [[TMP40]]
; UNROLL-NO-IC-NEXT: [[TMP43:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX]])
; UNROLL-NO-IC-NEXT: br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]]
; UNROLL-NO-IC: scalar.ph:
; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[B]], [[ENTRY:%.*]] ]
; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ 10240, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
; UNROLL-NO-IC-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP43]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi double [ [[TMP27]], [[MIDDLE_BLOCK]] ], [ [[J]], [[ENTRY]] ]
; UNROLL-NO-IC-NEXT: br label [[FOR_BODY:%.*]]
; UNROLL-NO-IC: for.cond.cleanup:
; UNROLL-NO-IC-NEXT: [[A_1_LCSSA:%.*]] = phi i32 [ [[A_1:%.*]], [[FOR_BODY]] ], [ [[TMP43]], [[MIDDLE_BLOCK]] ]
; UNROLL-NO-IC-NEXT: ret i32 [[A_1_LCSSA]]
; UNROLL-NO-IC: for.body:
; UNROLL-NO-IC-NEXT: [[B_ADDR_012:%.*]] = phi ptr [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[ADD_PTR:%.*]], [[FOR_BODY]] ]
; UNROLL-NO-IC-NEXT: [[I_011:%.*]] = phi i32 [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[INC1:%.*]], [[FOR_BODY]] ]
; UNROLL-NO-IC-NEXT: [[A_010:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[A_1]], [[FOR_BODY]] ]
; UNROLL-NO-IC-NEXT: [[J_ADDR_09:%.*]] = phi double [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP44:%.*]], [[FOR_BODY]] ]
; UNROLL-NO-IC-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[B_ADDR_012]], i64 [[IDXPROM]]
; UNROLL-NO-IC-NEXT: [[TMP44]] = load double, ptr [[ARRAYIDX]], align 8
; UNROLL-NO-IC-NEXT: [[MUL:%.*]] = fmul double [[J_ADDR_09]], [[TMP44]]
; UNROLL-NO-IC-NEXT: [[TOBOOL:%.*]] = fcmp une double [[MUL]], 0.000000e+00
; UNROLL-NO-IC-NEXT: [[INC:%.*]] = zext i1 [[TOBOOL]] to i32
; UNROLL-NO-IC-NEXT: [[A_1]] = add nsw i32 [[A_010]], [[INC]]
; UNROLL-NO-IC-NEXT: [[INC1]] = add nuw nsw i32 [[I_011]], 1
; UNROLL-NO-IC-NEXT: [[ADD_PTR]] = getelementptr inbounds double, ptr [[B_ADDR_012]], i64 25
; UNROLL-NO-IC-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC1]], 10240
; UNROLL-NO-IC-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]]
;
; UNROLL-NO-VF-LABEL: @PR33613(
; UNROLL-NO-VF-NEXT: entry:
; UNROLL-NO-VF-NEXT: [[IDXPROM:%.*]] = sext i32 [[D:%.*]] to i64
; UNROLL-NO-VF-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; UNROLL-NO-VF: vector.ph:
; UNROLL-NO-VF-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[B:%.*]], i64 2048000
; UNROLL-NO-VF-NEXT: br label [[VECTOR_BODY:%.*]]
; UNROLL-NO-VF: vector.body:
; UNROLL-NO-VF-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NO-VF-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP12:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NO-VF-NEXT: [[VEC_PHI2:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP13:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NO-VF-NEXT: [[VECTOR_RECUR:%.*]] = phi double [ [[J:%.*]], [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NO-VF-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 200
; UNROLL-NO-VF-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 200
; UNROLL-NO-VF-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[B]], i64 [[OFFSET_IDX]]
; UNROLL-NO-VF-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP1]]
; UNROLL-NO-VF-NEXT: [[TMP2:%.*]] = getelementptr inbounds double, ptr [[NEXT_GEP]], i64 [[IDXPROM]]
; UNROLL-NO-VF-NEXT: [[TMP3:%.*]] = getelementptr inbounds double, ptr [[NEXT_GEP3]], i64 [[IDXPROM]]
; UNROLL-NO-VF-NEXT: [[TMP4:%.*]] = load double, ptr [[TMP2]], align 8
; UNROLL-NO-VF-NEXT: [[TMP5]] = load double, ptr [[TMP3]], align 8
; UNROLL-NO-VF-NEXT: [[TMP6:%.*]] = fmul double [[VECTOR_RECUR]], [[TMP4]]
; UNROLL-NO-VF-NEXT: [[TMP7:%.*]] = fmul double [[TMP4]], [[TMP5]]
; UNROLL-NO-VF-NEXT: [[TMP8:%.*]] = fcmp une double [[TMP6]], 0.000000e+00
; UNROLL-NO-VF-NEXT: [[TMP9:%.*]] = fcmp une double [[TMP7]], 0.000000e+00
; UNROLL-NO-VF-NEXT: [[TMP10:%.*]] = zext i1 [[TMP8]] to i32
; UNROLL-NO-VF-NEXT: [[TMP11:%.*]] = zext i1 [[TMP9]] to i32
; UNROLL-NO-VF-NEXT: [[TMP12]] = add i32 [[VEC_PHI]], [[TMP10]]
; UNROLL-NO-VF-NEXT: [[TMP13]] = add i32 [[VEC_PHI2]], [[TMP11]]
; UNROLL-NO-VF-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
; UNROLL-NO-VF-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10240
; UNROLL-NO-VF-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
; UNROLL-NO-VF: middle.block:
; UNROLL-NO-VF-NEXT: [[BIN_RDX:%.*]] = add i32 [[TMP13]], [[TMP12]]
; UNROLL-NO-VF-NEXT: br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]]
; UNROLL-NO-VF: scalar.ph:
; UNROLL-NO-VF-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[B]], [[ENTRY:%.*]] ]
; UNROLL-NO-VF-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ 10240, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
; UNROLL-NO-VF-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[BIN_RDX]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi double [ [[TMP5]], [[MIDDLE_BLOCK]] ], [ [[J]], [[ENTRY]] ]
; UNROLL-NO-VF-NEXT: br label [[FOR_BODY:%.*]]
; UNROLL-NO-VF: for.cond.cleanup:
; UNROLL-NO-VF-NEXT: [[A_1_LCSSA:%.*]] = phi i32 [ [[A_1:%.*]], [[FOR_BODY]] ], [ [[BIN_RDX]], [[MIDDLE_BLOCK]] ]
; UNROLL-NO-VF-NEXT: ret i32 [[A_1_LCSSA]]
; UNROLL-NO-VF: for.body:
; UNROLL-NO-VF-NEXT: [[B_ADDR_012:%.*]] = phi ptr [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[ADD_PTR:%.*]], [[FOR_BODY]] ]
; UNROLL-NO-VF-NEXT: [[I_011:%.*]] = phi i32 [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[INC1:%.*]], [[FOR_BODY]] ]
; UNROLL-NO-VF-NEXT: [[A_010:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[A_1]], [[FOR_BODY]] ]
; UNROLL-NO-VF-NEXT: [[J_ADDR_09:%.*]] = phi double [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP15:%.*]], [[FOR_BODY]] ]
; UNROLL-NO-VF-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[B_ADDR_012]], i64 [[IDXPROM]]
; UNROLL-NO-VF-NEXT: [[TMP15]] = load double, ptr [[ARRAYIDX]], align 8
; UNROLL-NO-VF-NEXT: [[MUL:%.*]] = fmul double [[J_ADDR_09]], [[TMP15]]
; UNROLL-NO-VF-NEXT: [[TOBOOL:%.*]] = fcmp une double [[MUL]], 0.000000e+00
; UNROLL-NO-VF-NEXT: [[INC:%.*]] = zext i1 [[TOBOOL]] to i32
; UNROLL-NO-VF-NEXT: [[A_1]] = add nsw i32 [[A_010]], [[INC]]
; UNROLL-NO-VF-NEXT: [[INC1]] = add nuw nsw i32 [[I_011]], 1
; UNROLL-NO-VF-NEXT: [[ADD_PTR]] = getelementptr inbounds double, ptr [[B_ADDR_012]], i64 25
; UNROLL-NO-VF-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC1]], 10240
; UNROLL-NO-VF-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]]
;
; SINK-AFTER-LABEL: @PR33613(
; SINK-AFTER-NEXT: entry:
; SINK-AFTER-NEXT: [[IDXPROM:%.*]] = sext i32 [[D:%.*]] to i64
; SINK-AFTER-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; SINK-AFTER: vector.ph:
; SINK-AFTER-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[B:%.*]], i64 2048000
; SINK-AFTER-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x double> poison, double [[J:%.*]], i32 3
; SINK-AFTER-NEXT: br label [[VECTOR_BODY:%.*]]
; SINK-AFTER: vector.body:
; SINK-AFTER-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; SINK-AFTER-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP20:%.*]], [[VECTOR_BODY]] ]
; SINK-AFTER-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x double> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[TMP15:%.*]], [[VECTOR_BODY]] ]
; SINK-AFTER-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 200
; SINK-AFTER-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
; SINK-AFTER-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 200
; SINK-AFTER-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 400
; SINK-AFTER-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 600
; SINK-AFTER-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP0]]
; SINK-AFTER-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP1]]
; SINK-AFTER-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP2]]
; SINK-AFTER-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP3]]
; SINK-AFTER-NEXT: [[TMP4:%.*]] = getelementptr inbounds double, ptr [[NEXT_GEP]], i64 [[IDXPROM]]
; SINK-AFTER-NEXT: [[TMP5:%.*]] = getelementptr inbounds double, ptr [[NEXT_GEP2]], i64 [[IDXPROM]]
; SINK-AFTER-NEXT: [[TMP6:%.*]] = getelementptr inbounds double, ptr [[NEXT_GEP3]], i64 [[IDXPROM]]
; SINK-AFTER-NEXT: [[TMP7:%.*]] = getelementptr inbounds double, ptr [[NEXT_GEP4]], i64 [[IDXPROM]]
; SINK-AFTER-NEXT: [[TMP8:%.*]] = load double, ptr [[TMP4]], align 8
; SINK-AFTER-NEXT: [[TMP9:%.*]] = load double, ptr [[TMP5]], align 8
; SINK-AFTER-NEXT: [[TMP10:%.*]] = load double, ptr [[TMP6]], align 8
; SINK-AFTER-NEXT: [[TMP11:%.*]] = load double, ptr [[TMP7]], align 8
; SINK-AFTER-NEXT: [[TMP12:%.*]] = insertelement <4 x double> poison, double [[TMP8]], i32 0
; SINK-AFTER-NEXT: [[TMP13:%.*]] = insertelement <4 x double> [[TMP12]], double [[TMP9]], i32 1
; SINK-AFTER-NEXT: [[TMP14:%.*]] = insertelement <4 x double> [[TMP13]], double [[TMP10]], i32 2
; SINK-AFTER-NEXT: [[TMP15]] = insertelement <4 x double> [[TMP14]], double [[TMP11]], i32 3
; SINK-AFTER-NEXT: [[TMP16:%.*]] = shufflevector <4 x double> [[VECTOR_RECUR]], <4 x double> [[TMP15]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
; SINK-AFTER-NEXT: [[TMP17:%.*]] = fmul <4 x double> [[TMP16]], [[TMP15]]
; SINK-AFTER-NEXT: [[TMP18:%.*]] = fcmp une <4 x double> [[TMP17]], zeroinitializer
; SINK-AFTER-NEXT: [[TMP19:%.*]] = zext <4 x i1> [[TMP18]] to <4 x i32>
; SINK-AFTER-NEXT: [[TMP20]] = add <4 x i32> [[VEC_PHI]], [[TMP19]]
; SINK-AFTER-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; SINK-AFTER-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10240
; SINK-AFTER-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
; SINK-AFTER: middle.block:
; SINK-AFTER-NEXT: [[TMP22:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP20]])
; SINK-AFTER-NEXT: br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]]
; SINK-AFTER: scalar.ph:
; SINK-AFTER-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[B]], [[ENTRY:%.*]] ]
; SINK-AFTER-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ 10240, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
; SINK-AFTER-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP22]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi double [ [[TMP11]], [[MIDDLE_BLOCK]] ], [ [[J]], [[ENTRY]] ]
; SINK-AFTER-NEXT: br label [[FOR_BODY:%.*]]
; SINK-AFTER: for.cond.cleanup:
; SINK-AFTER-NEXT: [[A_1_LCSSA:%.*]] = phi i32 [ [[A_1:%.*]], [[FOR_BODY]] ], [ [[TMP22]], [[MIDDLE_BLOCK]] ]
; SINK-AFTER-NEXT: ret i32 [[A_1_LCSSA]]
; SINK-AFTER: for.body:
; SINK-AFTER-NEXT: [[B_ADDR_012:%.*]] = phi ptr [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[ADD_PTR:%.*]], [[FOR_BODY]] ]
; SINK-AFTER-NEXT: [[I_011:%.*]] = phi i32 [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[INC1:%.*]], [[FOR_BODY]] ]
; SINK-AFTER-NEXT: [[A_010:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[A_1]], [[FOR_BODY]] ]
; SINK-AFTER-NEXT: [[J_ADDR_09:%.*]] = phi double [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP23:%.*]], [[FOR_BODY]] ]
; SINK-AFTER-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[B_ADDR_012]], i64 [[IDXPROM]]
; SINK-AFTER-NEXT: [[TMP23]] = load double, ptr [[ARRAYIDX]], align 8
; SINK-AFTER-NEXT: [[MUL:%.*]] = fmul double [[J_ADDR_09]], [[TMP23]]
; SINK-AFTER-NEXT: [[TOBOOL:%.*]] = fcmp une double [[MUL]], 0.000000e+00
; SINK-AFTER-NEXT: [[INC:%.*]] = zext i1 [[TOBOOL]] to i32
; SINK-AFTER-NEXT: [[A_1]] = add nsw i32 [[A_010]], [[INC]]
; SINK-AFTER-NEXT: [[INC1]] = add nuw nsw i32 [[I_011]], 1
; SINK-AFTER-NEXT: [[ADD_PTR]] = getelementptr inbounds double, ptr [[B_ADDR_012]], i64 25
; SINK-AFTER-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC1]], 10240
; SINK-AFTER-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]]
;
entry:
%idxprom = sext i32 %d to i64
br label %for.body
for.cond.cleanup:
%a.1.lcssa = phi i32 [ %a.1, %for.body ]
ret i32 %a.1.lcssa
for.body:
%b.addr.012 = phi ptr [ %b, %entry ], [ %add.ptr, %for.body ]
%i.011 = phi i32 [ 0, %entry ], [ %inc1, %for.body ]
%a.010 = phi i32 [ 0, %entry ], [ %a.1, %for.body ]
%j.addr.09 = phi double [ %j, %entry ], [ %0, %for.body ]
%arrayidx = getelementptr inbounds double, ptr %b.addr.012, i64 %idxprom
%0 = load double, ptr %arrayidx, align 8
%mul = fmul double %j.addr.09, %0
%tobool = fcmp une double %mul, 0.000000e+00
%inc = zext i1 %tobool to i32
%a.1 = add nsw i32 %a.010, %inc
%inc1 = add nuw nsw i32 %i.011, 1
%add.ptr = getelementptr inbounds double, ptr %b.addr.012, i64 25
%exitcond = icmp eq i32 %inc1, 10240
br i1 %exitcond, label %for.cond.cleanup, label %for.body
}
; void sink_after(short *a, int n, int *b) {
; for(int i = 0; i < n; i++)
; b[i] = (aptr a[i + 1]);
; }
;
; Check that the sext sank after the load in the vector loop.
;
define void @sink_after(ptr noalias %a, ptr noalias %b, i64 %n) {
; UNROLL-NO-IC-LABEL: @sink_after(
; UNROLL-NO-IC-NEXT: entry:
; UNROLL-NO-IC-NEXT: [[DOTPRE:%.*]] = load i16, ptr [[A:%.*]], align 2
; UNROLL-NO-IC-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 8
; UNROLL-NO-IC-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; UNROLL-NO-IC: vector.ph:
; UNROLL-NO-IC-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 8
; UNROLL-NO-IC-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i16> poison, i16 [[DOTPRE]], i32 3
; UNROLL-NO-IC-NEXT: br label [[VECTOR_BODY:%.*]]
; UNROLL-NO-IC: vector.body:
; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[WIDE_LOAD1:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = add nuw nsw i64 [[INDEX]], 1
; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = getelementptr inbounds i16, ptr [[A]], i64 [[TMP1]]
; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[TMP2]], i32 0
; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = getelementptr inbounds i16, ptr [[TMP2]], i32 4
; UNROLL-NO-IC-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i16>, ptr [[TMP3]], align 2
; UNROLL-NO-IC-NEXT: [[WIDE_LOAD1]] = load <4 x i16>, ptr [[TMP4]], align 2
; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[WIDE_LOAD]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = shufflevector <4 x i16> [[WIDE_LOAD]], <4 x i16> [[WIDE_LOAD1]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = sext <4 x i16> [[TMP5]] to <4 x i32>
; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = sext <4 x i16> [[TMP6]] to <4 x i32>
; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = sext <4 x i16> [[WIDE_LOAD]] to <4 x i32>
; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = sext <4 x i16> [[WIDE_LOAD1]] to <4 x i32>
; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = mul nsw <4 x i32> [[TMP9]], [[TMP7]]
; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = mul nsw <4 x i32> [[TMP10]], [[TMP8]]
; UNROLL-NO-IC-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDEX]]
; UNROLL-NO-IC-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[TMP13]], i32 0
; UNROLL-NO-IC-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[TMP13]], i32 4
; UNROLL-NO-IC-NEXT: store <4 x i32> [[TMP11]], ptr [[TMP14]], align 4
; UNROLL-NO-IC-NEXT: store <4 x i32> [[TMP12]], ptr [[TMP15]], align 4
; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
; UNROLL-NO-IC-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; UNROLL-NO-IC-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
; UNROLL-NO-IC: middle.block:
; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD1]], i32 3
; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; UNROLL-NO-IC-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
; UNROLL-NO-IC: scalar.ph:
; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ [[DOTPRE]], [[ENTRY:%.*]] ]
; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
; UNROLL-NO-IC-NEXT: br label [[FOR_BODY:%.*]]
; UNROLL-NO-IC: for.body:
; UNROLL-NO-IC-NEXT: [[TMP17:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP18:%.*]], [[FOR_BODY]] ]
; UNROLL-NO-IC-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
; UNROLL-NO-IC-NEXT: [[CONV:%.*]] = sext i16 [[TMP17]] to i32
; UNROLL-NO-IC-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
; UNROLL-NO-IC-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, ptr [[A]], i64 [[INDVARS_IV_NEXT]]
; UNROLL-NO-IC-NEXT: [[TMP18]] = load i16, ptr [[ARRAYIDX2]], align 2
; UNROLL-NO-IC-NEXT: [[CONV3:%.*]] = sext i16 [[TMP18]] to i32
; UNROLL-NO-IC-NEXT: [[MUL:%.*]] = mul nsw i32 [[CONV3]], [[CONV]]
; UNROLL-NO-IC-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]]
; UNROLL-NO-IC-NEXT: store i32 [[MUL]], ptr [[ARRAYIDX5]], align 4
; UNROLL-NO-IC-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
; UNROLL-NO-IC-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]]
; UNROLL-NO-IC: for.end:
; UNROLL-NO-IC-NEXT: ret void
;
; UNROLL-NO-VF-LABEL: @sink_after(
; UNROLL-NO-VF-NEXT: entry:
; UNROLL-NO-VF-NEXT: [[DOTPRE:%.*]] = load i16, ptr [[A:%.*]], align 2
; UNROLL-NO-VF-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 2
; UNROLL-NO-VF-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; UNROLL-NO-VF: vector.ph:
; UNROLL-NO-VF-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 2
; UNROLL-NO-VF-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; UNROLL-NO-VF-NEXT: br label [[VECTOR_BODY:%.*]]
; UNROLL-NO-VF: vector.body:
; UNROLL-NO-VF-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NO-VF-NEXT: [[VECTOR_RECUR:%.*]] = phi i16 [ [[DOTPRE]], [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NO-VF-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
; UNROLL-NO-VF-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[INDEX]], 1
; UNROLL-NO-VF-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP1]], 1
; UNROLL-NO-VF-NEXT: [[TMP4:%.*]] = getelementptr inbounds i16, ptr [[A]], i64 [[TMP2]]
; UNROLL-NO-VF-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, ptr [[A]], i64 [[TMP3]]
; UNROLL-NO-VF-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP4]], align 2
; UNROLL-NO-VF-NEXT: [[TMP7]] = load i16, ptr [[TMP5]], align 2
; UNROLL-NO-VF-NEXT: [[TMP8:%.*]] = sext i16 [[VECTOR_RECUR]] to i32
; UNROLL-NO-VF-NEXT: [[TMP9:%.*]] = sext i16 [[TMP6]] to i32
; UNROLL-NO-VF-NEXT: [[TMP10:%.*]] = sext i16 [[TMP6]] to i32
; UNROLL-NO-VF-NEXT: [[TMP11:%.*]] = sext i16 [[TMP7]] to i32
; UNROLL-NO-VF-NEXT: [[TMP12:%.*]] = mul nsw i32 [[TMP10]], [[TMP8]]
; UNROLL-NO-VF-NEXT: [[TMP13:%.*]] = mul nsw i32 [[TMP11]], [[TMP9]]
; UNROLL-NO-VF-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDEX]]
; UNROLL-NO-VF-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP1]]
; UNROLL-NO-VF-NEXT: store i32 [[TMP12]], ptr [[TMP14]], align 4
; UNROLL-NO-VF-NEXT: store i32 [[TMP13]], ptr [[TMP15]], align 4
; UNROLL-NO-VF-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
; UNROLL-NO-VF-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; UNROLL-NO-VF-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
; UNROLL-NO-VF: middle.block:
; UNROLL-NO-VF-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; UNROLL-NO-VF-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
; UNROLL-NO-VF: scalar.ph:
; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[TMP7]], [[MIDDLE_BLOCK]] ], [ [[DOTPRE]], [[ENTRY:%.*]] ]
; UNROLL-NO-VF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
; UNROLL-NO-VF-NEXT: br label [[FOR_BODY:%.*]]
; UNROLL-NO-VF: for.body:
; UNROLL-NO-VF-NEXT: [[TMP17:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP18:%.*]], [[FOR_BODY]] ]
; UNROLL-NO-VF-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
; UNROLL-NO-VF-NEXT: [[CONV:%.*]] = sext i16 [[TMP17]] to i32
; UNROLL-NO-VF-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
; UNROLL-NO-VF-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, ptr [[A]], i64 [[INDVARS_IV_NEXT]]
; UNROLL-NO-VF-NEXT: [[TMP18]] = load i16, ptr [[ARRAYIDX2]], align 2
; UNROLL-NO-VF-NEXT: [[CONV3:%.*]] = sext i16 [[TMP18]] to i32
; UNROLL-NO-VF-NEXT: [[MUL:%.*]] = mul nsw i32 [[CONV3]], [[CONV]]
; UNROLL-NO-VF-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]]
; UNROLL-NO-VF-NEXT: store i32 [[MUL]], ptr [[ARRAYIDX5]], align 4
; UNROLL-NO-VF-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
; UNROLL-NO-VF-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]]
; UNROLL-NO-VF: for.end:
; UNROLL-NO-VF-NEXT: ret void
;
; SINK-AFTER-LABEL: @sink_after(
; SINK-AFTER-NEXT: entry:
; SINK-AFTER-NEXT: [[DOTPRE:%.*]] = load i16, ptr [[A:%.*]], align 2
; SINK-AFTER-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 4
; SINK-AFTER-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; SINK-AFTER: vector.ph:
; SINK-AFTER-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
; SINK-AFTER-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; SINK-AFTER-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i16> poison, i16 [[DOTPRE]], i32 3
; SINK-AFTER-NEXT: br label [[VECTOR_BODY:%.*]]
; SINK-AFTER: vector.body:
; SINK-AFTER-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; SINK-AFTER-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[WIDE_LOAD:%.*]], [[VECTOR_BODY]] ]
; SINK-AFTER-NEXT: [[TMP1:%.*]] = add nuw nsw i64 [[INDEX]], 1
; SINK-AFTER-NEXT: [[TMP2:%.*]] = getelementptr inbounds i16, ptr [[A]], i64 [[TMP1]]
; SINK-AFTER-NEXT: [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[TMP2]], i32 0
; SINK-AFTER-NEXT: [[WIDE_LOAD]] = load <4 x i16>, ptr [[TMP3]], align 2
; SINK-AFTER-NEXT: [[TMP4:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[WIDE_LOAD]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
; SINK-AFTER-NEXT: [[TMP5:%.*]] = sext <4 x i16> [[TMP4]] to <4 x i32>
; SINK-AFTER-NEXT: [[TMP6:%.*]] = sext <4 x i16> [[WIDE_LOAD]] to <4 x i32>
; SINK-AFTER-NEXT: [[TMP7:%.*]] = mul nsw <4 x i32> [[TMP6]], [[TMP5]]
; SINK-AFTER-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDEX]]
; SINK-AFTER-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i32 0
; SINK-AFTER-NEXT: store <4 x i32> [[TMP7]], ptr [[TMP9]], align 4
; SINK-AFTER-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; SINK-AFTER-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; SINK-AFTER-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
; SINK-AFTER: middle.block:
; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 3
; SINK-AFTER-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; SINK-AFTER-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
; SINK-AFTER: scalar.ph:
; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ [[DOTPRE]], [[ENTRY:%.*]] ]
; SINK-AFTER-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
; SINK-AFTER-NEXT: br label [[FOR_BODY:%.*]]
; SINK-AFTER: for.body:
; SINK-AFTER-NEXT: [[TMP11:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP12:%.*]], [[FOR_BODY]] ]
; SINK-AFTER-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
; SINK-AFTER-NEXT: [[CONV:%.*]] = sext i16 [[TMP11]] to i32
; SINK-AFTER-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
; SINK-AFTER-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, ptr [[A]], i64 [[INDVARS_IV_NEXT]]
; SINK-AFTER-NEXT: [[TMP12]] = load i16, ptr [[ARRAYIDX2]], align 2
; SINK-AFTER-NEXT: [[CONV3:%.*]] = sext i16 [[TMP12]] to i32
; SINK-AFTER-NEXT: [[MUL:%.*]] = mul nsw i32 [[CONV3]], [[CONV]]
; SINK-AFTER-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]]
; SINK-AFTER-NEXT: store i32 [[MUL]], ptr [[ARRAYIDX5]], align 4
; SINK-AFTER-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
; SINK-AFTER-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]]
; SINK-AFTER: for.end:
; SINK-AFTER-NEXT: ret void
;
entry:
%.pre = load i16, ptr %a
br label %for.body
for.body:
%0 = phi i16 [ %.pre, %entry ], [ %1, %for.body ]
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
%conv = sext i16 %0 to i32
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%arrayidx2 = getelementptr inbounds i16, ptr %a, i64 %indvars.iv.next
%1 = load i16, ptr %arrayidx2
%conv3 = sext i16 %1 to i32
%mul = mul nsw i32 %conv3, %conv
%arrayidx5 = getelementptr inbounds i32, ptr %b, i64 %indvars.iv
store i32 %mul, ptr %arrayidx5
%exitcond = icmp eq i64 %indvars.iv.next, %n
br i1 %exitcond, label %for.end, label %for.body
for.end:
ret void
}
; PR34711: given three consecutive instructions such that the first will be
; widened, the second is a cast that will be widened and needs to sink after the
; third, and the third is a first-order-recurring load that will be replicated
; instead of widened. Although the cast and the first instruction will both be
; widened, and are originally adjacent to each other, make sure the replicated
; load ends up appearing between them.
;
; void PR34711(short[2] *a, int *b, int *c, int n) {
; for(int i = 0; i < n; i++) {
; c[i] = 7;
; b[i] = (a[i][0] * a[i][1]);
; }
; }
;
; Check that the sext sank after the load in the vector loop.
;
define void @PR34711(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %n) {
; UNROLL-NO-IC-LABEL: @PR34711(
; UNROLL-NO-IC-NEXT: entry:
; UNROLL-NO-IC-NEXT: [[DOTPRE:%.*]] = load i16, ptr [[A:%.*]], align 2
; UNROLL-NO-IC-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 8
; UNROLL-NO-IC-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; UNROLL-NO-IC: vector.ph:
; UNROLL-NO-IC-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 8
; UNROLL-NO-IC-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i16> poison, i16 [[DOTPRE]], i32 3
; UNROLL-NO-IC-NEXT: br label [[VECTOR_BODY:%.*]]
; UNROLL-NO-IC: vector.body:
; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[TMP34:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2
; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3
; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 4
; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 5
; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 6
; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 7
; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[C:%.*]], i64 [[TMP0]]
; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i16], ptr [[A]], i64 [[TMP0]], i64 1
; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i16], ptr [[A]], i64 [[TMP1]], i64 1
; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x i16], ptr [[A]], i64 [[TMP2]], i64 1
; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i16], ptr [[A]], i64 [[TMP3]], i64 1
; UNROLL-NO-IC-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x i16], ptr [[A]], i64 [[TMP4]], i64 1
; UNROLL-NO-IC-NEXT: [[TMP14:%.*]] = getelementptr inbounds [2 x i16], ptr [[A]], i64 [[TMP5]], i64 1
; UNROLL-NO-IC-NEXT: [[TMP15:%.*]] = getelementptr inbounds [2 x i16], ptr [[A]], i64 [[TMP6]], i64 1
; UNROLL-NO-IC-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2 x i16], ptr [[A]], i64 [[TMP7]], i64 1
; UNROLL-NO-IC-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i32 0
; UNROLL-NO-IC-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i32 4
; UNROLL-NO-IC-NEXT: store <4 x i32> splat (i32 7), ptr [[TMP17]], align 4
; UNROLL-NO-IC-NEXT: store <4 x i32> splat (i32 7), ptr [[TMP18]], align 4
; UNROLL-NO-IC-NEXT: [[TMP19:%.*]] = load i16, ptr [[TMP9]], align 2
; UNROLL-NO-IC-NEXT: [[TMP20:%.*]] = load i16, ptr [[TMP10]], align 2
; UNROLL-NO-IC-NEXT: [[TMP21:%.*]] = load i16, ptr [[TMP11]], align 2
; UNROLL-NO-IC-NEXT: [[TMP22:%.*]] = load i16, ptr [[TMP12]], align 2
; UNROLL-NO-IC-NEXT: [[TMP23:%.*]] = insertelement <4 x i16> poison, i16 [[TMP19]], i32 0
; UNROLL-NO-IC-NEXT: [[TMP24:%.*]] = insertelement <4 x i16> [[TMP23]], i16 [[TMP20]], i32 1
; UNROLL-NO-IC-NEXT: [[TMP25:%.*]] = insertelement <4 x i16> [[TMP24]], i16 [[TMP21]], i32 2
; UNROLL-NO-IC-NEXT: [[TMP26:%.*]] = insertelement <4 x i16> [[TMP25]], i16 [[TMP22]], i32 3
; UNROLL-NO-IC-NEXT: [[TMP27:%.*]] = load i16, ptr [[TMP13]], align 2
; UNROLL-NO-IC-NEXT: [[TMP28:%.*]] = load i16, ptr [[TMP14]], align 2
; UNROLL-NO-IC-NEXT: [[TMP29:%.*]] = load i16, ptr [[TMP15]], align 2
; UNROLL-NO-IC-NEXT: [[TMP30:%.*]] = load i16, ptr [[TMP16]], align 2
; UNROLL-NO-IC-NEXT: [[TMP31:%.*]] = insertelement <4 x i16> poison, i16 [[TMP27]], i32 0
; UNROLL-NO-IC-NEXT: [[TMP32:%.*]] = insertelement <4 x i16> [[TMP31]], i16 [[TMP28]], i32 1
; UNROLL-NO-IC-NEXT: [[TMP33:%.*]] = insertelement <4 x i16> [[TMP32]], i16 [[TMP29]], i32 2
; UNROLL-NO-IC-NEXT: [[TMP34]] = insertelement <4 x i16> [[TMP33]], i16 [[TMP30]], i32 3
; UNROLL-NO-IC-NEXT: [[TMP35:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[TMP26]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
; UNROLL-NO-IC-NEXT: [[TMP36:%.*]] = shufflevector <4 x i16> [[TMP26]], <4 x i16> [[TMP34]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
; UNROLL-NO-IC-NEXT: [[TMP37:%.*]] = sext <4 x i16> [[TMP35]] to <4 x i32>
; UNROLL-NO-IC-NEXT: [[TMP38:%.*]] = sext <4 x i16> [[TMP36]] to <4 x i32>
; UNROLL-NO-IC-NEXT: [[TMP39:%.*]] = sext <4 x i16> [[TMP26]] to <4 x i32>
; UNROLL-NO-IC-NEXT: [[TMP40:%.*]] = sext <4 x i16> [[TMP34]] to <4 x i32>
; UNROLL-NO-IC-NEXT: [[TMP41:%.*]] = mul nsw <4 x i32> [[TMP39]], [[TMP37]]
; UNROLL-NO-IC-NEXT: [[TMP42:%.*]] = mul nsw <4 x i32> [[TMP40]], [[TMP38]]
; UNROLL-NO-IC-NEXT: [[TMP43:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[TMP0]]
; UNROLL-NO-IC-NEXT: [[TMP44:%.*]] = getelementptr inbounds i32, ptr [[TMP43]], i32 0
; UNROLL-NO-IC-NEXT: [[TMP45:%.*]] = getelementptr inbounds i32, ptr [[TMP43]], i32 4
; UNROLL-NO-IC-NEXT: store <4 x i32> [[TMP41]], ptr [[TMP44]], align 4
; UNROLL-NO-IC-NEXT: store <4 x i32> [[TMP42]], ptr [[TMP45]], align 4
; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
; UNROLL-NO-IC-NEXT: [[TMP46:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; UNROLL-NO-IC-NEXT: br i1 [[TMP46]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]]
; UNROLL-NO-IC: middle.block:
; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; UNROLL-NO-IC-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
; UNROLL-NO-IC: scalar.ph:
; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[TMP30]], [[MIDDLE_BLOCK]] ], [ [[DOTPRE]], [[ENTRY:%.*]] ]
; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
; UNROLL-NO-IC-NEXT: br label [[FOR_BODY:%.*]]
; UNROLL-NO-IC: for.body:
; UNROLL-NO-IC-NEXT: [[TMP47:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP48:%.*]], [[FOR_BODY]] ]
; UNROLL-NO-IC-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
; UNROLL-NO-IC-NEXT: [[ARRAYCIDX:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[INDVARS_IV]]
; UNROLL-NO-IC-NEXT: [[CUR_INDEX:%.*]] = getelementptr inbounds [2 x i16], ptr [[A]], i64 [[INDVARS_IV]], i64 1
; UNROLL-NO-IC-NEXT: store i32 7, ptr [[ARRAYCIDX]], align 4
; UNROLL-NO-IC-NEXT: [[CONV:%.*]] = sext i16 [[TMP47]] to i32
; UNROLL-NO-IC-NEXT: [[TMP48]] = load i16, ptr [[CUR_INDEX]], align 2
; UNROLL-NO-IC-NEXT: [[CONV3:%.*]] = sext i16 [[TMP48]] to i32
; UNROLL-NO-IC-NEXT: [[MUL:%.*]] = mul nsw i32 [[CONV3]], [[CONV]]
; UNROLL-NO-IC-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]]
; UNROLL-NO-IC-NEXT: store i32 [[MUL]], ptr [[ARRAYIDX5]], align 4
; UNROLL-NO-IC-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
; UNROLL-NO-IC-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
; UNROLL-NO-IC-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]]
; UNROLL-NO-IC: for.end:
; UNROLL-NO-IC-NEXT: ret void
;
; UNROLL-NO-VF-LABEL: @PR34711(
; UNROLL-NO-VF-NEXT: entry:
; UNROLL-NO-VF-NEXT: [[DOTPRE:%.*]] = load i16, ptr [[A:%.*]], align 2
; UNROLL-NO-VF-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 2
; UNROLL-NO-VF-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; UNROLL-NO-VF: vector.ph:
; UNROLL-NO-VF-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 2
; UNROLL-NO-VF-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; UNROLL-NO-VF-NEXT: br label [[VECTOR_BODY:%.*]]
; UNROLL-NO-VF: vector.body:
; UNROLL-NO-VF-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NO-VF-NEXT: [[VECTOR_RECUR:%.*]] = phi i16 [ [[DOTPRE]], [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NO-VF-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
; UNROLL-NO-VF-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[C:%.*]], i64 [[INDEX]]
; UNROLL-NO-VF-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[TMP1]]
; UNROLL-NO-VF-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x i16], ptr [[A]], i64 [[INDEX]], i64 1
; UNROLL-NO-VF-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x i16], ptr [[A]], i64 [[TMP1]], i64 1
; UNROLL-NO-VF-NEXT: store i32 7, ptr [[TMP2]], align 4
; UNROLL-NO-VF-NEXT: store i32 7, ptr [[TMP3]], align 4
; UNROLL-NO-VF-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP4]], align 2
; UNROLL-NO-VF-NEXT: [[TMP7]] = load i16, ptr [[TMP5]], align 2
; UNROLL-NO-VF-NEXT: [[TMP8:%.*]] = sext i16 [[VECTOR_RECUR]] to i32
; UNROLL-NO-VF-NEXT: [[TMP9:%.*]] = sext i16 [[TMP6]] to i32
; UNROLL-NO-VF-NEXT: [[TMP10:%.*]] = sext i16 [[TMP6]] to i32
; UNROLL-NO-VF-NEXT: [[TMP11:%.*]] = sext i16 [[TMP7]] to i32
; UNROLL-NO-VF-NEXT: [[TMP12:%.*]] = mul nsw i32 [[TMP10]], [[TMP8]]
; UNROLL-NO-VF-NEXT: [[TMP13:%.*]] = mul nsw i32 [[TMP11]], [[TMP9]]
; UNROLL-NO-VF-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDEX]]
; UNROLL-NO-VF-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP1]]
; UNROLL-NO-VF-NEXT: store i32 [[TMP12]], ptr [[TMP14]], align 4
; UNROLL-NO-VF-NEXT: store i32 [[TMP13]], ptr [[TMP15]], align 4
; UNROLL-NO-VF-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
; UNROLL-NO-VF-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; UNROLL-NO-VF-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]]
; UNROLL-NO-VF: middle.block:
; UNROLL-NO-VF-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; UNROLL-NO-VF-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
; UNROLL-NO-VF: scalar.ph:
; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[TMP7]], [[MIDDLE_BLOCK]] ], [ [[DOTPRE]], [[ENTRY:%.*]] ]
; UNROLL-NO-VF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
; UNROLL-NO-VF-NEXT: br label [[FOR_BODY:%.*]]
; UNROLL-NO-VF: for.body:
; UNROLL-NO-VF-NEXT: [[TMP17:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP18:%.*]], [[FOR_BODY]] ]
; UNROLL-NO-VF-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
; UNROLL-NO-VF-NEXT: [[ARRAYCIDX:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[INDVARS_IV]]
; UNROLL-NO-VF-NEXT: [[CUR_INDEX:%.*]] = getelementptr inbounds [2 x i16], ptr [[A]], i64 [[INDVARS_IV]], i64 1
; UNROLL-NO-VF-NEXT: store i32 7, ptr [[ARRAYCIDX]], align 4
; UNROLL-NO-VF-NEXT: [[CONV:%.*]] = sext i16 [[TMP17]] to i32
; UNROLL-NO-VF-NEXT: [[TMP18]] = load i16, ptr [[CUR_INDEX]], align 2
; UNROLL-NO-VF-NEXT: [[CONV3:%.*]] = sext i16 [[TMP18]] to i32
; UNROLL-NO-VF-NEXT: [[MUL:%.*]] = mul nsw i32 [[CONV3]], [[CONV]]
; UNROLL-NO-VF-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]]
; UNROLL-NO-VF-NEXT: store i32 [[MUL]], ptr [[ARRAYIDX5]], align 4
; UNROLL-NO-VF-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
; UNROLL-NO-VF-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
; UNROLL-NO-VF-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]]
; UNROLL-NO-VF: for.end:
; UNROLL-NO-VF-NEXT: ret void
;
; SINK-AFTER-LABEL: @PR34711(
; SINK-AFTER-NEXT: entry:
; SINK-AFTER-NEXT: [[DOTPRE:%.*]] = load i16, ptr [[A:%.*]], align 2
; SINK-AFTER-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 4
; SINK-AFTER-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; SINK-AFTER: vector.ph:
; SINK-AFTER-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
; SINK-AFTER-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; SINK-AFTER-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i16> poison, i16 [[DOTPRE]], i32 3
; SINK-AFTER-NEXT: br label [[VECTOR_BODY:%.*]]
; SINK-AFTER: vector.body:
; SINK-AFTER-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; SINK-AFTER-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[TMP17:%.*]], [[VECTOR_BODY]] ]
; SINK-AFTER-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
; SINK-AFTER-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
; SINK-AFTER-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2
; SINK-AFTER-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3
; SINK-AFTER-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[C:%.*]], i64 [[TMP0]]
; SINK-AFTER-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x i16], ptr [[A]], i64 [[TMP0]], i64 1
; SINK-AFTER-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x i16], ptr [[A]], i64 [[TMP1]], i64 1
; SINK-AFTER-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x i16], ptr [[A]], i64 [[TMP2]], i64 1
; SINK-AFTER-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i16], ptr [[A]], i64 [[TMP3]], i64 1
; SINK-AFTER-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 0
; SINK-AFTER-NEXT: store <4 x i32> splat (i32 7), ptr [[TMP9]], align 4
; SINK-AFTER-NEXT: [[TMP10:%.*]] = load i16, ptr [[TMP5]], align 2
; SINK-AFTER-NEXT: [[TMP11:%.*]] = load i16, ptr [[TMP6]], align 2
; SINK-AFTER-NEXT: [[TMP12:%.*]] = load i16, ptr [[TMP7]], align 2
; SINK-AFTER-NEXT: [[TMP13:%.*]] = load i16, ptr [[TMP8]], align 2
; SINK-AFTER-NEXT: [[TMP14:%.*]] = insertelement <4 x i16> poison, i16 [[TMP10]], i32 0
; SINK-AFTER-NEXT: [[TMP15:%.*]] = insertelement <4 x i16> [[TMP14]], i16 [[TMP11]], i32 1
; SINK-AFTER-NEXT: [[TMP16:%.*]] = insertelement <4 x i16> [[TMP15]], i16 [[TMP12]], i32 2
; SINK-AFTER-NEXT: [[TMP17]] = insertelement <4 x i16> [[TMP16]], i16 [[TMP13]], i32 3
; SINK-AFTER-NEXT: [[TMP18:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[TMP17]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
; SINK-AFTER-NEXT: [[TMP19:%.*]] = sext <4 x i16> [[TMP18]] to <4 x i32>
; SINK-AFTER-NEXT: [[TMP20:%.*]] = sext <4 x i16> [[TMP17]] to <4 x i32>
; SINK-AFTER-NEXT: [[TMP21:%.*]] = mul nsw <4 x i32> [[TMP20]], [[TMP19]]
; SINK-AFTER-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[TMP0]]
; SINK-AFTER-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, ptr [[TMP22]], i32 0
; SINK-AFTER-NEXT: store <4 x i32> [[TMP21]], ptr [[TMP23]], align 4
; SINK-AFTER-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; SINK-AFTER-NEXT: [[TMP24:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; SINK-AFTER-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]]
; SINK-AFTER: middle.block:
; SINK-AFTER-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; SINK-AFTER-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
; SINK-AFTER: scalar.ph:
; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[TMP13]], [[MIDDLE_BLOCK]] ], [ [[DOTPRE]], [[ENTRY:%.*]] ]
; SINK-AFTER-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
; SINK-AFTER-NEXT: br label [[FOR_BODY:%.*]]
; SINK-AFTER: for.body:
; SINK-AFTER-NEXT: [[TMP25:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP26:%.*]], [[FOR_BODY]] ]
; SINK-AFTER-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
; SINK-AFTER-NEXT: [[ARRAYCIDX:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[INDVARS_IV]]
; SINK-AFTER-NEXT: [[CUR_INDEX:%.*]] = getelementptr inbounds [2 x i16], ptr [[A]], i64 [[INDVARS_IV]], i64 1
; SINK-AFTER-NEXT: store i32 7, ptr [[ARRAYCIDX]], align 4
; SINK-AFTER-NEXT: [[CONV:%.*]] = sext i16 [[TMP25]] to i32
; SINK-AFTER-NEXT: [[TMP26]] = load i16, ptr [[CUR_INDEX]], align 2
; SINK-AFTER-NEXT: [[CONV3:%.*]] = sext i16 [[TMP26]] to i32
; SINK-AFTER-NEXT: [[MUL:%.*]] = mul nsw i32 [[CONV3]], [[CONV]]
; SINK-AFTER-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]]
; SINK-AFTER-NEXT: store i32 [[MUL]], ptr [[ARRAYIDX5]], align 4
; SINK-AFTER-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
; SINK-AFTER-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
; SINK-AFTER-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]]
; SINK-AFTER: for.end:
; SINK-AFTER-NEXT: ret void
;
entry:
%.pre = load i16, ptr %a
br label %for.body
for.body:
%0 = phi i16 [ %.pre, %entry ], [ %1, %for.body ]
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
%arraycidx = getelementptr inbounds i32, ptr %c, i64 %indvars.iv
%cur.index = getelementptr inbounds [2 x i16], ptr %a, i64 %indvars.iv, i64 1
store i32 7, ptr %arraycidx ; 1st instruction, to be widened.
%conv = sext i16 %0 to i32 ; 2nd, cast to sink after third.
%1 = load i16, ptr %cur.index ; 3rd, first-order-recurring load not widened.
%conv3 = sext i16 %1 to i32
%mul = mul nsw i32 %conv3, %conv
%arrayidx5 = getelementptr inbounds i32, ptr %b, i64 %indvars.iv
store i32 %mul, ptr %arrayidx5
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, %n
br i1 %exitcond, label %for.end, label %for.body
for.end:
ret void
}
; void no_sink_after(short *a, int n, int *b) {
; for(int i = 0; i < n; i++)
; b[i] = ((a[i] + 2) * a[i + 1]);
; }
;
;
define void @sink_after_with_multiple_users(ptr noalias %a, ptr noalias %b, i64 %n) {
; UNROLL-NO-IC-LABEL: @sink_after_with_multiple_users(
; UNROLL-NO-IC-NEXT: entry:
; UNROLL-NO-IC-NEXT: [[DOTPRE:%.*]] = load i16, ptr [[A:%.*]], align 2
; UNROLL-NO-IC-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 8
; UNROLL-NO-IC-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; UNROLL-NO-IC: vector.ph:
; UNROLL-NO-IC-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 8
; UNROLL-NO-IC-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i16> poison, i16 [[DOTPRE]], i32 3
; UNROLL-NO-IC-NEXT: br label [[VECTOR_BODY:%.*]]
; UNROLL-NO-IC: vector.body:
; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[WIDE_LOAD1:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = add nuw nsw i64 [[INDEX]], 1
; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = getelementptr inbounds i16, ptr [[A]], i64 [[TMP1]]
; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[TMP2]], i32 0
; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = getelementptr inbounds i16, ptr [[TMP2]], i32 4
; UNROLL-NO-IC-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i16>, ptr [[TMP3]], align 2
; UNROLL-NO-IC-NEXT: [[WIDE_LOAD1]] = load <4 x i16>, ptr [[TMP4]], align 2
; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[WIDE_LOAD]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = shufflevector <4 x i16> [[WIDE_LOAD]], <4 x i16> [[WIDE_LOAD1]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = sext <4 x i16> [[TMP5]] to <4 x i32>
; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = sext <4 x i16> [[TMP6]] to <4 x i32>
; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = add nsw <4 x i32> [[TMP7]], splat (i32 2)
; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = add nsw <4 x i32> [[TMP8]], splat (i32 2)
; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = sext <4 x i16> [[WIDE_LOAD]] to <4 x i32>
; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = sext <4 x i16> [[WIDE_LOAD1]] to <4 x i32>
; UNROLL-NO-IC-NEXT: [[TMP13:%.*]] = mul nsw <4 x i32> [[TMP9]], [[TMP11]]
; UNROLL-NO-IC-NEXT: [[TMP14:%.*]] = mul nsw <4 x i32> [[TMP10]], [[TMP12]]
; UNROLL-NO-IC-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDEX]]
; UNROLL-NO-IC-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[TMP15]], i32 0
; UNROLL-NO-IC-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP15]], i32 4
; UNROLL-NO-IC-NEXT: store <4 x i32> [[TMP13]], ptr [[TMP16]], align 4
; UNROLL-NO-IC-NEXT: store <4 x i32> [[TMP14]], ptr [[TMP17]], align 4
; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
; UNROLL-NO-IC-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; UNROLL-NO-IC-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
; UNROLL-NO-IC: middle.block:
; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD1]], i32 3
; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; UNROLL-NO-IC-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
; UNROLL-NO-IC: scalar.ph:
; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ [[DOTPRE]], [[ENTRY:%.*]] ]
; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
; UNROLL-NO-IC-NEXT: br label [[FOR_BODY:%.*]]
; UNROLL-NO-IC: for.body:
; UNROLL-NO-IC-NEXT: [[TMP19:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP20:%.*]], [[FOR_BODY]] ]
; UNROLL-NO-IC-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
; UNROLL-NO-IC-NEXT: [[CONV:%.*]] = sext i16 [[TMP19]] to i32
; UNROLL-NO-IC-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 2
; UNROLL-NO-IC-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
; UNROLL-NO-IC-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, ptr [[A]], i64 [[INDVARS_IV_NEXT]]
; UNROLL-NO-IC-NEXT: [[TMP20]] = load i16, ptr [[ARRAYIDX2]], align 2
; UNROLL-NO-IC-NEXT: [[CONV3:%.*]] = sext i16 [[TMP20]] to i32
; UNROLL-NO-IC-NEXT: [[MUL:%.*]] = mul nsw i32 [[ADD]], [[CONV3]]
; UNROLL-NO-IC-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]]
; UNROLL-NO-IC-NEXT: store i32 [[MUL]], ptr [[ARRAYIDX5]], align 4
; UNROLL-NO-IC-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
; UNROLL-NO-IC-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]]
; UNROLL-NO-IC: for.end:
; UNROLL-NO-IC-NEXT: ret void
;
; UNROLL-NO-VF-LABEL: @sink_after_with_multiple_users(
; UNROLL-NO-VF-NEXT: entry:
; UNROLL-NO-VF-NEXT: [[DOTPRE:%.*]] = load i16, ptr [[A:%.*]], align 2
; UNROLL-NO-VF-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 2
; UNROLL-NO-VF-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; UNROLL-NO-VF: vector.ph:
; UNROLL-NO-VF-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 2
; UNROLL-NO-VF-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; UNROLL-NO-VF-NEXT: br label [[VECTOR_BODY:%.*]]
; UNROLL-NO-VF: vector.body:
; UNROLL-NO-VF-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NO-VF-NEXT: [[VECTOR_RECUR:%.*]] = phi i16 [ [[DOTPRE]], [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NO-VF-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
; UNROLL-NO-VF-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[INDEX]], 1
; UNROLL-NO-VF-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP1]], 1
; UNROLL-NO-VF-NEXT: [[TMP4:%.*]] = getelementptr inbounds i16, ptr [[A]], i64 [[TMP2]]
; UNROLL-NO-VF-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, ptr [[A]], i64 [[TMP3]]
; UNROLL-NO-VF-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP4]], align 2
; UNROLL-NO-VF-NEXT: [[TMP7]] = load i16, ptr [[TMP5]], align 2
; UNROLL-NO-VF-NEXT: [[TMP8:%.*]] = sext i16 [[VECTOR_RECUR]] to i32
; UNROLL-NO-VF-NEXT: [[TMP9:%.*]] = sext i16 [[TMP6]] to i32
; UNROLL-NO-VF-NEXT: [[TMP10:%.*]] = add nsw i32 [[TMP8]], 2
; UNROLL-NO-VF-NEXT: [[TMP11:%.*]] = add nsw i32 [[TMP9]], 2
; UNROLL-NO-VF-NEXT: [[TMP12:%.*]] = sext i16 [[TMP6]] to i32
; UNROLL-NO-VF-NEXT: [[TMP13:%.*]] = sext i16 [[TMP7]] to i32
; UNROLL-NO-VF-NEXT: [[TMP14:%.*]] = mul nsw i32 [[TMP10]], [[TMP12]]
; UNROLL-NO-VF-NEXT: [[TMP15:%.*]] = mul nsw i32 [[TMP11]], [[TMP13]]
; UNROLL-NO-VF-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDEX]]
; UNROLL-NO-VF-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP1]]
; UNROLL-NO-VF-NEXT: store i32 [[TMP14]], ptr [[TMP16]], align 4
; UNROLL-NO-VF-NEXT: store i32 [[TMP15]], ptr [[TMP17]], align 4
; UNROLL-NO-VF-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
; UNROLL-NO-VF-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; UNROLL-NO-VF-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
; UNROLL-NO-VF: middle.block:
; UNROLL-NO-VF-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; UNROLL-NO-VF-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
; UNROLL-NO-VF: scalar.ph:
; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[TMP7]], [[MIDDLE_BLOCK]] ], [ [[DOTPRE]], [[ENTRY:%.*]] ]
; UNROLL-NO-VF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
; UNROLL-NO-VF-NEXT: br label [[FOR_BODY:%.*]]
; UNROLL-NO-VF: for.body:
; UNROLL-NO-VF-NEXT: [[TMP19:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP20:%.*]], [[FOR_BODY]] ]
; UNROLL-NO-VF-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
; UNROLL-NO-VF-NEXT: [[CONV:%.*]] = sext i16 [[TMP19]] to i32
; UNROLL-NO-VF-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 2
; UNROLL-NO-VF-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
; UNROLL-NO-VF-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, ptr [[A]], i64 [[INDVARS_IV_NEXT]]
; UNROLL-NO-VF-NEXT: [[TMP20]] = load i16, ptr [[ARRAYIDX2]], align 2
; UNROLL-NO-VF-NEXT: [[CONV3:%.*]] = sext i16 [[TMP20]] to i32
; UNROLL-NO-VF-NEXT: [[MUL:%.*]] = mul nsw i32 [[ADD]], [[CONV3]]
; UNROLL-NO-VF-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]]
; UNROLL-NO-VF-NEXT: store i32 [[MUL]], ptr [[ARRAYIDX5]], align 4
; UNROLL-NO-VF-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
; UNROLL-NO-VF-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]]
; UNROLL-NO-VF: for.end:
; UNROLL-NO-VF-NEXT: ret void
;
; SINK-AFTER-LABEL: @sink_after_with_multiple_users(
; SINK-AFTER-NEXT: entry:
; SINK-AFTER-NEXT: [[DOTPRE:%.*]] = load i16, ptr [[A:%.*]], align 2
; SINK-AFTER-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 4
; SINK-AFTER-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; SINK-AFTER: vector.ph:
; SINK-AFTER-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
; SINK-AFTER-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; SINK-AFTER-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i16> poison, i16 [[DOTPRE]], i32 3
; SINK-AFTER-NEXT: br label [[VECTOR_BODY:%.*]]
; SINK-AFTER: vector.body:
; SINK-AFTER-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; SINK-AFTER-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[WIDE_LOAD:%.*]], [[VECTOR_BODY]] ]
; SINK-AFTER-NEXT: [[TMP1:%.*]] = add nuw nsw i64 [[INDEX]], 1
; SINK-AFTER-NEXT: [[TMP2:%.*]] = getelementptr inbounds i16, ptr [[A]], i64 [[TMP1]]
; SINK-AFTER-NEXT: [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[TMP2]], i32 0
; SINK-AFTER-NEXT: [[WIDE_LOAD]] = load <4 x i16>, ptr [[TMP3]], align 2
; SINK-AFTER-NEXT: [[TMP4:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[WIDE_LOAD]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
; SINK-AFTER-NEXT: [[TMP5:%.*]] = sext <4 x i16> [[TMP4]] to <4 x i32>
; SINK-AFTER-NEXT: [[TMP6:%.*]] = add nsw <4 x i32> [[TMP5]], splat (i32 2)
; SINK-AFTER-NEXT: [[TMP7:%.*]] = sext <4 x i16> [[WIDE_LOAD]] to <4 x i32>
; SINK-AFTER-NEXT: [[TMP8:%.*]] = mul nsw <4 x i32> [[TMP6]], [[TMP7]]
; SINK-AFTER-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDEX]]
; SINK-AFTER-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i32 0
; SINK-AFTER-NEXT: store <4 x i32> [[TMP8]], ptr [[TMP10]], align 4
; SINK-AFTER-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; SINK-AFTER-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; SINK-AFTER-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
; SINK-AFTER: middle.block:
; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 3
; SINK-AFTER-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; SINK-AFTER-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
; SINK-AFTER: scalar.ph:
; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ [[DOTPRE]], [[ENTRY:%.*]] ]
; SINK-AFTER-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
; SINK-AFTER-NEXT: br label [[FOR_BODY:%.*]]
; SINK-AFTER: for.body:
; SINK-AFTER-NEXT: [[TMP12:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP13:%.*]], [[FOR_BODY]] ]
; SINK-AFTER-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
; SINK-AFTER-NEXT: [[CONV:%.*]] = sext i16 [[TMP12]] to i32
; SINK-AFTER-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 2
; SINK-AFTER-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
; SINK-AFTER-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, ptr [[A]], i64 [[INDVARS_IV_NEXT]]
; SINK-AFTER-NEXT: [[TMP13]] = load i16, ptr [[ARRAYIDX2]], align 2
; SINK-AFTER-NEXT: [[CONV3:%.*]] = sext i16 [[TMP13]] to i32
; SINK-AFTER-NEXT: [[MUL:%.*]] = mul nsw i32 [[ADD]], [[CONV3]]
; SINK-AFTER-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]]
; SINK-AFTER-NEXT: store i32 [[MUL]], ptr [[ARRAYIDX5]], align 4
; SINK-AFTER-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
; SINK-AFTER-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]]
; SINK-AFTER: for.end:
; SINK-AFTER-NEXT: ret void
;
entry:
%.pre = load i16, ptr %a
br label %for.body
for.body:
%0 = phi i16 [ %.pre, %entry ], [ %1, %for.body ]
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
%conv = sext i16 %0 to i32
%add = add nsw i32 %conv, 2
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%arrayidx2 = getelementptr inbounds i16, ptr %a, i64 %indvars.iv.next
%1 = load i16, ptr %arrayidx2
%conv3 = sext i16 %1 to i32
%mul = mul nsw i32 %add, %conv3
%arrayidx5 = getelementptr inbounds i32, ptr %b, i64 %indvars.iv
store i32 %mul, ptr %arrayidx5
%exitcond = icmp eq i64 %indvars.iv.next, %n
br i1 %exitcond, label %for.end, label %for.body
for.end:
ret void
}
; Do not sink branches: While branches are if-converted and do not require
; sinking, instructions with side effects (e.g. loads) conditioned by those
; branches will become users of the condition bit after vectorization and would
; need to be sunk if the loop is vectorized.
define void @do_not_sink_branch(i32 %x, ptr %in, ptr %out, i32 %tc) local_unnamed_addr #0 {
; UNROLL-NO-IC-LABEL: @do_not_sink_branch(
; UNROLL-NO-IC-NEXT: entry:
; UNROLL-NO-IC-NEXT: [[CMP530:%.*]] = icmp slt i32 0, [[TC:%.*]]
; UNROLL-NO-IC-NEXT: br label [[FOR_BODY4:%.*]]
; UNROLL-NO-IC: for.body4:
; UNROLL-NO-IC-NEXT: [[INDVARS_IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[COND_END:%.*]] ]
; UNROLL-NO-IC-NEXT: [[CMP534:%.*]] = phi i1 [ [[CMP530]], [[ENTRY]] ], [ [[CMP5:%.*]], [[COND_END]] ]
; UNROLL-NO-IC-NEXT: br i1 [[CMP534]], label [[COND_TRUE:%.*]], label [[COND_END]]
; UNROLL-NO-IC: cond.true:
; UNROLL-NO-IC-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, ptr [[IN:%.*]], i32 [[INDVARS_IV]]
; UNROLL-NO-IC-NEXT: [[IN_VAL:%.*]] = load i32, ptr [[ARRAYIDX7]], align 4
; UNROLL-NO-IC-NEXT: br label [[COND_END]]
; UNROLL-NO-IC: cond.end:
; UNROLL-NO-IC-NEXT: [[COND:%.*]] = phi i32 [ [[IN_VAL]], [[COND_TRUE]] ], [ 0, [[FOR_BODY4]] ]
; UNROLL-NO-IC-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, ptr [[OUT:%.*]], i32 [[INDVARS_IV]]
; UNROLL-NO-IC-NEXT: store i32 [[COND]], ptr [[ARRAYIDX8]], align 4
; UNROLL-NO-IC-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i32 [[INDVARS_IV]], 1
; UNROLL-NO-IC-NEXT: [[CMP5]] = icmp slt i32 [[INDVARS_IV_NEXT]], [[TC]]
; UNROLL-NO-IC-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INDVARS_IV_NEXT]], [[X:%.*]]
; UNROLL-NO-IC-NEXT: br i1 [[EXITCOND]], label [[FOR_END12_LOOPEXIT:%.*]], label [[FOR_BODY4]]
; UNROLL-NO-IC: for.end12.loopexit:
; UNROLL-NO-IC-NEXT: ret void
;
; UNROLL-NO-VF-LABEL: @do_not_sink_branch(
; UNROLL-NO-VF-NEXT: entry:
; UNROLL-NO-VF-NEXT: [[CMP530:%.*]] = icmp slt i32 0, [[TC:%.*]]
; UNROLL-NO-VF-NEXT: br label [[FOR_BODY4:%.*]]
; UNROLL-NO-VF: for.body4:
; UNROLL-NO-VF-NEXT: [[INDVARS_IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[COND_END:%.*]] ]
; UNROLL-NO-VF-NEXT: [[CMP534:%.*]] = phi i1 [ [[CMP530]], [[ENTRY]] ], [ [[CMP5:%.*]], [[COND_END]] ]
; UNROLL-NO-VF-NEXT: br i1 [[CMP534]], label [[COND_TRUE:%.*]], label [[COND_END]]
; UNROLL-NO-VF: cond.true:
; UNROLL-NO-VF-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, ptr [[IN:%.*]], i32 [[INDVARS_IV]]
; UNROLL-NO-VF-NEXT: [[IN_VAL:%.*]] = load i32, ptr [[ARRAYIDX7]], align 4
; UNROLL-NO-VF-NEXT: br label [[COND_END]]
; UNROLL-NO-VF: cond.end:
; UNROLL-NO-VF-NEXT: [[COND:%.*]] = phi i32 [ [[IN_VAL]], [[COND_TRUE]] ], [ 0, [[FOR_BODY4]] ]
; UNROLL-NO-VF-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, ptr [[OUT:%.*]], i32 [[INDVARS_IV]]
; UNROLL-NO-VF-NEXT: store i32 [[COND]], ptr [[ARRAYIDX8]], align 4
; UNROLL-NO-VF-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i32 [[INDVARS_IV]], 1
; UNROLL-NO-VF-NEXT: [[CMP5]] = icmp slt i32 [[INDVARS_IV_NEXT]], [[TC]]
; UNROLL-NO-VF-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INDVARS_IV_NEXT]], [[X:%.*]]
; UNROLL-NO-VF-NEXT: br i1 [[EXITCOND]], label [[FOR_END12_LOOPEXIT:%.*]], label [[FOR_BODY4]]
; UNROLL-NO-VF: for.end12.loopexit:
; UNROLL-NO-VF-NEXT: ret void
;
; SINK-AFTER-LABEL: @do_not_sink_branch(
; SINK-AFTER-NEXT: entry:
; SINK-AFTER-NEXT: [[CMP530:%.*]] = icmp slt i32 0, [[TC:%.*]]
; SINK-AFTER-NEXT: br label [[FOR_BODY4:%.*]]
; SINK-AFTER: for.body4:
; SINK-AFTER-NEXT: [[INDVARS_IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[COND_END:%.*]] ]
; SINK-AFTER-NEXT: [[CMP534:%.*]] = phi i1 [ [[CMP530]], [[ENTRY]] ], [ [[CMP5:%.*]], [[COND_END]] ]
; SINK-AFTER-NEXT: br i1 [[CMP534]], label [[COND_TRUE:%.*]], label [[COND_END]]
; SINK-AFTER: cond.true:
; SINK-AFTER-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, ptr [[IN:%.*]], i32 [[INDVARS_IV]]
; SINK-AFTER-NEXT: [[IN_VAL:%.*]] = load i32, ptr [[ARRAYIDX7]], align 4
; SINK-AFTER-NEXT: br label [[COND_END]]
; SINK-AFTER: cond.end:
; SINK-AFTER-NEXT: [[COND:%.*]] = phi i32 [ [[IN_VAL]], [[COND_TRUE]] ], [ 0, [[FOR_BODY4]] ]
; SINK-AFTER-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, ptr [[OUT:%.*]], i32 [[INDVARS_IV]]
; SINK-AFTER-NEXT: store i32 [[COND]], ptr [[ARRAYIDX8]], align 4
; SINK-AFTER-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i32 [[INDVARS_IV]], 1
; SINK-AFTER-NEXT: [[CMP5]] = icmp slt i32 [[INDVARS_IV_NEXT]], [[TC]]
; SINK-AFTER-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INDVARS_IV_NEXT]], [[X:%.*]]
; SINK-AFTER-NEXT: br i1 [[EXITCOND]], label [[FOR_END12_LOOPEXIT:%.*]], label [[FOR_BODY4]]
; SINK-AFTER: for.end12.loopexit:
; SINK-AFTER-NEXT: ret void
;
entry:
%cmp530 = icmp slt i32 0, %tc
br label %for.body4
for.body4: ; preds = %cond.end, %entry
%indvars.iv = phi i32 [ 0, %entry ], [ %indvars.iv.next, %cond.end ]
%cmp534 = phi i1 [ %cmp530, %entry ], [ %cmp5, %cond.end ]
br i1 %cmp534, label %cond.true, label %cond.end
cond.true: ; preds = %for.body4
%arrayidx7 = getelementptr inbounds i32, ptr %in, i32 %indvars.iv
%in.val = load i32, ptr %arrayidx7, align 4
br label %cond.end
cond.end: ; preds = %for.body4, %cond.true
%cond = phi i32 [ %in.val, %cond.true ], [ 0, %for.body4 ]
%arrayidx8 = getelementptr inbounds i32, ptr %out, i32 %indvars.iv
store i32 %cond, ptr %arrayidx8, align 4
%indvars.iv.next = add nuw nsw i32 %indvars.iv, 1
%cmp5 = icmp slt i32 %indvars.iv.next, %tc
%exitcond = icmp eq i32 %indvars.iv.next, %x
br i1 %exitcond, label %for.end12.loopexit, label %for.body4
for.end12.loopexit: ; preds = %cond.end
ret void
}
; Dead instructions, like the exit condition are not part of the actual VPlan
; and do not need to be sunk. PR44634.
define void @sink_dead_inst(ptr %a) {
; UNROLL-NO-IC-LABEL: @sink_dead_inst(
; UNROLL-NO-IC-NEXT: entry:
; UNROLL-NO-IC-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; UNROLL-NO-IC: vector.ph:
; UNROLL-NO-IC-NEXT: br label [[VECTOR_BODY:%.*]]
; UNROLL-NO-IC: vector.body:
; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NO-IC-NEXT: [[VEC_IND:%.*]] = phi <4 x i16> [ <i16 -27, i16 -26, i16 -25, i16 -24>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ <i16 poison, i16 poison, i16 poison, i16 0>, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR1:%.*]] = phi <4 x i32> [ <i32 poison, i32 poison, i32 poison, i32 -27>, [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <4 x i16> [[VEC_IND]], splat (i16 4)
; UNROLL-NO-IC-NEXT: [[DOTCAST:%.*]] = trunc i32 [[INDEX]] to i16
; UNROLL-NO-IC-NEXT: [[OFFSET_IDX:%.*]] = add i16 -27, [[DOTCAST]]
; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = add <4 x i16> [[VEC_IND]], splat (i16 1)
; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = add <4 x i16> [[STEP_ADD]], splat (i16 1)
; UNROLL-NO-IC-NEXT: [[TMP3]] = zext <4 x i16> [[TMP2]] to <4 x i32>
; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = add <4 x i16> [[TMP1]], splat (i16 5)
; UNROLL-NO-IC-NEXT: [[TMP5]] = add <4 x i16> [[TMP2]], splat (i16 5)
; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[TMP4]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = sub <4 x i16> [[TMP6]], splat (i16 10)
; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = sub <4 x i16> [[TMP7]], splat (i16 10)
; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = getelementptr i16, ptr [[A:%.*]], i16 [[OFFSET_IDX]]
; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = getelementptr i16, ptr [[TMP10]], i32 0
; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = getelementptr i16, ptr [[TMP10]], i32 4
; UNROLL-NO-IC-NEXT: store <4 x i16> [[TMP8]], ptr [[TMP11]], align 2
; UNROLL-NO-IC-NEXT: store <4 x i16> [[TMP9]], ptr [[TMP12]], align 2
; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <4 x i16> [[STEP_ADD]], splat (i16 4)
; UNROLL-NO-IC-NEXT: [[TMP13:%.*]] = icmp eq i32 [[INDEX_NEXT]], 40
; UNROLL-NO-IC-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]]
; UNROLL-NO-IC: middle.block:
; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[TMP5]], i32 3
; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT2:%.*]] = extractelement <4 x i32> [[TMP3]], i32 3
; UNROLL-NO-IC-NEXT: br i1 false, label [[FOR_END:%.*]], label [[SCALAR_PH]]
; UNROLL-NO-IC: scalar.ph:
; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ 13, [[MIDDLE_BLOCK]] ], [ -27, [[ENTRY:%.*]] ]
; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT3:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT2]], [[MIDDLE_BLOCK]] ], [ -27, [[ENTRY]] ]
; UNROLL-NO-IC-NEXT: br label [[FOR_COND:%.*]]
; UNROLL-NO-IC: for.cond:
; UNROLL-NO-IC-NEXT: [[IV:%.*]] = phi i16 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_COND]] ]
; UNROLL-NO-IC-NEXT: [[REC_1:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[REC_1_PREV:%.*]], [[FOR_COND]] ]
; UNROLL-NO-IC-NEXT: [[REC_2:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT3]], [[SCALAR_PH]] ], [ [[REC_2_PREV:%.*]], [[FOR_COND]] ]
; UNROLL-NO-IC-NEXT: [[USE_REC_1:%.*]] = sub i16 [[REC_1]], 10
; UNROLL-NO-IC-NEXT: [[CMP:%.*]] = icmp eq i32 [[REC_2]], 15
; UNROLL-NO-IC-NEXT: [[IV_NEXT]] = add i16 [[IV]], 1
; UNROLL-NO-IC-NEXT: [[REC_2_PREV]] = zext i16 [[IV_NEXT]] to i32
; UNROLL-NO-IC-NEXT: [[REC_1_PREV]] = add i16 [[IV_NEXT]], 5
; UNROLL-NO-IC-NEXT: [[GEP:%.*]] = getelementptr i16, ptr [[A]], i16 [[IV]]
; UNROLL-NO-IC-NEXT: store i16 [[USE_REC_1]], ptr [[GEP]], align 2
; UNROLL-NO-IC-NEXT: br i1 [[CMP]], label [[FOR_END]], label [[FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]]
; UNROLL-NO-IC: for.end:
; UNROLL-NO-IC-NEXT: ret void
;
; UNROLL-NO-VF-LABEL: @sink_dead_inst(
; UNROLL-NO-VF-NEXT: entry:
; UNROLL-NO-VF-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; UNROLL-NO-VF: vector.ph:
; UNROLL-NO-VF-NEXT: br label [[VECTOR_BODY:%.*]]
; UNROLL-NO-VF: vector.body:
; UNROLL-NO-VF-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NO-VF-NEXT: [[VECTOR_RECUR:%.*]] = phi i16 [ 0, [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NO-VF-NEXT: [[VECTOR_RECUR1:%.*]] = phi i32 [ -27, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NO-VF-NEXT: [[DOTCAST:%.*]] = trunc i32 [[INDEX]] to i16
; UNROLL-NO-VF-NEXT: [[OFFSET_IDX:%.*]] = add i16 -27, [[DOTCAST]]
; UNROLL-NO-VF-NEXT: [[TMP1:%.*]] = add i16 [[OFFSET_IDX]], 1
; UNROLL-NO-VF-NEXT: [[TMP2:%.*]] = add i16 [[OFFSET_IDX]], 1
; UNROLL-NO-VF-NEXT: [[TMP3:%.*]] = add i16 [[TMP1]], 1
; UNROLL-NO-VF-NEXT: [[TMP4]] = zext i16 [[TMP3]] to i32
; UNROLL-NO-VF-NEXT: [[TMP5:%.*]] = add i16 [[TMP2]], 5
; UNROLL-NO-VF-NEXT: [[TMP6]] = add i16 [[TMP3]], 5
; UNROLL-NO-VF-NEXT: [[TMP7:%.*]] = sub i16 [[VECTOR_RECUR]], 10
; UNROLL-NO-VF-NEXT: [[TMP8:%.*]] = sub i16 [[TMP5]], 10
; UNROLL-NO-VF-NEXT: [[TMP9:%.*]] = getelementptr i16, ptr [[A:%.*]], i16 [[OFFSET_IDX]]
; UNROLL-NO-VF-NEXT: [[TMP10:%.*]] = getelementptr i16, ptr [[A]], i16 [[TMP1]]
; UNROLL-NO-VF-NEXT: store i16 [[TMP7]], ptr [[TMP9]], align 2
; UNROLL-NO-VF-NEXT: store i16 [[TMP8]], ptr [[TMP10]], align 2
; UNROLL-NO-VF-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
; UNROLL-NO-VF-NEXT: [[TMP11:%.*]] = icmp eq i32 [[INDEX_NEXT]], 42
; UNROLL-NO-VF-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]]
; UNROLL-NO-VF: middle.block:
; UNROLL-NO-VF-NEXT: br i1 false, label [[FOR_END:%.*]], label [[SCALAR_PH]]
; UNROLL-NO-VF: scalar.ph:
; UNROLL-NO-VF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ 15, [[MIDDLE_BLOCK]] ], [ -27, [[ENTRY:%.*]] ]
; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[TMP6]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT2:%.*]] = phi i32 [ [[TMP4]], [[MIDDLE_BLOCK]] ], [ -27, [[ENTRY]] ]
; UNROLL-NO-VF-NEXT: br label [[FOR_COND:%.*]]
; UNROLL-NO-VF: for.cond:
; UNROLL-NO-VF-NEXT: [[IV:%.*]] = phi i16 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_COND]] ]
; UNROLL-NO-VF-NEXT: [[REC_1:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[REC_1_PREV:%.*]], [[FOR_COND]] ]
; UNROLL-NO-VF-NEXT: [[REC_2:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT2]], [[SCALAR_PH]] ], [ [[REC_2_PREV:%.*]], [[FOR_COND]] ]
; UNROLL-NO-VF-NEXT: [[USE_REC_1:%.*]] = sub i16 [[REC_1]], 10
; UNROLL-NO-VF-NEXT: [[CMP:%.*]] = icmp eq i32 [[REC_2]], 15
; UNROLL-NO-VF-NEXT: [[IV_NEXT]] = add i16 [[IV]], 1
; UNROLL-NO-VF-NEXT: [[REC_2_PREV]] = zext i16 [[IV_NEXT]] to i32
; UNROLL-NO-VF-NEXT: [[REC_1_PREV]] = add i16 [[IV_NEXT]], 5
; UNROLL-NO-VF-NEXT: [[GEP:%.*]] = getelementptr i16, ptr [[A]], i16 [[IV]]
; UNROLL-NO-VF-NEXT: store i16 [[USE_REC_1]], ptr [[GEP]], align 2
; UNROLL-NO-VF-NEXT: br i1 [[CMP]], label [[FOR_END]], label [[FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]]
; UNROLL-NO-VF: for.end:
; UNROLL-NO-VF-NEXT: ret void
;
; SINK-AFTER-LABEL: @sink_dead_inst(
; SINK-AFTER-NEXT: entry:
; SINK-AFTER-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; SINK-AFTER: vector.ph:
; SINK-AFTER-NEXT: br label [[VECTOR_BODY:%.*]]
; SINK-AFTER: vector.body:
; SINK-AFTER-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; SINK-AFTER-NEXT: [[VEC_IND:%.*]] = phi <4 x i16> [ <i16 -27, i16 -26, i16 -25, i16 -24>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; SINK-AFTER-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ <i16 poison, i16 poison, i16 poison, i16 0>, [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ]
; SINK-AFTER-NEXT: [[VECTOR_RECUR1:%.*]] = phi <4 x i32> [ <i32 poison, i32 poison, i32 poison, i32 -27>, [[VECTOR_PH]] ], [ [[TMP2:%.*]], [[VECTOR_BODY]] ]
; SINK-AFTER-NEXT: [[DOTCAST:%.*]] = trunc i32 [[INDEX]] to i16
; SINK-AFTER-NEXT: [[OFFSET_IDX:%.*]] = add i16 -27, [[DOTCAST]]
; SINK-AFTER-NEXT: [[TMP1:%.*]] = add <4 x i16> [[VEC_IND]], splat (i16 1)
; SINK-AFTER-NEXT: [[TMP2]] = zext <4 x i16> [[TMP1]] to <4 x i32>
; SINK-AFTER-NEXT: [[TMP3]] = add <4 x i16> [[TMP1]], splat (i16 5)
; SINK-AFTER-NEXT: [[TMP4:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[TMP3]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
; SINK-AFTER-NEXT: [[TMP5:%.*]] = sub <4 x i16> [[TMP4]], splat (i16 10)
; SINK-AFTER-NEXT: [[TMP6:%.*]] = getelementptr i16, ptr [[A:%.*]], i16 [[OFFSET_IDX]]
; SINK-AFTER-NEXT: [[TMP7:%.*]] = getelementptr i16, ptr [[TMP6]], i32 0
; SINK-AFTER-NEXT: store <4 x i16> [[TMP5]], ptr [[TMP7]], align 2
; SINK-AFTER-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
; SINK-AFTER-NEXT: [[VEC_IND_NEXT]] = add <4 x i16> [[VEC_IND]], splat (i16 4)
; SINK-AFTER-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], 40
; SINK-AFTER-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]]
; SINK-AFTER: middle.block:
; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[TMP3]], i32 3
; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT2:%.*]] = extractelement <4 x i32> [[TMP2]], i32 3
; SINK-AFTER-NEXT: br i1 false, label [[FOR_END:%.*]], label [[SCALAR_PH]]
; SINK-AFTER: scalar.ph:
; SINK-AFTER-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ 13, [[MIDDLE_BLOCK]] ], [ -27, [[ENTRY:%.*]] ]
; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT3:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT2]], [[MIDDLE_BLOCK]] ], [ -27, [[ENTRY]] ]
; SINK-AFTER-NEXT: br label [[FOR_COND:%.*]]
; SINK-AFTER: for.cond:
; SINK-AFTER-NEXT: [[IV:%.*]] = phi i16 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_COND]] ]
; SINK-AFTER-NEXT: [[REC_1:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[REC_1_PREV:%.*]], [[FOR_COND]] ]
; SINK-AFTER-NEXT: [[REC_2:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT3]], [[SCALAR_PH]] ], [ [[REC_2_PREV:%.*]], [[FOR_COND]] ]
; SINK-AFTER-NEXT: [[USE_REC_1:%.*]] = sub i16 [[REC_1]], 10
; SINK-AFTER-NEXT: [[CMP:%.*]] = icmp eq i32 [[REC_2]], 15
; SINK-AFTER-NEXT: [[IV_NEXT]] = add i16 [[IV]], 1
; SINK-AFTER-NEXT: [[REC_2_PREV]] = zext i16 [[IV_NEXT]] to i32
; SINK-AFTER-NEXT: [[REC_1_PREV]] = add i16 [[IV_NEXT]], 5
; SINK-AFTER-NEXT: [[GEP:%.*]] = getelementptr i16, ptr [[A]], i16 [[IV]]
; SINK-AFTER-NEXT: store i16 [[USE_REC_1]], ptr [[GEP]], align 2
; SINK-AFTER-NEXT: br i1 [[CMP]], label [[FOR_END]], label [[FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]]
; SINK-AFTER: for.end:
; SINK-AFTER-NEXT: ret void
;
entry:
br label %for.cond
for.cond:
%iv = phi i16 [ -27, %entry ], [ %iv.next, %for.cond ]
%rec.1 = phi i16 [ 0, %entry ], [ %rec.1.prev, %for.cond ]
%rec.2 = phi i32 [ -27, %entry ], [ %rec.2.prev, %for.cond ]
%use.rec.1 = sub i16 %rec.1, 10
%cmp = icmp eq i32 %rec.2, 15
%iv.next = add i16 %iv, 1
%rec.2.prev = zext i16 %iv.next to i32
%rec.1.prev = add i16 %iv.next, 5
%gep = getelementptr i16, ptr %a, i16 %iv
store i16 %use.rec.1, ptr %gep
br i1 %cmp, label %for.end, label %for.cond
for.end:
ret void
}
define i32 @sink_into_replication_region(i32 %y) {
; UNROLL-NO-IC-LABEL: @sink_into_replication_region(
; UNROLL-NO-IC-NEXT: bb:
; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = add i32 [[Y:%.*]], 1
; UNROLL-NO-IC-NEXT: [[SMIN:%.*]] = call i32 @llvm.smin.i32(i32 [[Y]], i32 1)
; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = sub i32 [[TMP0]], [[SMIN]]
; UNROLL-NO-IC-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]], !prof [[PROF26:![0-9]+]]
; UNROLL-NO-IC: vector.ph:
; UNROLL-NO-IC-NEXT: [[N_RND_UP:%.*]] = add i32 [[TMP1]], 7
; UNROLL-NO-IC-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N_RND_UP]], 8
; UNROLL-NO-IC-NEXT: [[N_VEC:%.*]] = sub i32 [[N_RND_UP]], [[N_MOD_VF]]
; UNROLL-NO-IC-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i32 [[TMP1]], 1
; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLATINSERT5:%.*]] = insertelement <4 x i32> poison, i32 [[TRIP_COUNT_MINUS_1]], i64 0
; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLAT6:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT5]], <4 x i32> poison, <4 x i32> zeroinitializer
; UNROLL-NO-IC-NEXT: br label [[VECTOR_BODY:%.*]]
; UNROLL-NO-IC: vector.body:
; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_UDIV_CONTINUE20:%.*]] ]
; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ <i32 poison, i32 poison, i32 poison, i32 0>, [[VECTOR_PH]] ], [ [[TMP43:%.*]], [[PRED_UDIV_CONTINUE20]] ]
; UNROLL-NO-IC-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP46:%.*]], [[PRED_UDIV_CONTINUE20]] ]
; UNROLL-NO-IC-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP47:%.*]], [[PRED_UDIV_CONTINUE20]] ]
; UNROLL-NO-IC-NEXT: [[OFFSET_IDX:%.*]] = sub i32 [[Y]], [[INDEX]]
; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[INDEX]], i64 0
; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
; UNROLL-NO-IC-NEXT: [[VEC_IV:%.*]] = add <4 x i32> [[BROADCAST_SPLAT]], <i32 0, i32 1, i32 2, i32 3>
; UNROLL-NO-IC-NEXT: [[VEC_IV4:%.*]] = add <4 x i32> [[BROADCAST_SPLAT]], <i32 4, i32 5, i32 6, i32 7>
; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = icmp ule <4 x i32> [[VEC_IV]], [[BROADCAST_SPLAT6]]
; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = icmp ule <4 x i32> [[VEC_IV4]], [[BROADCAST_SPLAT6]]
; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = extractelement <4 x i1> [[TMP2]], i32 0
; UNROLL-NO-IC-NEXT: br i1 [[TMP4]], label [[PRED_UDIV_IF:%.*]], label [[PRED_UDIV_CONTINUE:%.*]]
; UNROLL-NO-IC: pred.udiv.if:
; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = add i32 [[OFFSET_IDX]], 0
; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = udiv i32 219220132, [[TMP5]]
; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> poison, i32 [[TMP6]], i32 0
; UNROLL-NO-IC-NEXT: br label [[PRED_UDIV_CONTINUE]]
; UNROLL-NO-IC: pred.udiv.continue:
; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP7]], [[PRED_UDIV_IF]] ]
; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = extractelement <4 x i1> [[TMP2]], i32 1
; UNROLL-NO-IC-NEXT: br i1 [[TMP9]], label [[PRED_UDIV_IF7:%.*]], label [[PRED_UDIV_CONTINUE8:%.*]]
; UNROLL-NO-IC: pred.udiv.if7:
; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = add i32 [[OFFSET_IDX]], -1
; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = udiv i32 219220132, [[TMP10]]
; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = insertelement <4 x i32> [[TMP8]], i32 [[TMP11]], i32 1
; UNROLL-NO-IC-NEXT: br label [[PRED_UDIV_CONTINUE8]]
; UNROLL-NO-IC: pred.udiv.continue8:
; UNROLL-NO-IC-NEXT: [[TMP13:%.*]] = phi <4 x i32> [ [[TMP8]], [[PRED_UDIV_CONTINUE]] ], [ [[TMP12]], [[PRED_UDIV_IF7]] ]
; UNROLL-NO-IC-NEXT: [[TMP14:%.*]] = extractelement <4 x i1> [[TMP2]], i32 2
; UNROLL-NO-IC-NEXT: br i1 [[TMP14]], label [[PRED_UDIV_IF9:%.*]], label [[PRED_UDIV_CONTINUE10:%.*]]
; UNROLL-NO-IC: pred.udiv.if9:
; UNROLL-NO-IC-NEXT: [[TMP15:%.*]] = add i32 [[OFFSET_IDX]], -2
; UNROLL-NO-IC-NEXT: [[TMP16:%.*]] = udiv i32 219220132, [[TMP15]]
; UNROLL-NO-IC-NEXT: [[TMP17:%.*]] = insertelement <4 x i32> [[TMP13]], i32 [[TMP16]], i32 2
; UNROLL-NO-IC-NEXT: br label [[PRED_UDIV_CONTINUE10]]
; UNROLL-NO-IC: pred.udiv.continue10:
; UNROLL-NO-IC-NEXT: [[TMP18:%.*]] = phi <4 x i32> [ [[TMP13]], [[PRED_UDIV_CONTINUE8]] ], [ [[TMP17]], [[PRED_UDIV_IF9]] ]
; UNROLL-NO-IC-NEXT: [[TMP19:%.*]] = extractelement <4 x i1> [[TMP2]], i32 3
; UNROLL-NO-IC-NEXT: br i1 [[TMP19]], label [[PRED_UDIV_IF11:%.*]], label [[PRED_UDIV_CONTINUE12:%.*]]
; UNROLL-NO-IC: pred.udiv.if11:
; UNROLL-NO-IC-NEXT: [[TMP20:%.*]] = add i32 [[OFFSET_IDX]], -3
; UNROLL-NO-IC-NEXT: [[TMP21:%.*]] = udiv i32 219220132, [[TMP20]]
; UNROLL-NO-IC-NEXT: [[TMP22:%.*]] = insertelement <4 x i32> [[TMP18]], i32 [[TMP21]], i32 3
; UNROLL-NO-IC-NEXT: br label [[PRED_UDIV_CONTINUE12]]
; UNROLL-NO-IC: pred.udiv.continue12:
; UNROLL-NO-IC-NEXT: [[TMP23:%.*]] = phi <4 x i32> [ [[TMP18]], [[PRED_UDIV_CONTINUE10]] ], [ [[TMP22]], [[PRED_UDIV_IF11]] ]
; UNROLL-NO-IC-NEXT: [[TMP24:%.*]] = extractelement <4 x i1> [[TMP3]], i32 0
; UNROLL-NO-IC-NEXT: br i1 [[TMP24]], label [[PRED_UDIV_IF13:%.*]], label [[PRED_UDIV_CONTINUE14:%.*]]
; UNROLL-NO-IC: pred.udiv.if13:
; UNROLL-NO-IC-NEXT: [[TMP25:%.*]] = add i32 [[OFFSET_IDX]], -4
; UNROLL-NO-IC-NEXT: [[TMP26:%.*]] = udiv i32 219220132, [[TMP25]]
; UNROLL-NO-IC-NEXT: [[TMP27:%.*]] = insertelement <4 x i32> poison, i32 [[TMP26]], i32 0
; UNROLL-NO-IC-NEXT: br label [[PRED_UDIV_CONTINUE14]]
; UNROLL-NO-IC: pred.udiv.continue14:
; UNROLL-NO-IC-NEXT: [[TMP28:%.*]] = phi <4 x i32> [ poison, [[PRED_UDIV_CONTINUE12]] ], [ [[TMP27]], [[PRED_UDIV_IF13]] ]
; UNROLL-NO-IC-NEXT: [[TMP29:%.*]] = extractelement <4 x i1> [[TMP3]], i32 1
; UNROLL-NO-IC-NEXT: br i1 [[TMP29]], label [[PRED_UDIV_IF15:%.*]], label [[PRED_UDIV_CONTINUE16:%.*]]
; UNROLL-NO-IC: pred.udiv.if15:
; UNROLL-NO-IC-NEXT: [[TMP30:%.*]] = add i32 [[OFFSET_IDX]], -5
; UNROLL-NO-IC-NEXT: [[TMP31:%.*]] = udiv i32 219220132, [[TMP30]]
; UNROLL-NO-IC-NEXT: [[TMP32:%.*]] = insertelement <4 x i32> [[TMP28]], i32 [[TMP31]], i32 1
; UNROLL-NO-IC-NEXT: br label [[PRED_UDIV_CONTINUE16]]
; UNROLL-NO-IC: pred.udiv.continue16:
; UNROLL-NO-IC-NEXT: [[TMP33:%.*]] = phi <4 x i32> [ [[TMP28]], [[PRED_UDIV_CONTINUE14]] ], [ [[TMP32]], [[PRED_UDIV_IF15]] ]
; UNROLL-NO-IC-NEXT: [[TMP34:%.*]] = extractelement <4 x i1> [[TMP3]], i32 2
; UNROLL-NO-IC-NEXT: br i1 [[TMP34]], label [[PRED_UDIV_IF17:%.*]], label [[PRED_UDIV_CONTINUE18:%.*]]
; UNROLL-NO-IC: pred.udiv.if17:
; UNROLL-NO-IC-NEXT: [[TMP35:%.*]] = add i32 [[OFFSET_IDX]], -6
; UNROLL-NO-IC-NEXT: [[TMP36:%.*]] = udiv i32 219220132, [[TMP35]]
; UNROLL-NO-IC-NEXT: [[TMP37:%.*]] = insertelement <4 x i32> [[TMP33]], i32 [[TMP36]], i32 2
; UNROLL-NO-IC-NEXT: br label [[PRED_UDIV_CONTINUE18]]
; UNROLL-NO-IC: pred.udiv.continue18:
; UNROLL-NO-IC-NEXT: [[TMP38:%.*]] = phi <4 x i32> [ [[TMP33]], [[PRED_UDIV_CONTINUE16]] ], [ [[TMP37]], [[PRED_UDIV_IF17]] ]
; UNROLL-NO-IC-NEXT: [[TMP39:%.*]] = extractelement <4 x i1> [[TMP3]], i32 3
; UNROLL-NO-IC-NEXT: br i1 [[TMP39]], label [[PRED_UDIV_IF19:%.*]], label [[PRED_UDIV_CONTINUE20]]
; UNROLL-NO-IC: pred.udiv.if19:
; UNROLL-NO-IC-NEXT: [[TMP40:%.*]] = add i32 [[OFFSET_IDX]], -7
; UNROLL-NO-IC-NEXT: [[TMP41:%.*]] = udiv i32 219220132, [[TMP40]]
; UNROLL-NO-IC-NEXT: [[TMP42:%.*]] = insertelement <4 x i32> [[TMP38]], i32 [[TMP41]], i32 3
; UNROLL-NO-IC-NEXT: br label [[PRED_UDIV_CONTINUE20]]
; UNROLL-NO-IC: pred.udiv.continue20:
; UNROLL-NO-IC-NEXT: [[TMP43]] = phi <4 x i32> [ [[TMP38]], [[PRED_UDIV_CONTINUE18]] ], [ [[TMP42]], [[PRED_UDIV_IF19]] ]
; UNROLL-NO-IC-NEXT: [[TMP44:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[TMP23]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
; UNROLL-NO-IC-NEXT: [[TMP45:%.*]] = shufflevector <4 x i32> [[TMP23]], <4 x i32> [[TMP43]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
; UNROLL-NO-IC-NEXT: [[TMP46]] = add <4 x i32> [[VEC_PHI]], [[TMP44]]
; UNROLL-NO-IC-NEXT: [[TMP47]] = add <4 x i32> [[VEC_PHI1]], [[TMP45]]
; UNROLL-NO-IC-NEXT: [[TMP48:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP46]], <4 x i32> [[VEC_PHI]]
; UNROLL-NO-IC-NEXT: [[TMP49:%.*]] = select <4 x i1> [[TMP3]], <4 x i32> [[TMP47]], <4 x i32> [[VEC_PHI1]]
; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
; UNROLL-NO-IC-NEXT: [[TMP50:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
; UNROLL-NO-IC-NEXT: br i1 [[TMP50]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof [[PROF27:![0-9]+]], !llvm.loop [[LOOP28:![0-9]+]]
; UNROLL-NO-IC: middle.block:
; UNROLL-NO-IC-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[TMP49]], [[TMP48]]
; UNROLL-NO-IC-NEXT: [[TMP51:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX]])
; UNROLL-NO-IC-NEXT: br label [[BB1:%.*]]
; UNROLL-NO-IC: scalar.ph:
; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[Y]], [[BB:%.*]] ]
; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[BB]] ]
; UNROLL-NO-IC-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[BB]] ]
; UNROLL-NO-IC-NEXT: br label [[BB2:%.*]]
; UNROLL-NO-IC: bb1:
; UNROLL-NO-IC-NEXT: [[VAR:%.*]] = phi i32 [ [[VAR6:%.*]], [[BB2]] ], [ [[TMP51]], [[MIDDLE_BLOCK]] ]
; UNROLL-NO-IC-NEXT: ret i32 [[VAR]]
; UNROLL-NO-IC: bb2:
; UNROLL-NO-IC-NEXT: [[VAR3:%.*]] = phi i32 [ [[VAR8:%.*]], [[BB2]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
; UNROLL-NO-IC-NEXT: [[VAR4:%.*]] = phi i32 [ [[VAR7:%.*]], [[BB2]] ], [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ]
; UNROLL-NO-IC-NEXT: [[VAR5:%.*]] = phi i32 [ [[VAR6]], [[BB2]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
; UNROLL-NO-IC-NEXT: [[VAR6]] = add i32 [[VAR5]], [[VAR4]]
; UNROLL-NO-IC-NEXT: [[VAR7]] = udiv i32 219220132, [[VAR3]]
; UNROLL-NO-IC-NEXT: [[VAR8]] = add nsw i32 [[VAR3]], -1
; UNROLL-NO-IC-NEXT: [[VAR9:%.*]] = icmp slt i32 [[VAR3]], 2
; UNROLL-NO-IC-NEXT: br i1 [[VAR9]], label [[BB1]], label [[BB2]], !prof [[PROF29:![0-9]+]], !llvm.loop [[LOOP30:![0-9]+]]
;
; UNROLL-NO-VF-LABEL: @sink_into_replication_region(
; UNROLL-NO-VF-NEXT: bb:
; UNROLL-NO-VF-NEXT: [[TMP0:%.*]] = add i32 [[Y:%.*]], 1
; UNROLL-NO-VF-NEXT: [[SMIN:%.*]] = call i32 @llvm.smin.i32(i32 [[Y]], i32 1)
; UNROLL-NO-VF-NEXT: [[TMP1:%.*]] = sub i32 [[TMP0]], [[SMIN]]
; UNROLL-NO-VF-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]], !prof [[PROF26:![0-9]+]]
; UNROLL-NO-VF: vector.ph:
; UNROLL-NO-VF-NEXT: [[N_RND_UP:%.*]] = add i32 [[TMP1]], 1
; UNROLL-NO-VF-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N_RND_UP]], 2
; UNROLL-NO-VF-NEXT: [[N_VEC:%.*]] = sub i32 [[N_RND_UP]], [[N_MOD_VF]]
; UNROLL-NO-VF-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i32 [[TMP1]], 1
; UNROLL-NO-VF-NEXT: br label [[VECTOR_BODY:%.*]]
; UNROLL-NO-VF: vector.body:
; UNROLL-NO-VF-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_UDIV_CONTINUE4:%.*]] ]
; UNROLL-NO-VF-NEXT: [[VECTOR_RECUR:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[PRED_UDIV_CONTINUE4]] ]
; UNROLL-NO-VF-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[PRED_UDIV_CONTINUE4]] ]
; UNROLL-NO-VF-NEXT: [[VEC_PHI1:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[PRED_UDIV_CONTINUE4]] ]
; UNROLL-NO-VF-NEXT: [[OFFSET_IDX:%.*]] = sub i32 [[Y]], [[INDEX]]
; UNROLL-NO-VF-NEXT: [[VEC_IV:%.*]] = add i32 [[INDEX]], 0
; UNROLL-NO-VF-NEXT: [[VEC_IV2:%.*]] = add i32 [[INDEX]], 1
; UNROLL-NO-VF-NEXT: [[TMP2:%.*]] = icmp ule i32 [[VEC_IV]], [[TRIP_COUNT_MINUS_1]]
; UNROLL-NO-VF-NEXT: [[TMP3:%.*]] = icmp ule i32 [[VEC_IV2]], [[TRIP_COUNT_MINUS_1]]
; UNROLL-NO-VF-NEXT: br i1 [[TMP2]], label [[PRED_UDIV_IF:%.*]], label [[PRED_UDIV_CONTINUE:%.*]]
; UNROLL-NO-VF: pred.udiv.if:
; UNROLL-NO-VF-NEXT: [[TMP5:%.*]] = udiv i32 219220132, [[OFFSET_IDX]]
; UNROLL-NO-VF-NEXT: br label [[PRED_UDIV_CONTINUE]]
; UNROLL-NO-VF: pred.udiv.continue:
; UNROLL-NO-VF-NEXT: [[TMP6:%.*]] = phi i32 [ poison, [[VECTOR_BODY]] ], [ [[TMP5]], [[PRED_UDIV_IF]] ]
; UNROLL-NO-VF-NEXT: br i1 [[TMP3]], label [[PRED_UDIV_IF3:%.*]], label [[PRED_UDIV_CONTINUE4]]
; UNROLL-NO-VF: pred.udiv.if3:
; UNROLL-NO-VF-NEXT: [[TMP7:%.*]] = add i32 [[OFFSET_IDX]], -1
; UNROLL-NO-VF-NEXT: [[TMP8:%.*]] = udiv i32 219220132, [[TMP7]]
; UNROLL-NO-VF-NEXT: br label [[PRED_UDIV_CONTINUE4]]
; UNROLL-NO-VF: pred.udiv.continue4:
; UNROLL-NO-VF-NEXT: [[TMP9]] = phi i32 [ poison, [[PRED_UDIV_CONTINUE]] ], [ [[TMP8]], [[PRED_UDIV_IF3]] ]
; UNROLL-NO-VF-NEXT: [[TMP10]] = add i32 [[VEC_PHI]], [[VECTOR_RECUR]]
; UNROLL-NO-VF-NEXT: [[TMP11]] = add i32 [[VEC_PHI1]], [[TMP6]]
; UNROLL-NO-VF-NEXT: [[TMP12:%.*]] = select i1 [[TMP2]], i32 [[TMP10]], i32 [[VEC_PHI]]
; UNROLL-NO-VF-NEXT: [[TMP13:%.*]] = select i1 [[TMP3]], i32 [[TMP11]], i32 [[VEC_PHI1]]
; UNROLL-NO-VF-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
; UNROLL-NO-VF-NEXT: [[TMP14:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
; UNROLL-NO-VF-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof [[PROF27:![0-9]+]], !llvm.loop [[LOOP28:![0-9]+]]
; UNROLL-NO-VF: middle.block:
; UNROLL-NO-VF-NEXT: [[BIN_RDX:%.*]] = add i32 [[TMP13]], [[TMP12]]
; UNROLL-NO-VF-NEXT: br label [[BB1:%.*]]
; UNROLL-NO-VF: scalar.ph:
; UNROLL-NO-VF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[Y]], [[BB:%.*]] ]
; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[BB]] ]
; UNROLL-NO-VF-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[BB]] ]
; UNROLL-NO-VF-NEXT: br label [[BB2:%.*]]
; UNROLL-NO-VF: bb1:
; UNROLL-NO-VF-NEXT: [[VAR:%.*]] = phi i32 [ [[VAR6:%.*]], [[BB2]] ], [ [[BIN_RDX]], [[MIDDLE_BLOCK]] ]
; UNROLL-NO-VF-NEXT: ret i32 [[VAR]]
; UNROLL-NO-VF: bb2:
; UNROLL-NO-VF-NEXT: [[VAR3:%.*]] = phi i32 [ [[VAR8:%.*]], [[BB2]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
; UNROLL-NO-VF-NEXT: [[VAR4:%.*]] = phi i32 [ [[VAR7:%.*]], [[BB2]] ], [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ]
; UNROLL-NO-VF-NEXT: [[VAR5:%.*]] = phi i32 [ [[VAR6]], [[BB2]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
; UNROLL-NO-VF-NEXT: [[VAR6]] = add i32 [[VAR5]], [[VAR4]]
; UNROLL-NO-VF-NEXT: [[VAR7]] = udiv i32 219220132, [[VAR3]]
; UNROLL-NO-VF-NEXT: [[VAR8]] = add nsw i32 [[VAR3]], -1
; UNROLL-NO-VF-NEXT: [[VAR9:%.*]] = icmp slt i32 [[VAR3]], 2
; UNROLL-NO-VF-NEXT: br i1 [[VAR9]], label [[BB1]], label [[BB2]], !prof [[PROF29:![0-9]+]], !llvm.loop [[LOOP30:![0-9]+]]
;
; SINK-AFTER-LABEL: @sink_into_replication_region(
; SINK-AFTER-NEXT: bb:
; SINK-AFTER-NEXT: [[TMP0:%.*]] = add i32 [[Y:%.*]], 1
; SINK-AFTER-NEXT: [[SMIN:%.*]] = call i32 @llvm.smin.i32(i32 [[Y]], i32 1)
; SINK-AFTER-NEXT: [[TMP1:%.*]] = sub i32 [[TMP0]], [[SMIN]]
; SINK-AFTER-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]], !prof [[PROF26:![0-9]+]]
; SINK-AFTER: vector.ph:
; SINK-AFTER-NEXT: [[N_RND_UP:%.*]] = add i32 [[TMP1]], 3
; SINK-AFTER-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N_RND_UP]], 4
; SINK-AFTER-NEXT: [[N_VEC:%.*]] = sub i32 [[N_RND_UP]], [[N_MOD_VF]]
; SINK-AFTER-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i32 [[TMP1]], 1
; SINK-AFTER-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i32> poison, i32 [[TRIP_COUNT_MINUS_1]], i64 0
; SINK-AFTER-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT1]], <4 x i32> poison, <4 x i32> zeroinitializer
; SINK-AFTER-NEXT: br label [[VECTOR_BODY:%.*]]
; SINK-AFTER: vector.body:
; SINK-AFTER-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_UDIV_CONTINUE8:%.*]] ]
; SINK-AFTER-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ <i32 poison, i32 poison, i32 poison, i32 0>, [[VECTOR_PH]] ], [ [[TMP22:%.*]], [[PRED_UDIV_CONTINUE8]] ]
; SINK-AFTER-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP24:%.*]], [[PRED_UDIV_CONTINUE8]] ]
; SINK-AFTER-NEXT: [[OFFSET_IDX:%.*]] = sub i32 [[Y]], [[INDEX]]
; SINK-AFTER-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[INDEX]], i64 0
; SINK-AFTER-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
; SINK-AFTER-NEXT: [[VEC_IV:%.*]] = add <4 x i32> [[BROADCAST_SPLAT]], <i32 0, i32 1, i32 2, i32 3>
; SINK-AFTER-NEXT: [[TMP2:%.*]] = icmp ule <4 x i32> [[VEC_IV]], [[BROADCAST_SPLAT2]]
; SINK-AFTER-NEXT: [[TMP3:%.*]] = extractelement <4 x i1> [[TMP2]], i32 0
; SINK-AFTER-NEXT: br i1 [[TMP3]], label [[PRED_UDIV_IF:%.*]], label [[PRED_UDIV_CONTINUE:%.*]]
; SINK-AFTER: pred.udiv.if:
; SINK-AFTER-NEXT: [[TMP4:%.*]] = add i32 [[OFFSET_IDX]], 0
; SINK-AFTER-NEXT: [[TMP5:%.*]] = udiv i32 219220132, [[TMP4]]
; SINK-AFTER-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> poison, i32 [[TMP5]], i32 0
; SINK-AFTER-NEXT: br label [[PRED_UDIV_CONTINUE]]
; SINK-AFTER: pred.udiv.continue:
; SINK-AFTER-NEXT: [[TMP7:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP6]], [[PRED_UDIV_IF]] ]
; SINK-AFTER-NEXT: [[TMP8:%.*]] = extractelement <4 x i1> [[TMP2]], i32 1
; SINK-AFTER-NEXT: br i1 [[TMP8]], label [[PRED_UDIV_IF3:%.*]], label [[PRED_UDIV_CONTINUE4:%.*]]
; SINK-AFTER: pred.udiv.if3:
; SINK-AFTER-NEXT: [[TMP9:%.*]] = add i32 [[OFFSET_IDX]], -1
; SINK-AFTER-NEXT: [[TMP10:%.*]] = udiv i32 219220132, [[TMP9]]
; SINK-AFTER-NEXT: [[TMP11:%.*]] = insertelement <4 x i32> [[TMP7]], i32 [[TMP10]], i32 1
; SINK-AFTER-NEXT: br label [[PRED_UDIV_CONTINUE4]]
; SINK-AFTER: pred.udiv.continue4:
; SINK-AFTER-NEXT: [[TMP12:%.*]] = phi <4 x i32> [ [[TMP7]], [[PRED_UDIV_CONTINUE]] ], [ [[TMP11]], [[PRED_UDIV_IF3]] ]
; SINK-AFTER-NEXT: [[TMP13:%.*]] = extractelement <4 x i1> [[TMP2]], i32 2
; SINK-AFTER-NEXT: br i1 [[TMP13]], label [[PRED_UDIV_IF5:%.*]], label [[PRED_UDIV_CONTINUE6:%.*]]
; SINK-AFTER: pred.udiv.if5:
; SINK-AFTER-NEXT: [[TMP14:%.*]] = add i32 [[OFFSET_IDX]], -2
; SINK-AFTER-NEXT: [[TMP15:%.*]] = udiv i32 219220132, [[TMP14]]
; SINK-AFTER-NEXT: [[TMP16:%.*]] = insertelement <4 x i32> [[TMP12]], i32 [[TMP15]], i32 2
; SINK-AFTER-NEXT: br label [[PRED_UDIV_CONTINUE6]]
; SINK-AFTER: pred.udiv.continue6:
; SINK-AFTER-NEXT: [[TMP17:%.*]] = phi <4 x i32> [ [[TMP12]], [[PRED_UDIV_CONTINUE4]] ], [ [[TMP16]], [[PRED_UDIV_IF5]] ]
; SINK-AFTER-NEXT: [[TMP18:%.*]] = extractelement <4 x i1> [[TMP2]], i32 3
; SINK-AFTER-NEXT: br i1 [[TMP18]], label [[PRED_UDIV_IF7:%.*]], label [[PRED_UDIV_CONTINUE8]]
; SINK-AFTER: pred.udiv.if7:
; SINK-AFTER-NEXT: [[TMP19:%.*]] = add i32 [[OFFSET_IDX]], -3
; SINK-AFTER-NEXT: [[TMP20:%.*]] = udiv i32 219220132, [[TMP19]]
; SINK-AFTER-NEXT: [[TMP21:%.*]] = insertelement <4 x i32> [[TMP17]], i32 [[TMP20]], i32 3
; SINK-AFTER-NEXT: br label [[PRED_UDIV_CONTINUE8]]
; SINK-AFTER: pred.udiv.continue8:
; SINK-AFTER-NEXT: [[TMP22]] = phi <4 x i32> [ [[TMP17]], [[PRED_UDIV_CONTINUE6]] ], [ [[TMP21]], [[PRED_UDIV_IF7]] ]
; SINK-AFTER-NEXT: [[TMP23:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[TMP22]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
; SINK-AFTER-NEXT: [[TMP24]] = add <4 x i32> [[VEC_PHI]], [[TMP23]]
; SINK-AFTER-NEXT: [[TMP25:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP24]], <4 x i32> [[VEC_PHI]]
; SINK-AFTER-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
; SINK-AFTER-NEXT: [[TMP26:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
; SINK-AFTER-NEXT: br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof [[PROF27:![0-9]+]], !llvm.loop [[LOOP28:![0-9]+]]
; SINK-AFTER: middle.block:
; SINK-AFTER-NEXT: [[TMP27:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP25]])
; SINK-AFTER-NEXT: br label [[BB1:%.*]]
; SINK-AFTER: scalar.ph:
; SINK-AFTER-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[Y]], [[BB:%.*]] ]
; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[BB]] ]
; SINK-AFTER-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[BB]] ]
; SINK-AFTER-NEXT: br label [[BB2:%.*]]
; SINK-AFTER: bb1:
; SINK-AFTER-NEXT: [[VAR:%.*]] = phi i32 [ [[VAR6:%.*]], [[BB2]] ], [ [[TMP27]], [[MIDDLE_BLOCK]] ]
; SINK-AFTER-NEXT: ret i32 [[VAR]]
; SINK-AFTER: bb2:
; SINK-AFTER-NEXT: [[VAR3:%.*]] = phi i32 [ [[VAR8:%.*]], [[BB2]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
; SINK-AFTER-NEXT: [[VAR4:%.*]] = phi i32 [ [[VAR7:%.*]], [[BB2]] ], [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ]
; SINK-AFTER-NEXT: [[VAR5:%.*]] = phi i32 [ [[VAR6]], [[BB2]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
; SINK-AFTER-NEXT: [[VAR6]] = add i32 [[VAR5]], [[VAR4]]
; SINK-AFTER-NEXT: [[VAR7]] = udiv i32 219220132, [[VAR3]]
; SINK-AFTER-NEXT: [[VAR8]] = add nsw i32 [[VAR3]], -1
; SINK-AFTER-NEXT: [[VAR9:%.*]] = icmp slt i32 [[VAR3]], 2
; SINK-AFTER-NEXT: br i1 [[VAR9]], label [[BB1]], label [[BB2]], !prof [[PROF29:![0-9]+]], !llvm.loop [[LOOP30:![0-9]+]]
;
bb:
br label %bb2
bb1: ; preds = %bb2
%var = phi i32 [ %var6, %bb2 ]
ret i32 %var
bb2: ; preds = %bb2, %bb
%var3 = phi i32 [ %var8, %bb2 ], [ %y, %bb ]
%var4 = phi i32 [ %var7, %bb2 ], [ 0, %bb ]
%var5 = phi i32 [ %var6, %bb2 ], [ 0, %bb ]
%var6 = add i32 %var5, %var4
%var7 = udiv i32 219220132, %var3
%var8 = add nsw i32 %var3, -1
%var9 = icmp slt i32 %var3, 2
br i1 %var9, label %bb1, label %bb2, !prof !2
}
define i32 @sink_into_replication_region_multiple(ptr %x, i32 %y) {
;
; CHECK-LABEL: @sink_into_replication_region_multiple(
; UNROLL-NO-IC-LABEL: @sink_into_replication_region_multiple(
; UNROLL-NO-IC-NEXT: bb:
; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = add i32 [[Y:%.*]], 1
; UNROLL-NO-IC-NEXT: [[SMIN:%.*]] = call i32 @llvm.smin.i32(i32 [[Y]], i32 1)
; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = sub i32 [[TMP0]], [[SMIN]]
; UNROLL-NO-IC-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]], !prof [[PROF26]]
; UNROLL-NO-IC: vector.ph:
; UNROLL-NO-IC-NEXT: [[N_RND_UP:%.*]] = add i32 [[TMP1]], 7
; UNROLL-NO-IC-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N_RND_UP]], 8
; UNROLL-NO-IC-NEXT: [[N_VEC:%.*]] = sub i32 [[N_RND_UP]], [[N_MOD_VF]]
; UNROLL-NO-IC-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i32 [[TMP1]], 1
; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TRIP_COUNT_MINUS_1]], i64 0
; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
; UNROLL-NO-IC-NEXT: br label [[VECTOR_BODY:%.*]]
; UNROLL-NO-IC: vector.body:
; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE29:%.*]] ]
; UNROLL-NO-IC-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE29]] ]
; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ <i32 poison, i32 poison, i32 poison, i32 0>, [[VECTOR_PH]] ], [ [[TMP43:%.*]], [[PRED_STORE_CONTINUE29]] ]
; UNROLL-NO-IC-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP46:%.*]], [[PRED_STORE_CONTINUE29]] ]
; UNROLL-NO-IC-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP47:%.*]], [[PRED_STORE_CONTINUE29]] ]
; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
; UNROLL-NO-IC-NEXT: [[OFFSET_IDX:%.*]] = sub i32 [[Y]], [[INDEX]]
; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = add i32 [[OFFSET_IDX]], 0
; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = add i32 [[OFFSET_IDX]], -1
; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = add i32 [[OFFSET_IDX]], -2
; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = add i32 [[OFFSET_IDX]], -3
; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = add i32 [[OFFSET_IDX]], -4
; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = add i32 [[OFFSET_IDX]], -5
; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = add i32 [[OFFSET_IDX]], -6
; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = add i32 [[OFFSET_IDX]], -7
; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = icmp ule <4 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]]
; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = icmp ule <4 x i32> [[STEP_ADD]], [[BROADCAST_SPLAT]]
; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = extractelement <4 x i1> [[TMP10]], i32 0
; UNROLL-NO-IC-NEXT: br i1 [[TMP12]], label [[PRED_UDIV_IF:%.*]], label [[PRED_UDIV_CONTINUE:%.*]]
; UNROLL-NO-IC: pred.udiv.if:
; UNROLL-NO-IC-NEXT: [[TMP13:%.*]] = udiv i32 219220132, [[TMP2]]
; UNROLL-NO-IC-NEXT: [[TMP14:%.*]] = insertelement <4 x i32> poison, i32 [[TMP13]], i32 0
; UNROLL-NO-IC-NEXT: br label [[PRED_UDIV_CONTINUE]]
; UNROLL-NO-IC: pred.udiv.continue:
; UNROLL-NO-IC-NEXT: [[TMP15:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP14]], [[PRED_UDIV_IF]] ]
; UNROLL-NO-IC-NEXT: [[TMP16:%.*]] = extractelement <4 x i1> [[TMP10]], i32 1
; UNROLL-NO-IC-NEXT: br i1 [[TMP16]], label [[PRED_UDIV_IF2:%.*]], label [[PRED_UDIV_CONTINUE3:%.*]]
; UNROLL-NO-IC: pred.udiv.if2:
; UNROLL-NO-IC-NEXT: [[TMP17:%.*]] = udiv i32 219220132, [[TMP3]]
; UNROLL-NO-IC-NEXT: [[TMP18:%.*]] = insertelement <4 x i32> [[TMP15]], i32 [[TMP17]], i32 1
; UNROLL-NO-IC-NEXT: br label [[PRED_UDIV_CONTINUE3]]
; UNROLL-NO-IC: pred.udiv.continue3:
; UNROLL-NO-IC-NEXT: [[TMP19:%.*]] = phi <4 x i32> [ [[TMP15]], [[PRED_UDIV_CONTINUE]] ], [ [[TMP18]], [[PRED_UDIV_IF2]] ]
; UNROLL-NO-IC-NEXT: [[TMP20:%.*]] = extractelement <4 x i1> [[TMP10]], i32 2
; UNROLL-NO-IC-NEXT: br i1 [[TMP20]], label [[PRED_UDIV_IF4:%.*]], label [[PRED_UDIV_CONTINUE5:%.*]]
; UNROLL-NO-IC: pred.udiv.if4:
; UNROLL-NO-IC-NEXT: [[TMP21:%.*]] = udiv i32 219220132, [[TMP4]]
; UNROLL-NO-IC-NEXT: [[TMP22:%.*]] = insertelement <4 x i32> [[TMP19]], i32 [[TMP21]], i32 2
; UNROLL-NO-IC-NEXT: br label [[PRED_UDIV_CONTINUE5]]
; UNROLL-NO-IC: pred.udiv.continue5:
; UNROLL-NO-IC-NEXT: [[TMP23:%.*]] = phi <4 x i32> [ [[TMP19]], [[PRED_UDIV_CONTINUE3]] ], [ [[TMP22]], [[PRED_UDIV_IF4]] ]
; UNROLL-NO-IC-NEXT: [[TMP24:%.*]] = extractelement <4 x i1> [[TMP10]], i32 3
; UNROLL-NO-IC-NEXT: br i1 [[TMP24]], label [[PRED_UDIV_IF6:%.*]], label [[PRED_UDIV_CONTINUE7:%.*]]
; UNROLL-NO-IC: pred.udiv.if6:
; UNROLL-NO-IC-NEXT: [[TMP25:%.*]] = udiv i32 219220132, [[TMP5]]
; UNROLL-NO-IC-NEXT: [[TMP26:%.*]] = insertelement <4 x i32> [[TMP23]], i32 [[TMP25]], i32 3
; UNROLL-NO-IC-NEXT: br label [[PRED_UDIV_CONTINUE7]]
; UNROLL-NO-IC: pred.udiv.continue7:
; UNROLL-NO-IC-NEXT: [[TMP27:%.*]] = phi <4 x i32> [ [[TMP23]], [[PRED_UDIV_CONTINUE5]] ], [ [[TMP26]], [[PRED_UDIV_IF6]] ]
; UNROLL-NO-IC-NEXT: [[TMP28:%.*]] = extractelement <4 x i1> [[TMP11]], i32 0
; UNROLL-NO-IC-NEXT: br i1 [[TMP28]], label [[PRED_UDIV_IF8:%.*]], label [[PRED_UDIV_CONTINUE9:%.*]]
; UNROLL-NO-IC: pred.udiv.if8:
; UNROLL-NO-IC-NEXT: [[TMP29:%.*]] = udiv i32 219220132, [[TMP6]]
; UNROLL-NO-IC-NEXT: [[TMP30:%.*]] = insertelement <4 x i32> poison, i32 [[TMP29]], i32 0
; UNROLL-NO-IC-NEXT: br label [[PRED_UDIV_CONTINUE9]]
; UNROLL-NO-IC: pred.udiv.continue9:
; UNROLL-NO-IC-NEXT: [[TMP31:%.*]] = phi <4 x i32> [ poison, [[PRED_UDIV_CONTINUE7]] ], [ [[TMP30]], [[PRED_UDIV_IF8]] ]
; UNROLL-NO-IC-NEXT: [[TMP32:%.*]] = extractelement <4 x i1> [[TMP11]], i32 1
; UNROLL-NO-IC-NEXT: br i1 [[TMP32]], label [[PRED_UDIV_IF10:%.*]], label [[PRED_UDIV_CONTINUE11:%.*]]
; UNROLL-NO-IC: pred.udiv.if10:
; UNROLL-NO-IC-NEXT: [[TMP33:%.*]] = udiv i32 219220132, [[TMP7]]
; UNROLL-NO-IC-NEXT: [[TMP34:%.*]] = insertelement <4 x i32> [[TMP31]], i32 [[TMP33]], i32 1
; UNROLL-NO-IC-NEXT: br label [[PRED_UDIV_CONTINUE11]]
; UNROLL-NO-IC: pred.udiv.continue11:
; UNROLL-NO-IC-NEXT: [[TMP35:%.*]] = phi <4 x i32> [ [[TMP31]], [[PRED_UDIV_CONTINUE9]] ], [ [[TMP34]], [[PRED_UDIV_IF10]] ]
; UNROLL-NO-IC-NEXT: [[TMP36:%.*]] = extractelement <4 x i1> [[TMP11]], i32 2
; UNROLL-NO-IC-NEXT: br i1 [[TMP36]], label [[PRED_UDIV_IF12:%.*]], label [[PRED_UDIV_CONTINUE13:%.*]]
; UNROLL-NO-IC: pred.udiv.if12:
; UNROLL-NO-IC-NEXT: [[TMP37:%.*]] = udiv i32 219220132, [[TMP8]]
; UNROLL-NO-IC-NEXT: [[TMP38:%.*]] = insertelement <4 x i32> [[TMP35]], i32 [[TMP37]], i32 2
; UNROLL-NO-IC-NEXT: br label [[PRED_UDIV_CONTINUE13]]
; UNROLL-NO-IC: pred.udiv.continue13:
; UNROLL-NO-IC-NEXT: [[TMP39:%.*]] = phi <4 x i32> [ [[TMP35]], [[PRED_UDIV_CONTINUE11]] ], [ [[TMP38]], [[PRED_UDIV_IF12]] ]
; UNROLL-NO-IC-NEXT: [[TMP40:%.*]] = extractelement <4 x i1> [[TMP11]], i32 3
; UNROLL-NO-IC-NEXT: br i1 [[TMP40]], label [[PRED_UDIV_IF14:%.*]], label [[PRED_UDIV_CONTINUE15:%.*]]
; UNROLL-NO-IC: pred.udiv.if14:
; UNROLL-NO-IC-NEXT: [[TMP41:%.*]] = udiv i32 219220132, [[TMP9]]
; UNROLL-NO-IC-NEXT: [[TMP42:%.*]] = insertelement <4 x i32> [[TMP39]], i32 [[TMP41]], i32 3
; UNROLL-NO-IC-NEXT: br label [[PRED_UDIV_CONTINUE15]]
; UNROLL-NO-IC: pred.udiv.continue15:
; UNROLL-NO-IC-NEXT: [[TMP43]] = phi <4 x i32> [ [[TMP39]], [[PRED_UDIV_CONTINUE13]] ], [ [[TMP42]], [[PRED_UDIV_IF14]] ]
; UNROLL-NO-IC-NEXT: [[TMP44:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[TMP27]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
; UNROLL-NO-IC-NEXT: [[TMP45:%.*]] = shufflevector <4 x i32> [[TMP27]], <4 x i32> [[TMP43]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
; UNROLL-NO-IC-NEXT: [[TMP46]] = add <4 x i32> [[VEC_PHI]], [[TMP44]]
; UNROLL-NO-IC-NEXT: [[TMP47]] = add <4 x i32> [[VEC_PHI1]], [[TMP45]]
; UNROLL-NO-IC-NEXT: [[TMP48:%.*]] = extractelement <4 x i1> [[TMP10]], i32 0
; UNROLL-NO-IC-NEXT: br i1 [[TMP48]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
; UNROLL-NO-IC: pred.store.if:
; UNROLL-NO-IC-NEXT: [[TMP49:%.*]] = add i32 [[INDEX]], 0
; UNROLL-NO-IC-NEXT: [[TMP50:%.*]] = getelementptr inbounds i32, ptr [[X:%.*]], i32 [[TMP49]]
; UNROLL-NO-IC-NEXT: store i32 [[TMP2]], ptr [[TMP50]], align 4
; UNROLL-NO-IC-NEXT: br label [[PRED_STORE_CONTINUE]]
; UNROLL-NO-IC: pred.store.continue:
; UNROLL-NO-IC-NEXT: [[TMP51:%.*]] = extractelement <4 x i1> [[TMP10]], i32 1
; UNROLL-NO-IC-NEXT: br i1 [[TMP51]], label [[PRED_STORE_IF16:%.*]], label [[PRED_STORE_CONTINUE17:%.*]]
; UNROLL-NO-IC: pred.store.if16:
; UNROLL-NO-IC-NEXT: [[TMP52:%.*]] = add i32 [[INDEX]], 1
; UNROLL-NO-IC-NEXT: [[TMP53:%.*]] = getelementptr inbounds i32, ptr [[X]], i32 [[TMP52]]
; UNROLL-NO-IC-NEXT: store i32 [[TMP3]], ptr [[TMP53]], align 4
; UNROLL-NO-IC-NEXT: br label [[PRED_STORE_CONTINUE17]]
; UNROLL-NO-IC: pred.store.continue17:
; UNROLL-NO-IC-NEXT: [[TMP54:%.*]] = extractelement <4 x i1> [[TMP10]], i32 2
; UNROLL-NO-IC-NEXT: br i1 [[TMP54]], label [[PRED_STORE_IF18:%.*]], label [[PRED_STORE_CONTINUE19:%.*]]
; UNROLL-NO-IC: pred.store.if18:
; UNROLL-NO-IC-NEXT: [[TMP55:%.*]] = add i32 [[INDEX]], 2
; UNROLL-NO-IC-NEXT: [[TMP56:%.*]] = getelementptr inbounds i32, ptr [[X]], i32 [[TMP55]]
; UNROLL-NO-IC-NEXT: store i32 [[TMP4]], ptr [[TMP56]], align 4
; UNROLL-NO-IC-NEXT: br label [[PRED_STORE_CONTINUE19]]
; UNROLL-NO-IC: pred.store.continue19:
; UNROLL-NO-IC-NEXT: [[TMP57:%.*]] = extractelement <4 x i1> [[TMP10]], i32 3
; UNROLL-NO-IC-NEXT: br i1 [[TMP57]], label [[PRED_STORE_IF20:%.*]], label [[PRED_STORE_CONTINUE21:%.*]]
; UNROLL-NO-IC: pred.store.if20:
; UNROLL-NO-IC-NEXT: [[TMP58:%.*]] = add i32 [[INDEX]], 3
; UNROLL-NO-IC-NEXT: [[TMP59:%.*]] = getelementptr inbounds i32, ptr [[X]], i32 [[TMP58]]
; UNROLL-NO-IC-NEXT: store i32 [[TMP5]], ptr [[TMP59]], align 4
; UNROLL-NO-IC-NEXT: br label [[PRED_STORE_CONTINUE21]]
; UNROLL-NO-IC: pred.store.continue21:
; UNROLL-NO-IC-NEXT: [[TMP60:%.*]] = extractelement <4 x i1> [[TMP11]], i32 0
; UNROLL-NO-IC-NEXT: br i1 [[TMP60]], label [[PRED_STORE_IF22:%.*]], label [[PRED_STORE_CONTINUE23:%.*]]
; UNROLL-NO-IC: pred.store.if22:
; UNROLL-NO-IC-NEXT: [[TMP61:%.*]] = add i32 [[INDEX]], 4
; UNROLL-NO-IC-NEXT: [[TMP62:%.*]] = getelementptr inbounds i32, ptr [[X]], i32 [[TMP61]]
; UNROLL-NO-IC-NEXT: store i32 [[TMP6]], ptr [[TMP62]], align 4
; UNROLL-NO-IC-NEXT: br label [[PRED_STORE_CONTINUE23]]
; UNROLL-NO-IC: pred.store.continue23:
; UNROLL-NO-IC-NEXT: [[TMP63:%.*]] = extractelement <4 x i1> [[TMP11]], i32 1
; UNROLL-NO-IC-NEXT: br i1 [[TMP63]], label [[PRED_STORE_IF24:%.*]], label [[PRED_STORE_CONTINUE25:%.*]]
; UNROLL-NO-IC: pred.store.if24:
; UNROLL-NO-IC-NEXT: [[TMP64:%.*]] = add i32 [[INDEX]], 5
; UNROLL-NO-IC-NEXT: [[TMP65:%.*]] = getelementptr inbounds i32, ptr [[X]], i32 [[TMP64]]
; UNROLL-NO-IC-NEXT: store i32 [[TMP7]], ptr [[TMP65]], align 4
; UNROLL-NO-IC-NEXT: br label [[PRED_STORE_CONTINUE25]]
; UNROLL-NO-IC: pred.store.continue25:
; UNROLL-NO-IC-NEXT: [[TMP66:%.*]] = extractelement <4 x i1> [[TMP11]], i32 2
; UNROLL-NO-IC-NEXT: br i1 [[TMP66]], label [[PRED_STORE_IF26:%.*]], label [[PRED_STORE_CONTINUE27:%.*]]
; UNROLL-NO-IC: pred.store.if26:
; UNROLL-NO-IC-NEXT: [[TMP67:%.*]] = add i32 [[INDEX]], 6
; UNROLL-NO-IC-NEXT: [[TMP68:%.*]] = getelementptr inbounds i32, ptr [[X]], i32 [[TMP67]]
; UNROLL-NO-IC-NEXT: store i32 [[TMP8]], ptr [[TMP68]], align 4
; UNROLL-NO-IC-NEXT: br label [[PRED_STORE_CONTINUE27]]
; UNROLL-NO-IC: pred.store.continue27:
; UNROLL-NO-IC-NEXT: [[TMP69:%.*]] = extractelement <4 x i1> [[TMP11]], i32 3
; UNROLL-NO-IC-NEXT: br i1 [[TMP69]], label [[PRED_STORE_IF28:%.*]], label [[PRED_STORE_CONTINUE29]]
; UNROLL-NO-IC: pred.store.if28:
; UNROLL-NO-IC-NEXT: [[TMP70:%.*]] = add i32 [[INDEX]], 7
; UNROLL-NO-IC-NEXT: [[TMP71:%.*]] = getelementptr inbounds i32, ptr [[X]], i32 [[TMP70]]
; UNROLL-NO-IC-NEXT: store i32 [[TMP9]], ptr [[TMP71]], align 4
; UNROLL-NO-IC-NEXT: br label [[PRED_STORE_CONTINUE29]]
; UNROLL-NO-IC: pred.store.continue29:
; UNROLL-NO-IC-NEXT: [[TMP72:%.*]] = select <4 x i1> [[TMP10]], <4 x i32> [[TMP46]], <4 x i32> [[VEC_PHI]]
; UNROLL-NO-IC-NEXT: [[TMP73:%.*]] = select <4 x i1> [[TMP11]], <4 x i32> [[TMP47]], <4 x i32> [[VEC_PHI1]]
; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[STEP_ADD]], splat (i32 4)
; UNROLL-NO-IC-NEXT: [[TMP74:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
; UNROLL-NO-IC-NEXT: br i1 [[TMP74]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof [[PROF27]], !llvm.loop [[LOOP31:![0-9]+]]
; UNROLL-NO-IC: middle.block:
; UNROLL-NO-IC-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[TMP73]], [[TMP72]]
; UNROLL-NO-IC-NEXT: [[TMP75:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX]])
; UNROLL-NO-IC-NEXT: br label [[BB1:%.*]]
; UNROLL-NO-IC: scalar.ph:
; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[Y]], [[BB:%.*]] ]
; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ 0, [[BB]] ]
; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[BB]] ]
; UNROLL-NO-IC-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[BB]] ]
; UNROLL-NO-IC-NEXT: br label [[BB2:%.*]]
; UNROLL-NO-IC: bb1:
; UNROLL-NO-IC-NEXT: [[VAR:%.*]] = phi i32 [ [[VAR6:%.*]], [[BB2]] ], [ [[TMP75]], [[MIDDLE_BLOCK]] ]
; UNROLL-NO-IC-NEXT: ret i32 [[VAR]]
; UNROLL-NO-IC: bb2:
; UNROLL-NO-IC-NEXT: [[VAR3:%.*]] = phi i32 [ [[VAR8:%.*]], [[BB2]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
; UNROLL-NO-IC-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], [[BB2]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ]
; UNROLL-NO-IC-NEXT: [[VAR4:%.*]] = phi i32 [ [[VAR7:%.*]], [[BB2]] ], [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ]
; UNROLL-NO-IC-NEXT: [[VAR5:%.*]] = phi i32 [ [[VAR6]], [[BB2]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
; UNROLL-NO-IC-NEXT: [[G:%.*]] = getelementptr inbounds i32, ptr [[X]], i32 [[IV]]
; UNROLL-NO-IC-NEXT: [[VAR6]] = add i32 [[VAR5]], [[VAR4]]
; UNROLL-NO-IC-NEXT: [[VAR7]] = udiv i32 219220132, [[VAR3]]
; UNROLL-NO-IC-NEXT: store i32 [[VAR3]], ptr [[G]], align 4
; UNROLL-NO-IC-NEXT: [[VAR8]] = add nsw i32 [[VAR3]], -1
; UNROLL-NO-IC-NEXT: [[IV_NEXT]] = add nsw i32 [[IV]], 1
; UNROLL-NO-IC-NEXT: [[VAR9:%.*]] = icmp slt i32 [[VAR3]], 2
; UNROLL-NO-IC-NEXT: br i1 [[VAR9]], label [[BB1]], label [[BB2]], !prof [[PROF29]], !llvm.loop [[LOOP32:![0-9]+]]
;
; UNROLL-NO-VF-LABEL: @sink_into_replication_region_multiple(
; UNROLL-NO-VF-NEXT: bb:
; UNROLL-NO-VF-NEXT: [[TMP0:%.*]] = add i32 [[Y:%.*]], 1
; UNROLL-NO-VF-NEXT: [[SMIN:%.*]] = call i32 @llvm.smin.i32(i32 [[Y]], i32 1)
; UNROLL-NO-VF-NEXT: [[TMP1:%.*]] = sub i32 [[TMP0]], [[SMIN]]
; UNROLL-NO-VF-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]], !prof [[PROF26]]
; UNROLL-NO-VF: vector.ph:
; UNROLL-NO-VF-NEXT: [[N_RND_UP:%.*]] = add i32 [[TMP1]], 1
; UNROLL-NO-VF-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N_RND_UP]], 2
; UNROLL-NO-VF-NEXT: [[N_VEC:%.*]] = sub i32 [[N_RND_UP]], [[N_MOD_VF]]
; UNROLL-NO-VF-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i32 [[TMP1]], 1
; UNROLL-NO-VF-NEXT: br label [[VECTOR_BODY:%.*]]
; UNROLL-NO-VF: vector.body:
; UNROLL-NO-VF-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE6:%.*]] ]
; UNROLL-NO-VF-NEXT: [[VECTOR_RECUR:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[PRED_STORE_CONTINUE6]] ]
; UNROLL-NO-VF-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[PRED_STORE_CONTINUE6]] ]
; UNROLL-NO-VF-NEXT: [[VEC_PHI1:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[PRED_STORE_CONTINUE6]] ]
; UNROLL-NO-VF-NEXT: [[OFFSET_IDX:%.*]] = sub i32 [[Y]], [[INDEX]]
; UNROLL-NO-VF-NEXT: [[TMP3:%.*]] = add i32 [[OFFSET_IDX]], -1
; UNROLL-NO-VF-NEXT: [[VEC_IV:%.*]] = add i32 [[INDEX]], 0
; UNROLL-NO-VF-NEXT: [[VEC_IV2:%.*]] = add i32 [[INDEX]], 1
; UNROLL-NO-VF-NEXT: [[TMP4:%.*]] = icmp ule i32 [[VEC_IV]], [[TRIP_COUNT_MINUS_1]]
; UNROLL-NO-VF-NEXT: [[TMP5:%.*]] = icmp ule i32 [[VEC_IV2]], [[TRIP_COUNT_MINUS_1]]
; UNROLL-NO-VF-NEXT: br i1 [[TMP4]], label [[PRED_UDIV_IF:%.*]], label [[PRED_UDIV_CONTINUE:%.*]]
; UNROLL-NO-VF: pred.udiv.if:
; UNROLL-NO-VF-NEXT: [[TMP6:%.*]] = udiv i32 219220132, [[OFFSET_IDX]]
; UNROLL-NO-VF-NEXT: br label [[PRED_UDIV_CONTINUE]]
; UNROLL-NO-VF: pred.udiv.continue:
; UNROLL-NO-VF-NEXT: [[TMP7:%.*]] = phi i32 [ poison, [[VECTOR_BODY]] ], [ [[TMP6]], [[PRED_UDIV_IF]] ]
; UNROLL-NO-VF-NEXT: br i1 [[TMP5]], label [[PRED_UDIV_IF3:%.*]], label [[PRED_UDIV_CONTINUE4:%.*]]
; UNROLL-NO-VF: pred.udiv.if3:
; UNROLL-NO-VF-NEXT: [[TMP8:%.*]] = udiv i32 219220132, [[TMP3]]
; UNROLL-NO-VF-NEXT: br label [[PRED_UDIV_CONTINUE4]]
; UNROLL-NO-VF: pred.udiv.continue4:
; UNROLL-NO-VF-NEXT: [[TMP9]] = phi i32 [ poison, [[PRED_UDIV_CONTINUE]] ], [ [[TMP8]], [[PRED_UDIV_IF3]] ]
; UNROLL-NO-VF-NEXT: [[TMP10]] = add i32 [[VEC_PHI]], [[VECTOR_RECUR]]
; UNROLL-NO-VF-NEXT: [[TMP11]] = add i32 [[VEC_PHI1]], [[TMP7]]
; UNROLL-NO-VF-NEXT: br i1 [[TMP4]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
; UNROLL-NO-VF: pred.store.if:
; UNROLL-NO-VF-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[X:%.*]], i32 [[INDEX]]
; UNROLL-NO-VF-NEXT: store i32 [[OFFSET_IDX]], ptr [[TMP12]], align 4
; UNROLL-NO-VF-NEXT: br label [[PRED_STORE_CONTINUE]]
; UNROLL-NO-VF: pred.store.continue:
; UNROLL-NO-VF-NEXT: br i1 [[TMP5]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6]]
; UNROLL-NO-VF: pred.store.if5:
; UNROLL-NO-VF-NEXT: [[TMP14:%.*]] = add i32 [[INDEX]], 1
; UNROLL-NO-VF-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[X]], i32 [[TMP14]]
; UNROLL-NO-VF-NEXT: store i32 [[TMP3]], ptr [[TMP15]], align 4
; UNROLL-NO-VF-NEXT: br label [[PRED_STORE_CONTINUE6]]
; UNROLL-NO-VF: pred.store.continue6:
; UNROLL-NO-VF-NEXT: [[TMP16:%.*]] = select i1 [[TMP4]], i32 [[TMP10]], i32 [[VEC_PHI]]
; UNROLL-NO-VF-NEXT: [[TMP17:%.*]] = select i1 [[TMP5]], i32 [[TMP11]], i32 [[VEC_PHI1]]
; UNROLL-NO-VF-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
; UNROLL-NO-VF-NEXT: [[TMP18:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
; UNROLL-NO-VF-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof [[PROF27]], !llvm.loop [[LOOP31:![0-9]+]]
; UNROLL-NO-VF: middle.block:
; UNROLL-NO-VF-NEXT: [[BIN_RDX:%.*]] = add i32 [[TMP17]], [[TMP16]]
; UNROLL-NO-VF-NEXT: br label [[BB1:%.*]]
; UNROLL-NO-VF: scalar.ph:
; UNROLL-NO-VF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[Y]], [[BB:%.*]] ]
; UNROLL-NO-VF-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ 0, [[BB]] ]
; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[BB]] ]
; UNROLL-NO-VF-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[BB]] ]
; UNROLL-NO-VF-NEXT: br label [[BB2:%.*]]
; UNROLL-NO-VF: bb1:
; UNROLL-NO-VF-NEXT: [[VAR:%.*]] = phi i32 [ [[VAR6:%.*]], [[BB2]] ], [ [[BIN_RDX]], [[MIDDLE_BLOCK]] ]
; UNROLL-NO-VF-NEXT: ret i32 [[VAR]]
; UNROLL-NO-VF: bb2:
; UNROLL-NO-VF-NEXT: [[VAR3:%.*]] = phi i32 [ [[VAR8:%.*]], [[BB2]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
; UNROLL-NO-VF-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], [[BB2]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ]
; UNROLL-NO-VF-NEXT: [[VAR4:%.*]] = phi i32 [ [[VAR7:%.*]], [[BB2]] ], [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ]
; UNROLL-NO-VF-NEXT: [[VAR5:%.*]] = phi i32 [ [[VAR6]], [[BB2]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
; UNROLL-NO-VF-NEXT: [[G:%.*]] = getelementptr inbounds i32, ptr [[X]], i32 [[IV]]
; UNROLL-NO-VF-NEXT: [[VAR6]] = add i32 [[VAR5]], [[VAR4]]
; UNROLL-NO-VF-NEXT: [[VAR7]] = udiv i32 219220132, [[VAR3]]
; UNROLL-NO-VF-NEXT: store i32 [[VAR3]], ptr [[G]], align 4
; UNROLL-NO-VF-NEXT: [[VAR8]] = add nsw i32 [[VAR3]], -1
; UNROLL-NO-VF-NEXT: [[IV_NEXT]] = add nsw i32 [[IV]], 1
; UNROLL-NO-VF-NEXT: [[VAR9:%.*]] = icmp slt i32 [[VAR3]], 2
; UNROLL-NO-VF-NEXT: br i1 [[VAR9]], label [[BB1]], label [[BB2]], !prof [[PROF29]], !llvm.loop [[LOOP32:![0-9]+]]
;
; SINK-AFTER-LABEL: @sink_into_replication_region_multiple(
; SINK-AFTER-NEXT: bb:
; SINK-AFTER-NEXT: [[TMP0:%.*]] = add i32 [[Y:%.*]], 1
; SINK-AFTER-NEXT: [[SMIN:%.*]] = call i32 @llvm.smin.i32(i32 [[Y]], i32 1)
; SINK-AFTER-NEXT: [[TMP1:%.*]] = sub i32 [[TMP0]], [[SMIN]]
; SINK-AFTER-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]], !prof [[PROF26]]
; SINK-AFTER: vector.ph:
; SINK-AFTER-NEXT: [[N_RND_UP:%.*]] = add i32 [[TMP1]], 3
; SINK-AFTER-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N_RND_UP]], 4
; SINK-AFTER-NEXT: [[N_VEC:%.*]] = sub i32 [[N_RND_UP]], [[N_MOD_VF]]
; SINK-AFTER-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i32 [[TMP1]], 1
; SINK-AFTER-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TRIP_COUNT_MINUS_1]], i64 0
; SINK-AFTER-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
; SINK-AFTER-NEXT: br label [[VECTOR_BODY:%.*]]
; SINK-AFTER: vector.body:
; SINK-AFTER-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE12:%.*]] ]
; SINK-AFTER-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE12]] ]
; SINK-AFTER-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ <i32 poison, i32 poison, i32 poison, i32 0>, [[VECTOR_PH]] ], [ [[TMP22:%.*]], [[PRED_STORE_CONTINUE12]] ]
; SINK-AFTER-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP24:%.*]], [[PRED_STORE_CONTINUE12]] ]
; SINK-AFTER-NEXT: [[OFFSET_IDX:%.*]] = sub i32 [[Y]], [[INDEX]]
; SINK-AFTER-NEXT: [[TMP2:%.*]] = add i32 [[OFFSET_IDX]], 0
; SINK-AFTER-NEXT: [[TMP3:%.*]] = add i32 [[OFFSET_IDX]], -1
; SINK-AFTER-NEXT: [[TMP4:%.*]] = add i32 [[OFFSET_IDX]], -2
; SINK-AFTER-NEXT: [[TMP5:%.*]] = add i32 [[OFFSET_IDX]], -3
; SINK-AFTER-NEXT: [[TMP6:%.*]] = icmp ule <4 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]]
; SINK-AFTER-NEXT: [[TMP7:%.*]] = extractelement <4 x i1> [[TMP6]], i32 0
; SINK-AFTER-NEXT: br i1 [[TMP7]], label [[PRED_UDIV_IF:%.*]], label [[PRED_UDIV_CONTINUE:%.*]]
; SINK-AFTER: pred.udiv.if:
; SINK-AFTER-NEXT: [[TMP8:%.*]] = udiv i32 219220132, [[TMP2]]
; SINK-AFTER-NEXT: [[TMP9:%.*]] = insertelement <4 x i32> poison, i32 [[TMP8]], i32 0
; SINK-AFTER-NEXT: br label [[PRED_UDIV_CONTINUE]]
; SINK-AFTER: pred.udiv.continue:
; SINK-AFTER-NEXT: [[TMP10:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP9]], [[PRED_UDIV_IF]] ]
; SINK-AFTER-NEXT: [[TMP11:%.*]] = extractelement <4 x i1> [[TMP6]], i32 1
; SINK-AFTER-NEXT: br i1 [[TMP11]], label [[PRED_UDIV_IF1:%.*]], label [[PRED_UDIV_CONTINUE2:%.*]]
; SINK-AFTER: pred.udiv.if1:
; SINK-AFTER-NEXT: [[TMP12:%.*]] = udiv i32 219220132, [[TMP3]]
; SINK-AFTER-NEXT: [[TMP13:%.*]] = insertelement <4 x i32> [[TMP10]], i32 [[TMP12]], i32 1
; SINK-AFTER-NEXT: br label [[PRED_UDIV_CONTINUE2]]
; SINK-AFTER: pred.udiv.continue2:
; SINK-AFTER-NEXT: [[TMP14:%.*]] = phi <4 x i32> [ [[TMP10]], [[PRED_UDIV_CONTINUE]] ], [ [[TMP13]], [[PRED_UDIV_IF1]] ]
; SINK-AFTER-NEXT: [[TMP15:%.*]] = extractelement <4 x i1> [[TMP6]], i32 2
; SINK-AFTER-NEXT: br i1 [[TMP15]], label [[PRED_UDIV_IF3:%.*]], label [[PRED_UDIV_CONTINUE4:%.*]]
; SINK-AFTER: pred.udiv.if3:
; SINK-AFTER-NEXT: [[TMP16:%.*]] = udiv i32 219220132, [[TMP4]]
; SINK-AFTER-NEXT: [[TMP17:%.*]] = insertelement <4 x i32> [[TMP14]], i32 [[TMP16]], i32 2
; SINK-AFTER-NEXT: br label [[PRED_UDIV_CONTINUE4]]
; SINK-AFTER: pred.udiv.continue4:
; SINK-AFTER-NEXT: [[TMP18:%.*]] = phi <4 x i32> [ [[TMP14]], [[PRED_UDIV_CONTINUE2]] ], [ [[TMP17]], [[PRED_UDIV_IF3]] ]
; SINK-AFTER-NEXT: [[TMP19:%.*]] = extractelement <4 x i1> [[TMP6]], i32 3
; SINK-AFTER-NEXT: br i1 [[TMP19]], label [[PRED_UDIV_IF5:%.*]], label [[PRED_UDIV_CONTINUE6:%.*]]
; SINK-AFTER: pred.udiv.if5:
; SINK-AFTER-NEXT: [[TMP20:%.*]] = udiv i32 219220132, [[TMP5]]
; SINK-AFTER-NEXT: [[TMP21:%.*]] = insertelement <4 x i32> [[TMP18]], i32 [[TMP20]], i32 3
; SINK-AFTER-NEXT: br label [[PRED_UDIV_CONTINUE6]]
; SINK-AFTER: pred.udiv.continue6:
; SINK-AFTER-NEXT: [[TMP22]] = phi <4 x i32> [ [[TMP18]], [[PRED_UDIV_CONTINUE4]] ], [ [[TMP21]], [[PRED_UDIV_IF5]] ]
; SINK-AFTER-NEXT: [[TMP23:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[TMP22]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
; SINK-AFTER-NEXT: [[TMP24]] = add <4 x i32> [[VEC_PHI]], [[TMP23]]
; SINK-AFTER-NEXT: [[TMP25:%.*]] = extractelement <4 x i1> [[TMP6]], i32 0
; SINK-AFTER-NEXT: br i1 [[TMP25]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
; SINK-AFTER: pred.store.if:
; SINK-AFTER-NEXT: [[TMP26:%.*]] = add i32 [[INDEX]], 0
; SINK-AFTER-NEXT: [[TMP27:%.*]] = getelementptr inbounds i32, ptr [[X:%.*]], i32 [[TMP26]]
; SINK-AFTER-NEXT: store i32 [[TMP2]], ptr [[TMP27]], align 4
; SINK-AFTER-NEXT: br label [[PRED_STORE_CONTINUE]]
; SINK-AFTER: pred.store.continue:
; SINK-AFTER-NEXT: [[TMP28:%.*]] = extractelement <4 x i1> [[TMP6]], i32 1
; SINK-AFTER-NEXT: br i1 [[TMP28]], label [[PRED_STORE_IF7:%.*]], label [[PRED_STORE_CONTINUE8:%.*]]
; SINK-AFTER: pred.store.if7:
; SINK-AFTER-NEXT: [[TMP29:%.*]] = add i32 [[INDEX]], 1
; SINK-AFTER-NEXT: [[TMP30:%.*]] = getelementptr inbounds i32, ptr [[X]], i32 [[TMP29]]
; SINK-AFTER-NEXT: store i32 [[TMP3]], ptr [[TMP30]], align 4
; SINK-AFTER-NEXT: br label [[PRED_STORE_CONTINUE8]]
; SINK-AFTER: pred.store.continue8:
; SINK-AFTER-NEXT: [[TMP31:%.*]] = extractelement <4 x i1> [[TMP6]], i32 2
; SINK-AFTER-NEXT: br i1 [[TMP31]], label [[PRED_STORE_IF9:%.*]], label [[PRED_STORE_CONTINUE10:%.*]]
; SINK-AFTER: pred.store.if9:
; SINK-AFTER-NEXT: [[TMP32:%.*]] = add i32 [[INDEX]], 2
; SINK-AFTER-NEXT: [[TMP33:%.*]] = getelementptr inbounds i32, ptr [[X]], i32 [[TMP32]]
; SINK-AFTER-NEXT: store i32 [[TMP4]], ptr [[TMP33]], align 4
; SINK-AFTER-NEXT: br label [[PRED_STORE_CONTINUE10]]
; SINK-AFTER: pred.store.continue10:
; SINK-AFTER-NEXT: [[TMP34:%.*]] = extractelement <4 x i1> [[TMP6]], i32 3
; SINK-AFTER-NEXT: br i1 [[TMP34]], label [[PRED_STORE_IF11:%.*]], label [[PRED_STORE_CONTINUE12]]
; SINK-AFTER: pred.store.if11:
; SINK-AFTER-NEXT: [[TMP35:%.*]] = add i32 [[INDEX]], 3
; SINK-AFTER-NEXT: [[TMP36:%.*]] = getelementptr inbounds i32, ptr [[X]], i32 [[TMP35]]
; SINK-AFTER-NEXT: store i32 [[TMP5]], ptr [[TMP36]], align 4
; SINK-AFTER-NEXT: br label [[PRED_STORE_CONTINUE12]]
; SINK-AFTER: pred.store.continue12:
; SINK-AFTER-NEXT: [[TMP37:%.*]] = select <4 x i1> [[TMP6]], <4 x i32> [[TMP24]], <4 x i32> [[VEC_PHI]]
; SINK-AFTER-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
; SINK-AFTER-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
; SINK-AFTER-NEXT: [[TMP38:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
; SINK-AFTER-NEXT: br i1 [[TMP38]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof [[PROF27]], !llvm.loop [[LOOP31:![0-9]+]]
; SINK-AFTER: middle.block:
; SINK-AFTER-NEXT: [[TMP39:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP37]])
; SINK-AFTER-NEXT: br label [[BB1:%.*]]
; SINK-AFTER: scalar.ph:
; SINK-AFTER-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[Y]], [[BB:%.*]] ]
; SINK-AFTER-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ 0, [[BB]] ]
; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[BB]] ]
; SINK-AFTER-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[BB]] ]
; SINK-AFTER-NEXT: br label [[BB2:%.*]]
; SINK-AFTER: bb1:
; SINK-AFTER-NEXT: [[VAR:%.*]] = phi i32 [ [[VAR6:%.*]], [[BB2]] ], [ [[TMP39]], [[MIDDLE_BLOCK]] ]
; SINK-AFTER-NEXT: ret i32 [[VAR]]
; SINK-AFTER: bb2:
; SINK-AFTER-NEXT: [[VAR3:%.*]] = phi i32 [ [[VAR8:%.*]], [[BB2]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
; SINK-AFTER-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], [[BB2]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ]
; SINK-AFTER-NEXT: [[VAR4:%.*]] = phi i32 [ [[VAR7:%.*]], [[BB2]] ], [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ]
; SINK-AFTER-NEXT: [[VAR5:%.*]] = phi i32 [ [[VAR6]], [[BB2]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
; SINK-AFTER-NEXT: [[G:%.*]] = getelementptr inbounds i32, ptr [[X]], i32 [[IV]]
; SINK-AFTER-NEXT: [[VAR6]] = add i32 [[VAR5]], [[VAR4]]
; SINK-AFTER-NEXT: [[VAR7]] = udiv i32 219220132, [[VAR3]]
; SINK-AFTER-NEXT: store i32 [[VAR3]], ptr [[G]], align 4
; SINK-AFTER-NEXT: [[VAR8]] = add nsw i32 [[VAR3]], -1
; SINK-AFTER-NEXT: [[IV_NEXT]] = add nsw i32 [[IV]], 1
; SINK-AFTER-NEXT: [[VAR9:%.*]] = icmp slt i32 [[VAR3]], 2
; SINK-AFTER-NEXT: br i1 [[VAR9]], label [[BB1]], label [[BB2]], !prof [[PROF29]], !llvm.loop [[LOOP32:![0-9]+]]
;
bb:
br label %bb2
bb1: ; preds = %bb2
%var = phi i32 [ %var6, %bb2 ]
ret i32 %var
bb2: ; preds = %bb2, %bb
%var3 = phi i32 [ %var8, %bb2 ], [ %y, %bb ]
%iv = phi i32 [ %iv.next, %bb2 ], [ 0, %bb ]
%var4 = phi i32 [ %var7, %bb2 ], [ 0, %bb ]
%var5 = phi i32 [ %var6, %bb2 ], [ 0, %bb ]
%g = getelementptr inbounds i32, ptr %x, i32 %iv
%var6 = add i32 %var5, %var4
%var7 = udiv i32 219220132, %var3
store i32 %var3, ptr %g, align 4
%var8 = add nsw i32 %var3, -1
%iv.next = add nsw i32 %iv, 1
%var9 = icmp slt i32 %var3, 2
br i1 %var9, label %bb1, label %bb2, !prof !2
}
; %vec.dead will be marked as dead instruction in the vector loop and no recipe
; will be created for it. Make sure a valid sink target is used.
define i32 @sink_after_dead_inst(ptr %A.ptr) {
; UNROLL-NO-IC-LABEL: @sink_after_dead_inst(
; UNROLL-NO-IC-NEXT: entry:
; UNROLL-NO-IC-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; UNROLL-NO-IC: vector.ph:
; UNROLL-NO-IC-NEXT: br label [[VECTOR_BODY:%.*]]
; UNROLL-NO-IC: vector.body:
; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NO-IC-NEXT: [[VEC_IND:%.*]] = phi <4 x i16> [ <i16 0, i16 1, i16 2, i16 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ <i32 poison, i32 poison, i32 poison, i32 0>, [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <4 x i16> [[VEC_IND]], splat (i16 4)
; UNROLL-NO-IC-NEXT: [[OFFSET_IDX:%.*]] = trunc i32 [[INDEX]] to i16
; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = add <4 x i16> [[STEP_ADD]], splat (i16 1)
; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = or <4 x i16> [[TMP1]], [[TMP1]]
; UNROLL-NO-IC-NEXT: [[TMP3]] = zext <4 x i16> [[TMP2]] to <4 x i32>
; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[A_PTR:%.*]], i16 [[OFFSET_IDX]]
; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[TMP4]], i32 0
; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[TMP4]], i32 4
; UNROLL-NO-IC-NEXT: store <4 x i32> zeroinitializer, ptr [[TMP5]], align 4
; UNROLL-NO-IC-NEXT: store <4 x i32> zeroinitializer, ptr [[TMP6]], align 4
; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <4 x i16> [[STEP_ADD]], splat (i16 4)
; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], 16
; UNROLL-NO-IC-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP33:![0-9]+]]
; UNROLL-NO-IC: middle.block:
; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i32> [[TMP3]], i32 2
; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[TMP3]], i32 3
; UNROLL-NO-IC-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
; UNROLL-NO-IC: scalar.ph:
; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ 16, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
; UNROLL-NO-IC-NEXT: br label [[LOOP:%.*]]
; UNROLL-NO-IC: loop:
; UNROLL-NO-IC-NEXT: [[IV:%.*]] = phi i16 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
; UNROLL-NO-IC-NEXT: [[FOR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[FOR_PREV:%.*]], [[LOOP]] ]
; UNROLL-NO-IC-NEXT: [[CMP:%.*]] = icmp eq i32 [[FOR]], 15
; UNROLL-NO-IC-NEXT: [[C:%.*]] = icmp eq i1 [[CMP]], true
; UNROLL-NO-IC-NEXT: [[VEC_DEAD:%.*]] = and i1 [[C]], true
; UNROLL-NO-IC-NEXT: [[IV_NEXT]] = add i16 [[IV]], 1
; UNROLL-NO-IC-NEXT: [[B1:%.*]] = or i16 [[IV_NEXT]], [[IV_NEXT]]
; UNROLL-NO-IC-NEXT: [[B3:%.*]] = and i1 [[CMP]], [[C]]
; UNROLL-NO-IC-NEXT: [[FOR_PREV]] = zext i16 [[B1]] to i32
; UNROLL-NO-IC-NEXT: [[EXT:%.*]] = zext i1 [[B3]] to i32
; UNROLL-NO-IC-NEXT: [[A_GEP:%.*]] = getelementptr i32, ptr [[A_PTR]], i16 [[IV]]
; UNROLL-NO-IC-NEXT: store i32 0, ptr [[A_GEP]], align 4
; UNROLL-NO-IC-NEXT: br i1 [[VEC_DEAD]], label [[FOR_END]], label [[LOOP]], !llvm.loop [[LOOP34:![0-9]+]]
; UNROLL-NO-IC: for.end:
; UNROLL-NO-IC-NEXT: [[FOR_LCSSA:%.*]] = phi i32 [ [[FOR]], [[LOOP]] ], [ [[VECTOR_RECUR_EXTRACT_FOR_PHI]], [[MIDDLE_BLOCK]] ]
; UNROLL-NO-IC-NEXT: ret i32 [[FOR_LCSSA]]
;
; UNROLL-NO-VF-LABEL: @sink_after_dead_inst(
; UNROLL-NO-VF-NEXT: entry:
; UNROLL-NO-VF-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; UNROLL-NO-VF: vector.ph:
; UNROLL-NO-VF-NEXT: br label [[VECTOR_BODY:%.*]]
; UNROLL-NO-VF: vector.body:
; UNROLL-NO-VF-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NO-VF-NEXT: [[VECTOR_RECUR:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NO-VF-NEXT: [[OFFSET_IDX:%.*]] = trunc i32 [[INDEX]] to i16
; UNROLL-NO-VF-NEXT: [[TMP1:%.*]] = add i16 [[OFFSET_IDX]], 1
; UNROLL-NO-VF-NEXT: [[TMP2:%.*]] = add i16 [[OFFSET_IDX]], 1
; UNROLL-NO-VF-NEXT: [[TMP3:%.*]] = add i16 [[TMP1]], 1
; UNROLL-NO-VF-NEXT: [[TMP4:%.*]] = or i16 [[TMP2]], [[TMP2]]
; UNROLL-NO-VF-NEXT: [[TMP5:%.*]] = or i16 [[TMP3]], [[TMP3]]
; UNROLL-NO-VF-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32
; UNROLL-NO-VF-NEXT: [[TMP7]] = zext i16 [[TMP5]] to i32
; UNROLL-NO-VF-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[A_PTR:%.*]], i16 [[OFFSET_IDX]]
; UNROLL-NO-VF-NEXT: [[TMP9:%.*]] = getelementptr i32, ptr [[A_PTR]], i16 [[TMP1]]
; UNROLL-NO-VF-NEXT: store i32 0, ptr [[TMP8]], align 4
; UNROLL-NO-VF-NEXT: store i32 0, ptr [[TMP9]], align 4
; UNROLL-NO-VF-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
; UNROLL-NO-VF-NEXT: [[TMP10:%.*]] = icmp eq i32 [[INDEX_NEXT]], 16
; UNROLL-NO-VF-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP33:![0-9]+]]
; UNROLL-NO-VF: middle.block:
; UNROLL-NO-VF-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
; UNROLL-NO-VF: scalar.ph:
; UNROLL-NO-VF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ 16, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[TMP7]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
; UNROLL-NO-VF-NEXT: br label [[LOOP:%.*]]
; UNROLL-NO-VF: loop:
; UNROLL-NO-VF-NEXT: [[IV:%.*]] = phi i16 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
; UNROLL-NO-VF-NEXT: [[FOR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[FOR_PREV:%.*]], [[LOOP]] ]
; UNROLL-NO-VF-NEXT: [[CMP:%.*]] = icmp eq i32 [[FOR]], 15
; UNROLL-NO-VF-NEXT: [[C:%.*]] = icmp eq i1 [[CMP]], true
; UNROLL-NO-VF-NEXT: [[VEC_DEAD:%.*]] = and i1 [[C]], true
; UNROLL-NO-VF-NEXT: [[IV_NEXT]] = add i16 [[IV]], 1
; UNROLL-NO-VF-NEXT: [[B1:%.*]] = or i16 [[IV_NEXT]], [[IV_NEXT]]
; UNROLL-NO-VF-NEXT: [[B3:%.*]] = and i1 [[CMP]], [[C]]
; UNROLL-NO-VF-NEXT: [[FOR_PREV]] = zext i16 [[B1]] to i32
; UNROLL-NO-VF-NEXT: [[EXT:%.*]] = zext i1 [[B3]] to i32
; UNROLL-NO-VF-NEXT: [[A_GEP:%.*]] = getelementptr i32, ptr [[A_PTR]], i16 [[IV]]
; UNROLL-NO-VF-NEXT: store i32 0, ptr [[A_GEP]], align 4
; UNROLL-NO-VF-NEXT: br i1 [[VEC_DEAD]], label [[FOR_END]], label [[LOOP]], !llvm.loop [[LOOP34:![0-9]+]]
; UNROLL-NO-VF: for.end:
; UNROLL-NO-VF-NEXT: [[FOR_LCSSA:%.*]] = phi i32 [ [[FOR]], [[LOOP]] ], [ [[TMP6]], [[MIDDLE_BLOCK]] ]
; UNROLL-NO-VF-NEXT: ret i32 [[FOR_LCSSA]]
;
; SINK-AFTER-LABEL: @sink_after_dead_inst(
; SINK-AFTER-NEXT: entry:
; SINK-AFTER-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; SINK-AFTER: vector.ph:
; SINK-AFTER-NEXT: br label [[VECTOR_BODY:%.*]]
; SINK-AFTER: vector.body:
; SINK-AFTER-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; SINK-AFTER-NEXT: [[VEC_IND:%.*]] = phi <4 x i16> [ <i16 0, i16 1, i16 2, i16 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; SINK-AFTER-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ <i32 poison, i32 poison, i32 poison, i32 0>, [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ]
; SINK-AFTER-NEXT: [[OFFSET_IDX:%.*]] = trunc i32 [[INDEX]] to i16
; SINK-AFTER-NEXT: [[TMP1:%.*]] = add <4 x i16> [[VEC_IND]], splat (i16 1)
; SINK-AFTER-NEXT: [[TMP2:%.*]] = or <4 x i16> [[TMP1]], [[TMP1]]
; SINK-AFTER-NEXT: [[TMP3]] = zext <4 x i16> [[TMP2]] to <4 x i32>
; SINK-AFTER-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[A_PTR:%.*]], i16 [[OFFSET_IDX]]
; SINK-AFTER-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[TMP4]], i32 0
; SINK-AFTER-NEXT: store <4 x i32> zeroinitializer, ptr [[TMP5]], align 4
; SINK-AFTER-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
; SINK-AFTER-NEXT: [[VEC_IND_NEXT]] = add <4 x i16> [[VEC_IND]], splat (i16 4)
; SINK-AFTER-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 16
; SINK-AFTER-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP33:![0-9]+]]
; SINK-AFTER: middle.block:
; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i32> [[TMP3]], i32 2
; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[TMP3]], i32 3
; SINK-AFTER-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
; SINK-AFTER: scalar.ph:
; SINK-AFTER-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ 16, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
; SINK-AFTER-NEXT: br label [[LOOP:%.*]]
; SINK-AFTER: loop:
; SINK-AFTER-NEXT: [[IV:%.*]] = phi i16 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
; SINK-AFTER-NEXT: [[FOR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[FOR_PREV:%.*]], [[LOOP]] ]
; SINK-AFTER-NEXT: [[CMP:%.*]] = icmp eq i32 [[FOR]], 15
; SINK-AFTER-NEXT: [[C:%.*]] = icmp eq i1 [[CMP]], true
; SINK-AFTER-NEXT: [[VEC_DEAD:%.*]] = and i1 [[C]], true
; SINK-AFTER-NEXT: [[IV_NEXT]] = add i16 [[IV]], 1
; SINK-AFTER-NEXT: [[B1:%.*]] = or i16 [[IV_NEXT]], [[IV_NEXT]]
; SINK-AFTER-NEXT: [[B3:%.*]] = and i1 [[CMP]], [[C]]
; SINK-AFTER-NEXT: [[FOR_PREV]] = zext i16 [[B1]] to i32
; SINK-AFTER-NEXT: [[EXT:%.*]] = zext i1 [[B3]] to i32
; SINK-AFTER-NEXT: [[A_GEP:%.*]] = getelementptr i32, ptr [[A_PTR]], i16 [[IV]]
; SINK-AFTER-NEXT: store i32 0, ptr [[A_GEP]], align 4
; SINK-AFTER-NEXT: br i1 [[VEC_DEAD]], label [[FOR_END]], label [[LOOP]], !llvm.loop [[LOOP34:![0-9]+]]
; SINK-AFTER: for.end:
; SINK-AFTER-NEXT: [[FOR_LCSSA:%.*]] = phi i32 [ [[FOR]], [[LOOP]] ], [ [[VECTOR_RECUR_EXTRACT_FOR_PHI]], [[MIDDLE_BLOCK]] ]
; SINK-AFTER-NEXT: ret i32 [[FOR_LCSSA]]
;
entry:
br label %loop
loop:
%iv = phi i16 [ 0, %entry ], [ %iv.next, %loop ]
%for = phi i32 [ 0, %entry ], [ %for.prev, %loop ]
%cmp = icmp eq i32 %for, 15
%C = icmp eq i1 %cmp, true
%vec.dead = and i1 %C, 1
%iv.next = add i16 %iv, 1
%B1 = or i16 %iv.next, %iv.next
%B3 = and i1 %cmp, %C
%for.prev = zext i16 %B1 to i32
%ext = zext i1 %B3 to i32
%A.gep = getelementptr i32, ptr %A.ptr, i16 %iv
store i32 0, ptr %A.gep
br i1 %vec.dead, label %for.end, label %loop
for.end:
ret i32 %for
}
; %rec.1 only has %use.rec.1 as use, which can be removed. This enables %rec.1
; to be removed also.
define void @unused_recurrence(ptr %a) {
; UNROLL-NO-IC-LABEL: @unused_recurrence(
; UNROLL-NO-IC-NEXT: entry:
; UNROLL-NO-IC-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; UNROLL-NO-IC: vector.ph:
; UNROLL-NO-IC-NEXT: br label [[VECTOR_BODY:%.*]]
; UNROLL-NO-IC: vector.body:
; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NO-IC-NEXT: [[VEC_IND:%.*]] = phi <4 x i16> [ <i16 -27, i16 -26, i16 -25, i16 -24>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ <i16 poison, i16 poison, i16 poison, i16 0>, [[VECTOR_PH]] ], [ [[TMP1:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <4 x i16> [[VEC_IND]], splat (i16 4)
; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = add <4 x i16> [[STEP_ADD]], splat (i16 1)
; UNROLL-NO-IC-NEXT: [[TMP1]] = add <4 x i16> [[TMP0]], splat (i16 5)
; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <4 x i16> [[STEP_ADD]], splat (i16 4)
; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024
; UNROLL-NO-IC-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP35:![0-9]+]]
; UNROLL-NO-IC: middle.block:
; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[TMP1]], i32 3
; UNROLL-NO-IC-NEXT: br i1 false, label [[FOR_END:%.*]], label [[SCALAR_PH]]
; UNROLL-NO-IC: scalar.ph:
; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ 997, [[MIDDLE_BLOCK]] ], [ -27, [[ENTRY:%.*]] ]
; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
; UNROLL-NO-IC-NEXT: br label [[FOR_COND:%.*]]
; UNROLL-NO-IC: for.cond:
; UNROLL-NO-IC-NEXT: [[IV:%.*]] = phi i16 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_COND]] ]
; UNROLL-NO-IC-NEXT: [[REC_1:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[REC_1_PREV:%.*]], [[FOR_COND]] ]
; UNROLL-NO-IC-NEXT: [[USE_REC_1:%.*]] = sub i16 [[REC_1]], 10
; UNROLL-NO-IC-NEXT: [[IV_NEXT]] = add i16 [[IV]], 1
; UNROLL-NO-IC-NEXT: [[REC_1_PREV]] = add i16 [[IV_NEXT]], 5
; UNROLL-NO-IC-NEXT: [[CMP:%.*]] = icmp eq i16 [[IV]], 1000
; UNROLL-NO-IC-NEXT: br i1 [[CMP]], label [[FOR_END]], label [[FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]]
; UNROLL-NO-IC: for.end:
; UNROLL-NO-IC-NEXT: ret void
;
; UNROLL-NO-VF-LABEL: @unused_recurrence(
; UNROLL-NO-VF-NEXT: entry:
; UNROLL-NO-VF-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; UNROLL-NO-VF: vector.ph:
; UNROLL-NO-VF-NEXT: br label [[VECTOR_BODY:%.*]]
; UNROLL-NO-VF: vector.body:
; UNROLL-NO-VF-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NO-VF-NEXT: [[VECTOR_RECUR:%.*]] = phi i16 [ 0, [[VECTOR_PH]] ], [ [[TMP2:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NO-VF-NEXT: [[DOTCAST:%.*]] = trunc i32 [[INDEX]] to i16
; UNROLL-NO-VF-NEXT: [[OFFSET_IDX:%.*]] = add i16 -27, [[DOTCAST]]
; UNROLL-NO-VF-NEXT: [[TMP0:%.*]] = add i16 [[OFFSET_IDX]], 1
; UNROLL-NO-VF-NEXT: [[TMP1:%.*]] = add i16 [[TMP0]], 1
; UNROLL-NO-VF-NEXT: [[TMP2]] = add i16 [[TMP1]], 5
; UNROLL-NO-VF-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
; UNROLL-NO-VF-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1028
; UNROLL-NO-VF-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP35:![0-9]+]]
; UNROLL-NO-VF: middle.block:
; UNROLL-NO-VF-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
; UNROLL-NO-VF: scalar.ph:
; UNROLL-NO-VF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ 1001, [[MIDDLE_BLOCK]] ], [ -27, [[ENTRY:%.*]] ]
; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[TMP2]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
; UNROLL-NO-VF-NEXT: br label [[FOR_COND:%.*]]
; UNROLL-NO-VF: for.cond:
; UNROLL-NO-VF-NEXT: [[IV:%.*]] = phi i16 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_COND]] ]
; UNROLL-NO-VF-NEXT: [[REC_1:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[REC_1_PREV:%.*]], [[FOR_COND]] ]
; UNROLL-NO-VF-NEXT: [[USE_REC_1:%.*]] = sub i16 [[REC_1]], 10
; UNROLL-NO-VF-NEXT: [[IV_NEXT]] = add i16 [[IV]], 1
; UNROLL-NO-VF-NEXT: [[REC_1_PREV]] = add i16 [[IV_NEXT]], 5
; UNROLL-NO-VF-NEXT: [[CMP:%.*]] = icmp eq i16 [[IV]], 1000
; UNROLL-NO-VF-NEXT: br i1 [[CMP]], label [[FOR_END]], label [[FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]]
; UNROLL-NO-VF: for.end:
; UNROLL-NO-VF-NEXT: ret void
;
; SINK-AFTER-LABEL: @unused_recurrence(
; SINK-AFTER-NEXT: entry:
; SINK-AFTER-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; SINK-AFTER: vector.ph:
; SINK-AFTER-NEXT: br label [[VECTOR_BODY:%.*]]
; SINK-AFTER: vector.body:
; SINK-AFTER-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; SINK-AFTER-NEXT: [[VEC_IND:%.*]] = phi <4 x i16> [ <i16 -27, i16 -26, i16 -25, i16 -24>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; SINK-AFTER-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ <i16 poison, i16 poison, i16 poison, i16 0>, [[VECTOR_PH]] ], [ [[TMP1:%.*]], [[VECTOR_BODY]] ]
; SINK-AFTER-NEXT: [[TMP0:%.*]] = add <4 x i16> [[VEC_IND]], splat (i16 1)
; SINK-AFTER-NEXT: [[TMP1]] = add <4 x i16> [[TMP0]], splat (i16 5)
; SINK-AFTER-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
; SINK-AFTER-NEXT: [[VEC_IND_NEXT]] = add <4 x i16> [[VEC_IND]], splat (i16 4)
; SINK-AFTER-NEXT: [[TMP2:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1028
; SINK-AFTER-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP35:![0-9]+]]
; SINK-AFTER: middle.block:
; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[TMP1]], i32 3
; SINK-AFTER-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
; SINK-AFTER: scalar.ph:
; SINK-AFTER-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ 1001, [[MIDDLE_BLOCK]] ], [ -27, [[ENTRY:%.*]] ]
; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
; SINK-AFTER-NEXT: br label [[FOR_COND:%.*]]
; SINK-AFTER: for.cond:
; SINK-AFTER-NEXT: [[IV:%.*]] = phi i16 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_COND]] ]
; SINK-AFTER-NEXT: [[REC_1:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[REC_1_PREV:%.*]], [[FOR_COND]] ]
; SINK-AFTER-NEXT: [[USE_REC_1:%.*]] = sub i16 [[REC_1]], 10
; SINK-AFTER-NEXT: [[IV_NEXT]] = add i16 [[IV]], 1
; SINK-AFTER-NEXT: [[REC_1_PREV]] = add i16 [[IV_NEXT]], 5
; SINK-AFTER-NEXT: [[CMP:%.*]] = icmp eq i16 [[IV]], 1000
; SINK-AFTER-NEXT: br i1 [[CMP]], label [[FOR_END]], label [[FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]]
; SINK-AFTER: for.end:
; SINK-AFTER-NEXT: ret void
;
entry:
br label %for.cond
for.cond:
%iv = phi i16 [ -27, %entry ], [ %iv.next, %for.cond ]
%rec.1 = phi i16 [ 0, %entry ], [ %rec.1.prev, %for.cond ]
%use.rec.1 = sub i16 %rec.1, 10
%iv.next= add i16 %iv, 1
%rec.1.prev = add i16 %iv.next, 5
%cmp = icmp eq i16 %iv, 1000
br i1 %cmp, label %for.end, label %for.cond
for.end:
ret void
}
; Test case for https://github.com/llvm/llvm-project/issues/95520.
define i32 @recurence_uniform_load(ptr %src, ptr noalias %dst) {
; UNROLL-NO-IC-LABEL: @recurence_uniform_load(
; UNROLL-NO-IC-NEXT: entry:
; UNROLL-NO-IC-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; UNROLL-NO-IC: vector.ph:
; UNROLL-NO-IC-NEXT: br label [[VECTOR_BODY:%.*]]
; UNROLL-NO-IC: vector.body:
; UNROLL-NO-IC-NEXT: br label [[MIDDLE_BLOCK:%.*]]
; UNROLL-NO-IC: middle.block:
; UNROLL-NO-IC-NEXT: br label [[EXIT:%.*]]
; UNROLL-NO-IC: scalar.ph:
; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ]
; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[ENTRY]] ]
; UNROLL-NO-IC-NEXT: br label [[LOOP:%.*]]
; UNROLL-NO-IC: loop:
; UNROLL-NO-IC-NEXT: [[PHI:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[LOOP]] ]
; UNROLL-NO-IC-NEXT: [[RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[LOAD:%.*]], [[LOOP]] ]
; UNROLL-NO-IC-NEXT: [[ADD]] = add i64 [[PHI]], 1
; UNROLL-NO-IC-NEXT: [[LOAD]] = load i32, ptr [[SRC:%.*]], align 4
; UNROLL-NO-IC-NEXT: [[ICMP:%.*]] = icmp ult i64 [[PHI]], 1
; UNROLL-NO-IC-NEXT: br i1 [[ICMP]], label [[LOOP]], label [[EXIT]], !llvm.loop [[LOOP37:![0-9]+]]
; UNROLL-NO-IC: exit:
; UNROLL-NO-IC-NEXT: ret i32 0
;
; UNROLL-NO-VF-LABEL: @recurence_uniform_load(
; UNROLL-NO-VF-NEXT: entry:
; UNROLL-NO-VF-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; UNROLL-NO-VF: vector.ph:
; UNROLL-NO-VF-NEXT: br label [[VECTOR_BODY:%.*]]
; UNROLL-NO-VF: vector.body:
; UNROLL-NO-VF-NEXT: [[TMP0:%.*]] = load i32, ptr [[SRC:%.*]], align 4
; UNROLL-NO-VF-NEXT: br label [[MIDDLE_BLOCK:%.*]]
; UNROLL-NO-VF: middle.block:
; UNROLL-NO-VF-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
; UNROLL-NO-VF: scalar.ph:
; UNROLL-NO-VF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 2, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[TMP0]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
; UNROLL-NO-VF-NEXT: br label [[LOOP:%.*]]
; UNROLL-NO-VF: loop:
; UNROLL-NO-VF-NEXT: [[PHI:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[LOOP]] ]
; UNROLL-NO-VF-NEXT: [[RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[LOAD:%.*]], [[LOOP]] ]
; UNROLL-NO-VF-NEXT: [[ADD]] = add i64 [[PHI]], 1
; UNROLL-NO-VF-NEXT: [[LOAD]] = load i32, ptr [[SRC]], align 4
; UNROLL-NO-VF-NEXT: [[ICMP:%.*]] = icmp ult i64 [[PHI]], 1
; UNROLL-NO-VF-NEXT: br i1 [[ICMP]], label [[LOOP]], label [[EXIT]], !llvm.loop [[LOOP37:![0-9]+]]
; UNROLL-NO-VF: exit:
; UNROLL-NO-VF-NEXT: ret i32 0
;
; SINK-AFTER-LABEL: @recurence_uniform_load(
; SINK-AFTER-NEXT: entry:
; SINK-AFTER-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; SINK-AFTER: vector.ph:
; SINK-AFTER-NEXT: br label [[VECTOR_BODY:%.*]]
; SINK-AFTER: vector.body:
; SINK-AFTER-NEXT: br label [[MIDDLE_BLOCK:%.*]]
; SINK-AFTER: middle.block:
; SINK-AFTER-NEXT: br label [[EXIT:%.*]]
; SINK-AFTER: scalar.ph:
; SINK-AFTER-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ]
; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[ENTRY]] ]
; SINK-AFTER-NEXT: br label [[LOOP:%.*]]
; SINK-AFTER: loop:
; SINK-AFTER-NEXT: [[PHI:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[LOOP]] ]
; SINK-AFTER-NEXT: [[RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[LOAD:%.*]], [[LOOP]] ]
; SINK-AFTER-NEXT: [[ADD]] = add i64 [[PHI]], 1
; SINK-AFTER-NEXT: [[LOAD]] = load i32, ptr [[SRC:%.*]], align 4
; SINK-AFTER-NEXT: [[ICMP:%.*]] = icmp ult i64 [[PHI]], 1
; SINK-AFTER-NEXT: br i1 [[ICMP]], label [[LOOP]], label [[EXIT]], !llvm.loop [[LOOP37:![0-9]+]]
; SINK-AFTER: exit:
; SINK-AFTER-NEXT: ret i32 0
;
entry:
br label %loop
loop:
%phi = phi i64 [ 0, %entry ], [ %add, %loop ]
%recur = phi i32 [ 0, %entry ], [ %load, %loop ]
%add = add i64 %phi, 1
%load = load i32, ptr %src, align 4
%icmp = icmp ult i64 %phi, 1
br i1 %icmp, label %loop, label %exit
exit:
ret i32 0
}
!2 = !{!"branch_weights", i32 1, i32 1}