Files
clang-p2996/llvm/test/Transforms/LoopVectorize/reduction-inloop-uf4.ll
Björn Pettersson 092b6e73e6 [InstCombine] Handle "add like" in ADD+GEP->GEP+GEP rewrites (#135156)
Considering that "or disjoint" is the canonical for certain add
operations, then I think we want to support such "add like" operations
when doing ADD+GEP->GEP+GEP rewrites to make things more consistent.

Problem was found when improving ValueTracking, which turned an ADD into
OR, and then suddenly optimizations got worse due to these rewrites no
longer triggering.
2025-04-14 17:11:13 +02:00

633 lines
40 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -passes=loop-vectorize,dce,instcombine -force-vector-interleave=4 -force-vector-width=4 -prefer-inloop-reductions -S | FileCheck %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
define i32 @reduction_sum_single(ptr noalias nocapture %A) {
; CHECK-LABEL: @reduction_sum_single(
; CHECK-NEXT: entry:
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 16
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 32
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 48
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4
; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4
; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4
; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x i32>, ptr [[TMP3]], align 4
; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[WIDE_LOAD]])
; CHECK-NEXT: [[TMP5]] = add i32 [[TMP4]], [[VEC_PHI]]
; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[WIDE_LOAD4]])
; CHECK-NEXT: [[TMP7]] = add i32 [[TMP6]], [[VEC_PHI1]]
; CHECK-NEXT: [[TMP8:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[WIDE_LOAD5]])
; CHECK-NEXT: [[TMP9]] = add i32 [[TMP8]], [[VEC_PHI2]]
; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[WIDE_LOAD6]])
; CHECK-NEXT: [[TMP11]] = add i32 [[TMP10]], [[VEC_PHI3]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256
; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[BIN_RDX:%.*]] = add i32 [[TMP7]], [[TMP5]]
; CHECK-NEXT: [[BIN_RDX7:%.*]] = add i32 [[TMP9]], [[BIN_RDX]]
; CHECK-NEXT: [[BIN_RDX8:%.*]] = add i32 [[TMP11]], [[BIN_RDX7]]
; CHECK-NEXT: br i1 true, label [[DOT_CRIT_EDGE:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: br label [[DOTLR_PH:%.*]]
; CHECK: .lr.ph:
; CHECK-NEXT: br i1 poison, label [[DOT_CRIT_EDGE]], label [[DOTLR_PH]], !llvm.loop [[LOOP3:![0-9]+]]
; CHECK: ._crit_edge:
; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ poison, [[DOTLR_PH]] ], [ [[BIN_RDX8]], [[MIDDLE_BLOCK]] ]
; CHECK-NEXT: ret i32 [[SUM_0_LCSSA]]
;
entry:
br label %.lr.ph
.lr.ph: ; preds = %entry, %.lr.ph
%indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %entry ]
%sum.02 = phi i32 [ %l7, %.lr.ph ], [ 0, %entry ]
%l2 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
%l3 = load i32, ptr %l2, align 4
%l7 = add i32 %sum.02, %l3
%indvars.iv.next = add i64 %indvars.iv, 1
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
%exitcond = icmp eq i32 %lftr.wideiv, 256
br i1 %exitcond, label %._crit_edge, label %.lr.ph
._crit_edge: ; preds = %.lr.ph
%sum.0.lcssa = phi i32 [ %l7, %.lr.ph ]
ret i32 %sum.0.lcssa
}
; Check that we correctly unroll two reductions chained together.
define i64 @reduction_sum_chain(ptr noalias %p, ptr noalias %q) {
; CHECK-LABEL: @reduction_sum_chain(
; CHECK-NEXT: entry:
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[TMP17:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[TMP19:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[TMP21:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[TMP23:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i64, ptr [[Q:%.*]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i64 32
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i64 64
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i64 96
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP0]], align 8
; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8
; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i64>, ptr [[TMP3]], align 8
; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x i64>, ptr [[TMP4]], align 8
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[TMP1]], i64 32
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[TMP1]], i64 64
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[TMP1]], i64 96
; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x i64>, ptr [[TMP1]], align 8
; CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load <4 x i64>, ptr [[TMP5]], align 8
; CHECK-NEXT: [[WIDE_LOAD9:%.*]] = load <4 x i64>, ptr [[TMP6]], align 8
; CHECK-NEXT: [[WIDE_LOAD10:%.*]] = load <4 x i64>, ptr [[TMP7]], align 8
; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> [[WIDE_LOAD]])
; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[TMP8]], [[VEC_PHI]]
; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> [[WIDE_LOAD4]])
; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[TMP10]], [[VEC_PHI1]]
; CHECK-NEXT: [[TMP12:%.*]] = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> [[WIDE_LOAD5]])
; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[TMP12]], [[VEC_PHI2]]
; CHECK-NEXT: [[TMP14:%.*]] = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> [[WIDE_LOAD6]])
; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[TMP14]], [[VEC_PHI3]]
; CHECK-NEXT: [[TMP16:%.*]] = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> [[WIDE_LOAD7]])
; CHECK-NEXT: [[TMP17]] = add i64 [[TMP16]], [[TMP9]]
; CHECK-NEXT: [[TMP18:%.*]] = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> [[WIDE_LOAD8]])
; CHECK-NEXT: [[TMP19]] = add i64 [[TMP18]], [[TMP11]]
; CHECK-NEXT: [[TMP20:%.*]] = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> [[WIDE_LOAD9]])
; CHECK-NEXT: [[TMP21]] = add i64 [[TMP20]], [[TMP13]]
; CHECK-NEXT: [[TMP22:%.*]] = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> [[WIDE_LOAD10]])
; CHECK-NEXT: [[TMP23]] = add i64 [[TMP22]], [[TMP15]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
; CHECK-NEXT: [[TMP24:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256
; CHECK-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[BIN_RDX:%.*]] = add i64 [[TMP19]], [[TMP17]]
; CHECK-NEXT: [[BIN_RDX11:%.*]] = add i64 [[TMP21]], [[BIN_RDX]]
; CHECK-NEXT: [[BIN_RDX12:%.*]] = add i64 [[TMP23]], [[BIN_RDX11]]
; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: br i1 poison, label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP5:![0-9]+]]
; CHECK: exit:
; CHECK-NEXT: [[ADD2_LCSSA:%.*]] = phi i64 [ poison, [[LOOP]] ], [ [[BIN_RDX12]], [[MIDDLE_BLOCK]] ]
; CHECK-NEXT: ret i64 [[ADD2_LCSSA]]
;
entry:
br label %loop
loop:
%iv = phi i64 [0, %entry], [%iv.next, %loop]
%rdx = phi i64 [0, %entry], [%add2, %loop]
%p.gep = getelementptr i64, ptr %p, i64 %iv
%q.gep = getelementptr i64, ptr %q, i64 %iv
%x = load i64, ptr %p.gep
%y = load i64, ptr %q.gep
%add1 = add i64 %rdx, %x
%add2 = add i64 %add1, %y
%iv.next = add i64 %iv, 1
%done = icmp eq i64 %iv.next, 256
br i1 %done, label %exit, label %loop
exit:
ret i64 %add2
}
define i32 @predicated(ptr noalias nocapture %A) {
; CHECK-LABEL: @predicated(
; CHECK-NEXT: entry:
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE36:%.*]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE36]] ]
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP101:%.*]], [[PRED_LOAD_CONTINUE36]] ]
; CHECK-NEXT: [[VEC_PHI4:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP104:%.*]], [[PRED_LOAD_CONTINUE36]] ]
; CHECK-NEXT: [[VEC_PHI5:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP107:%.*]], [[PRED_LOAD_CONTINUE36]] ]
; CHECK-NEXT: [[VEC_PHI6:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP110:%.*]], [[PRED_LOAD_CONTINUE36]] ]
; CHECK-NEXT: [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
; CHECK-NEXT: [[STEP_ADD_2:%.*]] = add <4 x i64> [[VEC_IND]], splat (i64 8)
; CHECK-NEXT: [[STEP_ADD_3:%.*]] = add <4 x i64> [[VEC_IND]], splat (i64 12)
; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], splat (i64 257)
; CHECK-NEXT: [[TMP1:%.*]] = icmp ult <4 x i64> [[STEP_ADD]], splat (i64 257)
; CHECK-NEXT: [[TMP2:%.*]] = icmp ult <4 x i64> [[STEP_ADD_2]], splat (i64 257)
; CHECK-NEXT: [[TMP3:%.*]] = icmp ult <4 x i64> [[STEP_ADD_3]], splat (i64 257)
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
; CHECK-NEXT: br i1 [[TMP4]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
; CHECK: pred.load.if:
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> poison, i32 [[TMP6]], i64 0
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE]]
; CHECK: pred.load.continue:
; CHECK-NEXT: [[TMP8:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP7]], [[PRED_LOAD_IF]] ]
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i1> [[TMP0]], i64 1
; CHECK-NEXT: br i1 [[TMP9]], label [[PRED_LOAD_IF7:%.*]], label [[PRED_LOAD_CONTINUE8:%.*]]
; CHECK: pred.load.if4:
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[A]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[TMP10]], i64 4
; CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4
; CHECK-NEXT: [[TMP13:%.*]] = insertelement <4 x i32> [[TMP8]], i32 [[TMP12]], i64 1
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE8]]
; CHECK: pred.load.continue5:
; CHECK-NEXT: [[TMP14:%.*]] = phi <4 x i32> [ [[TMP8]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP13]], [[PRED_LOAD_IF7]] ]
; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x i1> [[TMP0]], i64 2
; CHECK-NEXT: br i1 [[TMP15]], label [[PRED_LOAD_IF9:%.*]], label [[PRED_LOAD_CONTINUE10:%.*]]
; CHECK: pred.load.if6:
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[A]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[TMP16]], i64 8
; CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4
; CHECK-NEXT: [[TMP19:%.*]] = insertelement <4 x i32> [[TMP14]], i32 [[TMP18]], i64 2
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE10]]
; CHECK: pred.load.continue7:
; CHECK-NEXT: [[TMP20:%.*]] = phi <4 x i32> [ [[TMP14]], [[PRED_LOAD_CONTINUE8]] ], [ [[TMP19]], [[PRED_LOAD_IF9]] ]
; CHECK-NEXT: [[TMP21:%.*]] = extractelement <4 x i1> [[TMP0]], i64 3
; CHECK-NEXT: br i1 [[TMP21]], label [[PRED_LOAD_IF11:%.*]], label [[PRED_LOAD_CONTINUE12:%.*]]
; CHECK: pred.load.if8:
; CHECK-NEXT: [[TMP22:%.*]] = getelementptr i32, ptr [[A]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[TMP22]], i64 12
; CHECK-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4
; CHECK-NEXT: [[TMP25:%.*]] = insertelement <4 x i32> [[TMP20]], i32 [[TMP24]], i64 3
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE12]]
; CHECK: pred.load.continue9:
; CHECK-NEXT: [[TMP26:%.*]] = phi <4 x i32> [ [[TMP20]], [[PRED_LOAD_CONTINUE10]] ], [ [[TMP25]], [[PRED_LOAD_IF11]] ]
; CHECK-NEXT: [[TMP27:%.*]] = extractelement <4 x i1> [[TMP1]], i64 0
; CHECK-NEXT: br i1 [[TMP27]], label [[PRED_LOAD_IF13:%.*]], label [[PRED_LOAD_CONTINUE14:%.*]]
; CHECK: pred.load.if10:
; CHECK-NEXT: [[TMP28:%.*]] = getelementptr i32, ptr [[A]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP29:%.*]] = getelementptr i8, ptr [[TMP28]], i64 16
; CHECK-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4
; CHECK-NEXT: [[TMP31:%.*]] = insertelement <4 x i32> poison, i32 [[TMP30]], i64 0
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE14]]
; CHECK: pred.load.continue11:
; CHECK-NEXT: [[TMP32:%.*]] = phi <4 x i32> [ poison, [[PRED_LOAD_CONTINUE12]] ], [ [[TMP31]], [[PRED_LOAD_IF13]] ]
; CHECK-NEXT: [[TMP33:%.*]] = extractelement <4 x i1> [[TMP1]], i64 1
; CHECK-NEXT: br i1 [[TMP33]], label [[PRED_LOAD_IF15:%.*]], label [[PRED_LOAD_CONTINUE16:%.*]]
; CHECK: pred.load.if12:
; CHECK-NEXT: [[TMP34:%.*]] = getelementptr i32, ptr [[A]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP35:%.*]] = getelementptr i8, ptr [[TMP34]], i64 20
; CHECK-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4
; CHECK-NEXT: [[TMP37:%.*]] = insertelement <4 x i32> [[TMP32]], i32 [[TMP36]], i64 1
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE16]]
; CHECK: pred.load.continue13:
; CHECK-NEXT: [[TMP38:%.*]] = phi <4 x i32> [ [[TMP32]], [[PRED_LOAD_CONTINUE14]] ], [ [[TMP37]], [[PRED_LOAD_IF15]] ]
; CHECK-NEXT: [[TMP39:%.*]] = extractelement <4 x i1> [[TMP1]], i64 2
; CHECK-NEXT: br i1 [[TMP39]], label [[PRED_LOAD_IF17:%.*]], label [[PRED_LOAD_CONTINUE18:%.*]]
; CHECK: pred.load.if14:
; CHECK-NEXT: [[TMP40:%.*]] = getelementptr i32, ptr [[A]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP41:%.*]] = getelementptr i8, ptr [[TMP40]], i64 24
; CHECK-NEXT: [[TMP42:%.*]] = load i32, ptr [[TMP41]], align 4
; CHECK-NEXT: [[TMP43:%.*]] = insertelement <4 x i32> [[TMP38]], i32 [[TMP42]], i64 2
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE18]]
; CHECK: pred.load.continue15:
; CHECK-NEXT: [[TMP44:%.*]] = phi <4 x i32> [ [[TMP38]], [[PRED_LOAD_CONTINUE16]] ], [ [[TMP43]], [[PRED_LOAD_IF17]] ]
; CHECK-NEXT: [[TMP45:%.*]] = extractelement <4 x i1> [[TMP1]], i64 3
; CHECK-NEXT: br i1 [[TMP45]], label [[PRED_LOAD_IF19:%.*]], label [[PRED_LOAD_CONTINUE20:%.*]]
; CHECK: pred.load.if16:
; CHECK-NEXT: [[TMP46:%.*]] = getelementptr i32, ptr [[A]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP47:%.*]] = getelementptr i8, ptr [[TMP46]], i64 28
; CHECK-NEXT: [[TMP48:%.*]] = load i32, ptr [[TMP47]], align 4
; CHECK-NEXT: [[TMP49:%.*]] = insertelement <4 x i32> [[TMP44]], i32 [[TMP48]], i64 3
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE20]]
; CHECK: pred.load.continue17:
; CHECK-NEXT: [[TMP50:%.*]] = phi <4 x i32> [ [[TMP44]], [[PRED_LOAD_CONTINUE18]] ], [ [[TMP49]], [[PRED_LOAD_IF19]] ]
; CHECK-NEXT: [[TMP51:%.*]] = extractelement <4 x i1> [[TMP2]], i64 0
; CHECK-NEXT: br i1 [[TMP51]], label [[PRED_LOAD_IF21:%.*]], label [[PRED_LOAD_CONTINUE22:%.*]]
; CHECK: pred.load.if18:
; CHECK-NEXT: [[TMP52:%.*]] = getelementptr i32, ptr [[A]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP53:%.*]] = getelementptr i8, ptr [[TMP52]], i64 32
; CHECK-NEXT: [[TMP54:%.*]] = load i32, ptr [[TMP53]], align 4
; CHECK-NEXT: [[TMP55:%.*]] = insertelement <4 x i32> poison, i32 [[TMP54]], i64 0
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE22]]
; CHECK: pred.load.continue19:
; CHECK-NEXT: [[TMP56:%.*]] = phi <4 x i32> [ poison, [[PRED_LOAD_CONTINUE20]] ], [ [[TMP55]], [[PRED_LOAD_IF21]] ]
; CHECK-NEXT: [[TMP57:%.*]] = extractelement <4 x i1> [[TMP2]], i64 1
; CHECK-NEXT: br i1 [[TMP57]], label [[PRED_LOAD_IF23:%.*]], label [[PRED_LOAD_CONTINUE24:%.*]]
; CHECK: pred.load.if20:
; CHECK-NEXT: [[TMP58:%.*]] = getelementptr i32, ptr [[A]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP59:%.*]] = getelementptr i8, ptr [[TMP58]], i64 36
; CHECK-NEXT: [[TMP60:%.*]] = load i32, ptr [[TMP59]], align 4
; CHECK-NEXT: [[TMP61:%.*]] = insertelement <4 x i32> [[TMP56]], i32 [[TMP60]], i64 1
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE24]]
; CHECK: pred.load.continue21:
; CHECK-NEXT: [[TMP62:%.*]] = phi <4 x i32> [ [[TMP56]], [[PRED_LOAD_CONTINUE22]] ], [ [[TMP61]], [[PRED_LOAD_IF23]] ]
; CHECK-NEXT: [[TMP63:%.*]] = extractelement <4 x i1> [[TMP2]], i64 2
; CHECK-NEXT: br i1 [[TMP63]], label [[PRED_LOAD_IF25:%.*]], label [[PRED_LOAD_CONTINUE26:%.*]]
; CHECK: pred.load.if22:
; CHECK-NEXT: [[TMP64:%.*]] = getelementptr i32, ptr [[A]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP65:%.*]] = getelementptr i8, ptr [[TMP64]], i64 40
; CHECK-NEXT: [[TMP66:%.*]] = load i32, ptr [[TMP65]], align 4
; CHECK-NEXT: [[TMP67:%.*]] = insertelement <4 x i32> [[TMP62]], i32 [[TMP66]], i64 2
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE26]]
; CHECK: pred.load.continue23:
; CHECK-NEXT: [[TMP68:%.*]] = phi <4 x i32> [ [[TMP62]], [[PRED_LOAD_CONTINUE24]] ], [ [[TMP67]], [[PRED_LOAD_IF25]] ]
; CHECK-NEXT: [[TMP69:%.*]] = extractelement <4 x i1> [[TMP2]], i64 3
; CHECK-NEXT: br i1 [[TMP69]], label [[PRED_LOAD_IF27:%.*]], label [[PRED_LOAD_CONTINUE28:%.*]]
; CHECK: pred.load.if24:
; CHECK-NEXT: [[TMP70:%.*]] = getelementptr i32, ptr [[A]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP71:%.*]] = getelementptr i8, ptr [[TMP70]], i64 44
; CHECK-NEXT: [[TMP72:%.*]] = load i32, ptr [[TMP71]], align 4
; CHECK-NEXT: [[TMP73:%.*]] = insertelement <4 x i32> [[TMP68]], i32 [[TMP72]], i64 3
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE28]]
; CHECK: pred.load.continue25:
; CHECK-NEXT: [[TMP74:%.*]] = phi <4 x i32> [ [[TMP68]], [[PRED_LOAD_CONTINUE26]] ], [ [[TMP73]], [[PRED_LOAD_IF27]] ]
; CHECK-NEXT: [[TMP75:%.*]] = extractelement <4 x i1> [[TMP3]], i64 0
; CHECK-NEXT: br i1 [[TMP75]], label [[PRED_LOAD_IF29:%.*]], label [[PRED_LOAD_CONTINUE30:%.*]]
; CHECK: pred.load.if26:
; CHECK-NEXT: [[TMP76:%.*]] = getelementptr i32, ptr [[A]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP77:%.*]] = getelementptr i8, ptr [[TMP76]], i64 48
; CHECK-NEXT: [[TMP78:%.*]] = load i32, ptr [[TMP77]], align 4
; CHECK-NEXT: [[TMP79:%.*]] = insertelement <4 x i32> poison, i32 [[TMP78]], i64 0
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE30]]
; CHECK: pred.load.continue27:
; CHECK-NEXT: [[TMP80:%.*]] = phi <4 x i32> [ poison, [[PRED_LOAD_CONTINUE28]] ], [ [[TMP79]], [[PRED_LOAD_IF29]] ]
; CHECK-NEXT: [[TMP81:%.*]] = extractelement <4 x i1> [[TMP3]], i64 1
; CHECK-NEXT: br i1 [[TMP81]], label [[PRED_LOAD_IF31:%.*]], label [[PRED_LOAD_CONTINUE32:%.*]]
; CHECK: pred.load.if28:
; CHECK-NEXT: [[TMP82:%.*]] = getelementptr i32, ptr [[A]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP83:%.*]] = getelementptr i8, ptr [[TMP82]], i64 52
; CHECK-NEXT: [[TMP84:%.*]] = load i32, ptr [[TMP83]], align 4
; CHECK-NEXT: [[TMP85:%.*]] = insertelement <4 x i32> [[TMP80]], i32 [[TMP84]], i64 1
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE32]]
; CHECK: pred.load.continue29:
; CHECK-NEXT: [[TMP86:%.*]] = phi <4 x i32> [ [[TMP80]], [[PRED_LOAD_CONTINUE30]] ], [ [[TMP85]], [[PRED_LOAD_IF31]] ]
; CHECK-NEXT: [[TMP87:%.*]] = extractelement <4 x i1> [[TMP3]], i64 2
; CHECK-NEXT: br i1 [[TMP87]], label [[PRED_LOAD_IF33:%.*]], label [[PRED_LOAD_CONTINUE34:%.*]]
; CHECK: pred.load.if30:
; CHECK-NEXT: [[TMP88:%.*]] = getelementptr i32, ptr [[A]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP89:%.*]] = getelementptr i8, ptr [[TMP88]], i64 56
; CHECK-NEXT: [[TMP90:%.*]] = load i32, ptr [[TMP89]], align 4
; CHECK-NEXT: [[TMP91:%.*]] = insertelement <4 x i32> [[TMP86]], i32 [[TMP90]], i64 2
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE34]]
; CHECK: pred.load.continue31:
; CHECK-NEXT: [[TMP92:%.*]] = phi <4 x i32> [ [[TMP86]], [[PRED_LOAD_CONTINUE32]] ], [ [[TMP91]], [[PRED_LOAD_IF33]] ]
; CHECK-NEXT: [[TMP93:%.*]] = extractelement <4 x i1> [[TMP3]], i64 3
; CHECK-NEXT: br i1 [[TMP93]], label [[PRED_LOAD_IF35:%.*]], label [[PRED_LOAD_CONTINUE36]]
; CHECK: pred.load.if32:
; CHECK-NEXT: [[TMP94:%.*]] = getelementptr i32, ptr [[A]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP95:%.*]] = getelementptr i8, ptr [[TMP94]], i64 60
; CHECK-NEXT: [[TMP96:%.*]] = load i32, ptr [[TMP95]], align 4
; CHECK-NEXT: [[TMP97:%.*]] = insertelement <4 x i32> [[TMP92]], i32 [[TMP96]], i64 3
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE36]]
; CHECK: pred.load.continue33:
; CHECK-NEXT: [[TMP98:%.*]] = phi <4 x i32> [ [[TMP92]], [[PRED_LOAD_CONTINUE34]] ], [ [[TMP97]], [[PRED_LOAD_IF35]] ]
; CHECK-NEXT: [[TMP99:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP26]], <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP100:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP99]])
; CHECK-NEXT: [[TMP101]] = add i32 [[TMP100]], [[VEC_PHI]]
; CHECK-NEXT: [[TMP102:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[TMP50]], <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP103:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP102]])
; CHECK-NEXT: [[TMP104]] = add i32 [[TMP103]], [[VEC_PHI4]]
; CHECK-NEXT: [[TMP105:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP74]], <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP106:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP105]])
; CHECK-NEXT: [[TMP107]] = add i32 [[TMP106]], [[VEC_PHI5]]
; CHECK-NEXT: [[TMP108:%.*]] = select <4 x i1> [[TMP3]], <4 x i32> [[TMP98]], <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP109:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP108]])
; CHECK-NEXT: [[TMP110]] = add i32 [[TMP109]], [[VEC_PHI6]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 16)
; CHECK-NEXT: [[TMP111:%.*]] = icmp eq i64 [[INDEX_NEXT]], 272
; CHECK-NEXT: br i1 [[TMP111]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[BIN_RDX:%.*]] = add i32 [[TMP104]], [[TMP101]]
; CHECK-NEXT: [[BIN_RDX37:%.*]] = add i32 [[TMP107]], [[BIN_RDX]]
; CHECK-NEXT: [[BIN_RDX38:%.*]] = add i32 [[TMP110]], [[BIN_RDX37]]
; CHECK-NEXT: br label [[DOT_CRIT_EDGE:%.*]]
; CHECK: scalar.ph:
; CHECK-NEXT: br label [[DOTLR_PH:%.*]]
; CHECK: .lr.ph:
; CHECK-NEXT: br i1 poison, label [[DOT_CRIT_EDGE]], label [[DOTLR_PH]], !llvm.loop [[LOOP7:![0-9]+]]
; CHECK: ._crit_edge:
; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ poison, [[DOTLR_PH]] ], [ [[BIN_RDX38]], [[MIDDLE_BLOCK]] ]
; CHECK-NEXT: ret i32 [[SUM_0_LCSSA]]
;
entry:
br label %.lr.ph
.lr.ph: ; preds = %entry, %.lr.ph
%indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %entry ]
%sum.02 = phi i32 [ %l7, %.lr.ph ], [ 0, %entry ]
%l2 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
%l3 = load i32, ptr %l2, align 4
%l7 = add i32 %sum.02, %l3
%indvars.iv.next = add i64 %indvars.iv, 1
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
%exitcond = icmp eq i32 %lftr.wideiv, 257
br i1 %exitcond, label %._crit_edge, label %.lr.ph, !llvm.loop !6
._crit_edge: ; preds = %.lr.ph
%sum.0.lcssa = phi i32 [ %l7, %.lr.ph ]
ret i32 %sum.0.lcssa
}
define i32 @cond_rdx_pred(i32 %cond, ptr noalias %a, i64 %N) {
; CHECK-LABEL: @cond_rdx_pred(
; CHECK-NEXT: entry:
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[N:%.*]], 15
; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[N_RND_UP]], -16
; CHECK-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = add i64 [[N]], -1
; CHECK-NEXT: [[BROADCAST_SPLATINSERT7:%.*]] = insertelement <4 x i32> poison, i32 [[COND:%.*]], i64 0
; CHECK-NEXT: [[TMP4:%.*]] = icmp sgt <4 x i32> [[BROADCAST_SPLATINSERT7]], splat (i32 7)
; CHECK-NEXT: [[BROADCAST_SPLAT8:%.*]] = shufflevector <4 x i1> [[TMP4]], <4 x i1> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[TRIP_COUNT_MINUS_1]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE38:%.*]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE38]] ]
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 4, [[VECTOR_PH]] ], [ [[TMP109:%.*]], [[PRED_LOAD_CONTINUE38]] ]
; CHECK-NEXT: [[VEC_PHI4:%.*]] = phi i32 [ 1, [[VECTOR_PH]] ], [ [[TMP112:%.*]], [[PRED_LOAD_CONTINUE38]] ]
; CHECK-NEXT: [[VEC_PHI5:%.*]] = phi i32 [ 1, [[VECTOR_PH]] ], [ [[TMP115:%.*]], [[PRED_LOAD_CONTINUE38]] ]
; CHECK-NEXT: [[VEC_PHI6:%.*]] = phi i32 [ 1, [[VECTOR_PH]] ], [ [[TMP118:%.*]], [[PRED_LOAD_CONTINUE38]] ]
; CHECK-NEXT: [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
; CHECK-NEXT: [[STEP_ADD1:%.*]] = add <4 x i64> [[VEC_IND]], splat (i64 8)
; CHECK-NEXT: [[STEP_ADD2:%.*]] = add <4 x i64> [[VEC_IND]], splat (i64 12)
; CHECK-NEXT: [[TMP0:%.*]] = icmp ule <4 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
; CHECK-NEXT: [[TMP1:%.*]] = icmp ule <4 x i64> [[STEP_ADD]], [[BROADCAST_SPLAT]]
; CHECK-NEXT: [[TMP2:%.*]] = icmp ule <4 x i64> [[STEP_ADD1]], [[BROADCAST_SPLAT]]
; CHECK-NEXT: [[TMP3:%.*]] = icmp ule <4 x i64> [[STEP_ADD2]], [[BROADCAST_SPLAT]]
; CHECK-NEXT: [[TMP8:%.*]] = select <4 x i1> [[TMP0]], <4 x i1> [[BROADCAST_SPLAT8]], <4 x i1> zeroinitializer
; CHECK-NEXT: [[TMP9:%.*]] = select <4 x i1> [[TMP1]], <4 x i1> [[BROADCAST_SPLAT8]], <4 x i1> zeroinitializer
; CHECK-NEXT: [[TMP10:%.*]] = select <4 x i1> [[TMP2]], <4 x i1> [[BROADCAST_SPLAT8]], <4 x i1> zeroinitializer
; CHECK-NEXT: [[TMP11:%.*]] = select <4 x i1> [[TMP3]], <4 x i1> [[BROADCAST_SPLAT8]], <4 x i1> zeroinitializer
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i1> [[TMP8]], i64 0
; CHECK-NEXT: br i1 [[TMP12]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
; CHECK: pred.load.if:
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4
; CHECK-NEXT: [[TMP15:%.*]] = insertelement <4 x i32> poison, i32 [[TMP14]], i64 0
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE]]
; CHECK: pred.load.continue:
; CHECK-NEXT: [[TMP16:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP15]], [[PRED_LOAD_IF]] ]
; CHECK-NEXT: [[TMP17:%.*]] = extractelement <4 x i1> [[TMP8]], i64 1
; CHECK-NEXT: br i1 [[TMP17]], label [[PRED_LOAD_IF9:%.*]], label [[PRED_LOAD_CONTINUE10:%.*]]
; CHECK: pred.load.if6:
; CHECK-NEXT: [[TMP18:%.*]] = getelementptr i32, ptr [[A]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[TMP18]], i64 4
; CHECK-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4
; CHECK-NEXT: [[TMP21:%.*]] = insertelement <4 x i32> [[TMP16]], i32 [[TMP20]], i64 1
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE10]]
; CHECK: pred.load.continue7:
; CHECK-NEXT: [[TMP22:%.*]] = phi <4 x i32> [ [[TMP16]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP21]], [[PRED_LOAD_IF9]] ]
; CHECK-NEXT: [[TMP23:%.*]] = extractelement <4 x i1> [[TMP8]], i64 2
; CHECK-NEXT: br i1 [[TMP23]], label [[PRED_LOAD_IF11:%.*]], label [[PRED_LOAD_CONTINUE12:%.*]]
; CHECK: pred.load.if8:
; CHECK-NEXT: [[TMP24:%.*]] = getelementptr i32, ptr [[A]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP25:%.*]] = getelementptr i8, ptr [[TMP24]], i64 8
; CHECK-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4
; CHECK-NEXT: [[TMP27:%.*]] = insertelement <4 x i32> [[TMP22]], i32 [[TMP26]], i64 2
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE12]]
; CHECK: pred.load.continue9:
; CHECK-NEXT: [[TMP28:%.*]] = phi <4 x i32> [ [[TMP22]], [[PRED_LOAD_CONTINUE10]] ], [ [[TMP27]], [[PRED_LOAD_IF11]] ]
; CHECK-NEXT: [[TMP29:%.*]] = extractelement <4 x i1> [[TMP8]], i64 3
; CHECK-NEXT: br i1 [[TMP29]], label [[PRED_LOAD_IF13:%.*]], label [[PRED_LOAD_CONTINUE14:%.*]]
; CHECK: pred.load.if10:
; CHECK-NEXT: [[TMP30:%.*]] = getelementptr i32, ptr [[A]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP31:%.*]] = getelementptr i8, ptr [[TMP30]], i64 12
; CHECK-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4
; CHECK-NEXT: [[TMP33:%.*]] = insertelement <4 x i32> [[TMP28]], i32 [[TMP32]], i64 3
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE14]]
; CHECK: pred.load.continue11:
; CHECK-NEXT: [[TMP34:%.*]] = phi <4 x i32> [ [[TMP28]], [[PRED_LOAD_CONTINUE12]] ], [ [[TMP33]], [[PRED_LOAD_IF13]] ]
; CHECK-NEXT: [[TMP35:%.*]] = extractelement <4 x i1> [[TMP9]], i64 0
; CHECK-NEXT: br i1 [[TMP35]], label [[PRED_LOAD_IF15:%.*]], label [[PRED_LOAD_CONTINUE16:%.*]]
; CHECK: pred.load.if12:
; CHECK-NEXT: [[TMP36:%.*]] = getelementptr i32, ptr [[A]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP37:%.*]] = getelementptr i8, ptr [[TMP36]], i64 16
; CHECK-NEXT: [[TMP38:%.*]] = load i32, ptr [[TMP37]], align 4
; CHECK-NEXT: [[TMP39:%.*]] = insertelement <4 x i32> poison, i32 [[TMP38]], i64 0
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE16]]
; CHECK: pred.load.continue13:
; CHECK-NEXT: [[TMP40:%.*]] = phi <4 x i32> [ poison, [[PRED_LOAD_CONTINUE14]] ], [ [[TMP39]], [[PRED_LOAD_IF15]] ]
; CHECK-NEXT: [[TMP41:%.*]] = extractelement <4 x i1> [[TMP9]], i64 1
; CHECK-NEXT: br i1 [[TMP41]], label [[PRED_LOAD_IF17:%.*]], label [[PRED_LOAD_CONTINUE18:%.*]]
; CHECK: pred.load.if14:
; CHECK-NEXT: [[TMP42:%.*]] = getelementptr i32, ptr [[A]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP43:%.*]] = getelementptr i8, ptr [[TMP42]], i64 20
; CHECK-NEXT: [[TMP44:%.*]] = load i32, ptr [[TMP43]], align 4
; CHECK-NEXT: [[TMP45:%.*]] = insertelement <4 x i32> [[TMP40]], i32 [[TMP44]], i64 1
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE18]]
; CHECK: pred.load.continue15:
; CHECK-NEXT: [[TMP46:%.*]] = phi <4 x i32> [ [[TMP40]], [[PRED_LOAD_CONTINUE16]] ], [ [[TMP45]], [[PRED_LOAD_IF17]] ]
; CHECK-NEXT: [[TMP47:%.*]] = extractelement <4 x i1> [[TMP9]], i64 2
; CHECK-NEXT: br i1 [[TMP47]], label [[PRED_LOAD_IF19:%.*]], label [[PRED_LOAD_CONTINUE20:%.*]]
; CHECK: pred.load.if16:
; CHECK-NEXT: [[TMP48:%.*]] = getelementptr i32, ptr [[A]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP49:%.*]] = getelementptr i8, ptr [[TMP48]], i64 24
; CHECK-NEXT: [[TMP50:%.*]] = load i32, ptr [[TMP49]], align 4
; CHECK-NEXT: [[TMP51:%.*]] = insertelement <4 x i32> [[TMP46]], i32 [[TMP50]], i64 2
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE20]]
; CHECK: pred.load.continue17:
; CHECK-NEXT: [[TMP52:%.*]] = phi <4 x i32> [ [[TMP46]], [[PRED_LOAD_CONTINUE18]] ], [ [[TMP51]], [[PRED_LOAD_IF19]] ]
; CHECK-NEXT: [[TMP53:%.*]] = extractelement <4 x i1> [[TMP9]], i64 3
; CHECK-NEXT: br i1 [[TMP53]], label [[PRED_LOAD_IF21:%.*]], label [[PRED_LOAD_CONTINUE22:%.*]]
; CHECK: pred.load.if18:
; CHECK-NEXT: [[TMP54:%.*]] = getelementptr i32, ptr [[A]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP55:%.*]] = getelementptr i8, ptr [[TMP54]], i64 28
; CHECK-NEXT: [[TMP56:%.*]] = load i32, ptr [[TMP55]], align 4
; CHECK-NEXT: [[TMP57:%.*]] = insertelement <4 x i32> [[TMP52]], i32 [[TMP56]], i64 3
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE22]]
; CHECK: pred.load.continue19:
; CHECK-NEXT: [[TMP58:%.*]] = phi <4 x i32> [ [[TMP52]], [[PRED_LOAD_CONTINUE20]] ], [ [[TMP57]], [[PRED_LOAD_IF21]] ]
; CHECK-NEXT: [[TMP59:%.*]] = extractelement <4 x i1> [[TMP10]], i64 0
; CHECK-NEXT: br i1 [[TMP59]], label [[PRED_LOAD_IF23:%.*]], label [[PRED_LOAD_CONTINUE24:%.*]]
; CHECK: pred.load.if20:
; CHECK-NEXT: [[TMP60:%.*]] = getelementptr i32, ptr [[A]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP61:%.*]] = getelementptr i8, ptr [[TMP60]], i64 32
; CHECK-NEXT: [[TMP62:%.*]] = load i32, ptr [[TMP61]], align 4
; CHECK-NEXT: [[TMP63:%.*]] = insertelement <4 x i32> poison, i32 [[TMP62]], i64 0
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE24]]
; CHECK: pred.load.continue21:
; CHECK-NEXT: [[TMP64:%.*]] = phi <4 x i32> [ poison, [[PRED_LOAD_CONTINUE22]] ], [ [[TMP63]], [[PRED_LOAD_IF23]] ]
; CHECK-NEXT: [[TMP65:%.*]] = extractelement <4 x i1> [[TMP10]], i64 1
; CHECK-NEXT: br i1 [[TMP65]], label [[PRED_LOAD_IF25:%.*]], label [[PRED_LOAD_CONTINUE26:%.*]]
; CHECK: pred.load.if22:
; CHECK-NEXT: [[TMP66:%.*]] = getelementptr i32, ptr [[A]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP67:%.*]] = getelementptr i8, ptr [[TMP66]], i64 36
; CHECK-NEXT: [[TMP68:%.*]] = load i32, ptr [[TMP67]], align 4
; CHECK-NEXT: [[TMP69:%.*]] = insertelement <4 x i32> [[TMP64]], i32 [[TMP68]], i64 1
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE26]]
; CHECK: pred.load.continue23:
; CHECK-NEXT: [[TMP70:%.*]] = phi <4 x i32> [ [[TMP64]], [[PRED_LOAD_CONTINUE24]] ], [ [[TMP69]], [[PRED_LOAD_IF25]] ]
; CHECK-NEXT: [[TMP71:%.*]] = extractelement <4 x i1> [[TMP10]], i64 2
; CHECK-NEXT: br i1 [[TMP71]], label [[PRED_LOAD_IF27:%.*]], label [[PRED_LOAD_CONTINUE28:%.*]]
; CHECK: pred.load.if24:
; CHECK-NEXT: [[TMP72:%.*]] = getelementptr i32, ptr [[A]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP73:%.*]] = getelementptr i8, ptr [[TMP72]], i64 40
; CHECK-NEXT: [[TMP74:%.*]] = load i32, ptr [[TMP73]], align 4
; CHECK-NEXT: [[TMP75:%.*]] = insertelement <4 x i32> [[TMP70]], i32 [[TMP74]], i64 2
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE28]]
; CHECK: pred.load.continue25:
; CHECK-NEXT: [[TMP76:%.*]] = phi <4 x i32> [ [[TMP70]], [[PRED_LOAD_CONTINUE26]] ], [ [[TMP75]], [[PRED_LOAD_IF27]] ]
; CHECK-NEXT: [[TMP77:%.*]] = extractelement <4 x i1> [[TMP10]], i64 3
; CHECK-NEXT: br i1 [[TMP77]], label [[PRED_LOAD_IF29:%.*]], label [[PRED_LOAD_CONTINUE30:%.*]]
; CHECK: pred.load.if26:
; CHECK-NEXT: [[TMP78:%.*]] = getelementptr i32, ptr [[A]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP79:%.*]] = getelementptr i8, ptr [[TMP78]], i64 44
; CHECK-NEXT: [[TMP80:%.*]] = load i32, ptr [[TMP79]], align 4
; CHECK-NEXT: [[TMP81:%.*]] = insertelement <4 x i32> [[TMP76]], i32 [[TMP80]], i64 3
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE30]]
; CHECK: pred.load.continue27:
; CHECK-NEXT: [[TMP82:%.*]] = phi <4 x i32> [ [[TMP76]], [[PRED_LOAD_CONTINUE28]] ], [ [[TMP81]], [[PRED_LOAD_IF29]] ]
; CHECK-NEXT: [[TMP83:%.*]] = extractelement <4 x i1> [[TMP11]], i64 0
; CHECK-NEXT: br i1 [[TMP83]], label [[PRED_LOAD_IF31:%.*]], label [[PRED_LOAD_CONTINUE32:%.*]]
; CHECK: pred.load.if28:
; CHECK-NEXT: [[TMP84:%.*]] = getelementptr i32, ptr [[A]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP85:%.*]] = getelementptr i8, ptr [[TMP84]], i64 48
; CHECK-NEXT: [[TMP86:%.*]] = load i32, ptr [[TMP85]], align 4
; CHECK-NEXT: [[TMP87:%.*]] = insertelement <4 x i32> poison, i32 [[TMP86]], i64 0
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE32]]
; CHECK: pred.load.continue29:
; CHECK-NEXT: [[TMP88:%.*]] = phi <4 x i32> [ poison, [[PRED_LOAD_CONTINUE30]] ], [ [[TMP87]], [[PRED_LOAD_IF31]] ]
; CHECK-NEXT: [[TMP89:%.*]] = extractelement <4 x i1> [[TMP11]], i64 1
; CHECK-NEXT: br i1 [[TMP89]], label [[PRED_LOAD_IF33:%.*]], label [[PRED_LOAD_CONTINUE34:%.*]]
; CHECK: pred.load.if30:
; CHECK-NEXT: [[TMP90:%.*]] = getelementptr i32, ptr [[A]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP91:%.*]] = getelementptr i8, ptr [[TMP90]], i64 52
; CHECK-NEXT: [[TMP92:%.*]] = load i32, ptr [[TMP91]], align 4
; CHECK-NEXT: [[TMP93:%.*]] = insertelement <4 x i32> [[TMP88]], i32 [[TMP92]], i64 1
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE34]]
; CHECK: pred.load.continue31:
; CHECK-NEXT: [[TMP94:%.*]] = phi <4 x i32> [ [[TMP88]], [[PRED_LOAD_CONTINUE32]] ], [ [[TMP93]], [[PRED_LOAD_IF33]] ]
; CHECK-NEXT: [[TMP95:%.*]] = extractelement <4 x i1> [[TMP11]], i64 2
; CHECK-NEXT: br i1 [[TMP95]], label [[PRED_LOAD_IF35:%.*]], label [[PRED_LOAD_CONTINUE36:%.*]]
; CHECK: pred.load.if32:
; CHECK-NEXT: [[TMP96:%.*]] = getelementptr i32, ptr [[A]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP97:%.*]] = getelementptr i8, ptr [[TMP96]], i64 56
; CHECK-NEXT: [[TMP98:%.*]] = load i32, ptr [[TMP97]], align 4
; CHECK-NEXT: [[TMP99:%.*]] = insertelement <4 x i32> [[TMP94]], i32 [[TMP98]], i64 2
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE36]]
; CHECK: pred.load.continue33:
; CHECK-NEXT: [[TMP100:%.*]] = phi <4 x i32> [ [[TMP94]], [[PRED_LOAD_CONTINUE34]] ], [ [[TMP99]], [[PRED_LOAD_IF35]] ]
; CHECK-NEXT: [[TMP101:%.*]] = extractelement <4 x i1> [[TMP11]], i64 3
; CHECK-NEXT: br i1 [[TMP101]], label [[PRED_LOAD_IF37:%.*]], label [[PRED_LOAD_CONTINUE38]]
; CHECK: pred.load.if34:
; CHECK-NEXT: [[TMP102:%.*]] = getelementptr i32, ptr [[A]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP103:%.*]] = getelementptr i8, ptr [[TMP102]], i64 60
; CHECK-NEXT: [[TMP104:%.*]] = load i32, ptr [[TMP103]], align 4
; CHECK-NEXT: [[TMP105:%.*]] = insertelement <4 x i32> [[TMP100]], i32 [[TMP104]], i64 3
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE38]]
; CHECK: pred.load.continue35:
; CHECK-NEXT: [[TMP106:%.*]] = phi <4 x i32> [ [[TMP100]], [[PRED_LOAD_CONTINUE36]] ], [ [[TMP105]], [[PRED_LOAD_IF37]] ]
; CHECK-NEXT: [[TMP107:%.*]] = select <4 x i1> [[TMP8]], <4 x i32> [[TMP34]], <4 x i32> splat (i32 1)
; CHECK-NEXT: [[TMP108:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[TMP107]])
; CHECK-NEXT: [[TMP109]] = mul i32 [[TMP108]], [[VEC_PHI]]
; CHECK-NEXT: [[TMP110:%.*]] = select <4 x i1> [[TMP9]], <4 x i32> [[TMP58]], <4 x i32> splat (i32 1)
; CHECK-NEXT: [[TMP111:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[TMP110]])
; CHECK-NEXT: [[TMP112]] = mul i32 [[TMP111]], [[VEC_PHI4]]
; CHECK-NEXT: [[TMP113:%.*]] = select <4 x i1> [[TMP10]], <4 x i32> [[TMP82]], <4 x i32> splat (i32 1)
; CHECK-NEXT: [[TMP114:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[TMP113]])
; CHECK-NEXT: [[TMP115]] = mul i32 [[TMP114]], [[VEC_PHI5]]
; CHECK-NEXT: [[TMP116:%.*]] = select <4 x i1> [[TMP11]], <4 x i32> [[TMP106]], <4 x i32> splat (i32 1)
; CHECK-NEXT: [[TMP117:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[TMP116]])
; CHECK-NEXT: [[TMP118]] = mul i32 [[TMP117]], [[VEC_PHI6]]
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 16
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 16)
; CHECK-NEXT: [[TMP119:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP119]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[BIN_RDX:%.*]] = mul i32 [[TMP112]], [[TMP109]]
; CHECK-NEXT: [[BIN_RDX39:%.*]] = mul i32 [[TMP115]], [[BIN_RDX]]
; CHECK-NEXT: [[BIN_RDX40:%.*]] = mul i32 [[TMP118]], [[BIN_RDX39]]
; CHECK-NEXT: br label [[FOR_END:%.*]]
; CHECK: scalar.ph:
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: br i1 poison, label [[IF_THEN:%.*]], label [[FOR_INC:%.*]]
; CHECK: if.then:
; CHECK-NEXT: br label [[FOR_INC]]
; CHECK: for.inc:
; CHECK-NEXT: br i1 poison, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
; CHECK: for.end:
; CHECK-NEXT: [[RES_LCSSA:%.*]] = phi i32 [ poison, [[FOR_INC]] ], [ [[BIN_RDX40]], [[MIDDLE_BLOCK]] ]
; CHECK-NEXT: ret i32 [[RES_LCSSA]]
;
entry:
br label %for.body
for.body:
%iv = phi i64 [ %inc, %for.inc ], [ 0, %entry ]
%sum = phi i32 [ %res, %for.inc ], [ 4, %entry ]
%cmp1 = icmp sgt i32 %cond, 7
br i1 %cmp1, label %if.then, label %for.inc
if.then:
%arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv
%load = load i32, ptr %arrayidx
%mul = mul nsw i32 %load, %sum
br label %for.inc
for.inc:
%res = phi i32 [ %mul, %if.then ], [ %sum, %for.body ]
%inc = add nuw nsw i64 %iv, 1
%exitcond.not = icmp eq i64 %inc, %N
br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !6
for.end:
ret i32 %res
}
!6 = distinct !{!6, !7, !8}
!7 = !{!"llvm.loop.vectorize.predicate.enable", i1 true}
!8 = !{!"llvm.loop.vectorize.enable", i1 true}