Files
clang-p2996/llvm/test/Transforms/LoopVectorize/reduction-inloop-pred.ll
Luke Lau 9dd1c66e8f [VPlan] Expand VPWidenIntOrFpInductionRecipe into separate recipes (#118638)
The motivation of this PR is to make #115274 easier to implement, and
should allow us to add EVL support by just passing EVL to the VF
operand.

The current difficulty with widening IVs with EVL is that
VPWidenIntOrFpInductionRecipe generates its own backedge value. Since
it's a VPHeaderPHIRecipe the VF operand must be in the preheader, which
means we can't use the EVL since it's defined in the loop body.

The gist in this PR is to take the approach in #114305 and expand
VPWidenIntOrFpInductionRecipe into several recipes for the initial
value, phi and backedge value just before execution. I.e. this example:

```
  vector.ph:
  Successor(s): vector loop

  <x1> vector loop: {
    vector.body:
      WIDEN-INDUCTION %i = phi %start, %step, %vf
      ...
      EMIT branch-on-count ...
    No successors
  }
```

gets expanded to:

``` 
vector.ph:
  ...
  vp<%induction.start> = ...
  vp<%induction.increment> = ...

Successor(s): vector loop

<x1> vector loop: {
  vector.body:
    ir<%i> = WIDEN-PHI vp<%induction.start>, vp<%vec.ind.next>
    ...
    vp<%vec.ind.next> = add ir<%i>, vp<%induction.increment>
    EMIT branch-on-count ...
  No successors
}
```

This allows us to a value defined in the loop in the backedge value, and
also means we can just reuse the existing backedge fixups in
VPlan::execute without having to specially handle it ourselves.

After this #115274 should just become a matter of setting the VF operand
to EVL (and building the increment step in the loop body, not the
preheader).
2025-06-17 18:24:07 +01:00

1622 lines
100 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -passes=loop-vectorize,dce,instcombine -force-vector-interleave=1 -force-vector-width=4 -prefer-inloop-reductions -prefer-predicate-over-epilogue=predicate-else-scalar-epilogue -S | FileCheck %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
define i32 @reduction_sum_single(ptr noalias nocapture %A) {
; CHECK-LABEL: @reduction_sum_single(
; CHECK-NEXT: entry:
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i16> [ <i16 0, i16 1, i16 2, i16 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP26:%.*]], [[PRED_LOAD_CONTINUE6]] ]
; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i16> [[VEC_IND]], splat (i16 257)
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
; CHECK: pred.load.if:
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> poison, i32 [[TMP3]], i64 0
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE]]
; CHECK: pred.load.continue:
; CHECK-NEXT: [[TMP5:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP4]], [[PRED_LOAD_IF]] ]
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i1> [[TMP0]], i64 1
; CHECK-NEXT: br i1 [[TMP6]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
; CHECK: pred.load.if1:
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[A]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[TMP7]], i64 4
; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4
; CHECK-NEXT: [[TMP10:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[TMP9]], i64 1
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE2]]
; CHECK: pred.load.continue2:
; CHECK-NEXT: [[TMP11:%.*]] = phi <4 x i32> [ [[TMP5]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP10]], [[PRED_LOAD_IF1]] ]
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i1> [[TMP0]], i64 2
; CHECK-NEXT: br i1 [[TMP12]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
; CHECK: pred.load.if3:
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[A]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[TMP13]], i64 8
; CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4
; CHECK-NEXT: [[TMP16:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP15]], i64 2
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE4]]
; CHECK: pred.load.continue4:
; CHECK-NEXT: [[TMP17:%.*]] = phi <4 x i32> [ [[TMP11]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP16]], [[PRED_LOAD_IF3]] ]
; CHECK-NEXT: [[TMP18:%.*]] = extractelement <4 x i1> [[TMP0]], i64 3
; CHECK-NEXT: br i1 [[TMP18]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]]
; CHECK: pred.load.if5:
; CHECK-NEXT: [[TMP19:%.*]] = getelementptr i32, ptr [[A]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[TMP19]], i64 12
; CHECK-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4
; CHECK-NEXT: [[TMP22:%.*]] = insertelement <4 x i32> [[TMP17]], i32 [[TMP21]], i64 3
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE6]]
; CHECK: pred.load.continue6:
; CHECK-NEXT: [[TMP23:%.*]] = phi <4 x i32> [ [[TMP17]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP22]], [[PRED_LOAD_IF5]] ]
; CHECK-NEXT: [[TMP24:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP23]], <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP25:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP24]])
; CHECK-NEXT: [[TMP26]] = add i32 [[TMP25]], [[VEC_PHI]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i16> [[VEC_IND]], splat (i16 4)
; CHECK-NEXT: [[TMP27:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260
; CHECK-NEXT: br i1 [[TMP27]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br label [[DOT_CRIT_EDGE:%.*]]
; CHECK: scalar.ph:
; CHECK-NEXT: br label [[DOTLR_PH:%.*]]
; CHECK: .lr.ph:
; CHECK-NEXT: br i1 poison, label [[DOT_CRIT_EDGE]], label [[DOTLR_PH]], !llvm.loop [[LOOP3:![0-9]+]]
; CHECK: ._crit_edge:
; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ poison, [[DOTLR_PH]] ], [ [[TMP26]], [[MIDDLE_BLOCK]] ]
; CHECK-NEXT: ret i32 [[SUM_0_LCSSA]]
;
entry:
br label %.lr.ph
.lr.ph: ; preds = %entry, %.lr.ph
%indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %entry ]
%sum.02 = phi i32 [ %l7, %.lr.ph ], [ 0, %entry ]
%l2 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
%l3 = load i32, ptr %l2, align 4
%l7 = add i32 %sum.02, %l3
%indvars.iv.next = add i64 %indvars.iv, 1
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
%exitcond = icmp eq i32 %lftr.wideiv, 257
br i1 %exitcond, label %._crit_edge, label %.lr.ph
._crit_edge: ; preds = %.lr.ph
%sum.0.lcssa = phi i32 [ %l7, %.lr.ph ]
ret i32 %sum.0.lcssa
}
define i32 @reduction_sum(ptr noalias nocapture %A, ptr noalias nocapture %B) {
; CHECK-LABEL: @reduction_sum(
; CHECK-NEXT: entry:
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE8:%.*]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i16> [ <i16 0, i16 1, i16 2, i16 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE8]] ]
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP48:%.*]], [[PRED_LOAD_CONTINUE8]] ]
; CHECK-NEXT: [[VEC_IND1:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[PRED_LOAD_CONTINUE8]] ]
; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i16> [[VEC_IND]], splat (i16 257)
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
; CHECK: pred.load.if:
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> poison, i32 [[TMP3]], i64 0
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> poison, i32 [[TMP6]], i64 0
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE]]
; CHECK: pred.load.continue:
; CHECK-NEXT: [[TMP8:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP4]], [[PRED_LOAD_IF]] ]
; CHECK-NEXT: [[TMP9:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP7]], [[PRED_LOAD_IF]] ]
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP0]], i64 1
; CHECK-NEXT: br i1 [[TMP10]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
; CHECK: pred.load.if2:
; CHECK-NEXT: [[TMP11:%.*]] = or disjoint i64 [[INDEX]], 1
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP11]]
; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4
; CHECK-NEXT: [[TMP14:%.*]] = insertelement <4 x i32> [[TMP8]], i32 [[TMP13]], i64 1
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP11]]
; CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4
; CHECK-NEXT: [[TMP17:%.*]] = insertelement <4 x i32> [[TMP9]], i32 [[TMP16]], i64 1
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE4]]
; CHECK: pred.load.continue3:
; CHECK-NEXT: [[TMP18:%.*]] = phi <4 x i32> [ [[TMP8]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], [[PRED_LOAD_IF3]] ]
; CHECK-NEXT: [[TMP19:%.*]] = phi <4 x i32> [ [[TMP9]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP17]], [[PRED_LOAD_IF3]] ]
; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x i1> [[TMP0]], i64 2
; CHECK-NEXT: br i1 [[TMP20]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6:%.*]]
; CHECK: pred.load.if4:
; CHECK-NEXT: [[TMP21:%.*]] = or disjoint i64 [[INDEX]], 2
; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP21]]
; CHECK-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4
; CHECK-NEXT: [[TMP24:%.*]] = insertelement <4 x i32> [[TMP18]], i32 [[TMP23]], i64 2
; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP21]]
; CHECK-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4
; CHECK-NEXT: [[TMP27:%.*]] = insertelement <4 x i32> [[TMP19]], i32 [[TMP26]], i64 2
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE6]]
; CHECK: pred.load.continue5:
; CHECK-NEXT: [[TMP28:%.*]] = phi <4 x i32> [ [[TMP18]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP24]], [[PRED_LOAD_IF5]] ]
; CHECK-NEXT: [[TMP29:%.*]] = phi <4 x i32> [ [[TMP19]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP27]], [[PRED_LOAD_IF5]] ]
; CHECK-NEXT: [[TMP30:%.*]] = extractelement <4 x i1> [[TMP0]], i64 3
; CHECK-NEXT: br i1 [[TMP30]], label [[PRED_LOAD_IF7:%.*]], label [[PRED_LOAD_CONTINUE8]]
; CHECK: pred.load.if6:
; CHECK-NEXT: [[TMP31:%.*]] = or disjoint i64 [[INDEX]], 3
; CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP31]]
; CHECK-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4
; CHECK-NEXT: [[TMP34:%.*]] = insertelement <4 x i32> [[TMP28]], i32 [[TMP33]], i64 3
; CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP31]]
; CHECK-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4
; CHECK-NEXT: [[TMP37:%.*]] = insertelement <4 x i32> [[TMP29]], i32 [[TMP36]], i64 3
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE8]]
; CHECK: pred.load.continue7:
; CHECK-NEXT: [[TMP38:%.*]] = phi <4 x i32> [ [[TMP28]], [[PRED_LOAD_CONTINUE6]] ], [ [[TMP34]], [[PRED_LOAD_IF7]] ]
; CHECK-NEXT: [[TMP39:%.*]] = phi <4 x i32> [ [[TMP29]], [[PRED_LOAD_CONTINUE6]] ], [ [[TMP37]], [[PRED_LOAD_IF7]] ]
; CHECK-NEXT: [[TMP40:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[VEC_IND1]], <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP41:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP40]])
; CHECK-NEXT: [[TMP42:%.*]] = add i32 [[TMP41]], [[VEC_PHI]]
; CHECK-NEXT: [[TMP43:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP38]], <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP44:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP43]])
; CHECK-NEXT: [[TMP45:%.*]] = add i32 [[TMP44]], [[TMP42]]
; CHECK-NEXT: [[TMP46:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP39]], <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP47:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP46]])
; CHECK-NEXT: [[TMP48]] = add i32 [[TMP47]], [[TMP45]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i16> [[VEC_IND]], splat (i16 4)
; CHECK-NEXT: [[VEC_IND_NEXT2]] = add <4 x i32> [[VEC_IND1]], splat (i32 4)
; CHECK-NEXT: [[TMP49:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260
; CHECK-NEXT: br i1 [[TMP49]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br label [[DOT_CRIT_EDGE:%.*]]
; CHECK: scalar.ph:
; CHECK-NEXT: br label [[DOTLR_PH:%.*]]
; CHECK: .lr.ph:
; CHECK-NEXT: br i1 poison, label [[DOT_CRIT_EDGE]], label [[DOTLR_PH]], !llvm.loop [[LOOP5:![0-9]+]]
; CHECK: ._crit_edge:
; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ poison, [[DOTLR_PH]] ], [ [[TMP48]], [[MIDDLE_BLOCK]] ]
; CHECK-NEXT: ret i32 [[SUM_0_LCSSA]]
;
entry:
br label %.lr.ph
.lr.ph: ; preds = %entry, %.lr.ph
%indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %entry ]
%sum.02 = phi i32 [ %l9, %.lr.ph ], [ 0, %entry ]
%l2 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
%l3 = load i32, ptr %l2, align 4
%l4 = getelementptr inbounds i32, ptr %B, i64 %indvars.iv
%l5 = load i32, ptr %l4, align 4
%l6 = trunc i64 %indvars.iv to i32
%l7 = add i32 %sum.02, %l6
%l8 = add i32 %l7, %l3
%l9 = add i32 %l8, %l5
%indvars.iv.next = add i64 %indvars.iv, 1
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
%exitcond = icmp eq i32 %lftr.wideiv, 257
br i1 %exitcond, label %._crit_edge, label %.lr.ph
._crit_edge: ; preds = %.lr.ph
%sum.0.lcssa = phi i32 [ %l9, %.lr.ph ]
ret i32 %sum.0.lcssa
}
define i32 @reduction_sum_const(ptr noalias nocapture %A) {
; CHECK-LABEL: @reduction_sum_const(
; CHECK-NEXT: entry:
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i16> [ <i16 0, i16 1, i16 2, i16 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP29:%.*]], [[PRED_LOAD_CONTINUE6]] ]
; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i16> [[VEC_IND]], splat (i16 257)
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
; CHECK: pred.load.if:
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> poison, i32 [[TMP3]], i64 0
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE]]
; CHECK: pred.load.continue:
; CHECK-NEXT: [[TMP5:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP4]], [[PRED_LOAD_IF]] ]
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i1> [[TMP0]], i64 1
; CHECK-NEXT: br i1 [[TMP6]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
; CHECK: pred.load.if1:
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[A]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[TMP7]], i64 4
; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4
; CHECK-NEXT: [[TMP10:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[TMP9]], i64 1
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE2]]
; CHECK: pred.load.continue2:
; CHECK-NEXT: [[TMP11:%.*]] = phi <4 x i32> [ [[TMP5]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP10]], [[PRED_LOAD_IF1]] ]
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i1> [[TMP0]], i64 2
; CHECK-NEXT: br i1 [[TMP12]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
; CHECK: pred.load.if3:
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[A]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[TMP13]], i64 8
; CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4
; CHECK-NEXT: [[TMP16:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP15]], i64 2
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE4]]
; CHECK: pred.load.continue4:
; CHECK-NEXT: [[TMP17:%.*]] = phi <4 x i32> [ [[TMP11]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP16]], [[PRED_LOAD_IF3]] ]
; CHECK-NEXT: [[TMP18:%.*]] = extractelement <4 x i1> [[TMP0]], i64 3
; CHECK-NEXT: br i1 [[TMP18]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]]
; CHECK: pred.load.if5:
; CHECK-NEXT: [[TMP19:%.*]] = getelementptr i32, ptr [[A]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[TMP19]], i64 12
; CHECK-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4
; CHECK-NEXT: [[TMP22:%.*]] = insertelement <4 x i32> [[TMP17]], i32 [[TMP21]], i64 3
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE6]]
; CHECK: pred.load.continue6:
; CHECK-NEXT: [[TMP23:%.*]] = phi <4 x i32> [ [[TMP17]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP22]], [[PRED_LOAD_IF5]] ]
; CHECK-NEXT: [[TMP24:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP23]], <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP25:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP24]])
; CHECK-NEXT: [[TMP26:%.*]] = add i32 [[TMP25]], [[VEC_PHI]]
; CHECK-NEXT: [[TMP27:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> splat (i32 3), <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP28:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP27]])
; CHECK-NEXT: [[TMP29]] = add i32 [[TMP28]], [[TMP26]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i16> [[VEC_IND]], splat (i16 4)
; CHECK-NEXT: [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260
; CHECK-NEXT: br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br label [[DOT_CRIT_EDGE:%.*]]
; CHECK: scalar.ph:
; CHECK-NEXT: br label [[DOTLR_PH:%.*]]
; CHECK: .lr.ph:
; CHECK-NEXT: br i1 poison, label [[DOT_CRIT_EDGE]], label [[DOTLR_PH]], !llvm.loop [[LOOP7:![0-9]+]]
; CHECK: ._crit_edge:
; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ poison, [[DOTLR_PH]] ], [ [[TMP29]], [[MIDDLE_BLOCK]] ]
; CHECK-NEXT: ret i32 [[SUM_0_LCSSA]]
;
entry:
br label %.lr.ph
.lr.ph: ; preds = %entry, %.lr.ph
%indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %entry ]
%sum.02 = phi i32 [ %l9, %.lr.ph ], [ 0, %entry ]
%l2 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
%l3 = load i32, ptr %l2, align 4
%l7 = add i32 %sum.02, %l3
%l9 = add i32 %l7, 3
%indvars.iv.next = add i64 %indvars.iv, 1
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
%exitcond = icmp eq i32 %lftr.wideiv, 257
br i1 %exitcond, label %._crit_edge, label %.lr.ph
._crit_edge: ; preds = %.lr.ph
%sum.0.lcssa = phi i32 [ %l9, %.lr.ph ]
ret i32 %sum.0.lcssa
}
define i32 @reduction_prod(ptr noalias nocapture %A, ptr noalias nocapture %B) {
; CHECK-LABEL: @reduction_prod(
; CHECK-NEXT: entry:
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE8:%.*]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i16> [ <i16 0, i16 1, i16 2, i16 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE8]] ]
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 1, [[VECTOR_PH]] ], [ [[TMP48:%.*]], [[PRED_LOAD_CONTINUE8]] ]
; CHECK-NEXT: [[VEC_IND1:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[PRED_LOAD_CONTINUE8]] ]
; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i16> [[VEC_IND]], splat (i16 257)
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
; CHECK: pred.load.if:
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> poison, i32 [[TMP3]], i64 0
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> poison, i32 [[TMP6]], i64 0
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE]]
; CHECK: pred.load.continue:
; CHECK-NEXT: [[TMP8:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP4]], [[PRED_LOAD_IF]] ]
; CHECK-NEXT: [[TMP9:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP7]], [[PRED_LOAD_IF]] ]
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP0]], i64 1
; CHECK-NEXT: br i1 [[TMP10]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
; CHECK: pred.load.if2:
; CHECK-NEXT: [[TMP11:%.*]] = or disjoint i64 [[INDEX]], 1
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP11]]
; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4
; CHECK-NEXT: [[TMP14:%.*]] = insertelement <4 x i32> [[TMP8]], i32 [[TMP13]], i64 1
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP11]]
; CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4
; CHECK-NEXT: [[TMP17:%.*]] = insertelement <4 x i32> [[TMP9]], i32 [[TMP16]], i64 1
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE4]]
; CHECK: pred.load.continue3:
; CHECK-NEXT: [[TMP18:%.*]] = phi <4 x i32> [ [[TMP8]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], [[PRED_LOAD_IF3]] ]
; CHECK-NEXT: [[TMP19:%.*]] = phi <4 x i32> [ [[TMP9]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP17]], [[PRED_LOAD_IF3]] ]
; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x i1> [[TMP0]], i64 2
; CHECK-NEXT: br i1 [[TMP20]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6:%.*]]
; CHECK: pred.load.if4:
; CHECK-NEXT: [[TMP21:%.*]] = or disjoint i64 [[INDEX]], 2
; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP21]]
; CHECK-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4
; CHECK-NEXT: [[TMP24:%.*]] = insertelement <4 x i32> [[TMP18]], i32 [[TMP23]], i64 2
; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP21]]
; CHECK-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4
; CHECK-NEXT: [[TMP27:%.*]] = insertelement <4 x i32> [[TMP19]], i32 [[TMP26]], i64 2
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE6]]
; CHECK: pred.load.continue5:
; CHECK-NEXT: [[TMP28:%.*]] = phi <4 x i32> [ [[TMP18]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP24]], [[PRED_LOAD_IF5]] ]
; CHECK-NEXT: [[TMP29:%.*]] = phi <4 x i32> [ [[TMP19]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP27]], [[PRED_LOAD_IF5]] ]
; CHECK-NEXT: [[TMP30:%.*]] = extractelement <4 x i1> [[TMP0]], i64 3
; CHECK-NEXT: br i1 [[TMP30]], label [[PRED_LOAD_IF7:%.*]], label [[PRED_LOAD_CONTINUE8]]
; CHECK: pred.load.if6:
; CHECK-NEXT: [[TMP31:%.*]] = or disjoint i64 [[INDEX]], 3
; CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP31]]
; CHECK-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4
; CHECK-NEXT: [[TMP34:%.*]] = insertelement <4 x i32> [[TMP28]], i32 [[TMP33]], i64 3
; CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP31]]
; CHECK-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4
; CHECK-NEXT: [[TMP37:%.*]] = insertelement <4 x i32> [[TMP29]], i32 [[TMP36]], i64 3
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE8]]
; CHECK: pred.load.continue7:
; CHECK-NEXT: [[TMP38:%.*]] = phi <4 x i32> [ [[TMP28]], [[PRED_LOAD_CONTINUE6]] ], [ [[TMP34]], [[PRED_LOAD_IF7]] ]
; CHECK-NEXT: [[TMP39:%.*]] = phi <4 x i32> [ [[TMP29]], [[PRED_LOAD_CONTINUE6]] ], [ [[TMP37]], [[PRED_LOAD_IF7]] ]
; CHECK-NEXT: [[TMP40:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[VEC_IND1]], <4 x i32> splat (i32 1)
; CHECK-NEXT: [[TMP41:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[TMP40]])
; CHECK-NEXT: [[TMP42:%.*]] = mul i32 [[TMP41]], [[VEC_PHI]]
; CHECK-NEXT: [[TMP43:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP38]], <4 x i32> splat (i32 1)
; CHECK-NEXT: [[TMP44:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[TMP43]])
; CHECK-NEXT: [[TMP45:%.*]] = mul i32 [[TMP44]], [[TMP42]]
; CHECK-NEXT: [[TMP46:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP39]], <4 x i32> splat (i32 1)
; CHECK-NEXT: [[TMP47:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[TMP46]])
; CHECK-NEXT: [[TMP48]] = mul i32 [[TMP47]], [[TMP45]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i16> [[VEC_IND]], splat (i16 4)
; CHECK-NEXT: [[VEC_IND_NEXT2]] = add <4 x i32> [[VEC_IND1]], splat (i32 4)
; CHECK-NEXT: [[TMP49:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260
; CHECK-NEXT: br i1 [[TMP49]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br label [[DOT_CRIT_EDGE:%.*]]
; CHECK: scalar.ph:
; CHECK-NEXT: br label [[DOTLR_PH:%.*]]
; CHECK: .lr.ph:
; CHECK-NEXT: br i1 poison, label [[DOT_CRIT_EDGE]], label [[DOTLR_PH]], !llvm.loop [[LOOP9:![0-9]+]]
; CHECK: ._crit_edge:
; CHECK-NEXT: [[PROD_0_LCSSA:%.*]] = phi i32 [ poison, [[DOTLR_PH]] ], [ [[TMP48]], [[MIDDLE_BLOCK]] ]
; CHECK-NEXT: ret i32 [[PROD_0_LCSSA]]
;
entry:
br label %.lr.ph
.lr.ph: ; preds = %entry, %.lr.ph
%indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %entry ]
%prod.02 = phi i32 [ %l9, %.lr.ph ], [ 1, %entry ]
%l2 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
%l3 = load i32, ptr %l2, align 4
%l4 = getelementptr inbounds i32, ptr %B, i64 %indvars.iv
%l5 = load i32, ptr %l4, align 4
%l6 = trunc i64 %indvars.iv to i32
%l7 = mul i32 %prod.02, %l6
%l8 = mul i32 %l7, %l3
%l9 = mul i32 %l8, %l5
%indvars.iv.next = add i64 %indvars.iv, 1
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
%exitcond = icmp eq i32 %lftr.wideiv, 257
br i1 %exitcond, label %._crit_edge, label %.lr.ph
._crit_edge: ; preds = %.lr.ph
%prod.0.lcssa = phi i32 [ %l9, %.lr.ph ]
ret i32 %prod.0.lcssa
}
define i32 @reduction_mix(ptr noalias nocapture %A, ptr noalias nocapture %B) {
; CHECK-LABEL: @reduction_mix(
; CHECK-NEXT: entry:
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE8:%.*]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i16> [ <i16 0, i16 1, i16 2, i16 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE8]] ]
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP46:%.*]], [[PRED_LOAD_CONTINUE8]] ]
; CHECK-NEXT: [[VEC_IND1:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[PRED_LOAD_CONTINUE8]] ]
; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i16> [[VEC_IND]], splat (i16 257)
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
; CHECK: pred.load.if:
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> poison, i32 [[TMP6]], i64 0
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[B1:%.*]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP8]], align 4
; CHECK-NEXT: [[TMP13:%.*]] = insertelement <4 x i32> poison, i32 [[TMP12]], i64 0
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE]]
; CHECK: pred.load.continue:
; CHECK-NEXT: [[TMP9:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP7]], [[PRED_LOAD_IF]] ]
; CHECK-NEXT: [[TMP14:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP13]], [[PRED_LOAD_IF]] ]
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP0]], i64 1
; CHECK-NEXT: br i1 [[TMP10]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
; CHECK: pred.load.if2:
; CHECK-NEXT: [[TMP11:%.*]] = or disjoint i64 [[INDEX]], 1
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP11]]
; CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4
; CHECK-NEXT: [[TMP17:%.*]] = insertelement <4 x i32> [[TMP9]], i32 [[TMP16]], i64 1
; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[B1]], i64 [[TMP11]]
; CHECK-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP18]], align 4
; CHECK-NEXT: [[TMP23:%.*]] = insertelement <4 x i32> [[TMP14]], i32 [[TMP22]], i64 1
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE4]]
; CHECK: pred.load.continue3:
; CHECK-NEXT: [[TMP19:%.*]] = phi <4 x i32> [ [[TMP9]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP17]], [[PRED_LOAD_IF3]] ]
; CHECK-NEXT: [[TMP24:%.*]] = phi <4 x i32> [ [[TMP14]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP23]], [[PRED_LOAD_IF3]] ]
; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x i1> [[TMP0]], i64 2
; CHECK-NEXT: br i1 [[TMP20]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6:%.*]]
; CHECK: pred.load.if4:
; CHECK-NEXT: [[TMP21:%.*]] = or disjoint i64 [[INDEX]], 2
; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP21]]
; CHECK-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4
; CHECK-NEXT: [[TMP27:%.*]] = insertelement <4 x i32> [[TMP19]], i32 [[TMP26]], i64 2
; CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds i32, ptr [[B1]], i64 [[TMP21]]
; CHECK-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP28]], align 4
; CHECK-NEXT: [[TMP33:%.*]] = insertelement <4 x i32> [[TMP24]], i32 [[TMP32]], i64 2
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE6]]
; CHECK: pred.load.continue5:
; CHECK-NEXT: [[TMP29:%.*]] = phi <4 x i32> [ [[TMP19]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP27]], [[PRED_LOAD_IF5]] ]
; CHECK-NEXT: [[TMP34:%.*]] = phi <4 x i32> [ [[TMP24]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP33]], [[PRED_LOAD_IF5]] ]
; CHECK-NEXT: [[TMP30:%.*]] = extractelement <4 x i1> [[TMP0]], i64 3
; CHECK-NEXT: br i1 [[TMP30]], label [[PRED_LOAD_IF7:%.*]], label [[PRED_LOAD_CONTINUE8]]
; CHECK: pred.load.if6:
; CHECK-NEXT: [[TMP31:%.*]] = or disjoint i64 [[INDEX]], 3
; CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP31]]
; CHECK-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4
; CHECK-NEXT: [[TMP37:%.*]] = insertelement <4 x i32> [[TMP29]], i32 [[TMP36]], i64 3
; CHECK-NEXT: [[TMP38:%.*]] = getelementptr inbounds i32, ptr [[B1]], i64 [[TMP31]]
; CHECK-NEXT: [[TMP48:%.*]] = load i32, ptr [[TMP38]], align 4
; CHECK-NEXT: [[TMP49:%.*]] = insertelement <4 x i32> [[TMP34]], i32 [[TMP48]], i64 3
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE8]]
; CHECK: pred.load.continue7:
; CHECK-NEXT: [[TMP39:%.*]] = phi <4 x i32> [ [[TMP29]], [[PRED_LOAD_CONTINUE6]] ], [ [[TMP37]], [[PRED_LOAD_IF7]] ]
; CHECK-NEXT: [[TMP50:%.*]] = phi <4 x i32> [ [[TMP34]], [[PRED_LOAD_CONTINUE6]] ], [ [[TMP49]], [[PRED_LOAD_IF7]] ]
; CHECK-NEXT: [[TMP41:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[VEC_IND1]], <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP42:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP41]])
; CHECK-NEXT: [[TMP43:%.*]] = add i32 [[TMP42]], [[VEC_PHI]]
; CHECK-NEXT: [[TMP40:%.*]] = mul nsw <4 x i32> [[TMP50]], [[TMP39]]
; CHECK-NEXT: [[TMP44:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP40]], <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP45:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP44]])
; CHECK-NEXT: [[TMP46]] = add i32 [[TMP45]], [[TMP43]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i16> [[VEC_IND]], splat (i16 4)
; CHECK-NEXT: [[VEC_IND_NEXT2]] = add <4 x i32> [[VEC_IND1]], splat (i32 4)
; CHECK-NEXT: [[TMP47:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260
; CHECK-NEXT: br i1 [[TMP47]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br label [[DOT_CRIT_EDGE:%.*]]
; CHECK: scalar.ph:
; CHECK-NEXT: br label [[DOTLR_PH:%.*]]
; CHECK: .lr.ph:
; CHECK-NEXT: br i1 poison, label [[DOT_CRIT_EDGE]], label [[DOTLR_PH]], !llvm.loop [[LOOP11:![0-9]+]]
; CHECK: ._crit_edge:
; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ poison, [[DOTLR_PH]] ], [ [[TMP46]], [[MIDDLE_BLOCK]] ]
; CHECK-NEXT: ret i32 [[SUM_0_LCSSA]]
;
entry:
br label %.lr.ph
.lr.ph: ; preds = %entry, %.lr.ph
%indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %entry ]
%sum.02 = phi i32 [ %l9, %.lr.ph ], [ 0, %entry ]
%l2 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
%l3 = load i32, ptr %l2, align 4
%l4 = getelementptr inbounds i32, ptr %B, i64 %indvars.iv
%l5 = load i32, ptr %l4, align 4
%l6 = mul nsw i32 %l5, %l3
%l7 = trunc i64 %indvars.iv to i32
%l8 = add i32 %sum.02, %l7
%l9 = add i32 %l8, %l6
%indvars.iv.next = add i64 %indvars.iv, 1
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
%exitcond = icmp eq i32 %lftr.wideiv, 257
br i1 %exitcond, label %._crit_edge, label %.lr.ph
._crit_edge: ; preds = %.lr.ph
%sum.0.lcssa = phi i32 [ %l9, %.lr.ph ]
ret i32 %sum.0.lcssa
}
define i32 @reduction_mul(ptr noalias nocapture %A, ptr noalias nocapture %B) {
; CHECK-LABEL: @reduction_mul(
; CHECK-NEXT: entry:
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i16> [ <i16 0, i16 1, i16 2, i16 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 19, [[VECTOR_PH]] ], [ [[TMP45:%.*]], [[PRED_LOAD_CONTINUE6]] ]
; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i16> [[VEC_IND]], splat (i16 257)
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
; CHECK: pred.load.if:
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> poison, i32 [[TMP3]], i64 0
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> poison, i32 [[TMP6]], i64 0
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE]]
; CHECK: pred.load.continue:
; CHECK-NEXT: [[TMP8:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP4]], [[PRED_LOAD_IF]] ]
; CHECK-NEXT: [[TMP9:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP7]], [[PRED_LOAD_IF]] ]
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP0]], i64 1
; CHECK-NEXT: br i1 [[TMP10]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
; CHECK: pred.load.if1:
; CHECK-NEXT: [[TMP11:%.*]] = or disjoint i64 [[INDEX]], 1
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP11]]
; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4
; CHECK-NEXT: [[TMP14:%.*]] = insertelement <4 x i32> [[TMP8]], i32 [[TMP13]], i64 1
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP11]]
; CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4
; CHECK-NEXT: [[TMP17:%.*]] = insertelement <4 x i32> [[TMP9]], i32 [[TMP16]], i64 1
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE2]]
; CHECK: pred.load.continue2:
; CHECK-NEXT: [[TMP18:%.*]] = phi <4 x i32> [ [[TMP8]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], [[PRED_LOAD_IF1]] ]
; CHECK-NEXT: [[TMP19:%.*]] = phi <4 x i32> [ [[TMP9]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP17]], [[PRED_LOAD_IF1]] ]
; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x i1> [[TMP0]], i64 2
; CHECK-NEXT: br i1 [[TMP20]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
; CHECK: pred.load.if3:
; CHECK-NEXT: [[TMP21:%.*]] = or disjoint i64 [[INDEX]], 2
; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP21]]
; CHECK-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4
; CHECK-NEXT: [[TMP24:%.*]] = insertelement <4 x i32> [[TMP18]], i32 [[TMP23]], i64 2
; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP21]]
; CHECK-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4
; CHECK-NEXT: [[TMP27:%.*]] = insertelement <4 x i32> [[TMP19]], i32 [[TMP26]], i64 2
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE4]]
; CHECK: pred.load.continue4:
; CHECK-NEXT: [[TMP28:%.*]] = phi <4 x i32> [ [[TMP18]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP24]], [[PRED_LOAD_IF3]] ]
; CHECK-NEXT: [[TMP29:%.*]] = phi <4 x i32> [ [[TMP19]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP27]], [[PRED_LOAD_IF3]] ]
; CHECK-NEXT: [[TMP30:%.*]] = extractelement <4 x i1> [[TMP0]], i64 3
; CHECK-NEXT: br i1 [[TMP30]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]]
; CHECK: pred.load.if5:
; CHECK-NEXT: [[TMP31:%.*]] = or disjoint i64 [[INDEX]], 3
; CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP31]]
; CHECK-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4
; CHECK-NEXT: [[TMP34:%.*]] = insertelement <4 x i32> [[TMP28]], i32 [[TMP33]], i64 3
; CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP31]]
; CHECK-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4
; CHECK-NEXT: [[TMP37:%.*]] = insertelement <4 x i32> [[TMP29]], i32 [[TMP36]], i64 3
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE6]]
; CHECK: pred.load.continue6:
; CHECK-NEXT: [[TMP38:%.*]] = phi <4 x i32> [ [[TMP28]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP34]], [[PRED_LOAD_IF5]] ]
; CHECK-NEXT: [[TMP39:%.*]] = phi <4 x i32> [ [[TMP29]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP37]], [[PRED_LOAD_IF5]] ]
; CHECK-NEXT: [[TMP40:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP38]], <4 x i32> splat (i32 1)
; CHECK-NEXT: [[TMP41:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[TMP40]])
; CHECK-NEXT: [[TMP42:%.*]] = mul i32 [[TMP41]], [[VEC_PHI]]
; CHECK-NEXT: [[TMP43:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP39]], <4 x i32> splat (i32 1)
; CHECK-NEXT: [[TMP44:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[TMP43]])
; CHECK-NEXT: [[TMP45]] = mul i32 [[TMP44]], [[TMP42]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i16> [[VEC_IND]], splat (i16 4)
; CHECK-NEXT: [[TMP46:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260
; CHECK-NEXT: br i1 [[TMP46]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br label [[DOT_CRIT_EDGE:%.*]]
; CHECK: scalar.ph:
; CHECK-NEXT: br label [[DOTLR_PH:%.*]]
; CHECK: .lr.ph:
; CHECK-NEXT: br i1 poison, label [[DOT_CRIT_EDGE]], label [[DOTLR_PH]], !llvm.loop [[LOOP13:![0-9]+]]
; CHECK: ._crit_edge:
; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ poison, [[DOTLR_PH]] ], [ [[TMP45]], [[MIDDLE_BLOCK]] ]
; CHECK-NEXT: ret i32 [[SUM_0_LCSSA]]
;
entry:
br label %.lr.ph
.lr.ph: ; preds = %entry, %.lr.ph
%indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %entry ]
%sum.02 = phi i32 [ %l7, %.lr.ph ], [ 19, %entry ]
%l2 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
%l3 = load i32, ptr %l2, align 4
%l4 = getelementptr inbounds i32, ptr %B, i64 %indvars.iv
%l5 = load i32, ptr %l4, align 4
%l6 = mul i32 %sum.02, %l3
%l7 = mul i32 %l6, %l5
%indvars.iv.next = add i64 %indvars.iv, 1
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
%exitcond = icmp eq i32 %lftr.wideiv, 257
br i1 %exitcond, label %._crit_edge, label %.lr.ph
._crit_edge: ; preds = %.lr.ph
%sum.0.lcssa = phi i32 [ %l7, %.lr.ph ]
ret i32 %sum.0.lcssa
}
define i32 @reduction_and(ptr nocapture %A, ptr nocapture %B) {
; CHECK-LABEL: @reduction_and(
; CHECK-NEXT: entry:
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i16> [ <i16 0, i16 1, i16 2, i16 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ -1, [[VECTOR_PH]] ], [ [[TMP45:%.*]], [[PRED_LOAD_CONTINUE6]] ]
; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i16> [[VEC_IND]], splat (i16 257)
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
; CHECK: pred.load.if:
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> poison, i32 [[TMP3]], i64 0
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> poison, i32 [[TMP6]], i64 0
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE]]
; CHECK: pred.load.continue:
; CHECK-NEXT: [[TMP8:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP4]], [[PRED_LOAD_IF]] ]
; CHECK-NEXT: [[TMP9:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP7]], [[PRED_LOAD_IF]] ]
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP0]], i64 1
; CHECK-NEXT: br i1 [[TMP10]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
; CHECK: pred.load.if1:
; CHECK-NEXT: [[TMP11:%.*]] = or disjoint i64 [[INDEX]], 1
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP11]]
; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4
; CHECK-NEXT: [[TMP14:%.*]] = insertelement <4 x i32> [[TMP8]], i32 [[TMP13]], i64 1
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP11]]
; CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4
; CHECK-NEXT: [[TMP17:%.*]] = insertelement <4 x i32> [[TMP9]], i32 [[TMP16]], i64 1
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE2]]
; CHECK: pred.load.continue2:
; CHECK-NEXT: [[TMP18:%.*]] = phi <4 x i32> [ [[TMP8]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], [[PRED_LOAD_IF1]] ]
; CHECK-NEXT: [[TMP19:%.*]] = phi <4 x i32> [ [[TMP9]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP17]], [[PRED_LOAD_IF1]] ]
; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x i1> [[TMP0]], i64 2
; CHECK-NEXT: br i1 [[TMP20]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
; CHECK: pred.load.if3:
; CHECK-NEXT: [[TMP21:%.*]] = or disjoint i64 [[INDEX]], 2
; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP21]]
; CHECK-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4
; CHECK-NEXT: [[TMP24:%.*]] = insertelement <4 x i32> [[TMP18]], i32 [[TMP23]], i64 2
; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP21]]
; CHECK-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4
; CHECK-NEXT: [[TMP27:%.*]] = insertelement <4 x i32> [[TMP19]], i32 [[TMP26]], i64 2
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE4]]
; CHECK: pred.load.continue4:
; CHECK-NEXT: [[TMP28:%.*]] = phi <4 x i32> [ [[TMP18]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP24]], [[PRED_LOAD_IF3]] ]
; CHECK-NEXT: [[TMP29:%.*]] = phi <4 x i32> [ [[TMP19]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP27]], [[PRED_LOAD_IF3]] ]
; CHECK-NEXT: [[TMP30:%.*]] = extractelement <4 x i1> [[TMP0]], i64 3
; CHECK-NEXT: br i1 [[TMP30]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]]
; CHECK: pred.load.if5:
; CHECK-NEXT: [[TMP31:%.*]] = or disjoint i64 [[INDEX]], 3
; CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP31]]
; CHECK-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4
; CHECK-NEXT: [[TMP34:%.*]] = insertelement <4 x i32> [[TMP28]], i32 [[TMP33]], i64 3
; CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP31]]
; CHECK-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4
; CHECK-NEXT: [[TMP37:%.*]] = insertelement <4 x i32> [[TMP29]], i32 [[TMP36]], i64 3
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE6]]
; CHECK: pred.load.continue6:
; CHECK-NEXT: [[TMP38:%.*]] = phi <4 x i32> [ [[TMP28]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP34]], [[PRED_LOAD_IF5]] ]
; CHECK-NEXT: [[TMP39:%.*]] = phi <4 x i32> [ [[TMP29]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP37]], [[PRED_LOAD_IF5]] ]
; CHECK-NEXT: [[TMP40:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP38]], <4 x i32> splat (i32 -1)
; CHECK-NEXT: [[TMP41:%.*]] = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> [[TMP40]])
; CHECK-NEXT: [[TMP42:%.*]] = and i32 [[TMP41]], [[VEC_PHI]]
; CHECK-NEXT: [[TMP43:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP39]], <4 x i32> splat (i32 -1)
; CHECK-NEXT: [[TMP44:%.*]] = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> [[TMP43]])
; CHECK-NEXT: [[TMP45]] = and i32 [[TMP44]], [[TMP42]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i16> [[VEC_IND]], splat (i16 4)
; CHECK-NEXT: [[TMP46:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260
; CHECK-NEXT: br i1 [[TMP46]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br label [[FOR_END:%.*]]
; CHECK: scalar.ph:
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: br i1 poison, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]]
; CHECK: for.end:
; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = phi i32 [ poison, [[FOR_BODY]] ], [ [[TMP45]], [[MIDDLE_BLOCK]] ]
; CHECK-NEXT: ret i32 [[RESULT_0_LCSSA]]
;
entry:
br label %for.body
for.body: ; preds = %entry, %for.body
%indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
%result.08 = phi i32 [ %and, %for.body ], [ -1, %entry ]
%arrayidx = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
%l0 = load i32, ptr %arrayidx, align 4
%arrayidx2 = getelementptr inbounds i32, ptr %B, i64 %indvars.iv
%l1 = load i32, ptr %arrayidx2, align 4
%add = and i32 %result.08, %l0
%and = and i32 %add, %l1
%indvars.iv.next = add i64 %indvars.iv, 1
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
%exitcond = icmp eq i32 %lftr.wideiv, 257
br i1 %exitcond, label %for.end, label %for.body
for.end: ; preds = %for.body, %entry
%result.0.lcssa = phi i32 [ %and, %for.body ]
ret i32 %result.0.lcssa
}
define i32 @reduction_or(ptr nocapture %A, ptr nocapture %B) {
; CHECK-LABEL: @reduction_or(
; CHECK-NEXT: entry:
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i16> [ <i16 0, i16 1, i16 2, i16 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP43:%.*]], [[PRED_LOAD_CONTINUE6]] ]
; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i16> [[VEC_IND]], splat (i16 257)
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
; CHECK: pred.load.if:
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> poison, i32 [[TMP3]], i64 0
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> poison, i32 [[TMP6]], i64 0
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE]]
; CHECK: pred.load.continue:
; CHECK-NEXT: [[TMP8:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP4]], [[PRED_LOAD_IF]] ]
; CHECK-NEXT: [[TMP9:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP7]], [[PRED_LOAD_IF]] ]
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP0]], i64 1
; CHECK-NEXT: br i1 [[TMP10]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
; CHECK: pred.load.if1:
; CHECK-NEXT: [[TMP11:%.*]] = or disjoint i64 [[INDEX]], 1
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP11]]
; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4
; CHECK-NEXT: [[TMP14:%.*]] = insertelement <4 x i32> [[TMP8]], i32 [[TMP13]], i64 1
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP11]]
; CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4
; CHECK-NEXT: [[TMP17:%.*]] = insertelement <4 x i32> [[TMP9]], i32 [[TMP16]], i64 1
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE2]]
; CHECK: pred.load.continue2:
; CHECK-NEXT: [[TMP18:%.*]] = phi <4 x i32> [ [[TMP8]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], [[PRED_LOAD_IF1]] ]
; CHECK-NEXT: [[TMP19:%.*]] = phi <4 x i32> [ [[TMP9]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP17]], [[PRED_LOAD_IF1]] ]
; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x i1> [[TMP0]], i64 2
; CHECK-NEXT: br i1 [[TMP20]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
; CHECK: pred.load.if3:
; CHECK-NEXT: [[TMP21:%.*]] = or disjoint i64 [[INDEX]], 2
; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP21]]
; CHECK-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4
; CHECK-NEXT: [[TMP24:%.*]] = insertelement <4 x i32> [[TMP18]], i32 [[TMP23]], i64 2
; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP21]]
; CHECK-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4
; CHECK-NEXT: [[TMP27:%.*]] = insertelement <4 x i32> [[TMP19]], i32 [[TMP26]], i64 2
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE4]]
; CHECK: pred.load.continue4:
; CHECK-NEXT: [[TMP28:%.*]] = phi <4 x i32> [ [[TMP18]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP24]], [[PRED_LOAD_IF3]] ]
; CHECK-NEXT: [[TMP29:%.*]] = phi <4 x i32> [ [[TMP19]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP27]], [[PRED_LOAD_IF3]] ]
; CHECK-NEXT: [[TMP30:%.*]] = extractelement <4 x i1> [[TMP0]], i64 3
; CHECK-NEXT: br i1 [[TMP30]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]]
; CHECK: pred.load.if5:
; CHECK-NEXT: [[TMP31:%.*]] = or disjoint i64 [[INDEX]], 3
; CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP31]]
; CHECK-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4
; CHECK-NEXT: [[TMP34:%.*]] = insertelement <4 x i32> [[TMP28]], i32 [[TMP33]], i64 3
; CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP31]]
; CHECK-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4
; CHECK-NEXT: [[TMP37:%.*]] = insertelement <4 x i32> [[TMP29]], i32 [[TMP36]], i64 3
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE6]]
; CHECK: pred.load.continue6:
; CHECK-NEXT: [[TMP38:%.*]] = phi <4 x i32> [ [[TMP28]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP34]], [[PRED_LOAD_IF5]] ]
; CHECK-NEXT: [[TMP39:%.*]] = phi <4 x i32> [ [[TMP29]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP37]], [[PRED_LOAD_IF5]] ]
; CHECK-NEXT: [[TMP40:%.*]] = add nsw <4 x i32> [[TMP39]], [[TMP38]]
; CHECK-NEXT: [[TMP41:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP40]], <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP42:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP41]])
; CHECK-NEXT: [[TMP43]] = or i32 [[TMP42]], [[VEC_PHI]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i16> [[VEC_IND]], splat (i16 4)
; CHECK-NEXT: [[TMP44:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260
; CHECK-NEXT: br i1 [[TMP44]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br label [[FOR_END:%.*]]
; CHECK: scalar.ph:
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: br i1 poison, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]]
; CHECK: for.end:
; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = phi i32 [ poison, [[FOR_BODY]] ], [ [[TMP43]], [[MIDDLE_BLOCK]] ]
; CHECK-NEXT: ret i32 [[RESULT_0_LCSSA]]
;
entry:
br label %for.body
for.body: ; preds = %entry, %for.body
%indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
%result.08 = phi i32 [ %or, %for.body ], [ 0, %entry ]
%arrayidx = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
%l0 = load i32, ptr %arrayidx, align 4
%arrayidx2 = getelementptr inbounds i32, ptr %B, i64 %indvars.iv
%l1 = load i32, ptr %arrayidx2, align 4
%add = add nsw i32 %l1, %l0
%or = or i32 %add, %result.08
%indvars.iv.next = add i64 %indvars.iv, 1
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
%exitcond = icmp eq i32 %lftr.wideiv, 257
br i1 %exitcond, label %for.end, label %for.body
for.end: ; preds = %for.body, %entry
%result.0.lcssa = phi i32 [ %or, %for.body ]
ret i32 %result.0.lcssa
}
define i32 @reduction_xor(ptr nocapture %A, ptr nocapture %B) {
; CHECK-LABEL: @reduction_xor(
; CHECK-NEXT: entry:
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i16> [ <i16 0, i16 1, i16 2, i16 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP43:%.*]], [[PRED_LOAD_CONTINUE6]] ]
; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i16> [[VEC_IND]], splat (i16 257)
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
; CHECK: pred.load.if:
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> poison, i32 [[TMP3]], i64 0
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> poison, i32 [[TMP6]], i64 0
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE]]
; CHECK: pred.load.continue:
; CHECK-NEXT: [[TMP8:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP4]], [[PRED_LOAD_IF]] ]
; CHECK-NEXT: [[TMP9:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP7]], [[PRED_LOAD_IF]] ]
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP0]], i64 1
; CHECK-NEXT: br i1 [[TMP10]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
; CHECK: pred.load.if1:
; CHECK-NEXT: [[TMP11:%.*]] = or disjoint i64 [[INDEX]], 1
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP11]]
; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4
; CHECK-NEXT: [[TMP14:%.*]] = insertelement <4 x i32> [[TMP8]], i32 [[TMP13]], i64 1
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP11]]
; CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4
; CHECK-NEXT: [[TMP17:%.*]] = insertelement <4 x i32> [[TMP9]], i32 [[TMP16]], i64 1
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE2]]
; CHECK: pred.load.continue2:
; CHECK-NEXT: [[TMP18:%.*]] = phi <4 x i32> [ [[TMP8]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], [[PRED_LOAD_IF1]] ]
; CHECK-NEXT: [[TMP19:%.*]] = phi <4 x i32> [ [[TMP9]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP17]], [[PRED_LOAD_IF1]] ]
; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x i1> [[TMP0]], i64 2
; CHECK-NEXT: br i1 [[TMP20]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
; CHECK: pred.load.if3:
; CHECK-NEXT: [[TMP21:%.*]] = or disjoint i64 [[INDEX]], 2
; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP21]]
; CHECK-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4
; CHECK-NEXT: [[TMP24:%.*]] = insertelement <4 x i32> [[TMP18]], i32 [[TMP23]], i64 2
; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP21]]
; CHECK-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4
; CHECK-NEXT: [[TMP27:%.*]] = insertelement <4 x i32> [[TMP19]], i32 [[TMP26]], i64 2
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE4]]
; CHECK: pred.load.continue4:
; CHECK-NEXT: [[TMP28:%.*]] = phi <4 x i32> [ [[TMP18]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP24]], [[PRED_LOAD_IF3]] ]
; CHECK-NEXT: [[TMP29:%.*]] = phi <4 x i32> [ [[TMP19]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP27]], [[PRED_LOAD_IF3]] ]
; CHECK-NEXT: [[TMP30:%.*]] = extractelement <4 x i1> [[TMP0]], i64 3
; CHECK-NEXT: br i1 [[TMP30]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]]
; CHECK: pred.load.if5:
; CHECK-NEXT: [[TMP31:%.*]] = or disjoint i64 [[INDEX]], 3
; CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP31]]
; CHECK-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4
; CHECK-NEXT: [[TMP34:%.*]] = insertelement <4 x i32> [[TMP28]], i32 [[TMP33]], i64 3
; CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP31]]
; CHECK-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4
; CHECK-NEXT: [[TMP37:%.*]] = insertelement <4 x i32> [[TMP29]], i32 [[TMP36]], i64 3
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE6]]
; CHECK: pred.load.continue6:
; CHECK-NEXT: [[TMP38:%.*]] = phi <4 x i32> [ [[TMP28]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP34]], [[PRED_LOAD_IF5]] ]
; CHECK-NEXT: [[TMP39:%.*]] = phi <4 x i32> [ [[TMP29]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP37]], [[PRED_LOAD_IF5]] ]
; CHECK-NEXT: [[TMP40:%.*]] = add nsw <4 x i32> [[TMP39]], [[TMP38]]
; CHECK-NEXT: [[TMP41:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP40]], <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP42:%.*]] = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> [[TMP41]])
; CHECK-NEXT: [[TMP43]] = xor i32 [[TMP42]], [[VEC_PHI]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i16> [[VEC_IND]], splat (i16 4)
; CHECK-NEXT: [[TMP44:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260
; CHECK-NEXT: br i1 [[TMP44]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br label [[FOR_END:%.*]]
; CHECK: scalar.ph:
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: br i1 poison, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]]
; CHECK: for.end:
; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = phi i32 [ poison, [[FOR_BODY]] ], [ [[TMP43]], [[MIDDLE_BLOCK]] ]
; CHECK-NEXT: ret i32 [[RESULT_0_LCSSA]]
;
entry:
br label %for.body
for.body: ; preds = %entry, %for.body
%indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
%result.08 = phi i32 [ %xor, %for.body ], [ 0, %entry ]
%arrayidx = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
%l0 = load i32, ptr %arrayidx, align 4
%arrayidx2 = getelementptr inbounds i32, ptr %B, i64 %indvars.iv
%l1 = load i32, ptr %arrayidx2, align 4
%add = add nsw i32 %l1, %l0
%xor = xor i32 %add, %result.08
%indvars.iv.next = add i64 %indvars.iv, 1
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
%exitcond = icmp eq i32 %lftr.wideiv, 257
br i1 %exitcond, label %for.end, label %for.body
for.end: ; preds = %for.body, %entry
%result.0.lcssa = phi i32 [ %xor, %for.body ]
ret i32 %result.0.lcssa
}
define float @reduction_fadd(ptr nocapture %A, ptr nocapture %B) {
; CHECK-LABEL: @reduction_fadd(
; CHECK-NEXT: entry:
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i16> [ <i16 0, i16 1, i16 2, i16 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, [[VECTOR_PH]] ], [ [[TMP43:%.*]], [[PRED_LOAD_CONTINUE6]] ]
; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i16> [[VEC_IND]], splat (i16 257)
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
; CHECK: pred.load.if:
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP3:%.*]] = load float, ptr [[TMP2]], align 4
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x float> poison, float [[TMP3]], i64 0
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP6:%.*]] = load float, ptr [[TMP5]], align 4
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x float> poison, float [[TMP6]], i64 0
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE]]
; CHECK: pred.load.continue:
; CHECK-NEXT: [[TMP8:%.*]] = phi <4 x float> [ poison, [[VECTOR_BODY]] ], [ [[TMP4]], [[PRED_LOAD_IF]] ]
; CHECK-NEXT: [[TMP9:%.*]] = phi <4 x float> [ poison, [[VECTOR_BODY]] ], [ [[TMP7]], [[PRED_LOAD_IF]] ]
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP0]], i64 1
; CHECK-NEXT: br i1 [[TMP10]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
; CHECK: pred.load.if1:
; CHECK-NEXT: [[TMP11:%.*]] = or disjoint i64 [[INDEX]], 1
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP11]]
; CHECK-NEXT: [[TMP13:%.*]] = load float, ptr [[TMP12]], align 4
; CHECK-NEXT: [[TMP14:%.*]] = insertelement <4 x float> [[TMP8]], float [[TMP13]], i64 1
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP11]]
; CHECK-NEXT: [[TMP16:%.*]] = load float, ptr [[TMP15]], align 4
; CHECK-NEXT: [[TMP17:%.*]] = insertelement <4 x float> [[TMP9]], float [[TMP16]], i64 1
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE2]]
; CHECK: pred.load.continue2:
; CHECK-NEXT: [[TMP18:%.*]] = phi <4 x float> [ [[TMP8]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], [[PRED_LOAD_IF1]] ]
; CHECK-NEXT: [[TMP19:%.*]] = phi <4 x float> [ [[TMP9]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP17]], [[PRED_LOAD_IF1]] ]
; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x i1> [[TMP0]], i64 2
; CHECK-NEXT: br i1 [[TMP20]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
; CHECK: pred.load.if3:
; CHECK-NEXT: [[TMP21:%.*]] = or disjoint i64 [[INDEX]], 2
; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP21]]
; CHECK-NEXT: [[TMP23:%.*]] = load float, ptr [[TMP22]], align 4
; CHECK-NEXT: [[TMP24:%.*]] = insertelement <4 x float> [[TMP18]], float [[TMP23]], i64 2
; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP21]]
; CHECK-NEXT: [[TMP26:%.*]] = load float, ptr [[TMP25]], align 4
; CHECK-NEXT: [[TMP27:%.*]] = insertelement <4 x float> [[TMP19]], float [[TMP26]], i64 2
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE4]]
; CHECK: pred.load.continue4:
; CHECK-NEXT: [[TMP28:%.*]] = phi <4 x float> [ [[TMP18]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP24]], [[PRED_LOAD_IF3]] ]
; CHECK-NEXT: [[TMP29:%.*]] = phi <4 x float> [ [[TMP19]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP27]], [[PRED_LOAD_IF3]] ]
; CHECK-NEXT: [[TMP30:%.*]] = extractelement <4 x i1> [[TMP0]], i64 3
; CHECK-NEXT: br i1 [[TMP30]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]]
; CHECK: pred.load.if5:
; CHECK-NEXT: [[TMP31:%.*]] = or disjoint i64 [[INDEX]], 3
; CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP31]]
; CHECK-NEXT: [[TMP33:%.*]] = load float, ptr [[TMP32]], align 4
; CHECK-NEXT: [[TMP34:%.*]] = insertelement <4 x float> [[TMP28]], float [[TMP33]], i64 3
; CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP31]]
; CHECK-NEXT: [[TMP36:%.*]] = load float, ptr [[TMP35]], align 4
; CHECK-NEXT: [[TMP37:%.*]] = insertelement <4 x float> [[TMP29]], float [[TMP36]], i64 3
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE6]]
; CHECK: pred.load.continue6:
; CHECK-NEXT: [[TMP38:%.*]] = phi <4 x float> [ [[TMP28]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP34]], [[PRED_LOAD_IF5]] ]
; CHECK-NEXT: [[TMP39:%.*]] = phi <4 x float> [ [[TMP29]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP37]], [[PRED_LOAD_IF5]] ]
; CHECK-NEXT: [[TMP40:%.*]] = select fast <4 x i1> [[TMP0]], <4 x float> [[TMP38]], <4 x float> zeroinitializer
; CHECK-NEXT: [[TMP41:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float [[VEC_PHI]], <4 x float> [[TMP40]])
; CHECK-NEXT: [[TMP42:%.*]] = select fast <4 x i1> [[TMP0]], <4 x float> [[TMP39]], <4 x float> zeroinitializer
; CHECK-NEXT: [[TMP43]] = call fast float @llvm.vector.reduce.fadd.v4f32(float [[TMP41]], <4 x float> [[TMP42]])
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i16> [[VEC_IND]], splat (i16 4)
; CHECK-NEXT: [[TMP44:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260
; CHECK-NEXT: br i1 [[TMP44]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br label [[FOR_END:%.*]]
; CHECK: scalar.ph:
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: br i1 poison, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]]
; CHECK: for.end:
; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = phi float [ poison, [[FOR_BODY]] ], [ [[TMP43]], [[MIDDLE_BLOCK]] ]
; CHECK-NEXT: ret float [[RESULT_0_LCSSA]]
;
entry:
br label %for.body
for.body: ; preds = %entry, %for.body
%indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
%result.08 = phi float [ %fadd, %for.body ], [ 0.0, %entry ]
%arrayidx = getelementptr inbounds float, ptr %A, i64 %indvars.iv
%l0 = load float, ptr %arrayidx, align 4
%arrayidx2 = getelementptr inbounds float, ptr %B, i64 %indvars.iv
%l1 = load float, ptr %arrayidx2, align 4
%add = fadd fast float %result.08, %l0
%fadd = fadd fast float %add, %l1
%indvars.iv.next = add i64 %indvars.iv, 1
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
%exitcond = icmp eq i32 %lftr.wideiv, 257
br i1 %exitcond, label %for.end, label %for.body
for.end: ; preds = %for.body, %entry
%result.0.lcssa = phi float [ %fadd, %for.body ]
ret float %result.0.lcssa
}
define float @reduction_fmul(ptr nocapture %A, ptr nocapture %B) {
; CHECK-LABEL: @reduction_fmul(
; CHECK-NEXT: entry:
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i16> [ <i16 0, i16 1, i16 2, i16 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, [[VECTOR_PH]] ], [ [[TMP45:%.*]], [[PRED_LOAD_CONTINUE6]] ]
; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i16> [[VEC_IND]], splat (i16 257)
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
; CHECK: pred.load.if:
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP3:%.*]] = load float, ptr [[TMP2]], align 4
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x float> poison, float [[TMP3]], i64 0
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP6:%.*]] = load float, ptr [[TMP5]], align 4
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x float> poison, float [[TMP6]], i64 0
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE]]
; CHECK: pred.load.continue:
; CHECK-NEXT: [[TMP8:%.*]] = phi <4 x float> [ poison, [[VECTOR_BODY]] ], [ [[TMP4]], [[PRED_LOAD_IF]] ]
; CHECK-NEXT: [[TMP9:%.*]] = phi <4 x float> [ poison, [[VECTOR_BODY]] ], [ [[TMP7]], [[PRED_LOAD_IF]] ]
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP0]], i64 1
; CHECK-NEXT: br i1 [[TMP10]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
; CHECK: pred.load.if1:
; CHECK-NEXT: [[TMP11:%.*]] = or disjoint i64 [[INDEX]], 1
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP11]]
; CHECK-NEXT: [[TMP13:%.*]] = load float, ptr [[TMP12]], align 4
; CHECK-NEXT: [[TMP14:%.*]] = insertelement <4 x float> [[TMP8]], float [[TMP13]], i64 1
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP11]]
; CHECK-NEXT: [[TMP16:%.*]] = load float, ptr [[TMP15]], align 4
; CHECK-NEXT: [[TMP17:%.*]] = insertelement <4 x float> [[TMP9]], float [[TMP16]], i64 1
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE2]]
; CHECK: pred.load.continue2:
; CHECK-NEXT: [[TMP18:%.*]] = phi <4 x float> [ [[TMP8]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], [[PRED_LOAD_IF1]] ]
; CHECK-NEXT: [[TMP19:%.*]] = phi <4 x float> [ [[TMP9]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP17]], [[PRED_LOAD_IF1]] ]
; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x i1> [[TMP0]], i64 2
; CHECK-NEXT: br i1 [[TMP20]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
; CHECK: pred.load.if3:
; CHECK-NEXT: [[TMP21:%.*]] = or disjoint i64 [[INDEX]], 2
; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP21]]
; CHECK-NEXT: [[TMP23:%.*]] = load float, ptr [[TMP22]], align 4
; CHECK-NEXT: [[TMP24:%.*]] = insertelement <4 x float> [[TMP18]], float [[TMP23]], i64 2
; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP21]]
; CHECK-NEXT: [[TMP26:%.*]] = load float, ptr [[TMP25]], align 4
; CHECK-NEXT: [[TMP27:%.*]] = insertelement <4 x float> [[TMP19]], float [[TMP26]], i64 2
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE4]]
; CHECK: pred.load.continue4:
; CHECK-NEXT: [[TMP28:%.*]] = phi <4 x float> [ [[TMP18]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP24]], [[PRED_LOAD_IF3]] ]
; CHECK-NEXT: [[TMP29:%.*]] = phi <4 x float> [ [[TMP19]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP27]], [[PRED_LOAD_IF3]] ]
; CHECK-NEXT: [[TMP30:%.*]] = extractelement <4 x i1> [[TMP0]], i64 3
; CHECK-NEXT: br i1 [[TMP30]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]]
; CHECK: pred.load.if5:
; CHECK-NEXT: [[TMP31:%.*]] = or disjoint i64 [[INDEX]], 3
; CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP31]]
; CHECK-NEXT: [[TMP33:%.*]] = load float, ptr [[TMP32]], align 4
; CHECK-NEXT: [[TMP34:%.*]] = insertelement <4 x float> [[TMP28]], float [[TMP33]], i64 3
; CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP31]]
; CHECK-NEXT: [[TMP36:%.*]] = load float, ptr [[TMP35]], align 4
; CHECK-NEXT: [[TMP37:%.*]] = insertelement <4 x float> [[TMP29]], float [[TMP36]], i64 3
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE6]]
; CHECK: pred.load.continue6:
; CHECK-NEXT: [[TMP38:%.*]] = phi <4 x float> [ [[TMP28]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP34]], [[PRED_LOAD_IF5]] ]
; CHECK-NEXT: [[TMP39:%.*]] = phi <4 x float> [ [[TMP29]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP37]], [[PRED_LOAD_IF5]] ]
; CHECK-NEXT: [[TMP40:%.*]] = select fast <4 x i1> [[TMP0]], <4 x float> [[TMP38]], <4 x float> splat (float 1.000000e+00)
; CHECK-NEXT: [[TMP41:%.*]] = call fast float @llvm.vector.reduce.fmul.v4f32(float 1.000000e+00, <4 x float> [[TMP40]])
; CHECK-NEXT: [[TMP42:%.*]] = fmul fast float [[TMP41]], [[VEC_PHI]]
; CHECK-NEXT: [[TMP43:%.*]] = select fast <4 x i1> [[TMP0]], <4 x float> [[TMP39]], <4 x float> splat (float 1.000000e+00)
; CHECK-NEXT: [[TMP44:%.*]] = call fast float @llvm.vector.reduce.fmul.v4f32(float 1.000000e+00, <4 x float> [[TMP43]])
; CHECK-NEXT: [[TMP45]] = fmul fast float [[TMP44]], [[TMP42]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i16> [[VEC_IND]], splat (i16 4)
; CHECK-NEXT: [[TMP46:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260
; CHECK-NEXT: br i1 [[TMP46]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br label [[FOR_END:%.*]]
; CHECK: scalar.ph:
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: br i1 poison, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]]
; CHECK: for.end:
; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = phi float [ poison, [[FOR_BODY]] ], [ [[TMP45]], [[MIDDLE_BLOCK]] ]
; CHECK-NEXT: ret float [[RESULT_0_LCSSA]]
;
entry:
br label %for.body
for.body: ; preds = %entry, %for.body
%indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
%result.08 = phi float [ %fmul, %for.body ], [ 0.0, %entry ]
%arrayidx = getelementptr inbounds float, ptr %A, i64 %indvars.iv
%l0 = load float, ptr %arrayidx, align 4
%arrayidx2 = getelementptr inbounds float, ptr %B, i64 %indvars.iv
%l1 = load float, ptr %arrayidx2, align 4
%add = fmul fast float %result.08, %l0
%fmul = fmul fast float %add, %l1
%indvars.iv.next = add i64 %indvars.iv, 1
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
%exitcond = icmp eq i32 %lftr.wideiv, 257
br i1 %exitcond, label %for.end, label %for.body
for.end: ; preds = %for.body, %entry
%result.0.lcssa = phi float [ %fmul, %for.body ]
ret float %result.0.lcssa
}
define i32 @reduction_min(ptr nocapture %A, ptr nocapture %B) {
; CHECK-LABEL: @reduction_min(
; CHECK-NEXT: entry:
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i16> [ <i16 0, i16 1, i16 2, i16 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 1000, [[VECTOR_PH]] ], [ [[RDX_MINMAX:%.*]], [[PRED_LOAD_CONTINUE6]] ]
; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i16> [[VEC_IND]], splat (i16 257)
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
; CHECK: pred.load.if:
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> poison, i32 [[TMP3]], i64 0
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE]]
; CHECK: pred.load.continue:
; CHECK-NEXT: [[TMP5:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP4]], [[PRED_LOAD_IF]] ]
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i1> [[TMP0]], i64 1
; CHECK-NEXT: br i1 [[TMP6]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
; CHECK: pred.load.if1:
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[A]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[TMP7]], i64 4
; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4
; CHECK-NEXT: [[TMP10:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[TMP9]], i64 1
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE2]]
; CHECK: pred.load.continue2:
; CHECK-NEXT: [[TMP11:%.*]] = phi <4 x i32> [ [[TMP5]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP10]], [[PRED_LOAD_IF1]] ]
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i1> [[TMP0]], i64 2
; CHECK-NEXT: br i1 [[TMP12]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
; CHECK: pred.load.if3:
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[A]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[TMP13]], i64 8
; CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4
; CHECK-NEXT: [[TMP16:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP15]], i64 2
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE4]]
; CHECK: pred.load.continue4:
; CHECK-NEXT: [[TMP17:%.*]] = phi <4 x i32> [ [[TMP11]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP16]], [[PRED_LOAD_IF3]] ]
; CHECK-NEXT: [[TMP18:%.*]] = extractelement <4 x i1> [[TMP0]], i64 3
; CHECK-NEXT: br i1 [[TMP18]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]]
; CHECK: pred.load.if5:
; CHECK-NEXT: [[TMP19:%.*]] = getelementptr i32, ptr [[A]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[TMP19]], i64 12
; CHECK-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4
; CHECK-NEXT: [[TMP22:%.*]] = insertelement <4 x i32> [[TMP17]], i32 [[TMP21]], i64 3
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE6]]
; CHECK: pred.load.continue6:
; CHECK-NEXT: [[TMP23:%.*]] = phi <4 x i32> [ [[TMP17]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP22]], [[PRED_LOAD_IF5]] ]
; CHECK-NEXT: [[TMP24:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP23]], <4 x i32> splat (i32 2147483647)
; CHECK-NEXT: [[TMP25:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[TMP24]])
; CHECK-NEXT: [[RDX_MINMAX]] = call i32 @llvm.smin.i32(i32 [[TMP25]], i32 [[VEC_PHI]])
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i16> [[VEC_IND]], splat (i16 4)
; CHECK-NEXT: [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260
; CHECK-NEXT: br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br label [[FOR_END:%.*]]
; CHECK: scalar.ph:
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: br i1 poison, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]]
; CHECK: for.end:
; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = phi i32 [ poison, [[FOR_BODY]] ], [ [[RDX_MINMAX]], [[MIDDLE_BLOCK]] ]
; CHECK-NEXT: ret i32 [[RESULT_0_LCSSA]]
;
entry:
br label %for.body
for.body: ; preds = %entry, %for.body
%indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
%result.08 = phi i32 [ %v0, %for.body ], [ 1000, %entry ]
%arrayidx = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
%l0 = load i32, ptr %arrayidx, align 4
%c0 = icmp slt i32 %result.08, %l0
%v0 = select i1 %c0, i32 %result.08, i32 %l0
%indvars.iv.next = add i64 %indvars.iv, 1
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
%exitcond = icmp eq i32 %lftr.wideiv, 257
br i1 %exitcond, label %for.end, label %for.body
for.end: ; preds = %for.body, %entry
%result.0.lcssa = phi i32 [ %v0, %for.body ]
ret i32 %result.0.lcssa
}
define i32 @reduction_max(ptr nocapture %A, ptr nocapture %B) {
; CHECK-LABEL: @reduction_max(
; CHECK-NEXT: entry:
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i16> [ <i16 0, i16 1, i16 2, i16 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 1000, [[VECTOR_PH]] ], [ [[RDX_MINMAX:%.*]], [[PRED_LOAD_CONTINUE6]] ]
; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i16> [[VEC_IND]], splat (i16 257)
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
; CHECK: pred.load.if:
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> poison, i32 [[TMP3]], i64 0
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE]]
; CHECK: pred.load.continue:
; CHECK-NEXT: [[TMP5:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP4]], [[PRED_LOAD_IF]] ]
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i1> [[TMP0]], i64 1
; CHECK-NEXT: br i1 [[TMP6]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
; CHECK: pred.load.if1:
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[A]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[TMP7]], i64 4
; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4
; CHECK-NEXT: [[TMP10:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[TMP9]], i64 1
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE2]]
; CHECK: pred.load.continue2:
; CHECK-NEXT: [[TMP11:%.*]] = phi <4 x i32> [ [[TMP5]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP10]], [[PRED_LOAD_IF1]] ]
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i1> [[TMP0]], i64 2
; CHECK-NEXT: br i1 [[TMP12]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
; CHECK: pred.load.if3:
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[A]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[TMP13]], i64 8
; CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4
; CHECK-NEXT: [[TMP16:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP15]], i64 2
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE4]]
; CHECK: pred.load.continue4:
; CHECK-NEXT: [[TMP17:%.*]] = phi <4 x i32> [ [[TMP11]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP16]], [[PRED_LOAD_IF3]] ]
; CHECK-NEXT: [[TMP18:%.*]] = extractelement <4 x i1> [[TMP0]], i64 3
; CHECK-NEXT: br i1 [[TMP18]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]]
; CHECK: pred.load.if5:
; CHECK-NEXT: [[TMP19:%.*]] = getelementptr i32, ptr [[A]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[TMP19]], i64 12
; CHECK-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4
; CHECK-NEXT: [[TMP22:%.*]] = insertelement <4 x i32> [[TMP17]], i32 [[TMP21]], i64 3
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE6]]
; CHECK: pred.load.continue6:
; CHECK-NEXT: [[TMP23:%.*]] = phi <4 x i32> [ [[TMP17]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP22]], [[PRED_LOAD_IF5]] ]
; CHECK-NEXT: [[TMP24:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP23]], <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP25:%.*]] = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> [[TMP24]])
; CHECK-NEXT: [[RDX_MINMAX]] = call i32 @llvm.umax.i32(i32 [[TMP25]], i32 [[VEC_PHI]])
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i16> [[VEC_IND]], splat (i16 4)
; CHECK-NEXT: [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260
; CHECK-NEXT: br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br label [[FOR_END:%.*]]
; CHECK: scalar.ph:
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: br i1 poison, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP27:![0-9]+]]
; CHECK: for.end:
; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = phi i32 [ poison, [[FOR_BODY]] ], [ [[RDX_MINMAX]], [[MIDDLE_BLOCK]] ]
; CHECK-NEXT: ret i32 [[RESULT_0_LCSSA]]
;
entry:
br label %for.body
for.body: ; preds = %entry, %for.body
%indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
%result.08 = phi i32 [ %v0, %for.body ], [ 1000, %entry ]
%arrayidx = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
%l0 = load i32, ptr %arrayidx, align 4
%c0 = icmp ugt i32 %result.08, %l0
%v0 = select i1 %c0, i32 %result.08, i32 %l0
%indvars.iv.next = add i64 %indvars.iv, 1
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
%exitcond = icmp eq i32 %lftr.wideiv, 257
br i1 %exitcond, label %for.end, label %for.body
for.end: ; preds = %for.body, %entry
%result.0.lcssa = phi i32 [ %v0, %for.body ]
ret i32 %result.0.lcssa
}
; Conditional reductions with multi-input phis.
define float @reduction_conditional(ptr %A, ptr %B, ptr %C, float %S) {
; CHECK-LABEL: @reduction_conditional(
; CHECK-NEXT: entry:
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x float> <float poison, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>, float [[S:%.*]], i64 0
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x float> [ [[TMP0]], [[VECTOR_PH]] ], [ [[PREDPHI3:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP1]], align 4
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i64 [[INDEX]]
; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x float>, ptr [[TMP2]], align 4
; CHECK-NEXT: [[TMP3:%.*]] = fcmp ogt <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD1]]
; CHECK-NEXT: [[TMP4:%.*]] = fcmp ogt <4 x float> [[WIDE_LOAD1]], splat (float 1.000000e+00)
; CHECK-NEXT: [[TMP8:%.*]] = xor <4 x i1> [[TMP4]], splat (i1 true)
; CHECK-NEXT: [[TMP6:%.*]] = fcmp ule <4 x float> [[WIDE_LOAD]], splat (float 2.000000e+00)
; CHECK-NEXT: [[TMP7:%.*]] = fadd fast <4 x float> [[VEC_PHI]], [[WIDE_LOAD1]]
; CHECK-NEXT: [[TMP5:%.*]] = and <4 x i1> [[TMP3]], [[TMP4]]
; CHECK-NEXT: [[TMP9:%.*]] = fadd fast <4 x float> [[VEC_PHI]], [[WIDE_LOAD]]
; CHECK-NEXT: [[TMP10:%.*]] = and <4 x i1> [[TMP6]], [[TMP8]]
; CHECK-NEXT: [[TMP11:%.*]] = and <4 x i1> [[TMP10]], [[TMP3]]
; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP11]], <4 x float> [[VEC_PHI]], <4 x float> [[TMP7]]
; CHECK-NEXT: [[PREDPHI2:%.*]] = select <4 x i1> [[TMP5]], <4 x float> [[TMP9]], <4 x float> [[PREDPHI]]
; CHECK-NEXT: [[PREDPHI3]] = select <4 x i1> [[TMP3]], <4 x float> [[PREDPHI2]], <4 x float> [[VEC_PHI]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 128
; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[TMP13:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> [[PREDPHI3]])
; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: br i1 poison, label [[IF_THEN:%.*]], label [[FOR_INC:%.*]]
; CHECK: if.then:
; CHECK-NEXT: br i1 poison, label [[IF_THEN8:%.*]], label [[IF_ELSE:%.*]]
; CHECK: if.then8:
; CHECK-NEXT: br label [[FOR_INC]]
; CHECK: if.else:
; CHECK-NEXT: br i1 poison, label [[IF_THEN16:%.*]], label [[FOR_INC]]
; CHECK: if.then16:
; CHECK-NEXT: br label [[FOR_INC]]
; CHECK: for.inc:
; CHECK-NEXT: br i1 poison, label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP29:![0-9]+]]
; CHECK: for.end:
; CHECK-NEXT: [[SUM_1_LCSSA:%.*]] = phi float [ poison, [[FOR_INC]] ], [ [[TMP13]], [[MIDDLE_BLOCK]] ]
; CHECK-NEXT: ret float [[SUM_1_LCSSA]]
;
entry:
br label %for.body
for.body:
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.inc ]
%sum.033 = phi float [ %S, %entry ], [ %sum.1, %for.inc ]
%arrayidx = getelementptr inbounds float, ptr %A, i64 %indvars.iv
%l0 = load float, ptr %arrayidx, align 4
%arrayidx2 = getelementptr inbounds float, ptr %B, i64 %indvars.iv
%l1 = load float, ptr %arrayidx2, align 4
%cmp3 = fcmp ogt float %l0, %l1
br i1 %cmp3, label %if.then, label %for.inc
if.then:
%cmp6 = fcmp ogt float %l1, 1.000000e+00
br i1 %cmp6, label %if.then8, label %if.else
if.then8:
%add = fadd fast float %sum.033, %l0
br label %for.inc
if.else:
%cmp14 = fcmp ogt float %l0, 2.000000e+00
br i1 %cmp14, label %if.then16, label %for.inc
if.then16:
%add19 = fadd fast float %sum.033, %l1
br label %for.inc
for.inc:
%sum.1 = phi float [ %add, %if.then8 ], [ %add19, %if.then16 ], [ %sum.033, %if.else ], [ %sum.033, %for.body ]
%indvars.iv.next = add i64 %indvars.iv, 1
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
%exitcond = icmp ne i32 %lftr.wideiv, 128
br i1 %exitcond, label %for.body, label %for.end
for.end:
%sum.1.lcssa = phi float [ %sum.1, %for.inc ]
ret float %sum.1.lcssa
}
define i8 @reduction_add_trunc(ptr noalias nocapture %A) {
; CHECK-LABEL: @reduction_add_trunc(
; CHECK-NEXT: entry:
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i16> [ <i16 0, i16 1, i16 2, i16 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ <i32 255, i32 0, i32 0, i32 0>, [[VECTOR_PH]] ], [ [[TMP30:%.*]], [[PRED_LOAD_CONTINUE6]] ]
; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i16> [[VEC_IND]], splat (i16 257)
; CHECK-NEXT: [[TMP1:%.*]] = and <4 x i32> [[VEC_PHI]], splat (i32 255)
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
; CHECK-NEXT: br i1 [[TMP2]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
; CHECK: pred.load.if:
; CHECK-NEXT: [[TMP3:%.*]] = sext i32 [[INDEX]] to i64
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[TMP3]]
; CHECK-NEXT: [[TMP5:%.*]] = load i8, ptr [[TMP4]], align 4
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i8> poison, i8 [[TMP5]], i64 0
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE]]
; CHECK: pred.load.continue:
; CHECK-NEXT: [[TMP7:%.*]] = phi <4 x i8> [ poison, [[VECTOR_BODY]] ], [ [[TMP6]], [[PRED_LOAD_IF]] ]
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i1> [[TMP0]], i64 1
; CHECK-NEXT: br i1 [[TMP8]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
; CHECK: pred.load.if1:
; CHECK-NEXT: [[TMP9:%.*]] = sext i32 [[INDEX]] to i64
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP9]]
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[TMP10]], i64 1
; CHECK-NEXT: [[TMP12:%.*]] = load i8, ptr [[TMP11]], align 4
; CHECK-NEXT: [[TMP13:%.*]] = insertelement <4 x i8> [[TMP7]], i8 [[TMP12]], i64 1
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE2]]
; CHECK: pred.load.continue2:
; CHECK-NEXT: [[TMP14:%.*]] = phi <4 x i8> [ [[TMP7]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP13]], [[PRED_LOAD_IF1]] ]
; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x i1> [[TMP0]], i64 2
; CHECK-NEXT: br i1 [[TMP15]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
; CHECK: pred.load.if3:
; CHECK-NEXT: [[TMP16:%.*]] = sext i32 [[INDEX]] to i64
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP16]]
; CHECK-NEXT: [[TMP18:%.*]] = getelementptr i8, ptr [[TMP17]], i64 2
; CHECK-NEXT: [[TMP19:%.*]] = load i8, ptr [[TMP18]], align 4
; CHECK-NEXT: [[TMP20:%.*]] = insertelement <4 x i8> [[TMP14]], i8 [[TMP19]], i64 2
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE4]]
; CHECK: pred.load.continue4:
; CHECK-NEXT: [[TMP21:%.*]] = phi <4 x i8> [ [[TMP14]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP20]], [[PRED_LOAD_IF3]] ]
; CHECK-NEXT: [[TMP22:%.*]] = extractelement <4 x i1> [[TMP0]], i64 3
; CHECK-NEXT: br i1 [[TMP22]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]]
; CHECK: pred.load.if5:
; CHECK-NEXT: [[TMP23:%.*]] = sext i32 [[INDEX]] to i64
; CHECK-NEXT: [[TMP24:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP23]]
; CHECK-NEXT: [[TMP25:%.*]] = getelementptr i8, ptr [[TMP24]], i64 3
; CHECK-NEXT: [[TMP26:%.*]] = load i8, ptr [[TMP25]], align 4
; CHECK-NEXT: [[TMP27:%.*]] = insertelement <4 x i8> [[TMP21]], i8 [[TMP26]], i64 3
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE6]]
; CHECK: pred.load.continue6:
; CHECK-NEXT: [[TMP28:%.*]] = phi <4 x i8> [ [[TMP21]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP27]], [[PRED_LOAD_IF5]] ]
; CHECK-NEXT: [[TMP29:%.*]] = zext <4 x i8> [[TMP28]] to <4 x i32>
; CHECK-NEXT: [[TMP30]] = add nuw nsw <4 x i32> [[TMP1]], [[TMP29]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i16> [[VEC_IND]], splat (i16 4)
; CHECK-NEXT: [[TMP31:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260
; CHECK-NEXT: br i1 [[TMP31]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[TMP32:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP30]], <4 x i32> [[VEC_PHI]]
; CHECK-NEXT: [[TMP33:%.*]] = trunc <4 x i32> [[TMP32]] to <4 x i8>
; CHECK-NEXT: [[TMP34:%.*]] = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> [[TMP33]])
; CHECK-NEXT: br label [[DOT_CRIT_EDGE:%.*]]
; CHECK: scalar.ph:
; CHECK-NEXT: br label [[DOTLR_PH:%.*]]
; CHECK: .lr.ph:
; CHECK-NEXT: br i1 poison, label [[DOT_CRIT_EDGE]], label [[DOTLR_PH]], !llvm.loop [[LOOP31:![0-9]+]]
; CHECK: ._crit_edge:
; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i8 [ poison, [[DOTLR_PH]] ], [ [[TMP34]], [[MIDDLE_BLOCK]] ]
; CHECK-NEXT: ret i8 [[SUM_0_LCSSA]]
;
entry:
br label %.lr.ph
.lr.ph: ; preds = %entry, %.lr.ph
%indvars.iv = phi i32 [ %indvars.iv.next, %.lr.ph ], [ 0, %entry ]
%sum.02p = phi i32 [ %l9, %.lr.ph ], [ 255, %entry ]
%sum.02 = and i32 %sum.02p, 255
%l2 = getelementptr inbounds i8, ptr %A, i32 %indvars.iv
%l3 = load i8, ptr %l2, align 4
%l3e = zext i8 %l3 to i32
%l9 = add i32 %sum.02, %l3e
%indvars.iv.next = add i32 %indvars.iv, 1
%exitcond = icmp eq i32 %indvars.iv.next, 257
br i1 %exitcond, label %._crit_edge, label %.lr.ph
._crit_edge: ; preds = %.lr.ph
%sum.0.lcssa = phi i32 [ %l9, %.lr.ph ]
%ret = trunc i32 %sum.0.lcssa to i8
ret i8 %ret
}
define i8 @reduction_and_trunc(ptr noalias nocapture %A) {
; CHECK-LABEL: @reduction_and_trunc(
; CHECK-NEXT: entry:
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i16> [ <i16 0, i16 1, i16 2, i16 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ <i32 255, i32 -1, i32 -1, i32 -1>, [[VECTOR_PH]] ], [ [[TMP29:%.*]], [[PRED_LOAD_CONTINUE6]] ]
; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i16> [[VEC_IND]], splat (i16 257)
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
; CHECK: pred.load.if:
; CHECK-NEXT: [[TMP2:%.*]] = sext i32 [[INDEX]] to i64
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[TMP2]]
; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr [[TMP3]], align 4
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i8> poison, i8 [[TMP4]], i64 0
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE]]
; CHECK: pred.load.continue:
; CHECK-NEXT: [[TMP6:%.*]] = phi <4 x i8> [ poison, [[VECTOR_BODY]] ], [ [[TMP5]], [[PRED_LOAD_IF]] ]
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i1> [[TMP0]], i64 1
; CHECK-NEXT: br i1 [[TMP7]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
; CHECK: pred.load.if1:
; CHECK-NEXT: [[TMP8:%.*]] = sext i32 [[INDEX]] to i64
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP8]]
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[TMP9]], i64 1
; CHECK-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP10]], align 4
; CHECK-NEXT: [[TMP12:%.*]] = insertelement <4 x i8> [[TMP6]], i8 [[TMP11]], i64 1
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE2]]
; CHECK: pred.load.continue2:
; CHECK-NEXT: [[TMP13:%.*]] = phi <4 x i8> [ [[TMP6]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP12]], [[PRED_LOAD_IF1]] ]
; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i1> [[TMP0]], i64 2
; CHECK-NEXT: br i1 [[TMP14]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
; CHECK: pred.load.if3:
; CHECK-NEXT: [[TMP15:%.*]] = sext i32 [[INDEX]] to i64
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP15]]
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[TMP16]], i64 2
; CHECK-NEXT: [[TMP18:%.*]] = load i8, ptr [[TMP17]], align 4
; CHECK-NEXT: [[TMP19:%.*]] = insertelement <4 x i8> [[TMP13]], i8 [[TMP18]], i64 2
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE4]]
; CHECK: pred.load.continue4:
; CHECK-NEXT: [[TMP20:%.*]] = phi <4 x i8> [ [[TMP13]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP19]], [[PRED_LOAD_IF3]] ]
; CHECK-NEXT: [[TMP21:%.*]] = extractelement <4 x i1> [[TMP0]], i64 3
; CHECK-NEXT: br i1 [[TMP21]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]]
; CHECK: pred.load.if5:
; CHECK-NEXT: [[TMP22:%.*]] = sext i32 [[INDEX]] to i64
; CHECK-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP22]]
; CHECK-NEXT: [[TMP24:%.*]] = getelementptr i8, ptr [[TMP23]], i64 3
; CHECK-NEXT: [[TMP25:%.*]] = load i8, ptr [[TMP24]], align 4
; CHECK-NEXT: [[TMP26:%.*]] = insertelement <4 x i8> [[TMP20]], i8 [[TMP25]], i64 3
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE6]]
; CHECK: pred.load.continue6:
; CHECK-NEXT: [[TMP27:%.*]] = phi <4 x i8> [ [[TMP20]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP26]], [[PRED_LOAD_IF5]] ]
; CHECK-NEXT: [[TMP28:%.*]] = zext <4 x i8> [[TMP27]] to <4 x i32>
; CHECK-NEXT: [[TMP29]] = and <4 x i32> [[VEC_PHI]], [[TMP28]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i16> [[VEC_IND]], splat (i16 4)
; CHECK-NEXT: [[TMP30:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260
; CHECK-NEXT: br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[TMP31:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP29]], <4 x i32> [[VEC_PHI]]
; CHECK-NEXT: [[TMP32:%.*]] = trunc <4 x i32> [[TMP31]] to <4 x i8>
; CHECK-NEXT: [[TMP33:%.*]] = call i8 @llvm.vector.reduce.and.v4i8(<4 x i8> [[TMP32]])
; CHECK-NEXT: br label [[DOT_CRIT_EDGE:%.*]]
; CHECK: scalar.ph:
; CHECK-NEXT: br label [[DOTLR_PH:%.*]]
; CHECK: .lr.ph:
; CHECK-NEXT: br i1 poison, label [[DOT_CRIT_EDGE]], label [[DOTLR_PH]], !llvm.loop [[LOOP33:![0-9]+]]
; CHECK: ._crit_edge:
; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i8 [ poison, [[DOTLR_PH]] ], [ [[TMP33]], [[MIDDLE_BLOCK]] ]
; CHECK-NEXT: ret i8 [[SUM_0_LCSSA]]
;
entry:
br label %.lr.ph
.lr.ph: ; preds = %entry, %.lr.ph
%indvars.iv = phi i32 [ %indvars.iv.next, %.lr.ph ], [ 0, %entry ]
%sum.02p = phi i32 [ %l9, %.lr.ph ], [ 255, %entry ]
%sum.02 = and i32 %sum.02p, 255
%l2 = getelementptr inbounds i8, ptr %A, i32 %indvars.iv
%l3 = load i8, ptr %l2, align 4
%l3e = zext i8 %l3 to i32
%l9 = and i32 %sum.02, %l3e
%indvars.iv.next = add i32 %indvars.iv, 1
%exitcond = icmp eq i32 %indvars.iv.next, 257
br i1 %exitcond, label %._crit_edge, label %.lr.ph
._crit_edge: ; preds = %.lr.ph
%sum.0.lcssa = phi i32 [ %l9, %.lr.ph ]
%ret = trunc i32 %sum.0.lcssa to i8
ret i8 %ret
}