|
|
|
|
@@ -96,10 +96,10 @@ define i32 @reduction_sum(i32* noalias nocapture %A, i32* noalias nocapture %B)
|
|
|
|
|
; CHECK: vector.ph:
|
|
|
|
|
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
|
|
|
|
|
; CHECK: vector.body:
|
|
|
|
|
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ]
|
|
|
|
|
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
|
|
|
|
|
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP48:%.*]], [[PRED_LOAD_CONTINUE6]] ]
|
|
|
|
|
; CHECK-NEXT: [[VEC_IND7:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT8:%.*]], [[PRED_LOAD_CONTINUE6]] ]
|
|
|
|
|
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE8:%.*]] ]
|
|
|
|
|
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE8]] ]
|
|
|
|
|
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP48:%.*]], [[PRED_LOAD_CONTINUE8]] ]
|
|
|
|
|
; CHECK-NEXT: [[VEC_IND1:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[PRED_LOAD_CONTINUE8]] ]
|
|
|
|
|
; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], <i64 257, i64 257, i64 257, i64 257>
|
|
|
|
|
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0
|
|
|
|
|
; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
|
|
|
|
|
@@ -115,8 +115,8 @@ define i32 @reduction_sum(i32* noalias nocapture %A, i32* noalias nocapture %B)
|
|
|
|
|
; CHECK-NEXT: [[TMP8:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP4]], [[PRED_LOAD_IF]] ]
|
|
|
|
|
; CHECK-NEXT: [[TMP9:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP7]], [[PRED_LOAD_IF]] ]
|
|
|
|
|
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP0]], i32 1
|
|
|
|
|
; CHECK-NEXT: br i1 [[TMP10]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
|
|
|
|
|
; CHECK: pred.load.if1:
|
|
|
|
|
; CHECK-NEXT: br i1 [[TMP10]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
|
|
|
|
|
; CHECK: pred.load.if3:
|
|
|
|
|
; CHECK-NEXT: [[TMP11:%.*]] = or i64 [[INDEX]], 1
|
|
|
|
|
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP11]]
|
|
|
|
|
; CHECK-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4
|
|
|
|
|
@@ -124,13 +124,13 @@ define i32 @reduction_sum(i32* noalias nocapture %A, i32* noalias nocapture %B)
|
|
|
|
|
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP11]]
|
|
|
|
|
; CHECK-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4
|
|
|
|
|
; CHECK-NEXT: [[TMP17:%.*]] = insertelement <4 x i32> [[TMP9]], i32 [[TMP16]], i32 1
|
|
|
|
|
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE2]]
|
|
|
|
|
; CHECK: pred.load.continue2:
|
|
|
|
|
; CHECK-NEXT: [[TMP18:%.*]] = phi <4 x i32> [ [[TMP8]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], [[PRED_LOAD_IF1]] ]
|
|
|
|
|
; CHECK-NEXT: [[TMP19:%.*]] = phi <4 x i32> [ [[TMP9]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP17]], [[PRED_LOAD_IF1]] ]
|
|
|
|
|
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE4]]
|
|
|
|
|
; CHECK: pred.load.continue4:
|
|
|
|
|
; CHECK-NEXT: [[TMP18:%.*]] = phi <4 x i32> [ [[TMP8]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], [[PRED_LOAD_IF3]] ]
|
|
|
|
|
; CHECK-NEXT: [[TMP19:%.*]] = phi <4 x i32> [ [[TMP9]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP17]], [[PRED_LOAD_IF3]] ]
|
|
|
|
|
; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x i1> [[TMP0]], i32 2
|
|
|
|
|
; CHECK-NEXT: br i1 [[TMP20]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
|
|
|
|
|
; CHECK: pred.load.if3:
|
|
|
|
|
; CHECK-NEXT: br i1 [[TMP20]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6:%.*]]
|
|
|
|
|
; CHECK: pred.load.if5:
|
|
|
|
|
; CHECK-NEXT: [[TMP21:%.*]] = or i64 [[INDEX]], 2
|
|
|
|
|
; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP21]]
|
|
|
|
|
; CHECK-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4
|
|
|
|
|
@@ -138,13 +138,13 @@ define i32 @reduction_sum(i32* noalias nocapture %A, i32* noalias nocapture %B)
|
|
|
|
|
; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP21]]
|
|
|
|
|
; CHECK-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4
|
|
|
|
|
; CHECK-NEXT: [[TMP27:%.*]] = insertelement <4 x i32> [[TMP19]], i32 [[TMP26]], i32 2
|
|
|
|
|
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE4]]
|
|
|
|
|
; CHECK: pred.load.continue4:
|
|
|
|
|
; CHECK-NEXT: [[TMP28:%.*]] = phi <4 x i32> [ [[TMP18]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP24]], [[PRED_LOAD_IF3]] ]
|
|
|
|
|
; CHECK-NEXT: [[TMP29:%.*]] = phi <4 x i32> [ [[TMP19]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP27]], [[PRED_LOAD_IF3]] ]
|
|
|
|
|
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE6]]
|
|
|
|
|
; CHECK: pred.load.continue6:
|
|
|
|
|
; CHECK-NEXT: [[TMP28:%.*]] = phi <4 x i32> [ [[TMP18]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP24]], [[PRED_LOAD_IF5]] ]
|
|
|
|
|
; CHECK-NEXT: [[TMP29:%.*]] = phi <4 x i32> [ [[TMP19]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP27]], [[PRED_LOAD_IF5]] ]
|
|
|
|
|
; CHECK-NEXT: [[TMP30:%.*]] = extractelement <4 x i1> [[TMP0]], i32 3
|
|
|
|
|
; CHECK-NEXT: br i1 [[TMP30]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]]
|
|
|
|
|
; CHECK: pred.load.if5:
|
|
|
|
|
; CHECK-NEXT: br i1 [[TMP30]], label [[PRED_LOAD_IF7:%.*]], label [[PRED_LOAD_CONTINUE8]]
|
|
|
|
|
; CHECK: pred.load.if7:
|
|
|
|
|
; CHECK-NEXT: [[TMP31:%.*]] = or i64 [[INDEX]], 3
|
|
|
|
|
; CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP31]]
|
|
|
|
|
; CHECK-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4
|
|
|
|
|
@@ -152,11 +152,11 @@ define i32 @reduction_sum(i32* noalias nocapture %A, i32* noalias nocapture %B)
|
|
|
|
|
; CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP31]]
|
|
|
|
|
; CHECK-NEXT: [[TMP36:%.*]] = load i32, i32* [[TMP35]], align 4
|
|
|
|
|
; CHECK-NEXT: [[TMP37:%.*]] = insertelement <4 x i32> [[TMP29]], i32 [[TMP36]], i32 3
|
|
|
|
|
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE6]]
|
|
|
|
|
; CHECK: pred.load.continue6:
|
|
|
|
|
; CHECK-NEXT: [[TMP38:%.*]] = phi <4 x i32> [ [[TMP28]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP34]], [[PRED_LOAD_IF5]] ]
|
|
|
|
|
; CHECK-NEXT: [[TMP39:%.*]] = phi <4 x i32> [ [[TMP29]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP37]], [[PRED_LOAD_IF5]] ]
|
|
|
|
|
; CHECK-NEXT: [[TMP40:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[VEC_IND7]], <4 x i32> zeroinitializer
|
|
|
|
|
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE8]]
|
|
|
|
|
; CHECK: pred.load.continue8:
|
|
|
|
|
; CHECK-NEXT: [[TMP38:%.*]] = phi <4 x i32> [ [[TMP28]], [[PRED_LOAD_CONTINUE6]] ], [ [[TMP34]], [[PRED_LOAD_IF7]] ]
|
|
|
|
|
; CHECK-NEXT: [[TMP39:%.*]] = phi <4 x i32> [ [[TMP29]], [[PRED_LOAD_CONTINUE6]] ], [ [[TMP37]], [[PRED_LOAD_IF7]] ]
|
|
|
|
|
; CHECK-NEXT: [[TMP40:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[VEC_IND1]], <4 x i32> zeroinitializer
|
|
|
|
|
; CHECK-NEXT: [[TMP41:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP40]])
|
|
|
|
|
; CHECK-NEXT: [[TMP42:%.*]] = add i32 [[TMP41]], [[VEC_PHI]]
|
|
|
|
|
; CHECK-NEXT: [[TMP43:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP38]], <4 x i32> zeroinitializer
|
|
|
|
|
@@ -167,7 +167,7 @@ define i32 @reduction_sum(i32* noalias nocapture %A, i32* noalias nocapture %B)
|
|
|
|
|
; CHECK-NEXT: [[TMP48]] = add i32 [[TMP47]], [[TMP45]]
|
|
|
|
|
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4
|
|
|
|
|
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4>
|
|
|
|
|
; CHECK-NEXT: [[VEC_IND_NEXT8]] = add <4 x i32> [[VEC_IND7]], <i32 4, i32 4, i32 4, i32 4>
|
|
|
|
|
; CHECK-NEXT: [[VEC_IND_NEXT2]] = add <4 x i32> [[VEC_IND1]], <i32 4, i32 4, i32 4, i32 4>
|
|
|
|
|
; CHECK-NEXT: [[TMP49:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260
|
|
|
|
|
; CHECK-NEXT: br i1 [[TMP49]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
|
|
|
|
|
; CHECK: middle.block:
|
|
|
|
|
@@ -301,10 +301,10 @@ define i32 @reduction_prod(i32* noalias nocapture %A, i32* noalias nocapture %B)
|
|
|
|
|
; CHECK: vector.ph:
|
|
|
|
|
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
|
|
|
|
|
; CHECK: vector.body:
|
|
|
|
|
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ]
|
|
|
|
|
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
|
|
|
|
|
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 1, [[VECTOR_PH]] ], [ [[TMP48:%.*]], [[PRED_LOAD_CONTINUE6]] ]
|
|
|
|
|
; CHECK-NEXT: [[VEC_IND7:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT8:%.*]], [[PRED_LOAD_CONTINUE6]] ]
|
|
|
|
|
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE8:%.*]] ]
|
|
|
|
|
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE8]] ]
|
|
|
|
|
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 1, [[VECTOR_PH]] ], [ [[TMP48:%.*]], [[PRED_LOAD_CONTINUE8]] ]
|
|
|
|
|
; CHECK-NEXT: [[VEC_IND1:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[PRED_LOAD_CONTINUE8]] ]
|
|
|
|
|
; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], <i64 257, i64 257, i64 257, i64 257>
|
|
|
|
|
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0
|
|
|
|
|
; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
|
|
|
|
|
@@ -320,8 +320,8 @@ define i32 @reduction_prod(i32* noalias nocapture %A, i32* noalias nocapture %B)
|
|
|
|
|
; CHECK-NEXT: [[TMP8:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP4]], [[PRED_LOAD_IF]] ]
|
|
|
|
|
; CHECK-NEXT: [[TMP9:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP7]], [[PRED_LOAD_IF]] ]
|
|
|
|
|
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP0]], i32 1
|
|
|
|
|
; CHECK-NEXT: br i1 [[TMP10]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
|
|
|
|
|
; CHECK: pred.load.if1:
|
|
|
|
|
; CHECK-NEXT: br i1 [[TMP10]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
|
|
|
|
|
; CHECK: pred.load.if3:
|
|
|
|
|
; CHECK-NEXT: [[TMP11:%.*]] = or i64 [[INDEX]], 1
|
|
|
|
|
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP11]]
|
|
|
|
|
; CHECK-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4
|
|
|
|
|
@@ -329,13 +329,13 @@ define i32 @reduction_prod(i32* noalias nocapture %A, i32* noalias nocapture %B)
|
|
|
|
|
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP11]]
|
|
|
|
|
; CHECK-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4
|
|
|
|
|
; CHECK-NEXT: [[TMP17:%.*]] = insertelement <4 x i32> [[TMP9]], i32 [[TMP16]], i32 1
|
|
|
|
|
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE2]]
|
|
|
|
|
; CHECK: pred.load.continue2:
|
|
|
|
|
; CHECK-NEXT: [[TMP18:%.*]] = phi <4 x i32> [ [[TMP8]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], [[PRED_LOAD_IF1]] ]
|
|
|
|
|
; CHECK-NEXT: [[TMP19:%.*]] = phi <4 x i32> [ [[TMP9]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP17]], [[PRED_LOAD_IF1]] ]
|
|
|
|
|
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE4]]
|
|
|
|
|
; CHECK: pred.load.continue4:
|
|
|
|
|
; CHECK-NEXT: [[TMP18:%.*]] = phi <4 x i32> [ [[TMP8]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], [[PRED_LOAD_IF3]] ]
|
|
|
|
|
; CHECK-NEXT: [[TMP19:%.*]] = phi <4 x i32> [ [[TMP9]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP17]], [[PRED_LOAD_IF3]] ]
|
|
|
|
|
; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x i1> [[TMP0]], i32 2
|
|
|
|
|
; CHECK-NEXT: br i1 [[TMP20]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
|
|
|
|
|
; CHECK: pred.load.if3:
|
|
|
|
|
; CHECK-NEXT: br i1 [[TMP20]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6:%.*]]
|
|
|
|
|
; CHECK: pred.load.if5:
|
|
|
|
|
; CHECK-NEXT: [[TMP21:%.*]] = or i64 [[INDEX]], 2
|
|
|
|
|
; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP21]]
|
|
|
|
|
; CHECK-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4
|
|
|
|
|
@@ -343,13 +343,13 @@ define i32 @reduction_prod(i32* noalias nocapture %A, i32* noalias nocapture %B)
|
|
|
|
|
; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP21]]
|
|
|
|
|
; CHECK-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4
|
|
|
|
|
; CHECK-NEXT: [[TMP27:%.*]] = insertelement <4 x i32> [[TMP19]], i32 [[TMP26]], i32 2
|
|
|
|
|
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE4]]
|
|
|
|
|
; CHECK: pred.load.continue4:
|
|
|
|
|
; CHECK-NEXT: [[TMP28:%.*]] = phi <4 x i32> [ [[TMP18]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP24]], [[PRED_LOAD_IF3]] ]
|
|
|
|
|
; CHECK-NEXT: [[TMP29:%.*]] = phi <4 x i32> [ [[TMP19]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP27]], [[PRED_LOAD_IF3]] ]
|
|
|
|
|
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE6]]
|
|
|
|
|
; CHECK: pred.load.continue6:
|
|
|
|
|
; CHECK-NEXT: [[TMP28:%.*]] = phi <4 x i32> [ [[TMP18]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP24]], [[PRED_LOAD_IF5]] ]
|
|
|
|
|
; CHECK-NEXT: [[TMP29:%.*]] = phi <4 x i32> [ [[TMP19]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP27]], [[PRED_LOAD_IF5]] ]
|
|
|
|
|
; CHECK-NEXT: [[TMP30:%.*]] = extractelement <4 x i1> [[TMP0]], i32 3
|
|
|
|
|
; CHECK-NEXT: br i1 [[TMP30]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]]
|
|
|
|
|
; CHECK: pred.load.if5:
|
|
|
|
|
; CHECK-NEXT: br i1 [[TMP30]], label [[PRED_LOAD_IF7:%.*]], label [[PRED_LOAD_CONTINUE8]]
|
|
|
|
|
; CHECK: pred.load.if7:
|
|
|
|
|
; CHECK-NEXT: [[TMP31:%.*]] = or i64 [[INDEX]], 3
|
|
|
|
|
; CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP31]]
|
|
|
|
|
; CHECK-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4
|
|
|
|
|
@@ -357,11 +357,11 @@ define i32 @reduction_prod(i32* noalias nocapture %A, i32* noalias nocapture %B)
|
|
|
|
|
; CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP31]]
|
|
|
|
|
; CHECK-NEXT: [[TMP36:%.*]] = load i32, i32* [[TMP35]], align 4
|
|
|
|
|
; CHECK-NEXT: [[TMP37:%.*]] = insertelement <4 x i32> [[TMP29]], i32 [[TMP36]], i32 3
|
|
|
|
|
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE6]]
|
|
|
|
|
; CHECK: pred.load.continue6:
|
|
|
|
|
; CHECK-NEXT: [[TMP38:%.*]] = phi <4 x i32> [ [[TMP28]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP34]], [[PRED_LOAD_IF5]] ]
|
|
|
|
|
; CHECK-NEXT: [[TMP39:%.*]] = phi <4 x i32> [ [[TMP29]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP37]], [[PRED_LOAD_IF5]] ]
|
|
|
|
|
; CHECK-NEXT: [[TMP40:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[VEC_IND7]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
|
|
|
|
|
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE8]]
|
|
|
|
|
; CHECK: pred.load.continue8:
|
|
|
|
|
; CHECK-NEXT: [[TMP38:%.*]] = phi <4 x i32> [ [[TMP28]], [[PRED_LOAD_CONTINUE6]] ], [ [[TMP34]], [[PRED_LOAD_IF7]] ]
|
|
|
|
|
; CHECK-NEXT: [[TMP39:%.*]] = phi <4 x i32> [ [[TMP29]], [[PRED_LOAD_CONTINUE6]] ], [ [[TMP37]], [[PRED_LOAD_IF7]] ]
|
|
|
|
|
; CHECK-NEXT: [[TMP40:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[VEC_IND1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
|
|
|
|
|
; CHECK-NEXT: [[TMP41:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[TMP40]])
|
|
|
|
|
; CHECK-NEXT: [[TMP42:%.*]] = mul i32 [[TMP41]], [[VEC_PHI]]
|
|
|
|
|
; CHECK-NEXT: [[TMP43:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP38]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
|
|
|
|
|
@@ -372,7 +372,7 @@ define i32 @reduction_prod(i32* noalias nocapture %A, i32* noalias nocapture %B)
|
|
|
|
|
; CHECK-NEXT: [[TMP48]] = mul i32 [[TMP47]], [[TMP45]]
|
|
|
|
|
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4
|
|
|
|
|
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4>
|
|
|
|
|
; CHECK-NEXT: [[VEC_IND_NEXT8]] = add <4 x i32> [[VEC_IND7]], <i32 4, i32 4, i32 4, i32 4>
|
|
|
|
|
; CHECK-NEXT: [[VEC_IND_NEXT2]] = add <4 x i32> [[VEC_IND1]], <i32 4, i32 4, i32 4, i32 4>
|
|
|
|
|
; CHECK-NEXT: [[TMP49:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260
|
|
|
|
|
; CHECK-NEXT: br i1 [[TMP49]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
|
|
|
|
|
; CHECK: middle.block:
|
|
|
|
|
@@ -416,10 +416,10 @@ define i32 @reduction_mix(i32* noalias nocapture %A, i32* noalias nocapture %B)
|
|
|
|
|
; CHECK: vector.ph:
|
|
|
|
|
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
|
|
|
|
|
; CHECK: vector.body:
|
|
|
|
|
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ]
|
|
|
|
|
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
|
|
|
|
|
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP46:%.*]], [[PRED_LOAD_CONTINUE6]] ]
|
|
|
|
|
; CHECK-NEXT: [[VEC_IND7:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT8:%.*]], [[PRED_LOAD_CONTINUE6]] ]
|
|
|
|
|
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE8:%.*]] ]
|
|
|
|
|
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE8]] ]
|
|
|
|
|
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP46:%.*]], [[PRED_LOAD_CONTINUE8]] ]
|
|
|
|
|
; CHECK-NEXT: [[VEC_IND1:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[PRED_LOAD_CONTINUE8]] ]
|
|
|
|
|
; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], <i64 257, i64 257, i64 257, i64 257>
|
|
|
|
|
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0
|
|
|
|
|
; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
|
|
|
|
|
@@ -435,8 +435,8 @@ define i32 @reduction_mix(i32* noalias nocapture %A, i32* noalias nocapture %B)
|
|
|
|
|
; CHECK-NEXT: [[TMP8:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP4]], [[PRED_LOAD_IF]] ]
|
|
|
|
|
; CHECK-NEXT: [[TMP9:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP7]], [[PRED_LOAD_IF]] ]
|
|
|
|
|
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP0]], i32 1
|
|
|
|
|
; CHECK-NEXT: br i1 [[TMP10]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
|
|
|
|
|
; CHECK: pred.load.if1:
|
|
|
|
|
; CHECK-NEXT: br i1 [[TMP10]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
|
|
|
|
|
; CHECK: pred.load.if3:
|
|
|
|
|
; CHECK-NEXT: [[TMP11:%.*]] = or i64 [[INDEX]], 1
|
|
|
|
|
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP11]]
|
|
|
|
|
; CHECK-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4
|
|
|
|
|
@@ -444,13 +444,13 @@ define i32 @reduction_mix(i32* noalias nocapture %A, i32* noalias nocapture %B)
|
|
|
|
|
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP11]]
|
|
|
|
|
; CHECK-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4
|
|
|
|
|
; CHECK-NEXT: [[TMP17:%.*]] = insertelement <4 x i32> [[TMP9]], i32 [[TMP16]], i32 1
|
|
|
|
|
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE2]]
|
|
|
|
|
; CHECK: pred.load.continue2:
|
|
|
|
|
; CHECK-NEXT: [[TMP18:%.*]] = phi <4 x i32> [ [[TMP8]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], [[PRED_LOAD_IF1]] ]
|
|
|
|
|
; CHECK-NEXT: [[TMP19:%.*]] = phi <4 x i32> [ [[TMP9]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP17]], [[PRED_LOAD_IF1]] ]
|
|
|
|
|
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE4]]
|
|
|
|
|
; CHECK: pred.load.continue4:
|
|
|
|
|
; CHECK-NEXT: [[TMP18:%.*]] = phi <4 x i32> [ [[TMP8]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], [[PRED_LOAD_IF3]] ]
|
|
|
|
|
; CHECK-NEXT: [[TMP19:%.*]] = phi <4 x i32> [ [[TMP9]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP17]], [[PRED_LOAD_IF3]] ]
|
|
|
|
|
; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x i1> [[TMP0]], i32 2
|
|
|
|
|
; CHECK-NEXT: br i1 [[TMP20]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
|
|
|
|
|
; CHECK: pred.load.if3:
|
|
|
|
|
; CHECK-NEXT: br i1 [[TMP20]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6:%.*]]
|
|
|
|
|
; CHECK: pred.load.if5:
|
|
|
|
|
; CHECK-NEXT: [[TMP21:%.*]] = or i64 [[INDEX]], 2
|
|
|
|
|
; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP21]]
|
|
|
|
|
; CHECK-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4
|
|
|
|
|
@@ -458,13 +458,13 @@ define i32 @reduction_mix(i32* noalias nocapture %A, i32* noalias nocapture %B)
|
|
|
|
|
; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP21]]
|
|
|
|
|
; CHECK-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4
|
|
|
|
|
; CHECK-NEXT: [[TMP27:%.*]] = insertelement <4 x i32> [[TMP19]], i32 [[TMP26]], i32 2
|
|
|
|
|
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE4]]
|
|
|
|
|
; CHECK: pred.load.continue4:
|
|
|
|
|
; CHECK-NEXT: [[TMP28:%.*]] = phi <4 x i32> [ [[TMP18]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP24]], [[PRED_LOAD_IF3]] ]
|
|
|
|
|
; CHECK-NEXT: [[TMP29:%.*]] = phi <4 x i32> [ [[TMP19]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP27]], [[PRED_LOAD_IF3]] ]
|
|
|
|
|
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE6]]
|
|
|
|
|
; CHECK: pred.load.continue6:
|
|
|
|
|
; CHECK-NEXT: [[TMP28:%.*]] = phi <4 x i32> [ [[TMP18]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP24]], [[PRED_LOAD_IF5]] ]
|
|
|
|
|
; CHECK-NEXT: [[TMP29:%.*]] = phi <4 x i32> [ [[TMP19]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP27]], [[PRED_LOAD_IF5]] ]
|
|
|
|
|
; CHECK-NEXT: [[TMP30:%.*]] = extractelement <4 x i1> [[TMP0]], i32 3
|
|
|
|
|
; CHECK-NEXT: br i1 [[TMP30]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]]
|
|
|
|
|
; CHECK: pred.load.if5:
|
|
|
|
|
; CHECK-NEXT: br i1 [[TMP30]], label [[PRED_LOAD_IF7:%.*]], label [[PRED_LOAD_CONTINUE8]]
|
|
|
|
|
; CHECK: pred.load.if7:
|
|
|
|
|
; CHECK-NEXT: [[TMP31:%.*]] = or i64 [[INDEX]], 3
|
|
|
|
|
; CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP31]]
|
|
|
|
|
; CHECK-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4
|
|
|
|
|
@@ -472,12 +472,12 @@ define i32 @reduction_mix(i32* noalias nocapture %A, i32* noalias nocapture %B)
|
|
|
|
|
; CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP31]]
|
|
|
|
|
; CHECK-NEXT: [[TMP36:%.*]] = load i32, i32* [[TMP35]], align 4
|
|
|
|
|
; CHECK-NEXT: [[TMP37:%.*]] = insertelement <4 x i32> [[TMP29]], i32 [[TMP36]], i32 3
|
|
|
|
|
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE6]]
|
|
|
|
|
; CHECK: pred.load.continue6:
|
|
|
|
|
; CHECK-NEXT: [[TMP38:%.*]] = phi <4 x i32> [ [[TMP28]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP34]], [[PRED_LOAD_IF5]] ]
|
|
|
|
|
; CHECK-NEXT: [[TMP39:%.*]] = phi <4 x i32> [ [[TMP29]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP37]], [[PRED_LOAD_IF5]] ]
|
|
|
|
|
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE8]]
|
|
|
|
|
; CHECK: pred.load.continue8:
|
|
|
|
|
; CHECK-NEXT: [[TMP38:%.*]] = phi <4 x i32> [ [[TMP28]], [[PRED_LOAD_CONTINUE6]] ], [ [[TMP34]], [[PRED_LOAD_IF7]] ]
|
|
|
|
|
; CHECK-NEXT: [[TMP39:%.*]] = phi <4 x i32> [ [[TMP29]], [[PRED_LOAD_CONTINUE6]] ], [ [[TMP37]], [[PRED_LOAD_IF7]] ]
|
|
|
|
|
; CHECK-NEXT: [[TMP40:%.*]] = mul nsw <4 x i32> [[TMP39]], [[TMP38]]
|
|
|
|
|
; CHECK-NEXT: [[TMP41:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[VEC_IND7]], <4 x i32> zeroinitializer
|
|
|
|
|
; CHECK-NEXT: [[TMP41:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[VEC_IND1]], <4 x i32> zeroinitializer
|
|
|
|
|
; CHECK-NEXT: [[TMP42:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP41]])
|
|
|
|
|
; CHECK-NEXT: [[TMP43:%.*]] = add i32 [[TMP42]], [[VEC_PHI]]
|
|
|
|
|
; CHECK-NEXT: [[TMP44:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP40]], <4 x i32> zeroinitializer
|
|
|
|
|
@@ -485,7 +485,7 @@ define i32 @reduction_mix(i32* noalias nocapture %A, i32* noalias nocapture %B)
|
|
|
|
|
; CHECK-NEXT: [[TMP46]] = add i32 [[TMP45]], [[TMP43]]
|
|
|
|
|
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4
|
|
|
|
|
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4>
|
|
|
|
|
; CHECK-NEXT: [[VEC_IND_NEXT8]] = add <4 x i32> [[VEC_IND7]], <i32 4, i32 4, i32 4, i32 4>
|
|
|
|
|
; CHECK-NEXT: [[VEC_IND_NEXT2]] = add <4 x i32> [[VEC_IND1]], <i32 4, i32 4, i32 4, i32 4>
|
|
|
|
|
; CHECK-NEXT: [[TMP47:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260
|
|
|
|
|
; CHECK-NEXT: br i1 [[TMP47]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
|
|
|
|
|
; CHECK: middle.block:
|
|
|
|
|
|