[IndVars] Pass TTI to replaceCongruentIVs
In IndVarSimplify after simplifying and extending loop IVs we call 'replaceCongruentIVs'. This function optionally takes a TTI argument to be able to replace narrow IVs uses with truncates of the widest one. For some reason the TTI wasn't passed to the function, so it couldn't perform such transform. This patch fixes it. Reviewed By: mkazantsev Differential Revision: https://reviews.llvm.org/D113024
This commit is contained in:
@@ -1924,7 +1924,7 @@ bool IndVarSimplify::run(Loop *L) {
|
||||
}
|
||||
|
||||
// Eliminate redundant IV cycles.
|
||||
NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts);
|
||||
NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts, TTI);
|
||||
|
||||
// Try to convert exit conditions to unsigned and rotate computation
|
||||
// out of the loop. Note: Handles invalidation internally if needed.
|
||||
|
||||
@@ -47,11 +47,10 @@ define void @non_local_load_with_iv_zext(i32* %ptr) {
|
||||
; CHECK-NEXT: br label [[LOOP:%.*]]
|
||||
; CHECK: loop:
|
||||
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ], [ 0, [[ENTRY:%.*]] ]
|
||||
; CHECK-NEXT: [[VAL:%.*]] = phi i32 [ [[VAL_INC:%.*]], [[LOOP]] ], [ 0, [[ENTRY]] ]
|
||||
; CHECK-NEXT: [[VAL_INC]] = add nuw nsw i32 [[VAL]], 1
|
||||
; CHECK-NEXT: store i32 [[VAL_INC]], i32* [[PTR]], align 4
|
||||
; CHECK-NEXT: call void @foo(i64 [[INDVARS_IV]])
|
||||
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
|
||||
; CHECK-NEXT: [[INDVARS:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
|
||||
; CHECK-NEXT: store i32 [[INDVARS]], i32* [[PTR]], align 4
|
||||
; CHECK-NEXT: call void @foo(i64 [[INDVARS_IV]])
|
||||
; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp eq i64 [[INDVARS_IV]], 1000
|
||||
; CHECK-NEXT: br i1 [[LOOP_COND]], label [[EXIT:%.*]], label [[LOOP]]
|
||||
; CHECK: exit:
|
||||
@@ -85,12 +84,12 @@ define void @two_non_local_loads(i32* %ptr1) {
|
||||
; CHECK-NEXT: br label [[LOOP:%.*]]
|
||||
; CHECK: loop:
|
||||
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ], [ 0, [[ENTRY:%.*]] ]
|
||||
; CHECK-NEXT: [[VAL2:%.*]] = phi i32 [ [[VAL2_INC:%.*]], [[LOOP]] ], [ 0, [[ENTRY]] ]
|
||||
; CHECK-NEXT: [[VAL2_INC]] = add nuw nsw i32 [[VAL2]], 1
|
||||
; CHECK-NEXT: store i32 [[VAL2_INC]], i32* [[PTR1]], align 4
|
||||
; CHECK-NEXT: store i32 [[VAL2_INC]], i32* [[PTR2]], align 4
|
||||
; CHECK-NEXT: call void @foo(i64 [[INDVARS_IV]])
|
||||
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
|
||||
; CHECK-NEXT: [[INDVARS4:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
|
||||
; CHECK-NEXT: store i32 [[INDVARS4]], i32* [[PTR1]], align 4
|
||||
; CHECK-NEXT: [[INDVARS:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
|
||||
; CHECK-NEXT: store i32 [[INDVARS]], i32* [[PTR2]], align 4
|
||||
; CHECK-NEXT: call void @foo(i64 [[INDVARS_IV]])
|
||||
; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp eq i64 [[INDVARS_IV]], 1000
|
||||
; CHECK-NEXT: br i1 [[LOOP_COND]], label [[EXIT:%.*]], label [[LOOP]]
|
||||
; CHECK: exit:
|
||||
|
||||
@@ -9,11 +9,11 @@ define i32 @fn2() personality i32 (...)* @__CxxFrameHandler3 {
|
||||
; CHECK-NEXT: br label [[FOR_COND:%.*]]
|
||||
; CHECK: for.cond:
|
||||
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ], [ 0, [[ENTRY:%.*]] ]
|
||||
; CHECK-NEXT: [[C_0:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_INC]] ], [ 0, [[ENTRY]] ]
|
||||
; CHECK-NEXT: [[INDVARS1:%.*]] = trunc i64 [[INDVARS_IV]] to i32
|
||||
; CHECK-NEXT: invoke void @fn1(i64 [[INDVARS_IV]])
|
||||
; CHECK-NEXT: to label [[FOR_INC]] unwind label [[CATCH_DISPATCH:%.*]]
|
||||
; CHECK: catch.dispatch:
|
||||
; CHECK-NEXT: [[C_0_LCSSA:%.*]] = phi i32 [ [[C_0]], [[FOR_COND]] ]
|
||||
; CHECK-NEXT: [[C_0_LCSSA:%.*]] = phi i32 [ [[INDVARS1]], [[FOR_COND]] ]
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = catchswitch within none [label %catch] unwind to caller
|
||||
; CHECK: catch:
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = catchpad within [[TMP0]] [i8* null, i32 64, i8* null]
|
||||
@@ -22,7 +22,6 @@ define i32 @fn2() personality i32 (...)* @__CxxFrameHandler3 {
|
||||
; CHECK-NEXT: ret i32 [[C_0_LCSSA]]
|
||||
; CHECK: for.inc:
|
||||
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw i64 [[INDVARS_IV]], 1
|
||||
; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[C_0]], 1
|
||||
; CHECK-NEXT: br label [[FOR_COND]]
|
||||
;
|
||||
entry:
|
||||
|
||||
@@ -103,17 +103,17 @@ define void @test2([8 x i8]* %a, i8* %b, i8 %limit) {
|
||||
; CHECK: for.cond1.preheader.preheader:
|
||||
; CHECK-NEXT: br label [[FOR_COND1_PREHEADER:%.*]]
|
||||
; CHECK: for.cond1.preheader.us:
|
||||
; CHECK-NEXT: [[INDVARS_IV2:%.*]] = phi i64 [ 0, [[FOR_COND1_PREHEADER_US_PREHEADER]] ], [ [[INDVARS_IV_NEXT3:%.*]], [[FOR_INC13_US:%.*]] ]
|
||||
; CHECK-NEXT: [[INDVARS_IV3:%.*]] = phi i64 [ 0, [[FOR_COND1_PREHEADER_US_PREHEADER]] ], [ [[INDVARS_IV_NEXT4:%.*]], [[FOR_INC13_US:%.*]] ]
|
||||
; CHECK-NEXT: br i1 true, label [[FOR_BODY4_LR_PH_US:%.*]], label [[FOR_INC13_US]]
|
||||
; CHECK: for.inc13.us.loopexit:
|
||||
; CHECK-NEXT: br label [[FOR_INC13_US]]
|
||||
; CHECK: for.inc13.us:
|
||||
; CHECK-NEXT: [[INDVARS_IV_NEXT3]] = add nuw nsw i64 [[INDVARS_IV2]], 1
|
||||
; CHECK-NEXT: [[EXITCOND4:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT3]], 4
|
||||
; CHECK-NEXT: br i1 [[EXITCOND4]], label [[FOR_COND1_PREHEADER_US]], label [[FOR_END_LOOPEXIT1:%.*]]
|
||||
; CHECK-NEXT: [[INDVARS_IV_NEXT4]] = add nuw nsw i64 [[INDVARS_IV3]], 1
|
||||
; CHECK-NEXT: [[EXITCOND6:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT4]], 4
|
||||
; CHECK-NEXT: br i1 [[EXITCOND6]], label [[FOR_COND1_PREHEADER_US]], label [[FOR_END_LOOPEXIT1:%.*]]
|
||||
; CHECK: for.body4.us:
|
||||
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[FOR_BODY4_LR_PH_US]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY4_US:%.*]] ]
|
||||
; CHECK-NEXT: [[ARRAYIDX6_US:%.*]] = getelementptr inbounds [8 x i8], [8 x i8]* [[A:%.*]], i64 [[INDVARS_IV2]], i64 [[INDVARS_IV]]
|
||||
; CHECK-NEXT: [[ARRAYIDX6_US:%.*]] = getelementptr inbounds [8 x i8], [8 x i8]* [[A:%.*]], i64 [[INDVARS_IV3]], i64 [[INDVARS_IV]]
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = load i8, i8* [[ARRAYIDX6_US]], align 1
|
||||
; CHECK-NEXT: [[IDXPROM7_US:%.*]] = zext i8 [[TMP0]] to i64
|
||||
; CHECK-NEXT: [[ARRAYIDX8_US:%.*]] = getelementptr inbounds i8, i8* [[B:%.*]], i64 [[IDXPROM7_US]]
|
||||
@@ -549,6 +549,7 @@ define i32 @test11(i32 %start, i32* %p, i32* %q) {
|
||||
; CHECK: loop:
|
||||
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[BACKEDGE:%.*]] ], [ [[TMP0]], [[ENTRY:%.*]] ]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = add nsw i64 [[INDVARS_IV]], -1
|
||||
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], -1
|
||||
; CHECK-NEXT: [[COND:%.*]] = icmp eq i64 [[INDVARS_IV]], 0
|
||||
; CHECK-NEXT: br i1 [[COND]], label [[EXIT:%.*]], label [[BACKEDGE]]
|
||||
; CHECK: backedge:
|
||||
@@ -556,7 +557,6 @@ define i32 @test11(i32 %start, i32* %p, i32* %q) {
|
||||
; CHECK-NEXT: store i32 1, i32* [[STORE_ADDR]], align 4
|
||||
; CHECK-NEXT: [[STOP:%.*]] = load i32, i32* [[Q:%.*]], align 4
|
||||
; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp eq i32 [[STOP]], 0
|
||||
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], -1
|
||||
; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[FAILURE:%.*]]
|
||||
; CHECK: exit:
|
||||
; CHECK-NEXT: ret i32 0
|
||||
@@ -599,11 +599,11 @@ define i32 @test12(i32 %start, i32* %p, i32* %q) {
|
||||
; CHECK-NEXT: br i1 [[COND]], label [[EXIT:%.*]], label [[BACKEDGE]]
|
||||
; CHECK: backedge:
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = add nsw i64 [[INDVARS_IV]], -1
|
||||
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], -1
|
||||
; CHECK-NEXT: [[STORE_ADDR:%.*]] = getelementptr i32, i32* [[P:%.*]], i64 [[TMP1]]
|
||||
; CHECK-NEXT: store i32 1, i32* [[STORE_ADDR]], align 4
|
||||
; CHECK-NEXT: [[STOP:%.*]] = load i32, i32* [[Q:%.*]], align 4
|
||||
; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp eq i32 [[STOP]], 0
|
||||
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], -1
|
||||
; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[FAILURE:%.*]]
|
||||
; CHECK: exit:
|
||||
; CHECK-NEXT: ret i32 0
|
||||
@@ -862,8 +862,8 @@ define i32 @test16_unsigned_pos1(i32 %start, i32* %p, i32* %q, i32 %x) {
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = add nsw i64 [[INDVARS_IV]], -1
|
||||
; CHECK-NEXT: br i1 [[COND]], label [[EXIT:%.*]], label [[GUARDED:%.*]]
|
||||
; CHECK: guarded:
|
||||
; CHECK-NEXT: [[ICMP_USER_WIDE4:%.*]] = icmp ult i64 [[TMP1]], [[TMP2]]
|
||||
; CHECK-NEXT: br i1 [[ICMP_USER_WIDE4]], label [[BACKEDGE]], label [[SIDE_EXIT:%.*]]
|
||||
; CHECK-NEXT: [[ICMP_USER_WIDE5:%.*]] = icmp ult i64 [[TMP1]], [[TMP2]]
|
||||
; CHECK-NEXT: br i1 [[ICMP_USER_WIDE5]], label [[BACKEDGE]], label [[SIDE_EXIT:%.*]]
|
||||
; CHECK: backedge:
|
||||
; CHECK-NEXT: [[STORE_ADDR:%.*]] = getelementptr i32, i32* [[P:%.*]], i64 [[TMP3]]
|
||||
; CHECK-NEXT: store i32 1, i32* [[STORE_ADDR]], align 4
|
||||
@@ -1266,13 +1266,13 @@ define i32 @test17(i32* %p, i32 %len) {
|
||||
; CHECK: loop:
|
||||
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[BACKEDGE:%.*]] ], [ [[TMP0]], [[ENTRY:%.*]] ]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = add nsw i64 [[INDVARS_IV]], -1
|
||||
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], -1
|
||||
; CHECK-NEXT: [[COND_1:%.*]] = icmp eq i64 [[INDVARS_IV]], 0
|
||||
; CHECK-NEXT: br i1 [[COND_1]], label [[EXIT:%.*]], label [[BACKEDGE]]
|
||||
; CHECK: backedge:
|
||||
; CHECK-NEXT: [[ADDR:%.*]] = getelementptr inbounds i32, i32* [[P:%.*]], i64 [[TMP1]]
|
||||
; CHECK-NEXT: [[LOADED:%.*]] = load atomic i32, i32* [[ADDR]] unordered, align 4
|
||||
; CHECK-NEXT: [[COND_2:%.*]] = icmp eq i32 [[LOADED]], 0
|
||||
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], -1
|
||||
; CHECK-NEXT: br i1 [[COND_2]], label [[FAILURE:%.*]], label [[LOOP]]
|
||||
; CHECK: exit:
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 -1 to i32
|
||||
@@ -1312,10 +1312,9 @@ define void @test18() {
|
||||
; CHECK-NEXT: br label [[LOOP:%.*]]
|
||||
; CHECK: loop:
|
||||
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ], [ 0, [[ENTRY:%.*]] ]
|
||||
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], [[LOOP]] ], [ 0, [[ENTRY]] ]
|
||||
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1
|
||||
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
|
||||
; CHECK-NEXT: call void @bar(i32 [[IV_NEXT]])
|
||||
; CHECK-NEXT: [[INDVARS2:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
|
||||
; CHECK-NEXT: call void @bar(i32 [[INDVARS2]])
|
||||
; CHECK-NEXT: call void @foo(i64 [[INDVARS_IV]])
|
||||
; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp eq i64 [[INDVARS_IV]], 1000
|
||||
; CHECK-NEXT: br i1 [[LOOP_COND]], label [[EXIT:%.*]], label [[LOOP]]
|
||||
@@ -1412,11 +1411,10 @@ define void @test21(i32* %ptr) {
|
||||
; CHECK-NEXT: br label [[LOOP:%.*]]
|
||||
; CHECK: loop:
|
||||
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ], [ 0, [[ENTRY:%.*]] ]
|
||||
; CHECK-NEXT: [[VAL:%.*]] = phi i32 [ [[VAL_INC:%.*]], [[LOOP]] ], [ 0, [[ENTRY]] ]
|
||||
; CHECK-NEXT: [[VAL_INC]] = add nuw nsw i32 [[VAL]], 1
|
||||
; CHECK-NEXT: store i32 [[VAL_INC]], i32* [[PTR]], align 4
|
||||
; CHECK-NEXT: call void @foo(i64 [[INDVARS_IV]])
|
||||
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
|
||||
; CHECK-NEXT: [[INDVARS:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
|
||||
; CHECK-NEXT: store i32 [[INDVARS]], i32* [[PTR]], align 4
|
||||
; CHECK-NEXT: call void @foo(i64 [[INDVARS_IV]])
|
||||
; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp eq i64 [[INDVARS_IV]], 1000
|
||||
; CHECK-NEXT: br i1 [[LOOP_COND]], label [[EXIT:%.*]], label [[LOOP]]
|
||||
; CHECK: exit:
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -26,7 +26,7 @@ define void @loop_or(i8* noalias %pIn, i32* noalias %pOut, i32 %s) {
|
||||
; CHECK: for.body.preheader:
|
||||
; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[S]] to i64
|
||||
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[S]], 8
|
||||
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY_PREHEADER4:%.*]], label [[VECTOR_PH:%.*]]
|
||||
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY_PREHEADER5:%.*]], label [[VECTOR_PH:%.*]]
|
||||
; CHECK: vector.ph:
|
||||
; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[WIDE_TRIP_COUNT]], 4294967288
|
||||
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
|
||||
@@ -37,9 +37,9 @@ define void @loop_or(i8* noalias %pIn, i32* noalias %pOut, i32 %s) {
|
||||
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, <4 x i8>* [[TMP1]], align 1
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, i8* [[TMP0]], i64 4
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to <4 x i8>*
|
||||
; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i8>, <4 x i8>* [[TMP3]], align 1
|
||||
; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i8>, <4 x i8>* [[TMP3]], align 1
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = zext <4 x i8> [[WIDE_LOAD]] to <4 x i32>
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = zext <4 x i8> [[WIDE_LOAD3]] to <4 x i32>
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = zext <4 x i8> [[WIDE_LOAD4]] to <4 x i32>
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = mul nuw nsw <4 x i32> [[TMP4]], <i32 65792, i32 65792, i32 65792, i32 65792>
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = mul nuw nsw <4 x i32> [[TMP5]], <i32 65792, i32 65792, i32 65792, i32 65792>
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = or <4 x i32> [[TMP4]], <i32 -16777216, i32 -16777216, i32 -16777216, i32 -16777216>
|
||||
@@ -57,12 +57,12 @@ define void @loop_or(i8* noalias %pIn, i32* noalias %pOut, i32 %s) {
|
||||
; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
|
||||
; CHECK: middle.block:
|
||||
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[WIDE_TRIP_COUNT]]
|
||||
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END]], label [[FOR_BODY_PREHEADER4]]
|
||||
; CHECK: for.body.preheader4:
|
||||
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END]], label [[FOR_BODY_PREHEADER5]]
|
||||
; CHECK: for.body.preheader5:
|
||||
; CHECK-NEXT: [[INDVARS_IV_PH:%.*]] = phi i64 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ]
|
||||
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
||||
; CHECK: for.body:
|
||||
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[INDVARS_IV_PH]], [[FOR_BODY_PREHEADER4]] ]
|
||||
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[INDVARS_IV_PH]], [[FOR_BODY_PREHEADER5]] ]
|
||||
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, i8* [[PIN]], i64 [[INDVARS_IV]]
|
||||
; CHECK-NEXT: [[TMP17:%.*]] = load i8, i8* [[ARRAYIDX]], align 1
|
||||
; CHECK-NEXT: [[CONV:%.*]] = zext i8 [[TMP17]] to i32
|
||||
|
||||
@@ -19,28 +19,28 @@ define void @vdiv(double* %x, double* %y, double %a, i32 %N) #0 {
|
||||
; CHECK: for.body.preheader:
|
||||
; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64
|
||||
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N]], 16
|
||||
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY_PREHEADER17:%.*]], label [[VECTOR_MEMCHECK:%.*]]
|
||||
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY_PREHEADER18:%.*]], label [[VECTOR_MEMCHECK:%.*]]
|
||||
; CHECK: vector.memcheck:
|
||||
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr double, double* [[X:%.*]], i64 [[WIDE_TRIP_COUNT]]
|
||||
; CHECK-NEXT: [[SCEVGEP6:%.*]] = getelementptr double, double* [[Y:%.*]], i64 [[WIDE_TRIP_COUNT]]
|
||||
; CHECK-NEXT: [[BOUND0:%.*]] = icmp ugt double* [[SCEVGEP6]], [[X]]
|
||||
; CHECK-NEXT: [[SCEVGEP7:%.*]] = getelementptr double, double* [[Y:%.*]], i64 [[WIDE_TRIP_COUNT]]
|
||||
; CHECK-NEXT: [[BOUND0:%.*]] = icmp ugt double* [[SCEVGEP7]], [[X]]
|
||||
; CHECK-NEXT: [[BOUND1:%.*]] = icmp ugt double* [[SCEVGEP]], [[Y]]
|
||||
; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
|
||||
; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label [[FOR_BODY_PREHEADER17]], label [[VECTOR_PH:%.*]]
|
||||
; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label [[FOR_BODY_PREHEADER18]], label [[VECTOR_PH:%.*]]
|
||||
; CHECK: vector.ph:
|
||||
; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[WIDE_TRIP_COUNT]], 4294967280
|
||||
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x double> poison, double [[A:%.*]], i32 0
|
||||
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x double> [[BROADCAST_SPLATINSERT]], <4 x double> poison, <4 x i32> zeroinitializer
|
||||
; CHECK-NEXT: [[BROADCAST_SPLATINSERT11:%.*]] = insertelement <4 x double> poison, double [[A]], i32 0
|
||||
; CHECK-NEXT: [[BROADCAST_SPLAT12:%.*]] = shufflevector <4 x double> [[BROADCAST_SPLATINSERT11]], <4 x double> poison, <4 x i32> zeroinitializer
|
||||
; CHECK-NEXT: [[BROADCAST_SPLATINSERT13:%.*]] = insertelement <4 x double> poison, double [[A]], i32 0
|
||||
; CHECK-NEXT: [[BROADCAST_SPLAT14:%.*]] = shufflevector <4 x double> [[BROADCAST_SPLATINSERT13]], <4 x double> poison, <4 x i32> zeroinitializer
|
||||
; CHECK-NEXT: [[BROADCAST_SPLATINSERT15:%.*]] = insertelement <4 x double> poison, double [[A]], i32 0
|
||||
; CHECK-NEXT: [[BROADCAST_SPLAT16:%.*]] = shufflevector <4 x double> [[BROADCAST_SPLATINSERT15]], <4 x double> poison, <4 x i32> zeroinitializer
|
||||
; CHECK-NEXT: [[BROADCAST_SPLATINSERT12:%.*]] = insertelement <4 x double> poison, double [[A]], i32 0
|
||||
; CHECK-NEXT: [[BROADCAST_SPLAT13:%.*]] = shufflevector <4 x double> [[BROADCAST_SPLATINSERT12]], <4 x double> poison, <4 x i32> zeroinitializer
|
||||
; CHECK-NEXT: [[BROADCAST_SPLATINSERT14:%.*]] = insertelement <4 x double> poison, double [[A]], i32 0
|
||||
; CHECK-NEXT: [[BROADCAST_SPLAT15:%.*]] = shufflevector <4 x double> [[BROADCAST_SPLATINSERT14]], <4 x double> poison, <4 x i32> zeroinitializer
|
||||
; CHECK-NEXT: [[BROADCAST_SPLATINSERT16:%.*]] = insertelement <4 x double> poison, double [[A]], i32 0
|
||||
; CHECK-NEXT: [[BROADCAST_SPLAT17:%.*]] = shufflevector <4 x double> [[BROADCAST_SPLATINSERT16]], <4 x double> poison, <4 x i32> zeroinitializer
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = fdiv fast <4 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, [[BROADCAST_SPLAT]]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = fdiv fast <4 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, [[BROADCAST_SPLAT12]]
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = fdiv fast <4 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, [[BROADCAST_SPLAT14]]
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = fdiv fast <4 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, [[BROADCAST_SPLAT16]]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = fdiv fast <4 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, [[BROADCAST_SPLAT13]]
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = fdiv fast <4 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, [[BROADCAST_SPLAT15]]
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = fdiv fast <4 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, [[BROADCAST_SPLAT17]]
|
||||
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
|
||||
; CHECK: vector.body:
|
||||
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||
@@ -49,17 +49,17 @@ define void @vdiv(double* %x, double* %y, double %a, i32 %N) #0 {
|
||||
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x double>, <4 x double>* [[TMP5]], align 8, !tbaa [[TBAA3:![0-9]+]], !alias.scope !7
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds double, double* [[TMP4]], i64 4
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = bitcast double* [[TMP6]] to <4 x double>*
|
||||
; CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load <4 x double>, <4 x double>* [[TMP7]], align 8, !tbaa [[TBAA3]], !alias.scope !7
|
||||
; CHECK-NEXT: [[WIDE_LOAD9:%.*]] = load <4 x double>, <4 x double>* [[TMP7]], align 8, !tbaa [[TBAA3]], !alias.scope !7
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds double, double* [[TMP4]], i64 8
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = bitcast double* [[TMP8]] to <4 x double>*
|
||||
; CHECK-NEXT: [[WIDE_LOAD9:%.*]] = load <4 x double>, <4 x double>* [[TMP9]], align 8, !tbaa [[TBAA3]], !alias.scope !7
|
||||
; CHECK-NEXT: [[WIDE_LOAD10:%.*]] = load <4 x double>, <4 x double>* [[TMP9]], align 8, !tbaa [[TBAA3]], !alias.scope !7
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds double, double* [[TMP4]], i64 12
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = bitcast double* [[TMP10]] to <4 x double>*
|
||||
; CHECK-NEXT: [[WIDE_LOAD10:%.*]] = load <4 x double>, <4 x double>* [[TMP11]], align 8, !tbaa [[TBAA3]], !alias.scope !7
|
||||
; CHECK-NEXT: [[WIDE_LOAD11:%.*]] = load <4 x double>, <4 x double>* [[TMP11]], align 8, !tbaa [[TBAA3]], !alias.scope !7
|
||||
; CHECK-NEXT: [[TMP12:%.*]] = fmul fast <4 x double> [[WIDE_LOAD]], [[TMP0]]
|
||||
; CHECK-NEXT: [[TMP13:%.*]] = fmul fast <4 x double> [[WIDE_LOAD8]], [[TMP1]]
|
||||
; CHECK-NEXT: [[TMP14:%.*]] = fmul fast <4 x double> [[WIDE_LOAD9]], [[TMP2]]
|
||||
; CHECK-NEXT: [[TMP15:%.*]] = fmul fast <4 x double> [[WIDE_LOAD10]], [[TMP3]]
|
||||
; CHECK-NEXT: [[TMP13:%.*]] = fmul fast <4 x double> [[WIDE_LOAD9]], [[TMP1]]
|
||||
; CHECK-NEXT: [[TMP14:%.*]] = fmul fast <4 x double> [[WIDE_LOAD10]], [[TMP2]]
|
||||
; CHECK-NEXT: [[TMP15:%.*]] = fmul fast <4 x double> [[WIDE_LOAD11]], [[TMP3]]
|
||||
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds double, double* [[X]], i64 [[INDEX]]
|
||||
; CHECK-NEXT: [[TMP17:%.*]] = bitcast double* [[TMP16]] to <4 x double>*
|
||||
; CHECK-NEXT: store <4 x double> [[TMP12]], <4 x double>* [[TMP17]], align 8, !tbaa [[TBAA3]], !alias.scope !10, !noalias !7
|
||||
@@ -77,8 +77,8 @@ define void @vdiv(double* %x, double* %y, double %a, i32 %N) #0 {
|
||||
; CHECK-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
|
||||
; CHECK: middle.block:
|
||||
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[WIDE_TRIP_COUNT]]
|
||||
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END]], label [[FOR_BODY_PREHEADER17]]
|
||||
; CHECK: for.body.preheader17:
|
||||
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END]], label [[FOR_BODY_PREHEADER18]]
|
||||
; CHECK: for.body.preheader18:
|
||||
; CHECK-NEXT: [[INDVARS_IV_PH:%.*]] = phi i64 [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[FOR_BODY_PREHEADER]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ]
|
||||
; CHECK-NEXT: [[TMP25:%.*]] = xor i64 [[INDVARS_IV_PH]], -1
|
||||
; CHECK-NEXT: [[TMP26:%.*]] = add nsw i64 [[TMP25]], [[WIDE_TRIP_COUNT]]
|
||||
@@ -101,17 +101,17 @@ define void @vdiv(double* %x, double* %y, double %a, i32 %N) #0 {
|
||||
; CHECK-NEXT: [[PROL_ITER_CMP_NOT:%.*]] = icmp eq i64 [[PROL_ITER_SUB]], 0
|
||||
; CHECK-NEXT: br i1 [[PROL_ITER_CMP_NOT]], label [[FOR_BODY_PROL_LOOPEXIT]], label [[FOR_BODY_PROL]], !llvm.loop [[LOOP14:![0-9]+]]
|
||||
; CHECK: for.body.prol.loopexit:
|
||||
; CHECK-NEXT: [[INDVARS_IV_UNR:%.*]] = phi i64 [ [[INDVARS_IV_PH]], [[FOR_BODY_PREHEADER17]] ], [ [[INDVARS_IV_NEXT_PROL]], [[FOR_BODY_PROL]] ]
|
||||
; CHECK-NEXT: [[INDVARS_IV_UNR:%.*]] = phi i64 [ [[INDVARS_IV_PH]], [[FOR_BODY_PREHEADER18]] ], [ [[INDVARS_IV_NEXT_PROL]], [[FOR_BODY_PROL]] ]
|
||||
; CHECK-NEXT: [[TMP29:%.*]] = icmp ult i64 [[TMP26]], 3
|
||||
; CHECK-NEXT: br i1 [[TMP29]], label [[FOR_END]], label [[FOR_BODY_PREHEADER17_NEW:%.*]]
|
||||
; CHECK: for.body.preheader17.new:
|
||||
; CHECK-NEXT: br i1 [[TMP29]], label [[FOR_END]], label [[FOR_BODY_PREHEADER18_NEW:%.*]]
|
||||
; CHECK: for.body.preheader18.new:
|
||||
; CHECK-NEXT: [[TMP30:%.*]] = fdiv fast double 1.000000e+00, [[A]]
|
||||
; CHECK-NEXT: [[TMP31:%.*]] = fdiv fast double 1.000000e+00, [[A]]
|
||||
; CHECK-NEXT: [[TMP32:%.*]] = fdiv fast double 1.000000e+00, [[A]]
|
||||
; CHECK-NEXT: [[TMP33:%.*]] = fdiv fast double 1.000000e+00, [[A]]
|
||||
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
||||
; CHECK: for.body:
|
||||
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_UNR]], [[FOR_BODY_PREHEADER17_NEW]] ], [ [[INDVARS_IV_NEXT_3:%.*]], [[FOR_BODY]] ]
|
||||
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_UNR]], [[FOR_BODY_PREHEADER18_NEW]] ], [ [[INDVARS_IV_NEXT_3:%.*]], [[FOR_BODY]] ]
|
||||
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[Y]], i64 [[INDVARS_IV]]
|
||||
; CHECK-NEXT: [[T0:%.*]] = load double, double* [[ARRAYIDX]], align 8, !tbaa [[TBAA3]]
|
||||
; CHECK-NEXT: [[TMP34:%.*]] = fmul fast double [[T0]], [[TMP30]]
|
||||
|
||||
Reference in New Issue
Block a user