Optimize the IR generated for a VPWidenIntOrFpInductionRecipe to use the narrowest type necessary, when the trip-count of a loop is known to be constant and the only use of the recipe is the condition used by the vector loop's backedge branch.
92 lines
5.0 KiB
LLVM
92 lines
5.0 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
|
|
; RUN: opt -S --passes=loop-vectorize -prefer-predicate-over-epilogue=predicate-dont-vectorize -force-vector-width=4 < %s | FileCheck %s
|
|
|
|
define i32 @test(ptr %vf1, i64 %n) {
|
|
; CHECK-LABEL: define i32 @test(
|
|
; CHECK-SAME: ptr [[VF1:%.*]], i64 [[N:%.*]]) {
|
|
; CHECK-NEXT: [[ENTRY:.*]]:
|
|
; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
|
|
; CHECK: [[VECTOR_PH]]:
|
|
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
|
|
; CHECK: [[VECTOR_BODY]]:
|
|
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE6:.*]] ]
|
|
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i8> [ <i8 0, i8 1, i8 2, i8 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE6]] ]
|
|
; CHECK-NEXT: [[TMP0:%.*]] = icmp ule <4 x i8> [[VEC_IND]], splat (i8 -56)
|
|
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0
|
|
; CHECK-NEXT: br i1 [[TMP1]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
|
|
; CHECK: [[PRED_STORE_IF]]:
|
|
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0
|
|
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[VF1]], i64 [[TMP2]]
|
|
; CHECK-NEXT: [[TMP4:%.*]] = alloca i8, i64 [[N]], align 16
|
|
; CHECK-NEXT: store ptr [[TMP4]], ptr [[TMP3]], align 8
|
|
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]]
|
|
; CHECK: [[PRED_STORE_CONTINUE]]:
|
|
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i1> [[TMP0]], i32 1
|
|
; CHECK-NEXT: br i1 [[TMP5]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2:.*]]
|
|
; CHECK: [[PRED_STORE_IF1]]:
|
|
; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 1
|
|
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds ptr, ptr [[VF1]], i64 [[TMP6]]
|
|
; CHECK-NEXT: [[TMP8:%.*]] = alloca i8, i64 [[N]], align 16
|
|
; CHECK-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 8
|
|
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE2]]
|
|
; CHECK: [[PRED_STORE_CONTINUE2]]:
|
|
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i1> [[TMP0]], i32 2
|
|
; CHECK-NEXT: br i1 [[TMP9]], label %[[PRED_STORE_IF3:.*]], label %[[PRED_STORE_CONTINUE4:.*]]
|
|
; CHECK: [[PRED_STORE_IF3]]:
|
|
; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 2
|
|
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds ptr, ptr [[VF1]], i64 [[TMP10]]
|
|
; CHECK-NEXT: [[TMP12:%.*]] = alloca i8, i64 [[N]], align 16
|
|
; CHECK-NEXT: store ptr [[TMP12]], ptr [[TMP11]], align 8
|
|
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE4]]
|
|
; CHECK: [[PRED_STORE_CONTINUE4]]:
|
|
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i1> [[TMP0]], i32 3
|
|
; CHECK-NEXT: br i1 [[TMP13]], label %[[PRED_STORE_IF5:.*]], label %[[PRED_STORE_CONTINUE6]]
|
|
; CHECK: [[PRED_STORE_IF5]]:
|
|
; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], 3
|
|
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds ptr, ptr [[VF1]], i64 [[TMP14]]
|
|
; CHECK-NEXT: [[TMP16:%.*]] = alloca i8, i64 [[N]], align 16
|
|
; CHECK-NEXT: store ptr [[TMP16]], ptr [[TMP15]], align 8
|
|
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE6]]
|
|
; CHECK: [[PRED_STORE_CONTINUE6]]:
|
|
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
|
|
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i8> [[VEC_IND]], splat (i8 4)
|
|
; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 204
|
|
; CHECK-NEXT: br i1 [[TMP17]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
|
|
; CHECK: [[MIDDLE_BLOCK]]:
|
|
; CHECK-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]]
|
|
; CHECK: [[SCALAR_PH]]:
|
|
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 204, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
|
|
; CHECK-NEXT: br label %[[FOR_BODY:.*]]
|
|
; CHECK: [[FOR_BODY]]:
|
|
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ]
|
|
; CHECK-NEXT: [[TMP18:%.*]] = alloca i8, i64 [[N]], align 16
|
|
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[VF1]], i64 [[INDVARS_IV]]
|
|
; CHECK-NEXT: store ptr [[TMP18]], ptr [[ARRAYIDX]], align 8
|
|
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1
|
|
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV]], 200
|
|
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
|
|
; CHECK: [[EXIT]]:
|
|
; CHECK-NEXT: ret i32 0
|
|
;
|
|
entry:
|
|
br label %for.body
|
|
|
|
for.body:
|
|
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
|
|
%0 = alloca i8, i64 %n, align 16
|
|
%arrayidx = getelementptr inbounds ptr, ptr %vf1, i64 %indvars.iv
|
|
store ptr %0, ptr %arrayidx, align 8
|
|
%indvars.iv.next = add i64 %indvars.iv, 1
|
|
%exitcond.not = icmp eq i64 %indvars.iv, 200
|
|
br i1 %exitcond.not, label %exit, label %for.body
|
|
|
|
exit:
|
|
ret i32 0
|
|
}
|
|
;.
|
|
; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
|
|
; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
|
|
; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
|
|
; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
|
|
;.
|