Epilogue vectorization uses isScalarAfterVectorization to check if widened versions for inductions need to be generated and bails out in those cases. At the moment, there are scenarios where isScalarAfterVectorization returns true but VPWidenPointerInduction::onlyScalarsGenerated would return false, causing widening. This can lead to widened phis with incorrect start values being created in the epilogue vector body. This patch addresses the issue by storing the cost-model decision in VPWidenPointerInductionRecipe and restoring the behavior before151c144. This effectively reverts151c144, but the long-term fix is to properly support widened inductions during epilogue vectorization Fixes #57712.
102 lines
4.7 KiB
LLVM
102 lines
4.7 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
|
; RUN: opt -S -passes=loop-vectorize -force-vector-width=2 -opaque-pointers < %s | FileCheck %s
|
|
|
|
; TODO: This still crashes with inbounds on the GEPs.
|
|
define void @test(ptr %p1.start, ptr %p2.start, ptr %p1.end) {
|
|
; CHECK-LABEL: @test(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: br label [[LOOP:%.*]]
|
|
; CHECK: loop:
|
|
; CHECK-NEXT: [[P1:%.*]] = phi ptr [ [[P1_START:%.*]], [[ENTRY:%.*]] ], [ [[P1_NEXT:%.*]], [[LOOP]] ]
|
|
; CHECK-NEXT: [[P2:%.*]] = phi ptr [ [[P2_START:%.*]], [[ENTRY]] ], [ [[P2_NEXT:%.*]], [[LOOP]] ]
|
|
; CHECK-NEXT: [[P1_VAL:%.*]] = load float, ptr [[P1]], align 4
|
|
; CHECK-NEXT: [[P2_VAL:%.*]] = load float, ptr [[P2]], align 4
|
|
; CHECK-NEXT: [[SUM:%.*]] = fadd float [[P1_VAL]], [[P2_VAL]]
|
|
; CHECK-NEXT: store float [[SUM]], ptr [[P1]], align 4
|
|
; CHECK-NEXT: [[P1_NEXT]] = getelementptr float, ptr [[P1]], i64 1
|
|
; CHECK-NEXT: [[P2_NEXT]] = getelementptr float, ptr [[P2]], i64 1
|
|
; CHECK-NEXT: [[C:%.*]] = icmp ne ptr [[P1_NEXT]], [[P1_END:%.*]]
|
|
; CHECK-NEXT: br i1 [[C]], label [[LOOP]], label [[EXIT:%.*]]
|
|
; CHECK: exit:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
br label %loop
|
|
|
|
loop:
|
|
%p1 = phi ptr [ %p1.start, %entry ], [ %p1.next, %loop ]
|
|
%p2 = phi ptr [ %p2.start, %entry ], [ %p2.next, %loop ]
|
|
%p1.val = load float, ptr %p1
|
|
%p2.val = load float, ptr %p2
|
|
%sum = fadd float %p1.val, %p2.val
|
|
store float %sum, ptr %p1
|
|
%p1.next = getelementptr float, ptr %p1, i64 1
|
|
%p2.next = getelementptr float, ptr %p2, i64 1
|
|
%c = icmp ne ptr %p1.next, %p1.end
|
|
br i1 %c, label %loop, label %exit
|
|
|
|
exit:
|
|
ret void
|
|
}
|
|
|
|
define void @store_pointer_induction(ptr %start, ptr %end) {
|
|
; CHECK-LABEL: @store_pointer_induction(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[START2:%.*]] = ptrtoint ptr [[START:%.*]] to i64
|
|
; CHECK-NEXT: [[END1:%.*]] = ptrtoint ptr [[END:%.*]] to i64
|
|
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[END1]], -8
|
|
; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], [[START2]]
|
|
; CHECK-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP1]], 3
|
|
; CHECK-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1
|
|
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP3]], 2
|
|
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
|
|
; CHECK: vector.ph:
|
|
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], 2
|
|
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]]
|
|
; CHECK-NEXT: [[TMP4:%.*]] = mul i64 [[N_VEC]], 8
|
|
; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP4]]
|
|
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
|
|
; CHECK: vector.body:
|
|
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
|
; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 0
|
|
; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 8
|
|
; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP6]]
|
|
; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 1
|
|
; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 8
|
|
; CHECK-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP8]]
|
|
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x ptr> poison, ptr [[NEXT_GEP]], i32 0
|
|
; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x ptr> [[TMP9]], ptr [[NEXT_GEP3]], i32 1
|
|
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr ptr, ptr [[NEXT_GEP]], i32 0
|
|
; CHECK-NEXT: store <2 x ptr> [[TMP10]], ptr [[TMP11]], align 4
|
|
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
|
|
; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
|
|
; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
|
|
; CHECK: middle.block:
|
|
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]]
|
|
; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
|
|
; CHECK: scalar.ph:
|
|
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY:%.*]] ]
|
|
; CHECK-NEXT: br label [[LOOP:%.*]]
|
|
; CHECK: loop:
|
|
; CHECK-NEXT: [[IV:%.*]] = phi ptr [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
|
|
; CHECK-NEXT: store ptr [[IV]], ptr [[IV]], align 4
|
|
; CHECK-NEXT: [[IV_NEXT]] = getelementptr inbounds ptr, ptr [[IV]], i32 1
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq ptr [[IV_NEXT]], [[END]]
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP2:![0-9]+]]
|
|
; CHECK: exit:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
br label %loop
|
|
|
|
loop:
|
|
%iv = phi ptr [ %start, %entry ], [ %iv.next, %loop ]
|
|
store ptr %iv, ptr %iv, align 4
|
|
%iv.next = getelementptr inbounds ptr, ptr %iv, i32 1
|
|
%exitcond = icmp eq ptr %iv.next, %end
|
|
br i1 %exitcond, label %exit, label %loop
|
|
|
|
exit:
|
|
ret void
|
|
}
|