[LV] Update call widening decision when scalarzing calls.

collectInstsToScalarize may decide to scalarize a call. If so, we have
to update the widening decision for the call, otherwise the call won't
be scalarized as expected during VPlan construction.

This issue was uncovered by f82543d509.
This commit is contained in:
Florian Hahn
2024-09-03 14:12:40 +01:00
parent 0797c184c6
commit dd94537b40
2 changed files with 64 additions and 1 deletions

View File

@@ -5386,8 +5386,18 @@ void LoopVectorizationCostModel::collectInstsToScalarize(ElementCount VF) {
// 3. Emulated masked memrefs, if a hacked cost is needed.
if (!isScalarAfterVectorization(&I, VF) && !VF.isScalable() &&
!useEmulatedMaskMemRefHack(&I, VF) &&
computePredInstDiscount(&I, ScalarCosts, VF) >= 0)
computePredInstDiscount(&I, ScalarCosts, VF) >= 0) {
ScalarCostsVF.insert(ScalarCosts.begin(), ScalarCosts.end());
// Check if we decided to scalarize a call. If so, update the widening
// decision of the call to CM_Scalarize with the computed scalar cost.
for (const auto &[I, _] : ScalarCosts) {
auto *CI = dyn_cast<CallInst>(I);
if (!CI || !CallWideningDecisions.contains({CI, VF}))
continue;
CallWideningDecisions[{CI, VF}].Kind = CM_Scalarize;
CallWideningDecisions[{CI, VF}].Cost = ScalarCosts[CI];
}
}
// Remember that BB will remain after vectorization.
PredicatedBBsAfterVectorization[VF].insert(BB);
for (auto *Pred : predecessors(BB)) {

View File

@@ -126,6 +126,59 @@ exit:
ret void
}
define void @call_scalarized(ptr noalias %src, ptr noalias %dst, double %0) {
; CHECK-LABEL: define void @call_scalarized(
; CHECK-SAME: ptr noalias [[SRC:%.*]], ptr noalias [[DST:%.*]], double [[TMP0:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
; CHECK: [[LOOP_HEADER]]:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 100, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], -1
; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr double, ptr [[SRC]], i64 [[IV_NEXT]]
; CHECK-NEXT: [[L:%.*]] = load double, ptr [[GEP_SRC]], align 8
; CHECK-NEXT: [[CMP295:%.*]] = fcmp ugt double [[TMP0]], 0.000000e+00
; CHECK-NEXT: [[CMP299:%.*]] = fcmp ugt double [[L]], 0.000000e+00
; CHECK-NEXT: [[OR_COND:%.*]] = or i1 [[CMP295]], [[CMP299]]
; CHECK-NEXT: br i1 [[OR_COND]], label %[[LOOP_LATCH]], label %[[THEN:.*]]
; CHECK: [[THEN]]:
; CHECK-NEXT: [[SQRT:%.*]] = call double @llvm.sqrt.f64(double [[L]])
; CHECK-NEXT: [[GEP_DST:%.*]] = getelementptr double, ptr [[DST]], i64 [[IV_NEXT]]
; CHECK-NEXT: store double [[SQRT]], ptr [[GEP_DST]], align 8
; CHECK-NEXT: br label %[[LOOP_LATCH]]
; CHECK: [[LOOP_LATCH]]:
; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 0
; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label %[[EXIT:.*]], label %[[LOOP_HEADER]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: ret void
;
entry:
br label %loop.header
loop.header:
%iv = phi i64 [ 100, %entry ], [ %iv.next, %loop.latch ]
%iv.next = add i64 %iv, -1
%gep.src = getelementptr double, ptr %src, i64 %iv.next
%l = load double, ptr %gep.src, align 8
%cmp295 = fcmp ugt double %0, 0.000000e+00
%cmp299 = fcmp ugt double %l, 0.000000e+00
%or.cond = or i1 %cmp295, %cmp299
br i1 %or.cond, label %loop.latch, label %then
then:
%sqrt = call double @llvm.sqrt.f64(double %l)
%gep.dst = getelementptr double, ptr %dst, i64 %iv.next
store double %sqrt, ptr %gep.dst, align 8
br label %loop.latch
loop.latch:
%tobool.not = icmp eq i64 %iv.next, 0
br i1 %tobool.not, label %exit, label %loop.header
exit:
ret void
}
declare double @llvm.sqrt.f64(double) #0
declare double @llvm.powi.f64.i32(double, i32)
declare i64 @llvm.fshl.i64(i64, i64, i64)
;.