[LV] Use vscale for tuning to improve branch weight estimates (#144733)

In addBranchWeightToMiddleTerminator we attempt to add branch weights to
the middle block terminator. We pessimistically assume vscale=1, whereas
we can improve the estimate by using the value of vscale used for
tuning.
This commit is contained in:
David Sherwood
2025-07-01 13:23:38 +01:00
committed by GitHub
parent 15ab4bb5c8
commit 9b13dfdfbc
4 changed files with 12 additions and 6 deletions

View File

@@ -7327,9 +7327,11 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
OrigLoop->getHeader()->getContext());
VPlanTransforms::runPass(VPlanTransforms::replicateByVF, BestVPlan, BestVF);
VPlanTransforms::runPass(VPlanTransforms::materializeBroadcasts, BestVPlan);
if (hasBranchWeightMD(*OrigLoop->getLoopLatch()->getTerminator()))
if (hasBranchWeightMD(*OrigLoop->getLoopLatch()->getTerminator())) {
std::optional<unsigned> VScale = CM.getVScaleForTuning();
VPlanTransforms::runPass(VPlanTransforms::addBranchWeightToMiddleTerminator,
BestVPlan, BestVF);
BestVPlan, BestVF, VScale);
}
VPlanTransforms::optimizeForVFAndUF(BestVPlan, BestVF, BestUF, PSE);
VPlanTransforms::simplifyRecipes(BestVPlan, *Legal->getWidestInductionType());
VPlanTransforms::narrowInterleaveGroups(

View File

@@ -3330,8 +3330,8 @@ void VPlanTransforms::narrowInterleaveGroups(VPlan &Plan, ElementCount VF,
/// Add branch weight metadata, if the \p Plan's middle block is terminated by a
/// BranchOnCond recipe.
void VPlanTransforms::addBranchWeightToMiddleTerminator(VPlan &Plan,
ElementCount VF) {
void VPlanTransforms::addBranchWeightToMiddleTerminator(
VPlan &Plan, ElementCount VF, std::optional<unsigned> VScaleForTuning) {
VPBasicBlock *MiddleVPBB = Plan.getMiddleBlock();
auto *MiddleTerm =
dyn_cast_or_null<VPInstruction>(MiddleVPBB->getTerminator());
@@ -3343,6 +3343,8 @@ void VPlanTransforms::addBranchWeightToMiddleTerminator(VPlan &Plan,
"must have a BranchOnCond");
// Assume that `TripCount % VectorStep ` is equally distributed.
unsigned VectorStep = Plan.getUF() * VF.getKnownMinValue();
if (VF.isScalable() && VScaleForTuning.has_value())
VectorStep *= *VScaleForTuning;
assert(VectorStep > 0 && "trip count should not be zero");
MDBuilder MDB(Plan.getScalarHeader()->getIRBasicBlock()->getContext());
MDNode *BranchWeights =

View File

@@ -238,7 +238,9 @@ struct VPlanTransforms {
/// Add branch weight metadata, if the \p Plan's middle block is terminated by
/// a BranchOnCond recipe.
static void addBranchWeightToMiddleTerminator(VPlan &Plan, ElementCount VF);
static void
addBranchWeightToMiddleTerminator(VPlan &Plan, ElementCount VF,
std::optional<unsigned> VScaleForTuning);
};
} // namespace llvm

View File

@@ -92,7 +92,7 @@ for.cond.cleanup: ; preds = %for.body
; CHECK-V1-IC1: [[LOOP1]] = distinct !{[[LOOP1]], [[META2:![0-9]+]], [[META3:![0-9]+]]}
; CHECK-V1-IC1: [[META2]] = !{!"llvm.loop.isvectorized", i32 1}
; CHECK-V1-IC1: [[META3]] = !{!"llvm.loop.unroll.runtime.disable"}
; CHECK-V1-IC1: [[PROF4]] = !{!"branch_weights", i32 1, i32 3}
; CHECK-V1-IC1: [[PROF4]] = !{!"branch_weights", i32 1, i32 7}
; CHECK-V1-IC1: [[PROF5]] = !{!"branch_weights", i32 0, i32 0}
; CHECK-V1-IC1: [[LOOP6]] = distinct !{[[LOOP6]], [[META3]], [[META2]]}
;.