[LV] Use vscale for tuning to improve branch weight estimates (#144733)
In addBranchWeightToMiddleTerminator we attempt to add branch weights to the middle block terminator. We pessimistically assume vscale=1, whereas we can improve the estimate by using the value of vscale used for tuning.
This commit is contained in:
@@ -7327,9 +7327,11 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
|
||||
OrigLoop->getHeader()->getContext());
|
||||
VPlanTransforms::runPass(VPlanTransforms::replicateByVF, BestVPlan, BestVF);
|
||||
VPlanTransforms::runPass(VPlanTransforms::materializeBroadcasts, BestVPlan);
|
||||
if (hasBranchWeightMD(*OrigLoop->getLoopLatch()->getTerminator()))
|
||||
if (hasBranchWeightMD(*OrigLoop->getLoopLatch()->getTerminator())) {
|
||||
std::optional<unsigned> VScale = CM.getVScaleForTuning();
|
||||
VPlanTransforms::runPass(VPlanTransforms::addBranchWeightToMiddleTerminator,
|
||||
BestVPlan, BestVF);
|
||||
BestVPlan, BestVF, VScale);
|
||||
}
|
||||
VPlanTransforms::optimizeForVFAndUF(BestVPlan, BestVF, BestUF, PSE);
|
||||
VPlanTransforms::simplifyRecipes(BestVPlan, *Legal->getWidestInductionType());
|
||||
VPlanTransforms::narrowInterleaveGroups(
|
||||
|
||||
@@ -3330,8 +3330,8 @@ void VPlanTransforms::narrowInterleaveGroups(VPlan &Plan, ElementCount VF,
|
||||
|
||||
/// Add branch weight metadata, if the \p Plan's middle block is terminated by a
|
||||
/// BranchOnCond recipe.
|
||||
void VPlanTransforms::addBranchWeightToMiddleTerminator(VPlan &Plan,
|
||||
ElementCount VF) {
|
||||
void VPlanTransforms::addBranchWeightToMiddleTerminator(
|
||||
VPlan &Plan, ElementCount VF, std::optional<unsigned> VScaleForTuning) {
|
||||
VPBasicBlock *MiddleVPBB = Plan.getMiddleBlock();
|
||||
auto *MiddleTerm =
|
||||
dyn_cast_or_null<VPInstruction>(MiddleVPBB->getTerminator());
|
||||
@@ -3343,6 +3343,8 @@ void VPlanTransforms::addBranchWeightToMiddleTerminator(VPlan &Plan,
|
||||
"must have a BranchOnCond");
|
||||
// Assume that `TripCount % VectorStep ` is equally distributed.
|
||||
unsigned VectorStep = Plan.getUF() * VF.getKnownMinValue();
|
||||
if (VF.isScalable() && VScaleForTuning.has_value())
|
||||
VectorStep *= *VScaleForTuning;
|
||||
assert(VectorStep > 0 && "trip count should not be zero");
|
||||
MDBuilder MDB(Plan.getScalarHeader()->getIRBasicBlock()->getContext());
|
||||
MDNode *BranchWeights =
|
||||
|
||||
@@ -238,7 +238,9 @@ struct VPlanTransforms {
|
||||
|
||||
/// Add branch weight metadata, if the \p Plan's middle block is terminated by
|
||||
/// a BranchOnCond recipe.
|
||||
static void addBranchWeightToMiddleTerminator(VPlan &Plan, ElementCount VF);
|
||||
static void
|
||||
addBranchWeightToMiddleTerminator(VPlan &Plan, ElementCount VF,
|
||||
std::optional<unsigned> VScaleForTuning);
|
||||
};
|
||||
|
||||
} // namespace llvm
|
||||
|
||||
@@ -92,7 +92,7 @@ for.cond.cleanup: ; preds = %for.body
|
||||
; CHECK-V1-IC1: [[LOOP1]] = distinct !{[[LOOP1]], [[META2:![0-9]+]], [[META3:![0-9]+]]}
|
||||
; CHECK-V1-IC1: [[META2]] = !{!"llvm.loop.isvectorized", i32 1}
|
||||
; CHECK-V1-IC1: [[META3]] = !{!"llvm.loop.unroll.runtime.disable"}
|
||||
; CHECK-V1-IC1: [[PROF4]] = !{!"branch_weights", i32 1, i32 3}
|
||||
; CHECK-V1-IC1: [[PROF4]] = !{!"branch_weights", i32 1, i32 7}
|
||||
; CHECK-V1-IC1: [[PROF5]] = !{!"branch_weights", i32 0, i32 0}
|
||||
; CHECK-V1-IC1: [[LOOP6]] = distinct !{[[LOOP6]], [[META3]], [[META2]]}
|
||||
;.
|
||||
|
||||
Reference in New Issue
Block a user