[pgo] add means to specify "unknown" MD_prof (#145578)

This PR is part of https://discourse.llvm.org/t/rfc-profile-information-propagation-unittesting/73595

In a slight departure from the RFC, instead of a brand-new `MD_prof_unknown` kind, this adds a first operand to `MD_prof` metadata. This makes it easy to replace with valid metadata (only one `MD_prof`), otherwise sites inserting valid `MD_prof` would also have to check to remove the `unknown` one.

The patch just introduces the notion and fixes the verifier accordingly. Existing APIs working (esp. reading) `MD_prof` will be updated subsequently.
This commit is contained in:
Mircea Trofin
2025-06-30 16:57:11 -07:00
committed by GitHub
parent 12409a18f6
commit d2500e639b
4 changed files with 195 additions and 26 deletions

View File

@@ -27,6 +27,7 @@ struct MDProfLabels {
static const char *FunctionEntryCount;
static const char *SyntheticFunctionEntryCount;
static const char *ExpectedBranchWeights;
static const char *UnknownBranchWeightsMarker;
};
/// Checks if an Instruction has MD_prof Metadata
@@ -143,6 +144,18 @@ LLVM_ABI bool extractProfTotalWeight(const Instruction &I,
LLVM_ABI void setBranchWeights(Instruction &I, ArrayRef<uint32_t> Weights,
bool IsExpected);
/// Specify that the branch weights for this terminator cannot be known at
/// compile time. This should only be called by passes, and never as a default
/// behavior in e.g. MDBuilder. The goal is to use this info to validate passes
/// do not accidentally drop profile info, and this API is called in cases where
/// the pass explicitly cannot provide that info. Defaulting it in would hide
/// bugs where the pass forgets to transfer over or otherwise specify profile
/// info.
LLVM_ABI void setExplicitlyUnknownBranchWeights(Instruction &I);
LLVM_ABI bool isExplicitlyUnknownBranchWeightsMetadata(const MDNode &MD);
LLVM_ABI bool hasExplicitlyUnknownBranchWeights(const Instruction &I);
/// Scaling the profile data attached to 'I' using the ratio of S/T.
LLVM_ABI void scaleProfData(Instruction &I, uint64_t S, uint64_t T);

View File

@@ -94,6 +94,7 @@ const char *MDProfLabels::ValueProfile = "VP";
const char *MDProfLabels::FunctionEntryCount = "function_entry_count";
const char *MDProfLabels::SyntheticFunctionEntryCount =
"synthetic_function_entry_count";
const char *MDProfLabels::UnknownBranchWeightsMarker = "unknown";
bool hasProfMD(const Instruction &I) {
return I.hasMetadata(LLVMContext::MD_prof);
@@ -241,6 +242,27 @@ bool extractProfTotalWeight(const Instruction &I, uint64_t &TotalVal) {
return extractProfTotalWeight(I.getMetadata(LLVMContext::MD_prof), TotalVal);
}
void setExplicitlyUnknownBranchWeights(Instruction &I) {
MDBuilder MDB(I.getContext());
I.setMetadata(
LLVMContext::MD_prof,
MDNode::get(I.getContext(),
MDB.createString(MDProfLabels::UnknownBranchWeightsMarker)));
}
bool isExplicitlyUnknownBranchWeightsMetadata(const MDNode &MD) {
if (MD.getNumOperands() != 1)
return false;
return MD.getOperand(0).equalsStr(MDProfLabels::UnknownBranchWeightsMarker);
}
bool hasExplicitlyUnknownBranchWeights(const Instruction &I) {
auto *MD = I.getMetadata(LLVMContext::MD_prof);
if (!MD)
return false;
return isExplicitlyUnknownBranchWeightsMetadata(*MD);
}
void setBranchWeights(Instruction &I, ArrayRef<uint32_t> Weights,
bool IsExpected) {
MDBuilder MDB(I.getContext());

View File

@@ -2527,6 +2527,12 @@ void Verifier::verifyFunctionMetadata(
for (const auto &Pair : MDs) {
if (Pair.first == LLVMContext::MD_prof) {
MDNode *MD = Pair.second;
if (isExplicitlyUnknownBranchWeightsMetadata(*MD)) {
CheckFailed("'unknown' !prof metadata should appear only on "
"instructions supporting the 'branch_weights' metadata",
MD);
continue;
}
Check(MD->getNumOperands() >= 2,
"!prof annotations should have no less than 2 operands", MD);
@@ -4989,9 +4995,24 @@ void Verifier::visitDereferenceableMetadata(Instruction& I, MDNode* MD) {
}
void Verifier::visitProfMetadata(Instruction &I, MDNode *MD) {
Check(MD->getNumOperands() >= 2,
"!prof annotations should have no less than 2 operands", MD);
auto GetBranchingTerminatorNumOperands = [&]() {
unsigned ExpectedNumOperands = 0;
if (BranchInst *BI = dyn_cast<BranchInst>(&I))
ExpectedNumOperands = BI->getNumSuccessors();
else if (SwitchInst *SI = dyn_cast<SwitchInst>(&I))
ExpectedNumOperands = SI->getNumSuccessors();
else if (isa<CallInst>(&I))
ExpectedNumOperands = 1;
else if (IndirectBrInst *IBI = dyn_cast<IndirectBrInst>(&I))
ExpectedNumOperands = IBI->getNumDestinations();
else if (isa<SelectInst>(&I))
ExpectedNumOperands = 2;
else if (CallBrInst *CI = dyn_cast<CallBrInst>(&I))
ExpectedNumOperands = CI->getNumSuccessors();
return ExpectedNumOperands;
};
Check(MD->getNumOperands() >= 1,
"!prof annotations should have at least 1 operand", MD);
// Check first operand.
Check(MD->getOperand(0) != nullptr, "first operand should not be null", MD);
Check(isa<MDString>(MD->getOperand(0)),
@@ -4999,6 +5020,19 @@ void Verifier::visitProfMetadata(Instruction &I, MDNode *MD) {
MDString *MDS = cast<MDString>(MD->getOperand(0));
StringRef ProfName = MDS->getString();
if (ProfName == MDProfLabels::UnknownBranchWeightsMarker) {
Check(GetBranchingTerminatorNumOperands() != 0 || isa<InvokeInst>(I),
"'unknown' !prof should only appear on instructions on which "
"'branch_weights' would",
MD);
Check(MD->getNumOperands() == 1,
"'unknown' !prof should have no additional operands", MD);
return;
}
Check(MD->getNumOperands() >= 2,
"!prof annotations should have no less than 2 operands", MD);
// Check consistency of !prof branch_weights metadata.
if (ProfName == MDProfLabels::BranchWeights) {
unsigned NumBranchWeights = getNumBranchWeights(*MD);
@@ -5006,20 +5040,8 @@ void Verifier::visitProfMetadata(Instruction &I, MDNode *MD) {
Check(NumBranchWeights == 1 || NumBranchWeights == 2,
"Wrong number of InvokeInst branch_weights operands", MD);
} else {
unsigned ExpectedNumOperands = 0;
if (BranchInst *BI = dyn_cast<BranchInst>(&I))
ExpectedNumOperands = BI->getNumSuccessors();
else if (SwitchInst *SI = dyn_cast<SwitchInst>(&I))
ExpectedNumOperands = SI->getNumSuccessors();
else if (isa<CallInst>(&I))
ExpectedNumOperands = 1;
else if (IndirectBrInst *IBI = dyn_cast<IndirectBrInst>(&I))
ExpectedNumOperands = IBI->getNumDestinations();
else if (isa<SelectInst>(&I))
ExpectedNumOperands = 2;
else if (CallBrInst *CI = dyn_cast<CallBrInst>(&I))
ExpectedNumOperands = CI->getNumSuccessors();
else
const unsigned ExpectedNumOperands = GetBranchingTerminatorNumOperands();
if (ExpectedNumOperands == 0)
CheckFailed("!prof branch_weights are not allowed for this instruction",
MD);

View File

@@ -1,21 +1,65 @@
; Test MD_prof validation
; RUN: split-file %s %t
; RUN: opt -passes=verify %t/valid.ll --disable-output
; RUN: not opt -passes=verify %t/invalid1.ll --disable-output 2>&1 | FileCheck %s
; RUN: not opt -passes=verify %t/invalid2.ll --disable-output 2>&1 | FileCheck %s
; RUN: not opt -passes=verify %t/wrong-count.ll --disable-output 2>&1 | FileCheck %s --check-prefix=WRONG-COUNT
; RUN: not opt -passes=verify %t/invalid-name1.ll --disable-output 2>&1 | FileCheck %s
; RUN: not opt -passes=verify %t/invalid-name2.ll --disable-output 2>&1 | FileCheck %s
; RUN: opt -passes=verify %t/unknown-correct.ll --disable-output
; RUN: not opt -passes=verify %t/unknown-invalid.ll --disable-output 2>&1 | FileCheck %s --check-prefix=EXTRA-ARGS
; RUN: not opt -passes=verify %t/unknown-on-function1.ll --disable-output 2>&1 | FileCheck %s --check-prefix=ON-FUNCTION1
; RUN: not opt -passes=verify %t/unknown-on-function2.ll --disable-output 2>&1 | FileCheck %s --check-prefix=ON-FUNCTION2
; RUN: not opt -passes=verify %t/invalid-unknown-placement.ll --disable-output 2>&1 | FileCheck %s --check-prefix=INVALID-UNKNOWN-PLACEMENT
;--- valid.ll
define void @test(i1 %0) {
br i1 %0, label %2, label %3, !prof !0
2:
declare void @to_invoke()
declare i32 @__gxx_personality_v0(...)
define void @invoker() personality ptr @__gxx_personality_v0 {
invoke void @to_invoke() to label %exit unwind label %lpad, !prof !0
lpad:
%ll = landingpad {ptr, i32}
cleanup
ret void
3:
exit:
ret void
}
!0 = !{!"branch_weights", i32 1, i32 2}
;--- invalid1.ll
define i32 @test(i32 %a) {
%c = icmp eq i32 %a, 0
br i1 %c, label %yes, label %exit, !prof !0
yes:
switch i32 %a, label %exit [ i32 1, label %case_b
i32 2, label %case_c], !prof !1
case_b:
br label %exit
case_c:
br label %exit
exit:
%r = select i1 %c, i32 1, i32 2, !prof !0
ret i32 %r
}
!0 = !{!"branch_weights", i32 1, i32 2}
!1 = !{!"branch_weights", i32 1, i32 2, i32 3}
;--- wrong-count.ll
define void @test(i32 %a) {
%c = icmp eq i32 %a, 0
br i1 %c, label %yes, label %no, !prof !0
yes:
ret void
no:
ret void
}
!0 = !{!"branch_weights", i32 1, i32 2, i32 3}
; WRONG-COUNT: Wrong number of operands
;--- invalid-name1.ll
define void @test(i1 %0) {
br i1 %0, label %2, label %3, !prof !0
2:
@@ -25,7 +69,7 @@ define void @test(i1 %0) {
}
!0 = !{!"invalid", i32 1, i32 2}
;--- invalid2.ll
;--- invalid-name2.ll
define void @test(i1 %0) {
br i1 %0, label %2, label %3, !prof !0
2:
@@ -37,3 +81,71 @@ define void @test(i1 %0) {
!0 = !{!"function_entry_count", i32 1}
; CHECK: expected either branch_weights or VP profile name
;--- unknown-correct.ll
declare void @to_invoke()
declare i32 @__gxx_personality_v0(...)
define void @invoker() personality ptr @__gxx_personality_v0 {
invoke void @to_invoke() to label %exit unwind label %lpad, !prof !0
lpad:
%ll = landingpad {ptr, i32}
cleanup
ret void
exit:
ret void
}
define i32 @test(i32 %a) {
%c = icmp eq i32 %a, 0
br i1 %c, label %yes, label %exit, !prof !0
yes:
switch i32 %a, label %exit [ i32 1, label %case_b
i32 2, label %case_c], !prof !0
case_b:
br label %exit
case_c:
br label %exit
exit:
%r = select i1 %c, i32 1, i32 2, !prof !0
ret i32 %r
}
!0 = !{!"unknown"}
;--- unknown-invalid.ll
define void @test(i32 %a) {
%c = icmp eq i32 %a, 0
br i1 %c, label %yes, label %no, !prof !0
yes:
ret void
no:
ret void
}
!0 = !{!"unknown", i32 12, i32 67}
; EXTRA-ARGS: 'unknown' !prof should have no additional operands
;--- unknown-on-function1.ll
define void @test() !prof !0 {
ret void
}
!0 = !{!"unknown"}
; ON-FUNCTION1: 'unknown' !prof metadata should appear only on instructions supporting the 'branch_weights' metadata
;--- unknown-on-function2.ll
define void @test() !prof !0 {
ret void
}
!0 = !{!"unknown", i64 123}
; ON-FUNCTION2: first operand should be 'function_entry_count' or 'synthetic_function_entry_count'
;--- invalid-unknown-placement.ll
define i32 @test() {
%r = add i32 1, 2, !prof !0
ret i32 %r
}
!0 = !{!"unknown"}
; INVALID-UNKNOWN-PLACEMENT: 'unknown' !prof should only appear on instructions on which 'branch_weights' would