From d2500e639b641b0cfdd1564cc6ff4705b118f10c Mon Sep 17 00:00:00 2001 From: Mircea Trofin Date: Mon, 30 Jun 2025 16:57:11 -0700 Subject: [PATCH] [pgo] add means to specify "unknown" MD_prof (#145578) This PR is part of https://discourse.llvm.org/t/rfc-profile-information-propagation-unittesting/73595 In a slight departure from the RFC, instead of a brand-new `MD_prof_unknown` kind, this adds a first operand to `MD_prof` metadata. This makes it easy to replace with valid metadata (only one `MD_prof`), otherwise sites inserting valid `MD_prof` would also have to check to remove the `unknown` one. The patch just introduces the notion and fixes the verifier accordingly. Existing APIs working (esp. reading) `MD_prof` will be updated subsequently. --- llvm/include/llvm/IR/ProfDataUtils.h | 13 +++ llvm/lib/IR/ProfDataUtils.cpp | 22 +++++ llvm/lib/IR/Verifier.cpp | 56 ++++++++---- llvm/test/Verifier/branch-weight.ll | 130 +++++++++++++++++++++++++-- 4 files changed, 195 insertions(+), 26 deletions(-) diff --git a/llvm/include/llvm/IR/ProfDataUtils.h b/llvm/include/llvm/IR/ProfDataUtils.h index c24b2aa19a40..bc3b046dadb4 100644 --- a/llvm/include/llvm/IR/ProfDataUtils.h +++ b/llvm/include/llvm/IR/ProfDataUtils.h @@ -27,6 +27,7 @@ struct MDProfLabels { static const char *FunctionEntryCount; static const char *SyntheticFunctionEntryCount; static const char *ExpectedBranchWeights; + static const char *UnknownBranchWeightsMarker; }; /// Checks if an Instruction has MD_prof Metadata @@ -143,6 +144,18 @@ LLVM_ABI bool extractProfTotalWeight(const Instruction &I, LLVM_ABI void setBranchWeights(Instruction &I, ArrayRef Weights, bool IsExpected); +/// Specify that the branch weights for this terminator cannot be known at +/// compile time. This should only be called by passes, and never as a default +/// behavior in e.g. MDBuilder. The goal is to use this info to validate passes +/// do not accidentally drop profile info, and this API is called in cases where +/// the pass explicitly cannot provide that info. Defaulting it in would hide +/// bugs where the pass forgets to transfer over or otherwise specify profile +/// info. +LLVM_ABI void setExplicitlyUnknownBranchWeights(Instruction &I); + +LLVM_ABI bool isExplicitlyUnknownBranchWeightsMetadata(const MDNode &MD); +LLVM_ABI bool hasExplicitlyUnknownBranchWeights(const Instruction &I); + /// Scaling the profile data attached to 'I' using the ratio of S/T. LLVM_ABI void scaleProfData(Instruction &I, uint64_t S, uint64_t T); diff --git a/llvm/lib/IR/ProfDataUtils.cpp b/llvm/lib/IR/ProfDataUtils.cpp index 605208edda70..b1b5f67689e6 100644 --- a/llvm/lib/IR/ProfDataUtils.cpp +++ b/llvm/lib/IR/ProfDataUtils.cpp @@ -94,6 +94,7 @@ const char *MDProfLabels::ValueProfile = "VP"; const char *MDProfLabels::FunctionEntryCount = "function_entry_count"; const char *MDProfLabels::SyntheticFunctionEntryCount = "synthetic_function_entry_count"; +const char *MDProfLabels::UnknownBranchWeightsMarker = "unknown"; bool hasProfMD(const Instruction &I) { return I.hasMetadata(LLVMContext::MD_prof); @@ -241,6 +242,27 @@ bool extractProfTotalWeight(const Instruction &I, uint64_t &TotalVal) { return extractProfTotalWeight(I.getMetadata(LLVMContext::MD_prof), TotalVal); } +void setExplicitlyUnknownBranchWeights(Instruction &I) { + MDBuilder MDB(I.getContext()); + I.setMetadata( + LLVMContext::MD_prof, + MDNode::get(I.getContext(), + MDB.createString(MDProfLabels::UnknownBranchWeightsMarker))); +} + +bool isExplicitlyUnknownBranchWeightsMetadata(const MDNode &MD) { + if (MD.getNumOperands() != 1) + return false; + return MD.getOperand(0).equalsStr(MDProfLabels::UnknownBranchWeightsMarker); +} + +bool hasExplicitlyUnknownBranchWeights(const Instruction &I) { + auto *MD = I.getMetadata(LLVMContext::MD_prof); + if (!MD) + return false; + return isExplicitlyUnknownBranchWeightsMetadata(*MD); +} + void setBranchWeights(Instruction &I, ArrayRef Weights, bool IsExpected) { MDBuilder MDB(I.getContext()); diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp index 853f3b45fd10..227afe2b7b61 100644 --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -2527,6 +2527,12 @@ void Verifier::verifyFunctionMetadata( for (const auto &Pair : MDs) { if (Pair.first == LLVMContext::MD_prof) { MDNode *MD = Pair.second; + if (isExplicitlyUnknownBranchWeightsMetadata(*MD)) { + CheckFailed("'unknown' !prof metadata should appear only on " + "instructions supporting the 'branch_weights' metadata", + MD); + continue; + } Check(MD->getNumOperands() >= 2, "!prof annotations should have no less than 2 operands", MD); @@ -4989,9 +4995,24 @@ void Verifier::visitDereferenceableMetadata(Instruction& I, MDNode* MD) { } void Verifier::visitProfMetadata(Instruction &I, MDNode *MD) { - Check(MD->getNumOperands() >= 2, - "!prof annotations should have no less than 2 operands", MD); - + auto GetBranchingTerminatorNumOperands = [&]() { + unsigned ExpectedNumOperands = 0; + if (BranchInst *BI = dyn_cast(&I)) + ExpectedNumOperands = BI->getNumSuccessors(); + else if (SwitchInst *SI = dyn_cast(&I)) + ExpectedNumOperands = SI->getNumSuccessors(); + else if (isa(&I)) + ExpectedNumOperands = 1; + else if (IndirectBrInst *IBI = dyn_cast(&I)) + ExpectedNumOperands = IBI->getNumDestinations(); + else if (isa(&I)) + ExpectedNumOperands = 2; + else if (CallBrInst *CI = dyn_cast(&I)) + ExpectedNumOperands = CI->getNumSuccessors(); + return ExpectedNumOperands; + }; + Check(MD->getNumOperands() >= 1, + "!prof annotations should have at least 1 operand", MD); // Check first operand. Check(MD->getOperand(0) != nullptr, "first operand should not be null", MD); Check(isa(MD->getOperand(0)), @@ -4999,6 +5020,19 @@ void Verifier::visitProfMetadata(Instruction &I, MDNode *MD) { MDString *MDS = cast(MD->getOperand(0)); StringRef ProfName = MDS->getString(); + if (ProfName == MDProfLabels::UnknownBranchWeightsMarker) { + Check(GetBranchingTerminatorNumOperands() != 0 || isa(I), + "'unknown' !prof should only appear on instructions on which " + "'branch_weights' would", + MD); + Check(MD->getNumOperands() == 1, + "'unknown' !prof should have no additional operands", MD); + return; + } + + Check(MD->getNumOperands() >= 2, + "!prof annotations should have no less than 2 operands", MD); + // Check consistency of !prof branch_weights metadata. if (ProfName == MDProfLabels::BranchWeights) { unsigned NumBranchWeights = getNumBranchWeights(*MD); @@ -5006,20 +5040,8 @@ void Verifier::visitProfMetadata(Instruction &I, MDNode *MD) { Check(NumBranchWeights == 1 || NumBranchWeights == 2, "Wrong number of InvokeInst branch_weights operands", MD); } else { - unsigned ExpectedNumOperands = 0; - if (BranchInst *BI = dyn_cast(&I)) - ExpectedNumOperands = BI->getNumSuccessors(); - else if (SwitchInst *SI = dyn_cast(&I)) - ExpectedNumOperands = SI->getNumSuccessors(); - else if (isa(&I)) - ExpectedNumOperands = 1; - else if (IndirectBrInst *IBI = dyn_cast(&I)) - ExpectedNumOperands = IBI->getNumDestinations(); - else if (isa(&I)) - ExpectedNumOperands = 2; - else if (CallBrInst *CI = dyn_cast(&I)) - ExpectedNumOperands = CI->getNumSuccessors(); - else + const unsigned ExpectedNumOperands = GetBranchingTerminatorNumOperands(); + if (ExpectedNumOperands == 0) CheckFailed("!prof branch_weights are not allowed for this instruction", MD); diff --git a/llvm/test/Verifier/branch-weight.ll b/llvm/test/Verifier/branch-weight.ll index e3b0f340e31b..4c87a9835901 100644 --- a/llvm/test/Verifier/branch-weight.ll +++ b/llvm/test/Verifier/branch-weight.ll @@ -1,21 +1,65 @@ ; Test MD_prof validation ; RUN: split-file %s %t + ; RUN: opt -passes=verify %t/valid.ll --disable-output -; RUN: not opt -passes=verify %t/invalid1.ll --disable-output 2>&1 | FileCheck %s -; RUN: not opt -passes=verify %t/invalid2.ll --disable-output 2>&1 | FileCheck %s + +; RUN: not opt -passes=verify %t/wrong-count.ll --disable-output 2>&1 | FileCheck %s --check-prefix=WRONG-COUNT +; RUN: not opt -passes=verify %t/invalid-name1.ll --disable-output 2>&1 | FileCheck %s +; RUN: not opt -passes=verify %t/invalid-name2.ll --disable-output 2>&1 | FileCheck %s + +; RUN: opt -passes=verify %t/unknown-correct.ll --disable-output + +; RUN: not opt -passes=verify %t/unknown-invalid.ll --disable-output 2>&1 | FileCheck %s --check-prefix=EXTRA-ARGS +; RUN: not opt -passes=verify %t/unknown-on-function1.ll --disable-output 2>&1 | FileCheck %s --check-prefix=ON-FUNCTION1 +; RUN: not opt -passes=verify %t/unknown-on-function2.ll --disable-output 2>&1 | FileCheck %s --check-prefix=ON-FUNCTION2 +; RUN: not opt -passes=verify %t/invalid-unknown-placement.ll --disable-output 2>&1 | FileCheck %s --check-prefix=INVALID-UNKNOWN-PLACEMENT ;--- valid.ll -define void @test(i1 %0) { - br i1 %0, label %2, label %3, !prof !0 -2: +declare void @to_invoke() +declare i32 @__gxx_personality_v0(...) + +define void @invoker() personality ptr @__gxx_personality_v0 { + invoke void @to_invoke() to label %exit unwind label %lpad, !prof !0 +lpad: + %ll = landingpad {ptr, i32} + cleanup ret void -3: +exit: ret void } -!0 = !{!"branch_weights", i32 1, i32 2} -;--- invalid1.ll +define i32 @test(i32 %a) { + %c = icmp eq i32 %a, 0 + br i1 %c, label %yes, label %exit, !prof !0 +yes: + switch i32 %a, label %exit [ i32 1, label %case_b + i32 2, label %case_c], !prof !1 +case_b: + br label %exit +case_c: + br label %exit +exit: + %r = select i1 %c, i32 1, i32 2, !prof !0 + ret i32 %r +} +!0 = !{!"branch_weights", i32 1, i32 2} +!1 = !{!"branch_weights", i32 1, i32 2, i32 3} + +;--- wrong-count.ll +define void @test(i32 %a) { + %c = icmp eq i32 %a, 0 + br i1 %c, label %yes, label %no, !prof !0 +yes: + ret void +no: + ret void +} +!0 = !{!"branch_weights", i32 1, i32 2, i32 3} + +; WRONG-COUNT: Wrong number of operands + +;--- invalid-name1.ll define void @test(i1 %0) { br i1 %0, label %2, label %3, !prof !0 2: @@ -25,7 +69,7 @@ define void @test(i1 %0) { } !0 = !{!"invalid", i32 1, i32 2} -;--- invalid2.ll +;--- invalid-name2.ll define void @test(i1 %0) { br i1 %0, label %2, label %3, !prof !0 2: @@ -37,3 +81,71 @@ define void @test(i1 %0) { !0 = !{!"function_entry_count", i32 1} ; CHECK: expected either branch_weights or VP profile name + +;--- unknown-correct.ll +declare void @to_invoke() +declare i32 @__gxx_personality_v0(...) + +define void @invoker() personality ptr @__gxx_personality_v0 { + invoke void @to_invoke() to label %exit unwind label %lpad, !prof !0 +lpad: + %ll = landingpad {ptr, i32} + cleanup + ret void +exit: + ret void +} + +define i32 @test(i32 %a) { + %c = icmp eq i32 %a, 0 + br i1 %c, label %yes, label %exit, !prof !0 +yes: + switch i32 %a, label %exit [ i32 1, label %case_b + i32 2, label %case_c], !prof !0 +case_b: + br label %exit +case_c: + br label %exit +exit: + %r = select i1 %c, i32 1, i32 2, !prof !0 + ret i32 %r +} + +!0 = !{!"unknown"} + +;--- unknown-invalid.ll +define void @test(i32 %a) { + %c = icmp eq i32 %a, 0 + br i1 %c, label %yes, label %no, !prof !0 +yes: + ret void +no: + ret void +} + +!0 = !{!"unknown", i32 12, i32 67} +; EXTRA-ARGS: 'unknown' !prof should have no additional operands + +;--- unknown-on-function1.ll +define void @test() !prof !0 { + ret void +} + +!0 = !{!"unknown"} +; ON-FUNCTION1: 'unknown' !prof metadata should appear only on instructions supporting the 'branch_weights' metadata + +;--- unknown-on-function2.ll +define void @test() !prof !0 { + ret void +} + +!0 = !{!"unknown", i64 123} +; ON-FUNCTION2: first operand should be 'function_entry_count' or 'synthetic_function_entry_count' + +;--- invalid-unknown-placement.ll +define i32 @test() { + %r = add i32 1, 2, !prof !0 + ret i32 %r +} +!0 = !{!"unknown"} +; INVALID-UNKNOWN-PLACEMENT: 'unknown' !prof should only appear on instructions on which 'branch_weights' would