[TypeProf][PGO]Support skipping vtable comparisons for a class and its derived ones (#110575)
Performance critical core libraries could be highly-optimized for arch or micro-arch features. For instance, the absl crc library specializes different templated classes among different hardwares [1]. In a practical setting, it's likely that instrumented profiles are collected on one type of machine and used to optimize binaries that run on multiple types of hardwares. While this kind of specialization is rare in terms of lines of code, compiler can do a better job to skip vtable-based ICP. * The per-class `Extend` implementation is arch-specific as well. If an instrumented profile is collected on one arch and applied to another arch where `Extend` implementation is different, `Extend` might be regarded as unlikely function in the latter case. `ABSL_ATTRIBUTE_HOT` annotation alleviates the problem by putting all `Extend` implementation into the hot text section [2] This change introduces a comma-separated list to specify the mangled vtable names, and ICP pass will skip vtable-based comparison if a vtable variable definition is shown to be in its class hierarchy (per LLVM type metadata). [1]c6b27359c3/absl/crc/internal/crc_x86_arm_combined.cc (L621-L650)[2]c6b27359c3/absl/crc/internal/crc_x86_arm_combined.cc (L370C3-L370C21)
This commit is contained in:
@@ -132,6 +132,15 @@ static cl::opt<int> ICPMaxNumVTableLastCandidate(
|
||||
"icp-max-num-vtable-last-candidate", cl::init(1), cl::Hidden,
|
||||
cl::desc("The maximum number of vtable for the last candidate."));
|
||||
|
||||
static cl::list<std::string> ICPIgnoredBaseTypes(
|
||||
"icp-ignored-base-types", cl::Hidden,
|
||||
cl::desc(
|
||||
"A list of mangled vtable type info names. Classes specified by the "
|
||||
"type info names and their derived ones will not be vtable-ICP'ed. "
|
||||
"Useful when the profiled types and actual types in the optimized "
|
||||
"binary could be different due to profiling limitations. Type info "
|
||||
"names are those string literals used in LLVM type metadata"));
|
||||
|
||||
namespace {
|
||||
|
||||
// The key is a vtable global variable, and the value is a map.
|
||||
@@ -316,6 +325,8 @@ private:
|
||||
|
||||
OptimizationRemarkEmitter &ORE;
|
||||
|
||||
const DenseSet<StringRef> &IgnoredBaseTypes;
|
||||
|
||||
// A struct that records the direct target and it's call count.
|
||||
struct PromotionCandidate {
|
||||
Function *const TargetFunction;
|
||||
@@ -366,6 +377,10 @@ private:
|
||||
bool isProfitableToCompareVTables(const CallBase &CB,
|
||||
ArrayRef<PromotionCandidate> Candidates);
|
||||
|
||||
// Return true if the vtable corresponding to VTableGUID should be skipped
|
||||
// for vtable-based comparison.
|
||||
bool shouldSkipVTable(uint64_t VTableGUID);
|
||||
|
||||
// Given an indirect callsite and the list of function candidates, compute
|
||||
// the following vtable information in output parameters and return vtable
|
||||
// pointer if type profiles exist.
|
||||
@@ -391,10 +406,12 @@ public:
|
||||
Function &Func, Module &M, InstrProfSymtab *Symtab, bool SamplePGO,
|
||||
const VirtualCallSiteTypeInfoMap &VirtualCSInfo,
|
||||
VTableAddressPointOffsetValMap &VTableAddressPointOffsetVal,
|
||||
const DenseSet<StringRef> &IgnoredBaseTypes,
|
||||
OptimizationRemarkEmitter &ORE)
|
||||
: F(Func), M(M), Symtab(Symtab), SamplePGO(SamplePGO),
|
||||
VirtualCSInfo(VirtualCSInfo),
|
||||
VTableAddressPointOffsetVal(VTableAddressPointOffsetVal), ORE(ORE) {}
|
||||
VTableAddressPointOffsetVal(VTableAddressPointOffsetVal), ORE(ORE),
|
||||
IgnoredBaseTypes(IgnoredBaseTypes) {}
|
||||
IndirectCallPromoter(const IndirectCallPromoter &) = delete;
|
||||
IndirectCallPromoter &operator=(const IndirectCallPromoter &) = delete;
|
||||
|
||||
@@ -851,9 +868,14 @@ bool IndirectCallPromoter::isProfitableToCompareVTables(
|
||||
LLVM_DEBUG(dbgs() << "\n");
|
||||
|
||||
uint64_t CandidateVTableCount = 0;
|
||||
for (auto &[GUID, Count] : VTableGUIDAndCounts)
|
||||
|
||||
for (auto &[GUID, Count] : VTableGUIDAndCounts) {
|
||||
CandidateVTableCount += Count;
|
||||
|
||||
if (shouldSkipVTable(GUID))
|
||||
return false;
|
||||
}
|
||||
|
||||
if (CandidateVTableCount < Candidate.Count * ICPVTablePercentageThreshold) {
|
||||
LLVM_DEBUG(
|
||||
dbgs() << " function count " << Candidate.Count
|
||||
@@ -883,6 +905,27 @@ bool IndirectCallPromoter::isProfitableToCompareVTables(
|
||||
return true;
|
||||
}
|
||||
|
||||
bool IndirectCallPromoter::shouldSkipVTable(uint64_t VTableGUID) {
|
||||
if (IgnoredBaseTypes.empty())
|
||||
return false;
|
||||
|
||||
auto *VTableVar = Symtab->getGlobalVariable(VTableGUID);
|
||||
|
||||
assert(VTableVar && "VTableVar must exist for GUID in VTableGUIDAndCounts");
|
||||
|
||||
SmallVector<MDNode *, 2> Types;
|
||||
VTableVar->getMetadata(LLVMContext::MD_type, Types);
|
||||
|
||||
for (auto *Type : Types)
|
||||
if (auto *TypeId = dyn_cast<MDString>(Type->getOperand(1).get()))
|
||||
if (IgnoredBaseTypes.contains(TypeId->getString())) {
|
||||
LLVM_DEBUG(dbgs() << " vtable profiles should be ignored. Bail "
|
||||
"out of vtable comparison.");
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// For virtual calls in the module, collect per-callsite information which will
|
||||
// be used to associate an ICP candidate with a vtable and a specific function
|
||||
// in the vtable. With type intrinsics (llvm.type.test), we can find virtual
|
||||
@@ -956,9 +999,15 @@ static bool promoteIndirectCalls(Module &M, ProfileSummaryInfo *PSI, bool InLTO,
|
||||
bool Changed = false;
|
||||
VirtualCallSiteTypeInfoMap VirtualCSInfo;
|
||||
|
||||
if (EnableVTableProfileUse)
|
||||
DenseSet<StringRef> IgnoredBaseTypes;
|
||||
|
||||
if (EnableVTableProfileUse) {
|
||||
computeVirtualCallSiteTypeInfoMap(M, MAM, VirtualCSInfo);
|
||||
|
||||
for (StringRef Str : ICPIgnoredBaseTypes)
|
||||
IgnoredBaseTypes.insert(Str);
|
||||
}
|
||||
|
||||
// VTableAddressPointOffsetVal stores the vtable address points. The vtable
|
||||
// address point of a given <vtable, address point offset> is static (doesn't
|
||||
// change after being computed once).
|
||||
@@ -977,7 +1026,8 @@ static bool promoteIndirectCalls(Module &M, ProfileSummaryInfo *PSI, bool InLTO,
|
||||
auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
|
||||
|
||||
IndirectCallPromoter CallPromoter(F, M, &Symtab, SamplePGO, VirtualCSInfo,
|
||||
VTableAddressPointOffsetVal, ORE);
|
||||
VTableAddressPointOffsetVal,
|
||||
IgnoredBaseTypes, ORE);
|
||||
bool FuncChanged = CallPromoter.processFunction(PSI);
|
||||
if (ICPDUMPAFTER && FuncChanged) {
|
||||
LLVM_DEBUG(dbgs() << "\n== IR Dump After =="; F.print(dbgs()));
|
||||
|
||||
@@ -1,7 +1,11 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
|
||||
|
||||
; Tests that ICP compares vtables by checking IR.
|
||||
; RUN: opt < %s -passes='pgo-icall-prom' -pass-remarks=pgo-icall-prom -enable-vtable-profile-use -icp-max-num-vtable-last-candidate=2 -S 2>&1 | FileCheck %s --check-prefixes=VTABLE-COMMON,VTABLE-CMP
|
||||
; Require exactly one vtable candidate for each function candidate. Tests that ICP compares function by checking IR.
|
||||
; RUN: opt < %s -passes='pgo-icall-prom' -pass-remarks=pgo-icall-prom -enable-vtable-profile-use -icp-max-num-vtable-last-candidate=1 -S 2>&1 | FileCheck %s --check-prefixes=VTABLE-COMMON,FUNC-CMP
|
||||
; On top of line 4, ignore 'Base1' and its derived types for vtable-based comparison. Tests that ICP compares functions.
|
||||
; RUN: opt < %s -passes='pgo-icall-prom' -pass-remarks=pgo-icall-prom -enable-vtable-profile-use -icp-max-num-vtable-last-candidate=2 -icp-ignored-base-types='Base1' -S 2>&1 | FileCheck %s --check-prefixes=VTABLE-COMMON,FUNC-CMP
|
||||
|
||||
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-unknown-linux-gnu"
|
||||
|
||||
Reference in New Issue
Block a user