[MemProf] Attach value profile metadata to the IR using CalleeGuids. (#141164)
Use the newly introduced CalleeGuids in CallSiteInfo to annotate the IR where necessary with value profile metadata. Use a synthetic count of 1 since we don't have actual counts in the profile collection.
This commit is contained in:
@@ -178,6 +178,12 @@ static cl::opt<bool>
|
||||
cl::desc("Salvage stale MemProf profile"),
|
||||
cl::init(false), cl::Hidden);
|
||||
|
||||
static cl::opt<bool> ClMemProfAttachCalleeGuids(
|
||||
"memprof-attach-calleeguids",
|
||||
cl::desc(
|
||||
"Attach calleeguids as value profile metadata for indirect calls."),
|
||||
cl::init(true), cl::Hidden);
|
||||
|
||||
extern cl::opt<bool> MemProfReportHintedSizes;
|
||||
extern cl::opt<unsigned> MinClonedColdBytePercent;
|
||||
extern cl::opt<unsigned> MinCallsiteColdBytePercent;
|
||||
@@ -952,6 +958,46 @@ undriftMemProfRecord(const DenseMap<uint64_t, LocToLocMap> &UndriftMaps,
|
||||
UndriftCallStack(CS.Frames);
|
||||
}
|
||||
|
||||
// Helper function to process CalleeGuids and create value profile metadata
|
||||
static void addVPMetadata(Module &M, Instruction &I,
|
||||
ArrayRef<GlobalValue::GUID> CalleeGuids) {
|
||||
if (!ClMemProfAttachCalleeGuids || CalleeGuids.empty())
|
||||
return;
|
||||
|
||||
if (I.getMetadata(LLVMContext::MD_prof)) {
|
||||
uint64_t Unused;
|
||||
// TODO: When merging is implemented, increase this to a typical ICP value
|
||||
// (e.g., 3-6) For now, we only need to check if existing data exists, so 1
|
||||
// is sufficient
|
||||
auto ExistingVD = getValueProfDataFromInst(I, IPVK_IndirectCallTarget,
|
||||
/*MaxNumValueData=*/1, Unused);
|
||||
// We don't know how to merge value profile data yet.
|
||||
if (!ExistingVD.empty()) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
SmallVector<InstrProfValueData, 4> VDs;
|
||||
uint64_t TotalCount = 0;
|
||||
|
||||
for (const GlobalValue::GUID CalleeGUID : CalleeGuids) {
|
||||
InstrProfValueData VD;
|
||||
VD.Value = CalleeGUID;
|
||||
// For MemProf, we don't have actual call counts, so we assign
|
||||
// a weight of 1 to each potential target.
|
||||
// TODO: Consider making this weight configurable or increasing it to
|
||||
// improve effectiveness for ICP.
|
||||
VD.Count = 1;
|
||||
VDs.push_back(VD);
|
||||
TotalCount += VD.Count;
|
||||
}
|
||||
|
||||
if (!VDs.empty()) {
|
||||
annotateValueSite(M, I, VDs, TotalCount, IPVK_IndirectCallTarget,
|
||||
VDs.size());
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
readMemprof(Module &M, Function &F, IndexedInstrProfReader *MemProfReader,
|
||||
const TargetLibraryInfo &TLI,
|
||||
@@ -1020,15 +1066,35 @@ readMemprof(Module &M, Function &F, IndexedInstrProfReader *MemProfReader,
|
||||
// Build maps of the location hash to all profile data with that leaf location
|
||||
// (allocation info and the callsites).
|
||||
std::map<uint64_t, std::set<const AllocationInfo *>> LocHashToAllocInfo;
|
||||
// A hash function for std::unordered_set<ArrayRef<Frame>> to work.
|
||||
struct CallStackHash {
|
||||
size_t operator()(ArrayRef<Frame> CS) const {
|
||||
return computeFullStackId(CS);
|
||||
|
||||
// Helper struct for maintaining refs to callsite data. As an alternative we
|
||||
// could store a pointer to the CallSiteInfo struct but we also need the frame
|
||||
// index. Using ArrayRefs instead makes it a little easier to read.
|
||||
struct CallSiteEntry {
|
||||
// Subset of frames for the corresponding CallSiteInfo.
|
||||
ArrayRef<Frame> Frames;
|
||||
// Potential targets for indirect calls.
|
||||
ArrayRef<GlobalValue::GUID> CalleeGuids;
|
||||
|
||||
// Only compare Frame contents.
|
||||
// Use pointer-based equality instead of ArrayRef's operator== which does
|
||||
// element-wise comparison. We want to check if it's the same slice of the
|
||||
// underlying array, not just equivalent content.
|
||||
bool operator==(const CallSiteEntry &Other) const {
|
||||
return Frames.data() == Other.Frames.data() &&
|
||||
Frames.size() == Other.Frames.size();
|
||||
}
|
||||
};
|
||||
|
||||
struct CallSiteEntryHash {
|
||||
size_t operator()(const CallSiteEntry &Entry) const {
|
||||
return computeFullStackId(Entry.Frames);
|
||||
}
|
||||
};
|
||||
|
||||
// For the callsites we need to record slices of the frame array (see comments
|
||||
// below where the map entries are added).
|
||||
std::map<uint64_t, std::unordered_set<ArrayRef<Frame>, CallStackHash>>
|
||||
// below where the map entries are added) along with their CalleeGuids.
|
||||
std::map<uint64_t, std::unordered_set<CallSiteEntry, CallSiteEntryHash>>
|
||||
LocHashToCallSites;
|
||||
for (auto &AI : MemProfRec->AllocSites) {
|
||||
NumOfMemProfAllocContextProfiles++;
|
||||
@@ -1046,8 +1112,10 @@ readMemprof(Module &M, Function &F, IndexedInstrProfReader *MemProfReader,
|
||||
unsigned Idx = 0;
|
||||
for (auto &StackFrame : CS.Frames) {
|
||||
uint64_t StackId = computeStackId(StackFrame);
|
||||
LocHashToCallSites[StackId].insert(
|
||||
ArrayRef<Frame>(CS.Frames).drop_front(Idx++));
|
||||
ArrayRef<Frame> FrameSlice = ArrayRef<Frame>(CS.Frames).drop_front(Idx++);
|
||||
ArrayRef<GlobalValue::GUID> CalleeGuids(CS.CalleeGuids);
|
||||
LocHashToCallSites[StackId].insert({FrameSlice, CalleeGuids});
|
||||
|
||||
ProfileHasColumns |= StackFrame.Column;
|
||||
// Once we find this function, we can stop recording.
|
||||
if (StackFrame.Function == FuncGUID)
|
||||
@@ -1191,13 +1259,18 @@ readMemprof(Module &M, Function &F, IndexedInstrProfReader *MemProfReader,
|
||||
// Otherwise, add callsite metadata. If we reach here then we found the
|
||||
// instruction's leaf location in the callsites map and not the allocation
|
||||
// map.
|
||||
for (auto CallStackIdx : CallSitesIter->second) {
|
||||
for (const auto &CallSiteEntry : CallSitesIter->second) {
|
||||
// If we found and thus matched all frames on the call, create and
|
||||
// attach call stack metadata.
|
||||
if (stackFrameIncludesInlinedCallStack(CallStackIdx,
|
||||
if (stackFrameIncludesInlinedCallStack(CallSiteEntry.Frames,
|
||||
InlinedCallStack)) {
|
||||
NumOfMemProfMatchedCallSites++;
|
||||
addCallsiteMetadata(I, InlinedCallStack, Ctx);
|
||||
|
||||
// Try to attach indirect call metadata if possible.
|
||||
if (!CalledFunction)
|
||||
addVPMetadata(M, I, CallSiteEntry.CalleeGuids);
|
||||
|
||||
// Only need to find one with a matching call stack and add a single
|
||||
// callsite metadata.
|
||||
|
||||
|
||||
@@ -0,0 +1,88 @@
|
||||
; RUN: split-file %s %t
|
||||
|
||||
;; Basic functionality with flag toggle
|
||||
; RUN: llvm-profdata merge --memprof-version=4 %t/basic.yaml -o %t/basic.memprofdata
|
||||
; RUN: opt < %t/basic.ll -passes='memprof-use<profile-filename=%t/basic.memprofdata>' -memprof-attach-calleeguids=false -S 2>&1 | FileCheck %s --check-prefix=CHECK-DISABLE
|
||||
; RUN: opt < %t/basic.ll -passes='memprof-use<profile-filename=%t/basic.memprofdata>' -memprof-attach-calleeguids=true -S 2>&1 | FileCheck %s --check-prefix=CHECK-ENABLE
|
||||
|
||||
;; FDO conflict handling
|
||||
; RUN: llvm-profdata merge --memprof-version=4 %t/fdo_conflict.yaml -o %t/fdo_conflict.memprofdata
|
||||
; RUN: opt < %t/fdo_conflict.ll -passes='memprof-use<profile-filename=%t/fdo_conflict.memprofdata>' -memprof-attach-calleeguids=true -S 2>&1 | FileCheck %s --check-prefix=CHECK-FDO
|
||||
|
||||
;--- basic.yaml
|
||||
---
|
||||
HeapProfileRecords:
|
||||
- GUID: _Z3barv
|
||||
AllocSites: []
|
||||
CallSites:
|
||||
- Frames:
|
||||
- { Function: _Z3barv, LineOffset: 3, Column: 5, IsInlineFrame: false }
|
||||
CalleeGuids: [0x123456789abcdef0, 0x23456789abcdef01]
|
||||
...
|
||||
|
||||
;--- basic.ll
|
||||
define dso_local void @_Z3barv() !dbg !4 {
|
||||
entry:
|
||||
%fp = alloca ptr, align 8
|
||||
%0 = load ptr, ptr %fp, align 8
|
||||
call void %0(), !dbg !5
|
||||
; CHECK-ENABLE: call void %0(), {{.*}} !prof !6
|
||||
; CHECK-DISABLE-NOT: !prof
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-ENABLE: !6 = !{!"VP", i32 0, i64 2, i64 1311768467463790320, i64 1, i64 2541551405711093505, i64 1}
|
||||
|
||||
!llvm.module.flags = !{!2, !3}
|
||||
|
||||
!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1)
|
||||
!1 = !DIFile(filename: "t", directory: "/")
|
||||
!2 = !{i32 7, !"Dwarf Version", i32 5}
|
||||
!3 = !{i32 2, !"Debug Info Version", i32 3}
|
||||
!4 = distinct !DISubprogram(name: "bar", linkageName: "_Z3barv", scope: !1, file: !1, line: 1, unit: !0)
|
||||
!5 = !DILocation(line: 4, column: 5, scope: !4)
|
||||
|
||||
;--- fdo_conflict.yaml
|
||||
---
|
||||
HeapProfileRecords:
|
||||
- GUID: _Z3foov
|
||||
AllocSites: []
|
||||
CallSites:
|
||||
- Frames:
|
||||
- { Function: _Z3foov, LineOffset: 3, Column: 5, IsInlineFrame: false }
|
||||
CalleeGuids: [0x123456789abcdef0, 0x23456789abcdef01]
|
||||
- Frames:
|
||||
- { Function: _Z3foov, LineOffset: 5, Column: 5, IsInlineFrame: false }
|
||||
CalleeGuids: [0x555556789abcdef0, 0x666656789abcdef1]
|
||||
...
|
||||
|
||||
;--- fdo_conflict.ll
|
||||
define dso_local void @_Z3foov() !dbg !14 {
|
||||
entry:
|
||||
%fp = alloca ptr, align 8
|
||||
%0 = load ptr, ptr %fp, align 8
|
||||
; This call already has FDO value profile metadata - should NOT be modified
|
||||
; CHECK-FDO: call void %0(), {{.*}} !prof !6
|
||||
call void %0(), !dbg !15, !prof !16
|
||||
|
||||
%1 = load ptr, ptr %fp, align 8
|
||||
; This call does NOT have existing metadata - should get MemProf annotation
|
||||
; CHECK-FDO: call void %1(), {{.*}} !prof !9
|
||||
call void %1(), !dbg !17
|
||||
ret void
|
||||
}
|
||||
|
||||
!16 = !{!"VP", i32 0, i64 100, i64 9191153033785521275, i64 80, i64 -1069303473483922844, i64 20}
|
||||
|
||||
; CHECK-FDO: !6 = !{!"VP", i32 0, i64 100, i64 9191153033785521275, i64 80, i64 -1069303473483922844, i64 20}
|
||||
; CHECK-FDO: !9 = !{!"VP", i32 0, i64 2, i64 6148915942236413680, i64 1, i64 7378680115485269745, i64 1}
|
||||
|
||||
!llvm.module.flags = !{!12, !13}
|
||||
|
||||
!10 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !11)
|
||||
!11 = !DIFile(filename: "t", directory: "/")
|
||||
!12 = !{i32 7, !"Dwarf Version", i32 5}
|
||||
!13 = !{i32 2, !"Debug Info Version", i32 3}
|
||||
!14 = distinct !DISubprogram(name: "foo", linkageName: "_Z3foov", scope: !11, file: !11, line: 1, unit: !10)
|
||||
!15 = !DILocation(line: 4, column: 5, scope: !14)
|
||||
!17 = !DILocation(line: 6, column: 5, scope: !14)
|
||||
Reference in New Issue
Block a user