Files
clang-p2996/llvm/test/Transforms/SampleProfile/pseudo-probe-inline.ll
Hongtao Yu 3d89b3cbec [CSSPGO] Introducing distribution factor for pseudo probe.
Sample re-annotation is required in LTO time to achieve a reasonable post-inline profile quality. However, we have seen that such LTO-time re-annotation degrades profile quality. This is mainly caused by preLTO code duplication that is done by passes such as loop unrolling, jump threading, indirect call promotion etc, where samples corresponding to a source location are aggregated multiple times due to the duplicates. In this change we are introducing a concept of distribution factor for pseudo probes so that samples can be distributed for duplicated probes scaled by a factor. We hope that optimizations duplicating code well-maintain the branch frequency information (BFI) based on which probe distribution factors are calculated. Distribution factors are updated at the end of preLTO pipeline to reflect an estimated portion of the real execution count.

This change also introduces a pseudo probe verifier that can be run after each IR passes to detect duplicated pseudo probes.

A saturated distribution factor stands for 1.0. A pesudo probe will carry a factor with the value ranged from 0.0 to 1.0. A 64-bit integral distribution factor field that represents [0.0, 1.0] is associated to each block probe. Unfortunately this cannot be done for callsite probes due to the size limitation of a 32-bit Dwarf discriminator. A 7-bit distribution factor is used instead.

Changes are also needed to the sample profile inliner to deal with prorated callsite counts. Call sites duplicated by PreLTO passes, when later on inlined in LTO time, should have the callees’s probe prorated based on the Prelink-computed distribution factors. The distribution factors should also be taken into account when computing hotness for inline candidates. Also, Indirect call promotion results in multiple callisites. The original samples should be distributed across them. This is fixed by adjusting the callisites' distribution factors.

Reviewed By: wmi

Differential Revision: https://reviews.llvm.org/D93264
2021-02-02 11:55:01 -08:00

188 lines
8.5 KiB
LLVM

; RUN: opt < %s -passes=pseudo-probe,sample-profile -sample-profile-file=%S/Inputs/pseudo-probe-inline.prof -S -pass-remarks=sample-profile -sample-profile-prioritized-inline=0 -pass-remarks-output=%t.opt.yaml 2>&1 | FileCheck %s
; RUN: FileCheck %s -check-prefix=YAML < %t.opt.yaml
; RUN: llvm-profdata merge --sample --extbinary %S/Inputs/pseudo-probe-inline.prof -o %t2
; RUN: opt < %s -passes=pseudo-probe,sample-profile -sample-profile-file=%t2 -S -pass-remarks=sample-profile -sample-profile-prioritized-inline=0 -pass-remarks-output=%t2.opt.yaml 2>&1 | FileCheck %s
; RUN: FileCheck %s -check-prefix=YAML < %t2.opt.yaml
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
@factor = dso_local global i32 3, align 4
define dso_local i32 @foo(i32 %x) #0 !dbg !12 {
entry:
; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID1:]], i64 1, i32 0, i64 -1)
%add = add nsw i32 %x, 100000, !dbg !19
;; Check zen is fully inlined so there's no call to zen anymore.
;; Check code from the inlining of zen is properly annotated here.
; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2:]], i64 1, i32 0, i64 -1)
; CHECK: br i1 %cmp.i, label %while.cond.i, label %while.cond2.i, !dbg ![[#]], !prof ![[PD1:[0-9]+]]
; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 2, i32 0, i64 -1)
; CHECK: br i1 %cmp1.i, label %while.body.i, label %zen.exit, !dbg ![[#]], !prof ![[PD2:[0-9]+]]
; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 3, i32 0, i64 -1)
; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 4, i32 0, i64 -1)
; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 5, i32 0, i64 -1)
; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 6, i32 0, i64 -1)
; CHECK-NOT: call i32 @zen
%call = call i32 @zen(i32 %add), !dbg !20
ret i32 %call, !dbg !21
}
; CHECK: define dso_local i32 @zen
define dso_local i32 @zen(i32 %x) #0 !dbg !22 {
entry:
; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 1, i32 0, i64 -1)
%cmp = icmp sgt i32 %x, 0, !dbg !26
br i1 %cmp, label %while.cond, label %while.cond2, !dbg !28
while.cond:
; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 2, i32 0, i64 -1)
%x.addr.0 = phi i32 [ %x, %entry ], [ %sub, %while.body ]
%cmp1 = icmp sgt i32 %x.addr.0, 0, !dbg !29
br i1 %cmp1, label %while.body, label %if.end, !dbg !31
while.body:
; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 3, i32 0, i64 -1)
%0 = load volatile i32, i32* @factor, align 4, !dbg !32
%sub = sub nsw i32 %x.addr.0, %0, !dbg !39
br label %while.cond, !dbg !31
while.cond2:
; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 4, i32 0, i64 -1)
%x.addr.1 = phi i32 [ %x, %entry ], [ %add, %while.body4 ]
%cmp3 = icmp slt i32 %x.addr.1, 0, !dbg !42
br i1 %cmp3, label %while.body4, label %if.end, !dbg !44
while.body4:
; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 5, i32 0, i64 -1)
%1 = load volatile i32, i32* @factor, align 4, !dbg !45
%add = add nsw i32 %x.addr.1, %1, !dbg !48
br label %while.cond2, !dbg !44
if.end:
; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 6, i32 0, i64 -1)
%x.addr.2 = phi i32 [ %x.addr.0, %while.cond ], [ %x.addr.1, %while.cond2 ]
ret i32 %x.addr.2, !dbg !51
}
; CHECK: !llvm.pseudo_probe_desc = !{![[#DESC0:]], ![[#DESC1:]]}
; CHECK: ![[#DESC0]] = !{i64 [[#GUID1]], i64 [[#HASH1:]], !"foo"}
; CHECK: ![[#DESC1]] = !{i64 [[#GUID2]], i64 [[#HASH2:]], !"zen"}
; CHECK: ![[PD1]] = !{!"branch_weights", i32 25, i32 1}
; CHECK: ![[PD2]] = !{!"branch_weights", i32 382916, i32 25}
; Checking to see if YAML file is generated and contains remarks
;YAML: --- !Passed
;YAML-NEXT: Pass: sample-profile-inline
;YAML-NEXT: Name: Inlined
;YAML-NEXT: DebugLoc: { File: test.cpp, Line: 10, Column: 11 }
;YAML-NEXT: Function: foo
;YAML-NEXT: Args:
;YAML-NEXT: - Callee: zen
;YAML-NEXT: DebugLoc: { File: test.cpp, Line: 38, Column: 0 }
;YAML-NEXT: - String: ' inlined into '
;YAML-NEXT: - Caller: foo
;YAML-NEXT: DebugLoc: { File: test.cpp, Line: 9, Column: 0 }
;YAML-NEXT: - String: ' to match profiling context'
;YAML-NEXT: - String: ' with '
;YAML-NEXT: - String: '(cost='
;YAML-NEXT: - Cost: '15'
;YAML-NEXT: - String: ', threshold='
;YAML-NEXT: - Threshold: '2147483647'
;YAML-NEXT: - String: ')'
;YAML-NEXT: - String: ' at callsite '
;YAML-NEXT: - String: foo
;YAML-NEXT: - String: ':'
;YAML-NEXT: - Line: '1'
;YAML-NEXT: - String: ':'
;YAML-NEXT: - Column: '11'
;YAML-NEXT: - String: ';'
;YAML-NEXT: ...
;YAML: --- !Analysis
;YAML-NEXT: Pass: sample-profile
;YAML-NEXT: Name: AppliedSamples
;YAML-NEXT: DebugLoc: { File: test.cpp, Line: 10, Column: 22 }
;YAML-NEXT: Function: foo
;YAML-NEXT: Args:
;YAML-NEXT: - String: 'Applied '
;YAML-NEXT: - NumSamples: '23'
;YAML-NEXT: - String: ' samples from profile (ProbeId='
;YAML-NEXT: - ProbeId: '1'
;YAML-NEXT: - String: ', Factor='
;YAML-NEXT: - Factor: '1.000000e+00'
;YAML-NEXT: - String: ', OriginalSamples='
;YAML-NEXT: - OriginalSamples: '23'
;YAML-NEXT: - String: ')'
;YAML-NEXT: ...
;YAML: --- !Analysis
;YAML-NEXT: Pass: sample-profile
;YAML-NEXT: Name: AppliedSamples
;YAML-NEXT: DebugLoc: { File: test.cpp, Line: 39, Column: 9 }
;YAML-NEXT: Function: foo
;YAML-NEXT: Args:
;YAML-NEXT: - String: 'Applied '
;YAML-NEXT: - NumSamples: '23'
;YAML-NEXT: - String: ' samples from profile (ProbeId='
;YAML-NEXT: - ProbeId: '1'
;YAML-NEXT: - String: ', Factor='
;YAML-NEXT: - Factor: '1.000000e+00'
;YAML-NEXT: - String: ', OriginalSamples='
;YAML-NEXT: - OriginalSamples: '23'
;YAML-NEXT: - String: ')'
;YAML-NEXT: ...
;YAML: --- !Analysis
;YAML-NEXT: Pass: sample-profile
;YAML-NEXT: Name: AppliedSamples
;YAML-NEXT: DebugLoc: { File: test.cpp, Line: 41, Column: 14 }
;YAML-NEXT: Function: foo
;YAML-NEXT: Args:
;YAML-NEXT: - String: 'Applied '
;YAML-NEXT: - NumSamples: '382920'
;YAML-NEXT: - String: ' samples from profile (ProbeId='
;YAML-NEXT: - ProbeId: '2'
;YAML-NEXT: - String: ', Factor='
;YAML-NEXT: - Factor: '1.000000e+00'
;YAML-NEXT: - String: ', OriginalSamples='
;YAML-NEXT: - OriginalSamples: '382920'
;YAML-NEXT: - String: ')'
;YAML-NEXT: ...
attributes #0 = {"use-sample-profile"}
!llvm.module.flags = !{!8, !9}
!2 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !3)
!3 = !DIFile(filename: "test.cpp", directory: "test")
!4 = !{}
!7 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
!8 = !{i32 7, !"Dwarf Version", i32 4}
!9 = !{i32 2, !"Debug Info Version", i32 3}
!12 = distinct !DISubprogram(name: "foo", scope: !3, file: !3, line: 9, type: !13, scopeLine: 9, unit: !2)
!13 = !DISubroutineType(types: !14)
!14 = !{!7, !7}
!18 = !DILocation(line: 0, scope: !12)
!19 = !DILocation(line: 10, column: 22, scope: !12)
!20 = !DILocation(line: 10, column: 11, scope: !12)
!21 = !DILocation(line: 12, column: 3, scope: !12)
!22 = distinct !DISubprogram(name: "zen", scope: !3, file: !3, line: 37, type: !13, scopeLine: 38, unit: !2)
!25 = !DILocation(line: 0, scope: !22)
!26 = !DILocation(line: 39, column: 9, scope: !27)
!27 = distinct !DILexicalBlock(scope: !22, file: !3, line: 39, column: 7)
!28 = !DILocation(line: 39, column: 7, scope: !22)
!29 = !DILocation(line: 41, column: 14, scope: !30)
!30 = distinct !DILexicalBlock(scope: !27, file: !3, line: 39, column: 14)
!31 = !DILocation(line: 41, column: 5, scope: !30)
!32 = !DILocation(line: 42, column: 16, scope: !33)
!33 = distinct !DILexicalBlock(scope: !30, file: !3, line: 41, column: 19)
!38 = !DILocation(line: 42, column: 12, scope: !33)
!39 = !DILocation(line: 42, column: 9, scope: !33)
!42 = !DILocation(line: 48, column: 14, scope: !43)
!43 = distinct !DILexicalBlock(scope: !27, file: !3, line: 46, column: 8)
!44 = !DILocation(line: 48, column: 5, scope: !43)
!45 = !DILocation(line: 49, column: 16, scope: !46)
!46 = distinct !DILexicalBlock(scope: !43, file: !3, line: 48, column: 19)
!47 = !DILocation(line: 49, column: 12, scope: !46)
!48 = !DILocation(line: 49, column: 9, scope: !46)
!51 = !DILocation(line: 53, column: 3, scope: !22)