Files
clang-p2996/llvm/test/Transforms/SampleProfile/inline-mergeprof.ll
Wei Mi e32469a140 [SampleFDO] Enable sample-profile-top-down-load and sample-profile-merge-inlinee
by default.

sample-profile-top-down-load is an internal option which can enable top-down
order of inlining and profile annotation in sample profile load pass. It was
found to be beneficial for better profile annotation.

Recently we found it could also solve some build time issue. Suppose function
A has many callsites in function B. In the last release binary where sample
profile was collected, the outline copy of A is large because there are many
other functions inlined into A. However although all the callsites calling A
in B are inlined, but every inlined body is small (A was inlined into B
before other functions are inlined into A), there is no build time issue in
last release.

In an optimized build using the sample profile collected from last release,
without top-down inlining, we saw a case that A got very large because of
inlining, and then multiple callsites of A got inlined into B, and that led
to a huge B which caused significant build time issue besides profile
annotation issue.

To solve that problem, the patch enables the flag
sample-profile-top-down-load by default. sample-profile-top-down-load can
have better performance when it is enabled together with
sample-profile-merge-inlinee so in this patch we also enable
sample-profile-merge-inlinee by default.

Differential Revision: https://reviews.llvm.org/D82919
2020-07-08 09:23:18 -07:00

100 lines
4.4 KiB
LLVM

; Test we lose details of not inlined profile without '-sample-profile-merge-inlinee'
; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/inline-mergeprof.prof -sample-profile-merge-inlinee=false -S | FileCheck -check-prefix=SCALE %s
; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/inline-mergeprof.prof -sample-profile-merge-inlinee=true -S | FileCheck -check-prefix=SCALE %s
; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/inline-mergeprof.prof -sample-profile-merge-inlinee=false -S | FileCheck -check-prefix=SCALE %s
; Test we properly merge not inlined profile properly with '-sample-profile-merge-inlinee'
; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/inline-mergeprof.prof -sample-profile-merge-inlinee=true -S | FileCheck -check-prefix=MERGE %s
@.str = private unnamed_addr constant [11 x i8] c"sum is %d\0A\00", align 1
define i32 @main() #0 !dbg !6 {
entry:
%retval = alloca i32, align 4
%s = alloca i32, align 4
%i = alloca i32, align 4
%tmp = load i32, i32* %i, align 4, !dbg !8
%tmp1 = load i32, i32* %s, align 4, !dbg !8
%call = call i32 @_Z3sumii(i32 %tmp, i32 %tmp1), !dbg !8
; SCALE: call i32 @_Z3sumii
; MERGE: call i32 @_Z3sumii
store i32 %call, i32* %s, align 4, !dbg !8
ret i32 0, !dbg !11
}
define i32 @_Z3sumii(i32 %x, i32 %y) #0 !dbg !12 {
entry:
%x.addr = alloca i32, align 4
%y.addr = alloca i32, align 4
store i32 %x, i32* %x.addr, align 4
store i32 %y, i32* %y.addr, align 4
%tmp = load i32, i32* %x.addr, align 4, !dbg !13
%tmp1 = load i32, i32* %y.addr, align 4, !dbg !13
%add = add nsw i32 %tmp, %tmp1, !dbg !13
%tmp2 = load i32, i32* %x.addr, align 4, !dbg !13
%tmp3 = load i32, i32* %y.addr, align 4, !dbg !13
%cmp1 = icmp ne i32 %tmp3, 100, !dbg !13
br i1 %cmp1, label %if.then, label %if.else, !dbg !13
if.then: ; preds = %entry
%call = call i32 @_Z3subii(i32 %tmp2, i32 %tmp3), !dbg !14
ret i32 %add, !dbg !14
if.else: ; preds = %entry
ret i32 %add, !dbg !15
}
define i32 @_Z3subii(i32 %x, i32 %y) #0 !dbg !16 {
entry:
%x.addr = alloca i32, align 4
%y.addr = alloca i32, align 4
store i32 %x, i32* %x.addr, align 4
store i32 %y, i32* %y.addr, align 4
%tmp = load i32, i32* %x.addr, align 4, !dbg !17
%tmp1 = load i32, i32* %y.addr, align 4, !dbg !17
%add = sub nsw i32 %tmp, %tmp1, !dbg !17
ret i32 %add, !dbg !18
}
attributes #0 = { "use-sample-profile" }
declare i32 @printf(i8*, ...)
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!3, !4}
!llvm.ident = !{!5}
!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 3.5 ", isOptimized: false, runtimeVersion: 0, emissionKind: NoDebug, enums: !2, retainedTypes: !2, globals: !2, imports: !2)
!1 = !DIFile(filename: "calls.cc", directory: ".")
!2 = !{}
!3 = !{i32 2, !"Dwarf Version", i32 4}
!4 = !{i32 1, !"Debug Info Version", i32 3}
!5 = !{!"clang version 3.5 "}
!6 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 7, type: !7, scopeLine: 7, virtualIndex: 6, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2)
!7 = !DISubroutineType(types: !2)
!8 = !DILocation(line: 10, scope: !9)
!9 = !DILexicalBlockFile(scope: !10, file: !1, discriminator: 2)
!10 = distinct !DILexicalBlock(scope: !6, file: !1, line: 10)
!11 = !DILocation(line: 12, scope: !6)
!12 = distinct !DISubprogram(name: "sum", scope: !1, file: !1, line: 3, type: !7, scopeLine: 3, virtualIndex: 6, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2)
!13 = !DILocation(line: 4, scope: !12)
!14 = !DILocation(line: 5, scope: !12)
!15 = !DILocation(line: 6, scope: !12)
!16 = distinct !DISubprogram(name: "sub", scope: !1, file: !1, line: 20, type: !7, scopeLine: 20, virtualIndex: 6, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2)
!17 = !DILocation(line: 20, scope: !16)
!18 = !DILocation(line: 21, scope: !16)
; SCALE: name: "sum"
; SCALE-NEXT: {!"function_entry_count", i64 46}
; SCALE: !{!"branch_weights", i32 11, i32 2}
; SCALE: !{!"branch_weights", i64 20}
; SCALE: name: "sub"
; SCALE-NEXT: {!"function_entry_count", i64 -1}
; MERGE: name: "sum"
; MERGE-NEXT: {!"function_entry_count", i64 46}
; MERGE: !{!"branch_weights", i32 11, i32 23}
; MERGE: !{!"branch_weights", i32 10}
; MERGE: name: "sub"
; MERGE-NEXT: {!"function_entry_count", i64 3}