This adds the following to the new PM based inliner in PGO mode: * Use block frequency analysis to derive callsite's profile count and use that to adjust thresholds of hot and cold callsites. * Incrementally update the BFI of the caller after a callee gets inlined into it. This incremental update is only within an invocation of the run method - BFI is not preserved across calls to run. Update the function entry count of the callee after inlining it into a caller. * I've tuned the thresholds for the hot and cold callsites using a hacked up version of the old inliner that explicitly computes BFI on a set of internal benchmarks and spec. Once the new PM based pipeline stabilizes (IIRC Chandler mentioned there are known issues) I'll benchmark this again and adjust the thresholds if required. Inliner PGO support. Differential revision: https://reviews.llvm.org/D28331 llvm-svn: 292666
55 lines
1.5 KiB
LLVM
55 lines
1.5 KiB
LLVM
; RUN: opt < %s -inline -inlinecold-threshold=0 -S | FileCheck %s
|
|
|
|
; This tests that a cold callee gets the (lower) inlinecold-threshold even without
|
|
; Cold hint and does not get inlined because the cost exceeds the inlinecold-threshold.
|
|
; A callee with identical body does gets inlined because cost fits within the
|
|
; inline-threshold
|
|
|
|
define i32 @callee1(i32 %x) !prof !21 {
|
|
%x1 = add i32 %x, 1
|
|
%x2 = add i32 %x1, 1
|
|
%x3 = add i32 %x2, 1
|
|
call void @extern()
|
|
ret i32 %x3
|
|
}
|
|
|
|
define i32 @callee2(i32 %x) !prof !22 {
|
|
; CHECK-LABEL: @callee2(
|
|
%x1 = add i32 %x, 1
|
|
%x2 = add i32 %x1, 1
|
|
%x3 = add i32 %x2, 1
|
|
call void @extern()
|
|
ret i32 %x3
|
|
}
|
|
|
|
define i32 @caller2(i32 %y1) !prof !22 {
|
|
; CHECK-LABEL: @caller2(
|
|
; CHECK: call i32 @callee2
|
|
; CHECK-NOT: call i32 @callee1
|
|
; CHECK: ret i32 %x3.i
|
|
%y2 = call i32 @callee2(i32 %y1)
|
|
%y3 = call i32 @callee1(i32 %y2)
|
|
ret i32 %y3
|
|
}
|
|
|
|
declare void @extern()
|
|
|
|
!llvm.module.flags = !{!1}
|
|
!21 = !{!"function_entry_count", i64 100}
|
|
!22 = !{!"function_entry_count", i64 1}
|
|
|
|
!1 = !{i32 1, !"ProfileSummary", !2}
|
|
!2 = !{!3, !4, !5, !6, !7, !8, !9, !10}
|
|
!3 = !{!"ProfileFormat", !"InstrProf"}
|
|
!4 = !{!"TotalCount", i64 10000}
|
|
!5 = !{!"MaxCount", i64 1000}
|
|
!6 = !{!"MaxInternalCount", i64 1}
|
|
!7 = !{!"MaxFunctionCount", i64 1000}
|
|
!8 = !{!"NumCounts", i64 3}
|
|
!9 = !{!"NumFunctions", i64 3}
|
|
!10 = !{!"DetailedSummary", !11}
|
|
!11 = !{!12, !13, !14}
|
|
!12 = !{i32 10000, i64 100, i32 1}
|
|
!13 = !{i32 999000, i64 100, i32 1}
|
|
!14 = !{i32 999999, i64 1, i32 2}
|