If tail call optimization was not disabled for the profiled binary, the call contexts will be missing frames for tail calls. Handle this by performing a limited search through tail call edges for the profiled callee when a discontinuity is detected. The search depth is adjustable but defaults to 5. If we are able to identify a short sequence of tail calls, update the graph for those calls. In the case of ThinLTO, synthesize the necessary CallsiteInfos for carrying the cloning information to the backends.
90 lines
3.0 KiB
LLVM
90 lines
3.0 KiB
LLVM
;; Test to make sure that missing tail call frames in memprof profiles are
|
|
;; identified and cloned as needed for regular LTO.
|
|
|
|
;; -stats requires asserts
|
|
; REQUIRES: asserts
|
|
|
|
; RUN: opt -passes=memprof-context-disambiguation -supports-hot-cold-new \
|
|
; RUN: -stats %s -S 2>&1 | FileCheck %s --check-prefix=STATS --check-prefix=IR
|
|
|
|
;; Check that a smaller search depth limit blocks the tail call discovery
|
|
; RUN: opt -passes=memprof-context-disambiguation -supports-hot-cold-new \
|
|
; RUN: -memprof-tail-call-search-depth=1 -stats %s -S 2>&1 | FileCheck %s \
|
|
; RUN: --implicit-check-not=_Z3foov.memprof --implicit-check-not="found via tail calls"
|
|
|
|
source_filename = "memprof-tailcall.cc"
|
|
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
|
|
target triple = "x86_64-unknown-linux-gnu"
|
|
|
|
; Function Attrs: noinline
|
|
; IR-LABEL: @_Z3barv()
|
|
define ptr @_Z3barv() local_unnamed_addr #0 {
|
|
entry:
|
|
; IR: call ptr @_Znam(i64 10) #[[NOTCOLD:[0-9]+]]
|
|
%call = tail call ptr @_Znam(i64 10) #2, !memprof !0, !callsite !5
|
|
ret ptr %call
|
|
}
|
|
|
|
; Function Attrs: nobuiltin allocsize(0)
|
|
declare ptr @_Znam(i64) #1
|
|
|
|
; Function Attrs: noinline
|
|
; IR-LABEL: @_Z3bazv()
|
|
define ptr @_Z3bazv() #0 {
|
|
entry:
|
|
; IR: call ptr @_Z3barv()
|
|
%call = tail call ptr @_Z3barv()
|
|
ret ptr %call
|
|
}
|
|
|
|
; Function Attrs: noinline
|
|
; IR-LABEL: @_Z3foov()
|
|
define ptr @_Z3foov() #0 {
|
|
entry:
|
|
; IR: call ptr @_Z3bazv()
|
|
%call = tail call ptr @_Z3bazv()
|
|
ret ptr %call
|
|
}
|
|
|
|
; Function Attrs: noinline
|
|
; IR-LABEL: @main()
|
|
define i32 @main() #0 {
|
|
;; The first call to foo is part of a cold context, and should use the
|
|
;; original functions.
|
|
;; allocation. The latter should call the cloned functions.
|
|
; IR: call ptr @_Z3foov()
|
|
%call = tail call ptr @_Z3foov(), !callsite !6
|
|
;; The second call to foo is part of a cold context, and should call the
|
|
;; cloned functions.
|
|
; IR: call ptr @_Z3foov.memprof.1()
|
|
%call1 = tail call ptr @_Z3foov(), !callsite !7
|
|
ret i32 0
|
|
}
|
|
|
|
; IR-LABEL: @_Z3barv.memprof.1()
|
|
; IR: call ptr @_Znam(i64 10) #[[COLD:[0-9]+]]
|
|
; IR-LABEL: @_Z3bazv.memprof.1()
|
|
; IR: call ptr @_Z3barv.memprof.1()
|
|
; IR-LABEL: @_Z3foov.memprof.1()
|
|
; IR: call ptr @_Z3bazv.memprof.1()
|
|
|
|
; IR: attributes #[[NOTCOLD]] = { builtin allocsize(0) "memprof"="notcold" }
|
|
; IR: attributes #[[COLD]] = { builtin allocsize(0) "memprof"="cold" }
|
|
|
|
; STATS: 2 memprof-context-disambiguation - Number of profiled callees found via tail calls
|
|
; STATS: 4 memprof-context-disambiguation - Aggregate depth of profiled callees found via tail calls
|
|
; STATS: 2 memprof-context-disambiguation - Maximum depth of profiled callees found via tail calls
|
|
|
|
attributes #0 = { noinline }
|
|
attributes #1 = { nobuiltin allocsize(0) }
|
|
attributes #2 = { builtin allocsize(0) }
|
|
|
|
!0 = !{!1, !3}
|
|
!1 = !{!2, !"notcold"}
|
|
!2 = !{i64 3186456655321080972, i64 8632435727821051414}
|
|
!3 = !{!4, !"cold"}
|
|
!4 = !{i64 3186456655321080972, i64 -3421689549917153178}
|
|
!5 = !{i64 3186456655321080972}
|
|
!6 = !{i64 8632435727821051414}
|
|
!7 = !{i64 -3421689549917153178}
|