[BOLT] Explicitly check for returns when extending call continuation profile (#143295)

Call continuation logic relies on assumptions about fall-through origin:
- the branch is external to the function,
- fall-through start is at the beginning of the block,
- the block is not an entry point or a landing pad.

Leverage trace information to explicitly check whether the origin is a
return instruction, and defer to checks above only in case of
DSO-external branch source.

This covers both regular and BAT cases, addressing call continuation
fall-through undercounting in the latter mode, which improves BAT
profile quality metrics. For example, for one large binary:
- CFG discontinuity 21.83% -> 0.00%,
- CFG flow imbalance 10.77%/100.00% -> 3.40%/13.82% (weighted/worst)
- CG flow imbalance 8.49% —> 8.49%.

Depends on #143289.

Test Plan: updated callcont-fallthru.s
This commit is contained in:
Amir Ayupov
2025-06-17 06:28:27 -07:00
committed by GitHub
parent 816ab1af0d
commit 9fed480f18
3 changed files with 89 additions and 64 deletions

View File

@@ -4,29 +4,43 @@
# RUN: %clang %cflags -fpic -shared -xc /dev/null -o %t.so
## Link against a DSO to ensure PLT entries.
# RUN: %clangxx %cxxflags %s %t.so -o %t -Wl,-q -nostdlib
# RUN: link_fdata %s %t %t.pat PREAGGT1
# RUN: link_fdata %s %t %t.pat2 PREAGGT2
# RUN-DISABLED: link_fdata %s %t %t.patplt PREAGGPLT
# Trace to a call continuation, not a landing pad/entry point
# RUN: link_fdata %s %t %t.pa-base PREAGG-BASE
# Trace from a return to a landing pad/entry point call continuation
# RUN: link_fdata %s %t %t.pa-ret PREAGG-RET
# Trace from an external location to a landing pad/entry point call continuation
# RUN: link_fdata %s %t %t.pa-ext PREAGG-EXT
# RUN-DISABLED: link_fdata %s %t %t.pa-plt PREAGG-PLT
# RUN: llvm-strip --strip-unneeded %t -o %t.strip
# RUN: llvm-objcopy --remove-section=.eh_frame %t.strip %t.noeh
## Check pre-aggregated traces attach call continuation fallthrough count
# RUN: llvm-bolt %t.noeh --pa -p %t.pat -o %t.out \
# RUN: --print-cfg --print-only=main | FileCheck %s
## in the basic case (not an entry point, not a landing pad).
# RUN: llvm-bolt %t.noeh --pa -p %t.pa-base -o %t.out \
# RUN: --print-cfg --print-only=main | FileCheck %s --check-prefix=CHECK-BASE
## Check pre-aggregated traces don't attach call continuation fallthrough count
## to secondary entry point (unstripped)
# RUN: llvm-bolt %t --pa -p %t.pat2 -o %t.out \
# RUN: --print-cfg --print-only=main | FileCheck %s --check-prefix=CHECK3
## Check pre-aggregated traces don't attach call continuation fallthrough count
## to landing pad (stripped, LP)
# RUN: llvm-bolt %t.strip --pa -p %t.pat2 -o %t.out \
# RUN: --print-cfg --print-only=main | FileCheck %s --check-prefix=CHECK3
## Check pre-aggregated traces from a return attach call continuation
## fallthrough count to secondary entry point (unstripped)
# RUN: llvm-bolt %t --pa -p %t.pa-ret -o %t.out \
# RUN: --print-cfg --print-only=main | FileCheck %s --check-prefix=CHECK-ATTACH
## Check pre-aggregated traces from a return attach call continuation
## fallthrough count to landing pad (stripped, landing pad)
# RUN: llvm-bolt %t.strip --pa -p %t.pa-ret -o %t.out \
# RUN: --print-cfg --print-only=main | FileCheck %s --check-prefix=CHECK-ATTACH
## Check pre-aggregated traces from external location don't attach call
## continuation fallthrough count to secondary entry point (unstripped)
# RUN: llvm-bolt %t --pa -p %t.pa-ext -o %t.out \
# RUN: --print-cfg --print-only=main | FileCheck %s --check-prefix=CHECK-SKIP
## Check pre-aggregated traces from external location don't attach call
## continuation fallthrough count to landing pad (stripped, landing pad)
# RUN: llvm-bolt %t.strip --pa -p %t.pa-ext -o %t.out \
# RUN: --print-cfg --print-only=main | FileCheck %s --check-prefix=CHECK-SKIP
## Check pre-aggregated traces don't report zero-sized PLT fall-through as
## invalid trace
# RUN-DISABLED: llvm-bolt %t.strip --pa -p %t.patplt -o %t.out | FileCheck %s \
# RUN-DISABLED: llvm-bolt %t.strip --pa -p %t.pa-plt -o %t.out | FileCheck %s \
# RUN-DISABLED: --check-prefix=CHECK-PLT
# CHECK-PLT: traces mismatching disassembled function contents: 0
@@ -56,11 +70,11 @@ main:
Ltmp0_br:
callq puts@PLT
## Check PLT traces are accepted
# PREAGGPLT: T #Ltmp0_br# #puts@plt# #puts@plt# 3
# PREAGG-PLT: T #Ltmp0_br# #puts@plt# #puts@plt# 3
## Target is an external-origin call continuation
# PREAGGT1: T X:0 #Ltmp1# #Ltmp4_br# 2
# CHECK: callq puts@PLT
# CHECK-NEXT: count: 2
# PREAGG-BASE: T X:0 #Ltmp1# #Ltmp4_br# 2
# CHECK-BASE: callq puts@PLT
# CHECK-BASE-NEXT: count: 2
Ltmp1:
movq -0x10(%rbp), %rax
@@ -71,24 +85,18 @@ Ltmp4:
cmpl $0x0, -0x14(%rbp)
Ltmp4_br:
je Ltmp0
# CHECK2: je .Ltmp0
# CHECK2-NEXT: count: 3
movl $0xa, -0x18(%rbp)
callq foo
## Target is a binary-local call continuation
# PREAGGT1: T #Lfoo_ret# #Ltmp3# #Ltmp3_br# 1
# CHECK: callq foo
# CHECK-NEXT: count: 1
## PLT call continuation fallthrough spanning the call
# CHECK2: callq foo
# CHECK2-NEXT: count: 3
# PREAGG-RET: T #Lfoo_ret# #Ltmp3# #Ltmp3_br# 1
## Target is a secondary entry point (unstripped) or a landing pad (stripped)
# PREAGGT2: T X:0 #Ltmp3# #Ltmp3_br# 2
# CHECK3: callq foo
# CHECK3-NEXT: count: 0
# PREAGG-EXT: T X:0 #Ltmp3# #Ltmp3_br# 1
# CHECK-ATTACH: callq foo
# CHECK-ATTACH-NEXT: count: 1
# CHECK-SKIP: callq foo
# CHECK-SKIP-NEXT: count: 0
Ltmp3:
cmpl $0x0, -0x18(%rbp)