diff --git a/bolt/include/bolt/Profile/DataAggregator.h b/bolt/include/bolt/Profile/DataAggregator.h index 79a91861554d..c4ee75e7a6da 100644 --- a/bolt/include/bolt/Profile/DataAggregator.h +++ b/bolt/include/bolt/Profile/DataAggregator.h @@ -197,10 +197,6 @@ private: BoltAddressTranslation *BAT{nullptr}; - /// Whether pre-aggregated profile needs to convert branch profile into call - /// to continuation fallthrough profile. - bool NeedsConvertRetProfileToCallCont{false}; - /// Update function execution profile with a recorded trace. /// A trace is region of code executed between two LBR entries supplied in /// execution order. diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp index a8a187974418..80f4ea0c1b70 100644 --- a/bolt/lib/Profile/DataAggregator.cpp +++ b/bolt/lib/Profile/DataAggregator.cpp @@ -720,23 +720,6 @@ bool DataAggregator::doBranch(uint64_t From, uint64_t To, uint64_t Count, : isReturn(Func.disassembleInstructionAtOffset(Offset)); }; - // Returns whether \p Offset in \p Func may be a call continuation excluding - // entry points and landing pads. - auto checkCallCont = [&](const BinaryFunction &Func, const uint64_t Offset) { - // No call continuation at a function start. - if (!Offset) - return false; - - // FIXME: support BAT case where the function might be in empty state - // (split fragments declared non-simple). - if (!Func.hasCFG()) - return false; - - // The offset should not be an entry point or a landing pad. - const BinaryBasicBlock *ContBB = Func.getBasicBlockAtOffset(Offset); - return ContBB && !ContBB->isEntryPoint() && !ContBB->isLandingPad(); - }; - // Mutates \p Addr to an offset into the containing function, performing BAT // offset translation and parent lookup. // @@ -749,8 +732,7 @@ bool DataAggregator::doBranch(uint64_t From, uint64_t To, uint64_t Count, Addr -= Func->getAddress(); - bool IsRetOrCallCont = - IsFrom ? checkReturn(*Func, Addr) : checkCallCont(*Func, Addr); + bool IsRet = IsFrom && checkReturn(*Func, Addr); if (BAT) Addr = BAT->translate(Func->getAddress(), Addr, IsFrom); @@ -761,24 +743,16 @@ bool DataAggregator::doBranch(uint64_t From, uint64_t To, uint64_t Count, NumColdSamples += Count; if (!ParentFunc) - return std::pair{Func, IsRetOrCallCont}; + return std::pair{Func, IsRet}; - return std::pair{ParentFunc, IsRetOrCallCont}; + return std::pair{ParentFunc, IsRet}; }; - uint64_t ToOrig = To; auto [FromFunc, IsReturn] = handleAddress(From, /*IsFrom*/ true); - auto [ToFunc, IsCallCont] = handleAddress(To, /*IsFrom*/ false); + auto [ToFunc, _] = handleAddress(To, /*IsFrom*/ false); if (!FromFunc && !ToFunc) return false; - // Record call to continuation trace. - if (NeedsConvertRetProfileToCallCont && FromFunc != ToFunc && - (IsReturn || IsCallCont)) { - LBREntry First{ToOrig - 1, ToOrig - 1, false}; - LBREntry Second{ToOrig, ToOrig, false}; - return doTrace(First, Second, Count); - } // Ignore returns. if (IsReturn) return true; @@ -1235,21 +1209,14 @@ std::error_code DataAggregator::parseAggregatedLBREntry() { ErrorOr TypeOrErr = parseString(FieldSeparator); if (std::error_code EC = TypeOrErr.getError()) return EC; - // Pre-aggregated profile with branches and fallthroughs needs to convert - // return profile into call to continuation fall-through. - auto Type = AggregatedLBREntry::BRANCH; - if (TypeOrErr.get() == "B") { - NeedsConvertRetProfileToCallCont = true; + auto Type = AggregatedLBREntry::TRACE; + if (LLVM_LIKELY(TypeOrErr.get() == "T")) { + } else if (TypeOrErr.get() == "B") { Type = AggregatedLBREntry::BRANCH; } else if (TypeOrErr.get() == "F") { - NeedsConvertRetProfileToCallCont = true; Type = AggregatedLBREntry::FT; } else if (TypeOrErr.get() == "f") { - NeedsConvertRetProfileToCallCont = true; Type = AggregatedLBREntry::FT_EXTERNAL_ORIGIN; - } else if (TypeOrErr.get() == "T") { - // Trace is expanded into B and [Ff] - Type = AggregatedLBREntry::TRACE; } else { reportError("expected T, B, F or f"); return make_error_code(llvm::errc::io_error); diff --git a/bolt/test/X86/callcont-fallthru.s b/bolt/test/X86/callcont-fallthru.s index ee72d8f62e03..44e3bf21c14c 100644 --- a/bolt/test/X86/callcont-fallthru.s +++ b/bolt/test/X86/callcont-fallthru.s @@ -4,31 +4,12 @@ # RUN: %clang %cflags -fpic -shared -xc /dev/null -o %t.so ## Link against a DSO to ensure PLT entries. # RUN: %clangxx %cxxflags %s %t.so -o %t -Wl,-q -nostdlib -# RUN: link_fdata %s %t %t.pa1 PREAGG1 -# RUN: link_fdata %s %t %t.pa2 PREAGG2 -# RUN: link_fdata %s %t %t.pa3 PREAGG3 # RUN: link_fdata %s %t %t.pat PREAGGT1 # RUN: link_fdata %s %t %t.pat2 PREAGGT2 # RUN: link_fdata %s %t %t.patplt PREAGGPLT -## Check normal case: fallthrough is not LP or secondary entry. # RUN: llvm-strip --strip-unneeded %t -o %t.strip # RUN: llvm-objcopy --remove-section=.eh_frame %t.strip %t.noeh -# RUN: llvm-bolt %t.strip --pa -p %t.pa1 -o %t.out \ -# RUN: --print-cfg --print-only=main | FileCheck %s - -## Check that getFallthroughsInTrace correctly handles a trace starting at plt -## call continuation -# RUN: llvm-bolt %t.strip --pa -p %t.pa2 -o %t.out2 \ -# RUN: --print-cfg --print-only=main | FileCheck %s --check-prefix=CHECK2 - -## Check that we don't treat secondary entry points as call continuation sites. -# RUN: llvm-bolt %t --pa -p %t.pa3 -o %t.out \ -# RUN: --print-cfg --print-only=main | FileCheck %s --check-prefix=CHECK3 - -## Check fallthrough to a landing pad case. -# RUN: llvm-bolt %t.strip --pa -p %t.pa3 -o %t.out \ -# RUN: --print-cfg --print-only=main | FileCheck %s --check-prefix=CHECK3 ## Check pre-aggregated traces attach call continuation fallthrough count # RUN: llvm-bolt %t.noeh --pa -p %t.pat -o %t.out \ @@ -77,7 +58,6 @@ Ltmp0_br: ## Check PLT traces are accepted # PREAGGPLT: T #Ltmp0_br# #puts@plt# #puts@plt# 3 ## Target is an external-origin call continuation -# PREAGG1: B X:0 #Ltmp1# 2 0 # PREAGGT1: T X:0 #Ltmp1# #Ltmp4_br# 2 # CHECK: callq puts@PLT # CHECK-NEXT: count: 2 @@ -97,18 +77,15 @@ Ltmp4_br: movl $0xa, -0x18(%rbp) callq foo ## Target is a binary-local call continuation -# PREAGG1: B #Lfoo_ret# #Ltmp3# 1 0 # PREAGGT1: T #Lfoo_ret# #Ltmp3# #Ltmp3_br# 1 # CHECK: callq foo # CHECK-NEXT: count: 1 ## PLT call continuation fallthrough spanning the call -# PREAGG2: F #Ltmp1# #Ltmp3_br# 3 # CHECK2: callq foo # CHECK2-NEXT: count: 3 ## Target is a secondary entry point (unstripped) or a landing pad (stripped) -# PREAGG3: B X:0 #Ltmp3# 2 0 # PREAGGT2: T X:0 #Ltmp3# #Ltmp3_br# 2 # CHECK3: callq foo # CHECK3-NEXT: count: 0