diff --git a/bolt/BinaryBasicBlock.h b/bolt/BinaryBasicBlock.h index 52db09c8a8ed..90b4f11cdc7f 100644 --- a/bolt/BinaryBasicBlock.h +++ b/bolt/BinaryBasicBlock.h @@ -342,12 +342,17 @@ public: /// an unconditional branch) and thus has 2 successors, return a successor /// corresponding to a jump condition which could be true or false. /// Return nullptr if the basic block does not have a conditional jump. - const BinaryBasicBlock *getConditionalSuccessor(bool Condition) const { + BinaryBasicBlock *getConditionalSuccessor(bool Condition) { if (succ_size() != 2) return nullptr; return Successors[Condition == true ? 0 : 1]; } + const BinaryBasicBlock *getConditionalSuccessor(bool Condition) const { + return + const_cast(this)->getConditionalSuccessor(Condition); + } + /// Find the fallthrough successor for a block, or nullptr if there is /// none. const BinaryBasicBlock* getFallthrough() const { diff --git a/bolt/BinaryFunction.cpp b/bolt/BinaryFunction.cpp index 8f4df24254fd..3820f7713f4f 100644 --- a/bolt/BinaryFunction.cpp +++ b/bolt/BinaryFunction.cpp @@ -1466,7 +1466,7 @@ bool BinaryFunction::buildCFG() { for (auto I = Instructions.begin(), E = Instructions.end(); I != E; ++I) { const auto Offset = I->first; - const auto &Instr = I->second; + auto &Instr = I->second; auto LI = Labels.find(Offset); if (LI != Labels.end()) { @@ -1819,6 +1819,11 @@ uint64_t BinaryFunction::getFunctionScore() { if (FunctionScore != -1) return FunctionScore; + if (!isSimple() || !hasValidProfile()) { + FunctionScore = 0; + return FunctionScore; + } + uint64_t TotalScore = 0ULL; for (auto BB : layout()) { uint64_t BBExecCount = BB->getExecutionCount(); @@ -2620,6 +2625,41 @@ void BinaryFunction::postProcessBranches() { assert(validateCFG() && "invalid CFG"); } +const MCSymbol *BinaryFunction::getSymbolForEntry(uint64_t EntryNum) const { + if (EntryNum == 0) + return getSymbol(); + + if (!isMultiEntry()) + return nullptr; + + uint64_t NumEntries = 0; + for (auto *BB : BasicBlocks) { + if (!BB->isEntryPoint()) + continue; + if (NumEntries == EntryNum) + return BB->getLabel(); + ++NumEntries; + } + + return nullptr; +} + +uint64_t BinaryFunction::getEntryForSymbol(const MCSymbol *EntrySymbol) const { + if (getSymbol() == EntrySymbol) + return 0; + + uint64_t NumEntries = 0; + for (const auto *BB : BasicBlocks) { + if (!BB->isEntryPoint()) + continue; + if (BB->getLabel() == EntrySymbol) + return NumEntries; + ++NumEntries; + } + + llvm_unreachable("no entry for symbol"); +} + BinaryFunction::BasicBlockOrderType BinaryFunction::dfs() const { BasicBlockOrderType DFS; unsigned Index = 0; @@ -2649,8 +2689,24 @@ BinaryFunction::BasicBlockOrderType BinaryFunction::dfs() const { Stack.push(SuccBB); } - for (auto *SuccBB : BB->successors()) { - Stack.push(SuccBB); + const MCSymbol *TBB = nullptr; + const MCSymbol *FBB = nullptr; + MCInst *CondBranch = nullptr; + MCInst *UncondBranch = nullptr; + if (BB->analyzeBranch(TBB, FBB, CondBranch, UncondBranch) && + CondBranch && BB->succ_size() == 2) { + if (BC.MIA->getCanonicalBranchOpcode(CondBranch->getOpcode()) == + CondBranch->getOpcode()) { + Stack.push(BB->getConditionalSuccessor(true)); + Stack.push(BB->getConditionalSuccessor(false)); + } else { + Stack.push(BB->getConditionalSuccessor(false)); + Stack.push(BB->getConditionalSuccessor(true)); + } + } else { + for (auto *SuccBB : BB->successors()) { + Stack.push(SuccBB); + } } } @@ -2826,6 +2882,9 @@ bool BinaryFunction::equalJumpTables(const JumpTable *JumpTableA, } std::size_t BinaryFunction::hash(bool Recompute, bool UseDFS) const { + if (size() == 0) + return 0; + assert(hasCFG() && "function is expected to have CFG"); if (!Recompute) @@ -3687,13 +3746,14 @@ DynoStats BinaryFunction::getDynoStats() const { Stats[DynoStats::INDIRECT_CALLS] += CallFreq; } else if (const auto *CallSymbol = BC.MIA->getTargetSymbol(Instr)) { const auto *BF = BC.getFunctionForSymbol(CallSymbol); - if (BF && BF->isPLTFunction()) + if (BF && BF->isPLTFunction()) { Stats[DynoStats::PLT_CALLS] += CallFreq; // We don't process PLT functions and hence have to adjust // relevant dynostats here. Stats[DynoStats::LOADS] += CallFreq; Stats[DynoStats::INDIRECT_CALLS] += CallFreq; + } } } diff --git a/bolt/BinaryFunction.h b/bolt/BinaryFunction.h index 7d755eff2b07..5ed98c22da13 100644 --- a/bolt/BinaryFunction.h +++ b/bolt/BinaryFunction.h @@ -164,6 +164,40 @@ enum IndirectCallPromotionType : char { ICP_ALL /// Perform ICP on calls and jump tables. }; +/// Information on a single indirect call to a particular callee. +struct IndirectCallProfile { + bool IsFunction; + uint32_t Offset; + StringRef Name; + uint64_t Count; + uint64_t Mispreds; + + IndirectCallProfile(bool IsFunction, StringRef Name, uint64_t Count, + uint64_t Mispreds, uint32_t Offset = 0) + : IsFunction(IsFunction), Offset(Offset), Name(Name), Count(Count), + Mispreds(Mispreds) {} + + bool operator==(const IndirectCallProfile &Other) const { + return IsFunction == Other.IsFunction && + Name == Other.Name && + Offset == Other.Offset; + } +}; + +/// Aggregated information for an indirect call site. +using IndirectCallSiteProfile = SmallVector; + +inline raw_ostream &operator<<(raw_ostream &OS, + const bolt::IndirectCallSiteProfile &ICSP) { + const char *Sep = ""; + for (auto &CSP : ICSP) { + OS << Sep << "{ " << (CSP.IsFunction ? CSP.Name : "") << ": " + << CSP.Count << " (" << CSP.Mispreds << " misses) }"; + Sep = ", "; + } + return OS; +} + /// BinaryFunction is a representation of machine-level function. /// /// We use the term "Binary" as "Machine" was already taken. @@ -294,6 +328,14 @@ private: /// Profile match ratio for BranchData. float ProfileMatchRatio{0.0f}; + /// Indicates if function profile was collected using LBRs. + bool HasLBRProfile{true}; + + /// For functions with mismatched profile we store all call profile + /// information at a function level (as opposed to tying it to + /// specific call sites). + IndirectCallSiteProfile AllCallSites; + /// Score of the function (estimated number of instructions executed, /// according to profile data). -1 if the score has not been calculated yet. int64_t FunctionScore{-1}; @@ -511,11 +553,11 @@ private: /// function and that apply before the entry basic block). CFIInstrMapType CIEFrameInstructions; +public: /// Representation of a jump table. /// /// The jump table may include other jump tables that are referenced by /// a different label at a different offset in this jump table. -public: struct JumpTable { enum JumpTableType : char { JTT_NORMAL, @@ -745,10 +787,6 @@ private: Instructions.emplace(Offset, std::forward(Instruction)); } - /// Return instruction at a given offset in the function. Valid before - /// CFG is constructed or while instruction offsets are available in CFG. - MCInst *getInstructionAtOffset(uint64_t Offset); - /// Analyze and process indirect branch \p Instruction before it is /// added to Instructions list. IndirectBranchType processIndirectBranch(MCInst &Instruction, @@ -978,6 +1016,10 @@ public: return nullptr; } + /// Return instruction at a given offset in the function. Valid before + /// CFG is constructed or while instruction offsets are available in CFG. + MCInst *getInstructionAtOffset(uint64_t Offset); + /// Return the name of the function as extracted from the binary file. /// If the function has multiple names - return the last one /// followed by "(*#)". @@ -1102,6 +1144,13 @@ public: return OutputSymbol; } + /// Return MC symbol corresponding to an enumerated entry for multiple-entry + /// functions. + const MCSymbol *getSymbolForEntry(uint64_t EntryNum) const; + + /// Return an entry ID corresponding to a symbol. + uint64_t getEntryForSymbol(const MCSymbol *EntrySymbol) const; + MCSymbol *getColdSymbol() { if (ColdSymbol) return ColdSymbol; @@ -1895,6 +1944,15 @@ public: MemData = Data; } + /// Return all call site profile info for this function. + IndirectCallSiteProfile &getAllCallSites() { + return AllCallSites; + } + + const IndirectCallSiteProfile &getAllCallSites() const { + return AllCallSites; + } + /// Walks the list of basic blocks filling in missing information about /// edge frequency for fall-throughs. /// @@ -2004,6 +2062,9 @@ public: /// isIdenticalWith. void mergeProfileDataInto(BinaryFunction &BF) const; + /// Convert function-level branch data into instruction annotations. + void convertBranchData(); + /// Returns true if this function has identical code and CFG with /// the given function \p BF. /// @@ -2303,6 +2364,13 @@ template <> struct GraphTraits> : } }; +template <> +class MCAnnotationPrinter { +public: + void print(raw_ostream &OS, const bolt::IndirectCallSiteProfile &ICSP) const { + OS << ICSP; + } +}; } // namespace llvm diff --git a/bolt/BinaryFunctionProfile.cpp b/bolt/BinaryFunctionProfile.cpp index 66bf634ef6e9..30dc96e72ae5 100644 --- a/bolt/BinaryFunctionProfile.cpp +++ b/bolt/BinaryFunctionProfile.cpp @@ -261,7 +261,8 @@ bool BinaryFunction::recordBranch(uint64_t From, uint64_t To, if (!FromBB->getSuccessor(ToBB->getLabel())) { // Check if this is a recursive call or a return from a recursive call. - if (ToBB->isEntryPoint()) { + if (ToBB->isEntryPoint() && (BC.MIA->isCall(*FromInstruction) || + BC.MIA->isIndirectBranch(*FromInstruction))) { // Execution count is already accounted for. return true; } @@ -289,8 +290,18 @@ bool BinaryFunction::recordEntry(uint64_t To, bool Mispred, uint64_t Count) { if (!hasProfile()) ExecutionCount = 0; - if (To == 0) + BinaryBasicBlock *EntryBB = nullptr; + if (To == 0) { ExecutionCount += Count; + if (!empty()) + EntryBB = &front(); + } else if (auto *BB = getBasicBlockAtOffset(To)) { + if (BB->isEntryPoint()) + EntryBB = BB; + } + + if (EntryBB) + EntryBB->setExecutionCount(EntryBB->getKnownExecutionCount() + Count); return true; } @@ -319,8 +330,7 @@ void BinaryFunction::postProcessProfile() { return; } - // Is we are using non-LBR sampling there's nothing left to do. - if (!BranchData) + if (!HasLBRProfile) return; // Bug compatibility with previous version - double accounting for conditional @@ -339,7 +349,8 @@ void BinaryFunction::postProcessProfile() { } // Pre-sort branch data. - std::stable_sort(BranchData->Data.begin(), BranchData->Data.end()); + if (BranchData) + std::stable_sort(BranchData->Data.begin(), BranchData->Data.end()); // If we have at least some branch data for the function indicate that it // was executed. @@ -347,39 +358,22 @@ void BinaryFunction::postProcessProfile() { ExecutionCount = 1; } - // Compute preliminary execution count for each basic block + // Compute preliminary execution count for each basic block. for (auto *BB : BasicBlocks) { - BB->ExecutionCount = 0; + if ((!BB->isEntryPoint() && !BB->isLandingPad()) || + BB->ExecutionCount == BinaryBasicBlock::COUNT_NO_PROFILE) + BB->ExecutionCount = 0; } for (auto *BB : BasicBlocks) { auto SuccBIIter = BB->branch_info_begin(); for (auto Succ : BB->successors()) { - if (SuccBIIter->Count != BinaryBasicBlock::COUNT_NO_PROFILE) + if (!Succ->isEntryPoint() && + SuccBIIter->Count != BinaryBasicBlock::COUNT_NO_PROFILE) Succ->setExecutionCount(Succ->getExecutionCount() + SuccBIIter->Count); ++SuccBIIter; } } - // Set entry BBs to zero, we'll update their execution count next with entry - // data (we maintain a separate data structure for branches to function entry - // points) - for (auto *BB : BasicBlocks) { - if (BB->isEntryPoint()) - BB->ExecutionCount = 0; - } - - // Update execution counts of landing pad blocks and entry BBs - // There is a slight skew introduced here as branches originated from RETs - // may be accounted for in the execution count of an entry block if the last - // instruction in a predecessor fall-through block is a call. This situation - // should rarely happen because there are few multiple-entry functions. - for (const auto &I : BranchData->EntryData) { - BinaryBasicBlock *BB = getBasicBlockAtOffset(I.To.Offset); - if (BB && (BB->isEntryPoint() || BB->isLandingPad())) { - BB->setExecutionCount(BB->getExecutionCount() + I.Branches); - } - } - inferFallThroughCounts(); // Update profile information for jump tables based on CFG branch data. @@ -442,6 +436,7 @@ void BinaryFunction::readProfile() { return; if (!BC.DR.hasLBR()) { + HasLBRProfile = false; readSampleData(); return; } @@ -452,6 +447,23 @@ void BinaryFunction::readProfile() { if (!BranchData) return; + // Assign basic block counts to function entry points. These only include + // counts for outside entries. + // + // There is a slight skew introduced here as branches originated from RETs + // may be accounted for in the execution count of an entry block if the last + // instruction in a predecessor fall-through block is a call. This situation + // should rarely happen because there are few multiple-entry functions. + for (const auto &BI : BranchData->EntryData) { + BinaryBasicBlock *BB = getBasicBlockAtOffset(BI.To.Offset); + if (BB && (BB->isEntryPoint() || BB->isLandingPad())) { + auto Count = BB->getExecutionCount(); + if (Count == BinaryBasicBlock::COUNT_NO_PROFILE) + Count = 0; + BB->setExecutionCount(Count + BI.Branches); + } + } + uint64_t MismatchedBranches = 0; for (const auto &BI : BranchData->Data) { if (BI.From.Name != BI.To.Name) { @@ -466,25 +478,59 @@ void BinaryFunction::readProfile() { } } - // Special profile data propagation is required for conditional tail calls. - for (auto BB : BasicBlocks) { - auto *CTCInstr = BB->getLastNonPseudoInstr(); - if (!CTCInstr || !BC.MIA->getConditionalTailCall(*CTCInstr)) + // Convert branch data into annotations. + convertBranchData(); +} + +void BinaryFunction::convertBranchData() { + if (!BranchData || empty()) + return; + + // Profile information for calls. + // + // There are 3 cases that we annotate differently: + // 1) Conditional tail calls that could be mispredicted. + // 2) Indirect calls to multiple destinations with mispredictions. + // Before we validate CFG we have to handle indirect branches here too. + // 3) Regular direct calls. The count could be different from containing + // basic block count. Keep this data in case we find it useful. + // + for (auto &BI : BranchData->Data) { + // Ignore internal branches. + if (BI.To.IsSymbol && BI.To.Name == BI.From.Name && BI.To.Offset != 0) continue; - auto OffsetOrErr = - BC.MIA->tryGetAnnotationAs(*CTCInstr, "Offset"); - assert(OffsetOrErr && "offset not set for conditional tail call"); - - auto BranchInfoOrErr = BranchData->getDirectCallBranch(*OffsetOrErr); - if (!BranchInfoOrErr) + auto *Instr = getInstructionAtOffset(BI.From.Offset); + if (!Instr || + (!BC.MIA->isCall(*Instr) && !BC.MIA->isIndirectBranch(*Instr))) continue; - BC.MIA->addAnnotation(BC.Ctx.get(), *CTCInstr, "CTCTakenCount", - BranchInfoOrErr->Branches); - BC.MIA->addAnnotation(BC.Ctx.get(), *CTCInstr, "CTCMispredCount", - BranchInfoOrErr->Mispreds); + auto setOrUpdateAnnotation = [&](StringRef Name, uint64_t Count) { + if (opts::Verbosity >= 1 && BC.MIA->hasAnnotation(*Instr, Name)) { + errs() << "BOLT-WARNING: duplicate " << Name << " info for offset 0x" + << Twine::utohexstr(BI.From.Offset) + << " in function " << *this << '\n'; + } + auto &Value = BC.MIA->getOrCreateAnnotationAs(BC.Ctx.get(), + *Instr, Name); + Value += Count; + }; + + if (BC.MIA->isIndirectCall(*Instr) || BC.MIA->isIndirectBranch(*Instr)) { + IndirectCallSiteProfile &CSP = + BC.MIA->getOrCreateAnnotationAs(BC.Ctx.get(), + *Instr, "CallProfile"); + CSP.emplace_back(BI.To.IsSymbol, BI.To.Name, BI.Branches, + BI.Mispreds); + } else if (BC.MIA->getConditionalTailCall(*Instr)) { + setOrUpdateAnnotation("CTCTakenCount", BI.Branches); + setOrUpdateAnnotation("CTCMispredCount", BI.Mispreds); + } else { + setOrUpdateAnnotation("Count", BI.Branches); + } } + + BranchData = nullptr; } void BinaryFunction::mergeProfileDataInto(BinaryFunction &BF) const { diff --git a/bolt/CMakeLists.txt b/bolt/CMakeLists.txt index 959b19915f10..5e6ce6fe7b8c 100644 --- a/bolt/CMakeLists.txt +++ b/bolt/CMakeLists.txt @@ -71,6 +71,8 @@ add_llvm_tool(llvm-bolt DebugData.cpp DWARFRewriter.cpp Exceptions.cpp + ProfileReader.cpp + ProfileWriter.cpp RewriteInstance.cpp ) diff --git a/bolt/DataAggregator.cpp b/bolt/DataAggregator.cpp index a964c73069e4..1b039c44225a 100644 --- a/bolt/DataAggregator.cpp +++ b/bolt/DataAggregator.cpp @@ -746,8 +746,7 @@ std::error_code DataAggregator::parseBranchEvents() { ++NumSamples; NumEntries += Sample.LBR.size(); - // Parser semantic actions - // LBRs are stored in reverse execution order. NextLBR refers to next + // LBRs are stored in reverse execution order. NextLBR refers to the next // executed branch record. const LBREntry *NextLBR{nullptr}; for (const auto &LBR : Sample.LBR) { diff --git a/bolt/Passes/BinaryFunctionCallGraph.cpp b/bolt/Passes/BinaryFunctionCallGraph.cpp index 24dc378e1e4c..2b49f323cfab 100644 --- a/bolt/Passes/BinaryFunctionCallGraph.cpp +++ b/bolt/Passes/BinaryFunctionCallGraph.cpp @@ -134,7 +134,6 @@ BinaryFunctionCallGraph buildCallGraph(BinaryContext &BC, continue; } - const auto *BranchData = Function->getBranchData(); const auto SrcId = lookupNode(Function); // Offset of the current basic block from the beginning of the function uint64_t Offset = 0; @@ -166,25 +165,6 @@ BinaryFunctionCallGraph buildCallGraph(BinaryContext &BC, return false; }; - auto getCallInfoFromBranchData = [&](const BranchInfo &BI, bool IsStale) { - MCSymbol *DstSym = nullptr; - uint64_t Count; - if (BI.To.IsSymbol && (DstSym = BC.getGlobalSymbolByName(BI.To.Name))) { - Count = BI.Branches; - } else { - Count = COUNT_NO_PROFILE; - } - // If we are using the perf data for a stale function we need to filter - // out data which comes from branches. We'll assume that the To offset - // is non-zero for branches. - if (IsStale && BI.To.Offset != 0 && - (!DstSym || Function == BC.getFunctionForSymbol(DstSym))) { - DstSym = nullptr; - Count = COUNT_NO_PROFILE; - } - return std::make_pair(DstSym, Count); - }; - // Get pairs of (symbol, count) for each target at this callsite. // If the call is to an unknown function the symbol will be nullptr. // If there is no profiling data the count will be COUNT_NO_PROFILE. @@ -193,12 +173,15 @@ BinaryFunctionCallGraph buildCallGraph(BinaryContext &BC, const auto *DstSym = BC.MIA->getTargetSymbol(Inst); // If this is an indirect call use perf data directly. - if (!DstSym && BranchData && - BC.MIA->hasAnnotation(Inst, "Offset")) { - const auto InstrOffset = - BC.MIA->getAnnotationAs(Inst, "Offset"); - for (const auto &BI : BranchData->getBranchRange(InstrOffset)) { - Counts.push_back(getCallInfoFromBranchData(BI, false)); + if (!DstSym && BC.MIA->hasAnnotation(Inst, "CallProfile")) { + const auto &ICSP = + BC.MIA->getAnnotationAs(Inst, "CallProfile"); + for (const auto &CSI : ICSP) { + if (!CSI.IsFunction) + continue; + if (auto DstSym = BC.getGlobalSymbolByName(CSI.Name)) { + Counts.push_back(std::make_pair(DstSym, CSI.Count)); + } } } else { const auto Count = BB->getExecutionCount(); @@ -211,23 +194,29 @@ BinaryFunctionCallGraph buildCallGraph(BinaryContext &BC, // If the function has an invalid profile, try to use the perf data // directly (if requested). If there is no perf data for this function, // fall back to the CFG walker which attempts to handle missing data. - if (!Function->hasValidProfile() && CgFromPerfData && BranchData) { + if (!Function->hasValidProfile() && CgFromPerfData && + !Function->getAllCallSites().empty()) { DEBUG(dbgs() << "BOLT-DEBUG: buildCallGraph: Falling back to perf data" << " for " << *Function << "\n"); ++NumFallbacks; const auto Size = functionSize(Function); - for (const auto &BI : BranchData->Data) { - Offset = BI.From.Offset; + for (const auto &CSI : Function->getAllCallSites()) { + ++TotalCallsites; + + if (!CSI.IsFunction) + continue; + + auto *DstSym = BC.getGlobalSymbolByName(CSI.Name); + if (!DstSym) + continue; + // The computed offset may exceed the hot part of the function; hence, - // bound it the size + // bound it by the size. + Offset = CSI.Offset; if (Offset > Size) Offset = Size; - const auto CI = getCallInfoFromBranchData(BI, true); - if (!CI.first && CI.second == COUNT_NO_PROFILE) // probably a branch - continue; - ++TotalCallsites; - if (!recordCall(CI.first, CI.second)) { + if (!recordCall(DstSym, CSI.Count)) { ++NotProcessed; } } diff --git a/bolt/Passes/IndirectCallPromotion.cpp b/bolt/Passes/IndirectCallPromotion.cpp index f76564355c7b..299a339a69ad 100644 --- a/bolt/Passes/IndirectCallPromotion.cpp +++ b/bolt/Passes/IndirectCallPromotion.cpp @@ -142,21 +142,13 @@ namespace llvm { namespace bolt { IndirectCallPromotion::Callsite::Callsite(BinaryFunction &BF, - const BranchInfo &BI) -: From(BF.getSymbol()), - To(uint64_t(BI.To.Offset)), - Mispreds{uint64_t(BI.Mispreds)}, - Branches{uint64_t(BI.Branches)}, - Histories{BI.Histories} { - if (BI.To.IsSymbol) { - auto &BC = BF.getBinaryContext(); - auto Itr = BC.GlobalSymbols.find(BI.To.Name); - if (Itr != BC.GlobalSymbols.end()) { - To.IsSymbol = true; - To.Sym = BC.getOrCreateGlobalSymbol(Itr->second, "FUNCat"); - To.Addr = 0; - assert(To.Sym); - } + const IndirectCallProfile &ICP) + : From(BF.getSymbol()), + To(ICP.Offset), + Mispreds(ICP.Mispreds), + Branches(ICP.Count) { + if (ICP.IsFunction) { + To.Sym = BF.getBinaryContext().getGlobalSymbolByName(ICP.Name); } } @@ -192,20 +184,18 @@ IndirectCallPromotion::getCallTargets( Entry == BF.getFunctionColdEndLabel()) continue; const Location To(Entry); - Callsite CS{ - From, To, JI->Mispreds, JI->Count, BranchHistories(), - I - Range.first}; - Targets.emplace_back(CS); + Targets.emplace_back( + From, To, JI->Mispreds, JI->Count, I - Range.first); } // Sort by symbol then addr. std::sort(Targets.begin(), Targets.end(), [](const Callsite &A, const Callsite &B) { - if (A.To.IsSymbol && B.To.IsSymbol) + if (A.To.Sym && B.To.Sym) return A.To.Sym < B.To.Sym; - else if (A.To.IsSymbol && !B.To.IsSymbol) + else if (A.To.Sym && !B.To.Sym) return true; - else if (!A.To.IsSymbol && B.To.IsSymbol) + else if (!A.To.Sym && B.To.Sym) return false; else return A.To.Addr < B.To.Addr; @@ -221,7 +211,7 @@ IndirectCallPromotion::getCallTargets( while (++First != Last) { auto &A = *Result; const auto &B = *First; - if (A.To.IsSymbol && B.To.IsSymbol && A.To.Sym == B.To.Sym) { + if (A.To.Sym && B.To.Sym && A.To.Sym == B.To.Sym) { A.JTIndex.insert(A.JTIndex.end(), B.JTIndex.begin(), B.JTIndex.end()); } else { *(++Result) = *First; @@ -241,13 +231,13 @@ IndirectCallPromotion::getCallTargets( Inst.getOperand(0).getReg() == BC.MRI->getProgramCounter()) { return Targets; } - const auto *BranchData = BF.getBranchData(); - assert(BranchData && "expected initialized branch data"); - auto Offset = BC.MIA->getAnnotationAs(Inst, "Offset"); - for (const auto &BI : BranchData->getBranchRange(Offset)) { - Callsite Site(BF, BI); - if (Site.isValid()) { - Targets.emplace_back(std::move(Site)); + auto ICSP = + BC.MIA->tryGetAnnotationAs(Inst, "CallProfile"); + if (ICSP) { + for (const auto &CSP : ICSP.get()) { + Callsite Site(BF, CSP); + if (Site.isValid()) + Targets.emplace_back(std::move(Site)); } } } @@ -262,7 +252,7 @@ IndirectCallPromotion::getCallTargets( auto Last = std::remove_if(Targets.begin(), Targets.end(), [](const Callsite &CS) { - return !CS.To.IsSymbol; + return !CS.To.Sym; }); Targets.erase(Last, Targets.end()); @@ -540,7 +530,7 @@ IndirectCallPromotion::findCallTargetSymbols( for (size_t I = 0, TgtIdx = 0; I < N; ++TgtIdx) { auto &Target = Targets[TgtIdx]; - assert(Target.To.IsSymbol && "All ICP targets must be to known symbols"); + assert(Target.To.Sym && "All ICP targets must be to known symbols"); assert(!Target.JTIndex.empty() && "Jump tables must have indices"); for (auto Idx : Target.JTIndex) { SymTargets.push_back(std::make_pair(Target.To.Sym, Idx)); @@ -549,7 +539,7 @@ IndirectCallPromotion::findCallTargetSymbols( } } else { for (size_t I = 0; I < N; ++I) { - assert(Targets[I].To.IsSymbol && + assert(Targets[I].To.Sym && "All ICP targets must be to known symbols"); assert(Targets[I].JTIndex.empty() && "Can't have jump table indices for non-jump tables"); @@ -725,7 +715,7 @@ IndirectCallPromotion::rewriteCall( auto TBB = Function.createBasicBlock(0, Sym); for (auto &Inst : Insts) { // sanitize new instructions. if (BC.MIA->isCall(Inst)) - BC.MIA->removeAnnotation(Inst, "Offset"); + BC.MIA->removeAnnotation(Inst, "CallProfile"); } TBB->addInstructions(Insts.begin(), Insts.end()); NewBBs.emplace_back(std::move(TBB)); @@ -822,7 +812,7 @@ BinaryBasicBlock *IndirectCallPromotion::fixCFG( std::vector SymTargets; for (size_t I = 0; I < Targets.size(); ++I) { - assert(Targets[I].To.IsSymbol); + assert(Targets[I].To.Sym); if (Targets[I].JTIndex.empty()) SymTargets.push_back(Targets[I].To.Sym); else { @@ -1089,7 +1079,7 @@ IndirectCallPromotion::printCallsiteInfo(const BinaryBasicBlock *BB, const auto Frequency = 100.0 * Targets[I].Branches / NumCalls; const auto MisFrequency = 100.0 * Targets[I].Mispreds / NumCalls; outs() << "BOLT-INFO: "; - if (Targets[I].To.IsSymbol) + if (Targets[I].To.Sym) outs() << Targets[I].To.Sym->getName(); else outs() << Targets[I].To.Addr; @@ -1188,7 +1178,7 @@ void IndirectCallPromotion::runOnFunctions( if (!Function.isSimple() || !opts::shouldProcess(Function) || - !Function.getBranchData()) + !Function.hasProfile()) continue; const bool HasLayout = !Function.layout_empty(); @@ -1199,12 +1189,13 @@ void IndirectCallPromotion::runOnFunctions( for (auto &Inst : BB) { const bool IsJumpTable = Function.getJumpTable(Inst); - const bool HasBranchData = BC.MIA->hasAnnotation(Inst, "Offset"); + const bool HasIndirectCallProfile = + BC.MIA->hasAnnotation(Inst, "CallProfile"); const bool IsDirectCall = (BC.MIA->isCall(Inst) && BC.MIA->getTargetSymbol(Inst, 0)); if (!IsDirectCall && - ((HasBranchData && !IsJumpTable && OptimizeCalls) || + ((HasIndirectCallProfile && !IsJumpTable && OptimizeCalls) || (IsJumpTable && OptimizeJumpTables))) { uint64_t NumCalls = 0; for (const auto &BInfo : getCallTargets(Function, Inst)) { @@ -1233,8 +1224,8 @@ void IndirectCallPromotion::runOnFunctions( ++Num; } outs() << "BOLT-INFO: ICP Total indirect calls = " << TotalIndirectCalls - << ", " << Num << " callsites cover " << opts::ICPTopCallsites << "% " - << "of all indirect calls\n"; + << ", " << Num << " callsites cover " << opts::ICPTopCallsites + << "% of all indirect calls\n"; // Mark sites to optimize with "DoICP" annotation. for (size_t I = 0; I < Num; ++I) { @@ -1249,8 +1240,7 @@ void IndirectCallPromotion::runOnFunctions( if (!Function.isSimple() || !opts::shouldProcess(Function)) continue; - const auto *BranchData = Function.getBranchData(); - if (!BranchData) + if (!Function.hasProfile()) continue; const bool HasLayout = !Function.layout_empty(); @@ -1279,15 +1269,15 @@ void IndirectCallPromotion::runOnFunctions( auto &Inst = BB->getInstructionAtIndex(Idx); const auto InstIdx = &Inst - &(*BB->begin()); const bool IsTailCall = BC.MIA->isTailCall(Inst); - const bool HasBranchData = Function.getBranchData() && - BC.MIA->hasAnnotation(Inst, "Offset"); + const bool HasIndirectCallProfile = + BC.MIA->hasAnnotation(Inst, "CallProfile"); const bool IsJumpTable = Function.getJumpTable(Inst); if (BC.MIA->isCall(Inst)) { TotalCalls += BB->getKnownExecutionCount(); } - if (!((HasBranchData && !IsJumpTable && OptimizeCalls) || + if (!((HasIndirectCallProfile && !IsJumpTable && OptimizeCalls) || (IsJumpTable && OptimizeJumpTables))) continue; @@ -1458,7 +1448,7 @@ void IndirectCallPromotion::runOnFunctions( TotalIndirectJmps += FuncTotalIndirectJmps; } - outs() << "BOLT-INFO: ICP total indirect callsites = " + outs() << "BOLT-INFO: ICP total indirect callsites with profile = " << TotalIndirectCallsites << "\n" << "BOLT-INFO: ICP total jump table callsites = " @@ -1475,7 +1465,8 @@ void IndirectCallPromotion::runOnFunctions( << format("%.1f", (100.0 * TotalNumFrequentCalls) / std::max(TotalIndirectCalls, 1ul)) << "%\n" - << "BOLT-INFO: ICP percentage of indirect calls that are optimized = " + << "BOLT-INFO: ICP percentage of indirect callsites that are " + "optimized = " << format("%.1f", (100.0 * TotalOptimizedIndirectCallsites) / std::max(TotalIndirectCallsites, 1ul)) << "%\n" diff --git a/bolt/Passes/IndirectCallPromotion.h b/bolt/Passes/IndirectCallPromotion.h index e7b4cdc285e9..366fd1e23307 100644 --- a/bolt/Passes/IndirectCallPromotion.h +++ b/bolt/Passes/IndirectCallPromotion.h @@ -22,7 +22,7 @@ namespace bolt { /// Optimize indirect calls. /// The indirect call promotion pass visits each indirect call and -/// examines the BranchData for each. If the most frequent targets +/// examines a branch profile for each. If the most frequent targets /// from that callsite exceed the specified threshold (default 90%), /// the call is promoted. Otherwise, it is ignored. By default, /// only one target is considered at each callsite. @@ -103,14 +103,13 @@ class IndirectCallPromotion : public BinaryFunctionPass { using JumpTableInfoType = std::vector>; using SymTargetsType = std::vector>; struct Location { - bool IsSymbol{false}; MCSymbol *Sym{nullptr}; uint64_t Addr{0}; bool isValid() const { - return (IsSymbol && Sym) || (!IsSymbol && Addr != 0); + return Sym || (!Sym && Addr != 0); } Location() { } - explicit Location(MCSymbol *Sym) : IsSymbol(true), Sym(Sym) { } + explicit Location(MCSymbol *Sym) : Sym(Sym) { } explicit Location(uint64_t Addr) : Addr(Addr) { } }; @@ -119,18 +118,17 @@ class IndirectCallPromotion : public BinaryFunctionPass { Location To; uint64_t Mispreds{0}; uint64_t Branches{0}; - BranchHistories Histories; // Indices in the jmp table (jt only) std::vector JTIndex; bool isValid() const { return From.isValid() && To.isValid(); } - Callsite(BinaryFunction &BF, const BranchInfo &BI); + Callsite(BinaryFunction &BF, const IndirectCallProfile &ICP); Callsite(const Location &From, const Location &To, uint64_t Mispreds, uint64_t Branches, - const BranchHistories &Histories, uint64_t JTIndex) + uint64_t JTIndex) : From(From), To(To), Mispreds(Mispreds), Branches(Branches), - Histories(Histories), JTIndex(1, JTIndex) { } + JTIndex(1, JTIndex) { } }; std::unordered_set Modified; diff --git a/bolt/Passes/PLTCall.cpp b/bolt/Passes/PLTCall.cpp index e530dba77137..78eba87dc1e3 100644 --- a/bolt/Passes/PLTCall.cpp +++ b/bolt/Passes/PLTCall.cpp @@ -85,7 +85,7 @@ void PLTCall::runOnFunctions( if (NumCallsOptimized) { BC.RequiresZNow = true; outs() << "BOLT-INFO: " << NumCallsOptimized - << " PLT calls in the binary were opitmized.\n"; + << " PLT calls in the binary were optimized.\n"; } } diff --git a/bolt/ProfileReader.cpp b/bolt/ProfileReader.cpp new file mode 100644 index 000000000000..4f09ab900cb7 --- /dev/null +++ b/bolt/ProfileReader.cpp @@ -0,0 +1,265 @@ +//===-- ProfileReader.cpp - BOLT profile de-serializer ----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +//===----------------------------------------------------------------------===// + +#include "BinaryBasicBlock.h" +#include "BinaryFunction.h" +#include "ProfileReader.h" +#include "ProfileYAMLMapping.h" +#include "llvm/Support/CommandLine.h" + +namespace opts { +extern llvm::cl::opt Verbosity; +} + +namespace llvm { +namespace bolt { + +void +ProfileReader::buildNameMaps(std::map &Functions) { + for (auto &YamlBF : YamlBFs) { + StringRef Name = YamlBF.Name; + const auto Pos = Name.find("(*"); + if (Pos != StringRef::npos) + Name = Name.substr(0, Pos); + ProfileNameToProfile[Name] = &YamlBF; + if (const auto CommonName = getLTOCommonName(Name)) { + LTOCommonNameMap[*CommonName].push_back(&YamlBF); + } + } + for (auto &BFI : Functions) { + const auto &Function = BFI.second; + for (auto &Name : Function.getNames()) { + if (const auto CommonName = getLTOCommonName(Name)) { + LTOCommonNameFunctionMap[*CommonName].insert(&Function); + } + } + } +} + +bool +ProfileReader::parseFunctionProfile(BinaryFunction &BF, + const yaml::bolt::BinaryFunctionProfile &YamlBF) { + auto &BC = BF.getBinaryContext(); + + bool ProfileMatched = true; + uint64_t MismatchedBlocks = 0; + uint64_t MismatchedCalls = 0; + uint64_t MismatchedEdges = 0; + + BF.setExecutionCount(YamlBF.ExecCount); + + if (YamlBF.Hash != BF.hash(true, true)) { + if (opts::Verbosity >= 1) + errs() << "BOLT-WARNING: hash mismatch\n"; + ProfileMatched = false; + } + + if (YamlBF.NumBasicBlocks != BF.size()) { + if (opts::Verbosity >= 1) + errs() << "BOLT-WARNING: number of basic blocks mismatch\n"; + ProfileMatched = false; + } + + auto DFSOrder = BF.dfs(); + + for (const auto &YamlBB : YamlBF.Blocks) { + if (YamlBB.Index >= DFSOrder.size()) { + if (opts::Verbosity >= 2) + errs() << "BOLT-WARNING: index " << YamlBB.Index + << " is out of bounds\n"; + ++MismatchedBlocks; + continue; + } + + auto &BB = *DFSOrder[YamlBB.Index]; + BB.setExecutionCount(YamlBB.ExecCount); + + for (const auto &YamlCSI: YamlBB.CallSites) { + auto *Callee = YamlCSI.DestId < YamlProfileToFunction.size() ? + YamlProfileToFunction[YamlCSI.DestId] : nullptr; + bool IsFunction = Callee ? true : false; + const MCSymbol *CalleeSymbol = nullptr; + if (IsFunction) { + CalleeSymbol = Callee->getSymbolForEntry(YamlCSI.EntryDiscriminator); + } + StringRef Name = CalleeSymbol ? CalleeSymbol->getName() : ""; + BF.getAllCallSites().emplace_back( + IsFunction, Name, YamlCSI.Count, YamlCSI.Mispreds, YamlCSI.Offset); + + if (YamlCSI.Offset >= BB.getOriginalSize()) { + if (opts::Verbosity >= 2) + errs() << "BOLT-WARNING: offset " << YamlCSI.Offset + << " out of bounds in block " << BB.getName() << '\n'; + ++MismatchedCalls; + continue; + } + + auto *Instr = + BF.getInstructionAtOffset(BB.getInputOffset() + YamlCSI.Offset); + if (!Instr) { + if (opts::Verbosity >= 2) + errs() << "BOLT-WARNING: no instruction at offset " << YamlCSI.Offset + << " in block " << BB.getName() << '\n'; + ++MismatchedCalls; + continue; + } + if (!BC.MIA->isCall(*Instr) && !BC.MIA->isIndirectBranch(*Instr)) { + if (opts::Verbosity >= 2) + errs() << "BOLT-WARNING: expected call at offset " << YamlCSI.Offset + << " in block " << BB.getName() << '\n'; + ++MismatchedCalls; + continue; + } + + auto setAnnotation = [&](StringRef Name, uint64_t Count) { + if (BC.MIA->hasAnnotation(*Instr, Name)) { + if (opts::Verbosity >= 1) + errs() << "BOLT-WARNING: ignoring duplicate " << Name + << " info for offset 0x" << Twine::utohexstr(YamlCSI.Offset) + << " in function " << BF << '\n'; + return; + } + BC.MIA->addAnnotation(BC.Ctx.get(), *Instr, Name, Count); + }; + + if (BC.MIA->isIndirectCall(*Instr) || BC.MIA->isIndirectBranch(*Instr)) { + IndirectCallSiteProfile &CSP = + BC.MIA->getOrCreateAnnotationAs(BC.Ctx.get(), + *Instr, "CallProfile"); + CSP.emplace_back(IsFunction, Name, YamlCSI.Count, YamlCSI.Mispreds); + } else if (BC.MIA->getConditionalTailCall(*Instr)) { + setAnnotation("CTCTakenCount", YamlCSI.Count); + setAnnotation("CTCMispredCount", YamlCSI.Mispreds); + } else { + setAnnotation("Count", YamlCSI.Count); + } + } + + for (const auto &YamlSI : YamlBB.Successors) { + if (YamlSI.Index >= DFSOrder.size()) { + if (opts::Verbosity >= 1) + errs() << "BOLT-WARNING: index out of bounds for profiled block\n"; + ++MismatchedEdges; + continue; + } + + auto &SuccessorBB = *DFSOrder[YamlSI.Index]; + if (!BB.getSuccessor(SuccessorBB.getLabel())) { + if (opts::Verbosity >= 1) + errs() << "BOLT-WARNING: no successor for block " << BB.getName() + << " that matches index " << YamlSI.Index << " or block " + << SuccessorBB.getName() << '\n'; + ++MismatchedEdges; + continue; + } + + BB.setSuccessorBranchInfo(SuccessorBB, YamlSI.Count, YamlSI.Mispreds); + } + } + + ProfileMatched &= !MismatchedBlocks && !MismatchedCalls && !MismatchedEdges; + + if (ProfileMatched) + BF.markProfiled(); + + if (!ProfileMatched && opts::Verbosity >= 1) { + errs() << "BOLT-WARNING: " << MismatchedBlocks << " blocks, " + << MismatchedCalls << " calls, and " << MismatchedEdges + << " edges in profile did not match function " << BF << '\n'; + } + + return ProfileMatched; +} + +std::error_code +ProfileReader::readProfile(const std::string &FileName, + std::map &Functions) { + auto MB = MemoryBuffer::getFileOrSTDIN(FileName); + if (std::error_code EC = MB.getError()) { + errs() << "ERROR: cannot open " << FileName << ": " << EC.message() << "\n"; + return EC; + } + + yaml::Input YamlInput(MB.get()->getBuffer()); + YamlInput >> YamlBFs; + if (YamlInput.error()) { + errs() << "BOLT-ERROR: syntax error parsing " << FileName << " : " + << YamlInput.error().message() << '\n'; + return YamlInput.error(); + } + + buildNameMaps(Functions); + + YamlProfileToFunction.resize(YamlBFs.size() + 1); + for (auto &BFI : Functions) { + auto &Function = BFI.second; + auto Hash = Function.hash(true, true); + for (auto &FunctionName : Function.getNames()) { + const auto CommonName = getLTOCommonName(FunctionName); + if (CommonName) { + auto I = LTOCommonNameMap.find(*CommonName); + if (I == LTOCommonNameMap.end()) + continue; + + bool ProfileMatched{false}; + auto <OProfiles = I->getValue(); + for (auto *YamlBF : LTOProfiles) { + if (YamlBF->Used) + continue; + if (YamlBF->Hash == Hash) { + matchProfileToFunction(*YamlBF, Function); + break; + } + } + if (ProfileMatched) + break; + + // If there's only one function with a given name, try to + // match it partially. + if (LTOProfiles.size() == 1 && + LTOCommonNameFunctionMap[*CommonName].size() == 1 && + !LTOProfiles.front()->Used) { + matchProfileToFunction(*LTOProfiles.front(), Function); + break; + } + } else { + auto PI = ProfileNameToProfile.find(FunctionName); + if (PI == ProfileNameToProfile.end()) + continue; + + auto &YamlBF = *PI->getValue(); + matchProfileToFunction(YamlBF, Function); + break; + } + } + } + for (auto &YamlBF : YamlBFs) { + if (!YamlBF.Used) { + errs() << "BOLT-WARNING: profile ignored for function " + << YamlBF.Name << '\n'; + } + } + + for (auto &YamlBF : YamlBFs) { + if (YamlBF.Id >= YamlProfileToFunction.size()) { + // Such profile was ignored. + continue; + } + if (auto *BF = YamlProfileToFunction[YamlBF.Id]) { + parseFunctionProfile(*BF, YamlBF); + } + } + + return YamlInput.error(); +} + +} // end namespace bolt +} // end namespace llvm diff --git a/bolt/ProfileReader.h b/bolt/ProfileReader.h new file mode 100644 index 000000000000..1312ab6f3473 --- /dev/null +++ b/bolt/ProfileReader.h @@ -0,0 +1,68 @@ +//===-- ProfileReader.h - BOLT profile deserializer -------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_BOLT_PROFILEREADER_H +#define LLVM_TOOLS_LLVM_BOLT_PROFILEREADER_H + +#include "BinaryFunction.h" +#include "ProfileYAMLMapping.h" +#include + +namespace llvm { +namespace bolt { + +class ProfileReader { + /// Number of function profiles that were unused by the reader. + uint64_t NumUnusedProfiles{0}; + + /// Map a function ID from a profile to a BinaryFunction object. + std::vector YamlProfileToFunction; + + void reportError(StringRef Message); + + bool parseFunctionProfile(BinaryFunction &Function, + const yaml::bolt::BinaryFunctionProfile &YamlBF); + + /// Profile for binary functions. + std::vector YamlBFs; + + /// For LTO symbol resolution. + /// Map a common LTO prefix to a list of profiles matching the prefix. + StringMap> LTOCommonNameMap; + + /// Map a common LTO prefix to a set of binary functions. + StringMap> + LTOCommonNameFunctionMap; + + StringMap ProfileNameToProfile; + + void buildNameMaps(std::map &Functions); + + /// Update matched YAML -> BinaryFunction pair. + void matchProfileToFunction(yaml::bolt::BinaryFunctionProfile &YamlBF, + BinaryFunction &BF) { + if (YamlBF.Id >= YamlProfileToFunction.size()) + YamlProfileToFunction.resize(YamlBF.Id + 1); + YamlProfileToFunction[YamlBF.Id] = &BF; + YamlBF.Used = true; + } + +public: + /// Read profile from a file and associate with a set of functions. + std::error_code readProfile(const std::string &FileName, + std::map &Functions); + +}; + +} +} + +#endif diff --git a/bolt/ProfileWriter.cpp b/bolt/ProfileWriter.cpp new file mode 100644 index 000000000000..21883e7074fc --- /dev/null +++ b/bolt/ProfileWriter.cpp @@ -0,0 +1,174 @@ +//===-- ProfileWriter.cpp - Serialize profiling data ------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// +//===----------------------------------------------------------------------===// + +#include "BinaryBasicBlock.h" +#include "BinaryFunction.h" +#include "ProfileWriter.h" +#include "ProfileYAMLMapping.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/raw_ostream.h" + +#undef DEBUG_TYPE +#define DEBUG_TYPE "bolt-prof" + +namespace llvm { +namespace bolt { + +std::error_code +ProfileWriter::writeProfile(std::map &Functions) { + std::error_code EC; + OS = make_unique(FileName, EC, sys::fs::F_None); + if (EC) { + errs() << "BOLT-WARNING: " << EC.message() << " : unable to open " + << FileName << " for output.\n"; + return EC; + } + + printBinaryFunctionsProfile(Functions); + + return std::error_code(); +} + +namespace { +void +convert(const BinaryFunction &BF, yaml::bolt::BinaryFunctionProfile &YamlBF) { + auto &BC = BF.getBinaryContext(); + + YamlBF.Name = BF.getPrintName(); + YamlBF.Id = BF.getFunctionNumber(); + YamlBF.Hash = BF.hash(true, true); + YamlBF.ExecCount = BF.getKnownExecutionCount(); + YamlBF.NumBasicBlocks = BF.size(); + + for (const auto *BB : BF.dfs()) { + yaml::bolt::BinaryBasicBlockProfile YamlBB; + YamlBB.Index = BB->getLayoutIndex(); + YamlBB.NumInstructions = BB->getNumNonPseudos(); + YamlBB.ExecCount = BB->getKnownExecutionCount(); + + for (const auto &Instr : *BB) { + if (!BC.MIA->isCall(Instr) && !BC.MIA->isIndirectBranch(Instr)) + continue; + + yaml::bolt::CallSiteInfo CSI; + auto Offset = BC.MIA->tryGetAnnotationAs(Instr, "Offset"); + if (!Offset || Offset.get() < BB->getInputOffset()) + continue; + CSI.Offset = Offset.get() - BB->getInputOffset(); + + if (BC.MIA->isIndirectCall(Instr) || BC.MIA->isIndirectBranch(Instr)) { + auto ICSP = + BC.MIA->tryGetAnnotationAs(Instr, + "CallProfile"); + if (!ICSP) + continue; + for (auto &CSP : ICSP.get()) { + CSI.DestId = 0; // designated for unknown functions + CSI.EntryDiscriminator = 0; + if (CSP.IsFunction) { + const auto *CalleeSymbol = BC.getGlobalSymbolByName(CSP.Name); + if (CalleeSymbol) { + const auto *Callee = BC.getFunctionForSymbol(CalleeSymbol); + if (Callee) { + CSI.DestId = Callee->getFunctionNumber(); + } + } + } + CSI.Count = CSP.Count; + CSI.Mispreds = CSP.Mispreds; + YamlBB.CallSites.push_back(CSI); + } + } else { // direct call or a tail call + const auto *CalleeSymbol = BC.MIA->getTargetSymbol(Instr); + const auto Callee = BC.getFunctionForSymbol(CalleeSymbol); + if (Callee) { + CSI.DestId = Callee->getFunctionNumber();; + CSI.EntryDiscriminator = Callee->getEntryForSymbol(CalleeSymbol); + } + + if (BC.MIA->getConditionalTailCall(Instr)) { + auto CTCCount = + BC.MIA->tryGetAnnotationAs(Instr, "CTCTakenCount"); + if (CTCCount) { + CSI.Count = *CTCCount; + auto CTCMispreds = + BC.MIA->tryGetAnnotationAs(Instr, "CTCMispredCount"); + if (CTCMispreds) + CSI.Mispreds = *CTCMispreds; + } + } else { + auto Count = BC.MIA->tryGetAnnotationAs(Instr, "Count"); + if (Count) + CSI.Count = *Count; + } + + if (CSI.Count) + YamlBB.CallSites.emplace_back(CSI); + } + } + + // Skip printing if there's no profile data for non-entry basic block. + if (YamlBB.CallSites.empty() && !BB->isEntryPoint()) { + uint64_t SuccessorExecCount = 0; + for (auto &BranchInfo : BB->branch_info()) { + SuccessorExecCount += BranchInfo.Count; + } + if (!SuccessorExecCount) + continue; + } + + auto BranchInfo = BB->branch_info_begin(); + for (const auto *Successor : BB->successors()) { + yaml::bolt::SuccessorInfo YamlSI; + YamlSI.Index = Successor->getLayoutIndex(); + YamlSI.Count = BranchInfo->Count; + YamlSI.Mispreds = BranchInfo->MispredictedCount; + + YamlBB.Successors.emplace_back(YamlSI); + + ++BranchInfo; + } + + YamlBF.Blocks.emplace_back(YamlBB); + } +} +} // end anonymous namespace + +void ProfileWriter::printBinaryFunctionProfile(const BinaryFunction &BF) { + yaml::bolt::BinaryFunctionProfile YamlBF; + convert(BF, YamlBF); + + yaml::Output Out(*OS); + Out << YamlBF; +} + +void ProfileWriter::printBinaryFunctionsProfile( + std::map &BFs) { + std::vector YamlBFs; + for (auto &BFI : BFs) { + const auto &BF = BFI.second; + if (BF.hasProfile()) { + yaml::bolt::BinaryFunctionProfile YamlBF; + convert(BF, YamlBF); + YamlBFs.emplace_back(YamlBF); + } + } + + yaml::Output Out(*OS); + Out << YamlBFs; +} + +} // namespace bolt +} // namespace llvm diff --git a/bolt/ProfileWriter.h b/bolt/ProfileWriter.h new file mode 100644 index 000000000000..dfbbc9ad7e30 --- /dev/null +++ b/bolt/ProfileWriter.h @@ -0,0 +1,53 @@ +//===-- ProfileWriter.cpp - serialize profiling data ------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// +//===----------------------------------------------------------------------===// + + +#ifndef LLVM_TOOLS_LLVM_BOLT_PROFILE_WRITER_H +#define LLVM_TOOLS_LLVM_BOLT_PROFILE_WRITER_H + +#include "BinaryBasicBlock.h" +#include "BinaryContext.h" +#include "BinaryFunction.h" +#include "ProfileYAMLMapping.h" +#include "llvm/Support/ErrorOr.h" +#include "llvm/Support/raw_ostream.h" +#include + +namespace llvm { +namespace bolt { + +class ProfileWriter { + ProfileWriter() = delete; + + std::string FileName; + + std::error_code write(BinaryFunction &BF); + + std::unique_ptr OS; + + void printBinaryFunctionProfile(const BinaryFunction &BF); + + void printBinaryFunctionsProfile(std::map &BFs); + +public: + explicit ProfileWriter(const std::string &FileName) + : FileName(FileName) { + } + + /// Write profile for functions. + std::error_code writeProfile(std::map &Functions); +}; + +} // namespace bolt +} // namespace llvm + +#endif // LLVM_TOOLS_LLVM_BOLT_PROFILE_WRITER_H diff --git a/bolt/ProfileYAMLMapping.h b/bolt/ProfileYAMLMapping.h new file mode 100644 index 000000000000..85845ebc012e --- /dev/null +++ b/bolt/ProfileYAMLMapping.h @@ -0,0 +1,147 @@ +//===-- ProfileYAMLMapping.h - mappings for BOLT profile --------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Implement mapping between binary function profile and YAML representation. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_BOLT_PROFILEYAMLMAPPING_H +#define LLVM_TOOLS_LLVM_BOLT_PROFILEYAMLMAPPING_H + +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/YAMLTraits.h" +#include + +namespace llvm { +namespace yaml { + +namespace bolt { +struct CallSiteInfo { + llvm::yaml::Hex32 Offset{0}; + uint32_t DestId{0}; + uint32_t EntryDiscriminator{0}; // multiple entry discriminator + uint64_t Count{0}; + uint64_t Mispreds{0}; + + bool operator==(const CallSiteInfo &Other) const { + return Offset == Other.Offset && + DestId == Other.DestId && + EntryDiscriminator == Other.EntryDiscriminator; + } +}; +} + +template <> struct MappingTraits { + static void mapping(IO &YamlIO, bolt::CallSiteInfo &CSI) { + YamlIO.mapRequired("off", CSI.Offset); + YamlIO.mapRequired("fid", CSI.DestId); + YamlIO.mapOptional("disc", CSI.EntryDiscriminator, (uint32_t)0); + YamlIO.mapRequired("cnt", CSI.Count); + YamlIO.mapOptional("mis", CSI.Mispreds, (uint64_t)0); + } + + static const bool flow = true; +}; + +namespace bolt { +struct SuccessorInfo { + uint32_t Index{0}; + uint64_t Count{0}; + uint64_t Mispreds{0}; + + bool operator==(const SuccessorInfo &Other) const { + return Index == Other.Index; + } +}; +} + +template <> struct MappingTraits { + static void mapping(IO &YamlIO, bolt::SuccessorInfo &SI) { + YamlIO.mapRequired("bid", SI.Index); + YamlIO.mapRequired("cnt", SI.Count); + YamlIO.mapOptional("mis", SI.Mispreds, (uint64_t)0); + } + + static const bool flow = true; +}; + +} // end namespace yaml +} // end namespace llvm + +LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(llvm::yaml::bolt::CallSiteInfo) +LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(llvm::yaml::bolt::SuccessorInfo) + +namespace llvm { +namespace yaml { + +namespace bolt { +struct BinaryBasicBlockProfile { + uint32_t Index{0}; + uint32_t NumInstructions{0}; + llvm::yaml::Hex64 Hash{0}; + uint64_t ExecCount{0}; + std::vector CallSites; + std::vector Successors; + + bool operator==(const BinaryBasicBlockProfile &Other) const { + return Index == Other.Index; + } +}; +} // namespace bolt + +template <> struct MappingTraits { + static void mapping(IO &YamlIO, bolt::BinaryBasicBlockProfile &BBP) { + YamlIO.mapRequired("bid", BBP.Index); + YamlIO.mapRequired("insns", BBP.NumInstructions); + YamlIO.mapOptional("exec", BBP.ExecCount, (uint64_t)0); + YamlIO.mapOptional("calls", BBP.CallSites, + std::vector()); + YamlIO.mapOptional("succ", BBP.Successors, + std::vector()); + } +}; + +} // end namespace yaml +} // end namespace llvm + +LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::yaml::bolt::BinaryBasicBlockProfile) + +namespace llvm { +namespace yaml { + +namespace bolt { +struct BinaryFunctionProfile { + std::string Name; + uint32_t NumBasicBlocks; + uint32_t Id; + llvm::yaml::Hex64 Hash; + uint64_t ExecCount; + std::vector Blocks; + bool Used{false}; +}; +} + +template <> struct MappingTraits { + static void mapping(IO &YamlIO, bolt::BinaryFunctionProfile &BFP) { + YamlIO.mapRequired("name", BFP.Name); + YamlIO.mapRequired("fid", BFP.Id); + YamlIO.mapRequired("hash", BFP.Hash); + YamlIO.mapRequired("exec", BFP.ExecCount); + YamlIO.mapRequired("nblocks", BFP.NumBasicBlocks); + YamlIO.mapOptional("blocks", BFP.Blocks, + std::vector()); + } +}; + +} // end namespace yaml +} // end namespace llvm + +LLVM_YAML_IS_DOCUMENT_LIST_VECTOR(llvm::yaml::bolt::BinaryFunctionProfile) + +#endif diff --git a/bolt/RewriteInstance.cpp b/bolt/RewriteInstance.cpp index 518b9e9df5c7..ad3cc0253460 100644 --- a/bolt/RewriteInstance.cpp +++ b/bolt/RewriteInstance.cpp @@ -18,6 +18,8 @@ #include "DataAggregator.h" #include "DataReader.h" #include "Exceptions.h" +#include "ProfileReader.h" +#include "ProfileWriter.h" #include "RewriteInstance.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" @@ -97,6 +99,11 @@ AllowStripped("allow-stripped", cl::Hidden, cl::cat(BoltCategory)); +static cl::opt +BoltProfile("b", + cl::desc(""), + cl::cat(BoltCategory)); + cl::opt BoostMacroops("boost-macroops", cl::desc("try to boost macro-op fusions by avoiding the cache-line boundary"), @@ -217,6 +224,11 @@ RelocationMode("relocs", cl::ZeroOrMore, cl::cat(BoltCategory)); +static cl::opt +SaveProfile("w", + cl::desc("save recorded profile to a file"), + cl::cat(BoltOutputCategory)); + static cl::list SkipFunctionNames("skip-funcs", cl::CommaSeparated, @@ -873,7 +885,7 @@ void RewriteInstance::run() { discoverFileObjects(); readDebugInfo(); disassembleFunctions(); - readProfileData(); + processProfileData(); if (opts::AggregateOnly) return; postProcessFunctions(); @@ -1901,39 +1913,56 @@ void RewriteInstance::readDebugInfo() { BC->preprocessDebugInfo(BinaryFunctions); } -void RewriteInstance::readProfileData() { +void RewriteInstance::processProfileData() { if (DA.started()) { NamedRegionTimer T("aggregate data", TimerGroupName, opts::TimeRewrite); DA.aggregate(*BC.get(), BinaryFunctions); + for (auto &BFI : BinaryFunctions) { + auto &Function = BFI.second; + Function.convertBranchData(); + } + if (opts::AggregateOnly) { if (std::error_code EC = DA.writeAggregatedFile()) { check_error(EC, "cannot create output data file"); } } - return; - } + } else { + NamedRegionTimer T("read profile data", TimerGroupName, opts::TimeRewrite); + + if (!opts::BoltProfile.empty()) { + ProfileReader PR; + PR.readProfile(opts::BoltProfile, BinaryFunctions); + + return; + } + + // Preliminary match profile data to functions. + if (!BC->DR.getAllFuncsData().empty()) { + for (auto &BFI : BinaryFunctions) { + auto &Function = BFI.second; + if (auto *MemData = BC->DR.getFuncMemData(Function.getNames())) { + Function.MemData = MemData; + MemData->Used = true; + } + if (auto *FuncData = BC->DR.getFuncBranchData(Function.getNames())) { + Function.BranchData = FuncData; + Function.ExecutionCount = FuncData->ExecutionCount; + FuncData->Used = true; + } + } + } - NamedRegionTimer T("read profile data", TimerGroupName, opts::TimeRewrite); - // Preliminary match profile data to functions. - if (!BC->DR.getAllFuncsData().empty()) { for (auto &BFI : BinaryFunctions) { auto &Function = BFI.second; - if (auto *MemData = BC->DR.getFuncMemData(Function.getNames())) { - Function.MemData = MemData; - MemData->Used = true; - } - if (auto *FuncData = BC->DR.getFuncBranchData(Function.getNames())) { - Function.BranchData = FuncData; - Function.ExecutionCount = FuncData->ExecutionCount; - FuncData->Used = true; - } + Function.readProfile(); } } - for (auto &BFI : BinaryFunctions) { - auto &Function = BFI.second; - Function.readProfile(); + if (!opts::SaveProfile.empty()) { + ProfileWriter PW(opts::SaveProfile); + PW.writeProfile(BinaryFunctions); } } diff --git a/bolt/RewriteInstance.h b/bolt/RewriteInstance.h index 74c801a27d33..368ae2e6d61e 100644 --- a/bolt/RewriteInstance.h +++ b/bolt/RewriteInstance.h @@ -178,8 +178,8 @@ public: /// Read information from debug sections. void readDebugInfo(); - /// Associate profile data with functions. - void readProfileData(); + /// Associate profile data with binary objects. + void processProfileData(); /// Disassemble each function in the binary and associate it with a /// BinaryFunction object, preparing all information necessary for binary