diff --git a/bolt/BinaryFunction.cpp b/bolt/BinaryFunction.cpp index 0c1d61918530..8cdb17ea4c3d 100644 --- a/bolt/BinaryFunction.cpp +++ b/bolt/BinaryFunction.cpp @@ -48,6 +48,21 @@ BinaryFunction::getBasicBlockContainingOffset(uint64_t Offset) { return &(*--I); } +unsigned BinaryFunction::eraseDeadBBs( + std::map &ToPreserve) { + BasicBlockOrderType NewLayout; + unsigned Count = 0; + for (auto I = BasicBlocksLayout.begin(), E = BasicBlocksLayout.end(); I != E; + ++I) { + if (ToPreserve[*I]) + NewLayout.push_back(*I); + else + ++Count; + } + BasicBlocksLayout = std::move(NewLayout); + return Count; +} + void BinaryFunction::print(raw_ostream &OS, bool PrintInstructions) const { StringRef SectionName; Section.getName(SectionName); @@ -537,12 +552,13 @@ void BinaryFunction::inferFallThroughCounts() { void BinaryFunction::optimizeLayout(bool DumpLayout) { // Bail if no profiling information or if empty - if (getExecutionCount() == BinaryFunction::COUNT_NO_PROFILE || empty()) { + if (getExecutionCount() == BinaryFunction::COUNT_NO_PROFILE || + BasicBlocksLayout.empty()) { return; } // Work on optimal solution if problem is small enough - if (BasicBlocks.size() <= FUNC_SIZE_THRESHOLD) + if (BasicBlocksLayout.size() <= FUNC_SIZE_THRESHOLD) return solveOptimalLayout(DumpLayout); if (DumpLayout) { @@ -567,19 +583,19 @@ void BinaryFunction::optimizeLayout(bool DumpLayout) { BBToClusterMapTy BBToClusterMap; // Populating priority queue with all edges - for (auto &BB : BasicBlocks) { - BBToClusterMap[&BB] = -1; // Mark as unmapped - auto BI = BB.BranchInfo.begin(); - for (auto &I : BB.successors()) { + for (auto BB : BasicBlocksLayout) { + BBToClusterMap[BB] = -1; // Mark as unmapped + auto BI = BB->BranchInfo.begin(); + for (auto &I : BB->successors()) { if (BI->Count != BinaryBasicBlock::COUNT_FALLTHROUGH_EDGE) - Weight[std::make_pair(&BB, I)] = BI->Count; - Queue.push(std::make_pair(&BB, I)); + Weight[std::make_pair(BB, I)] = BI->Count; + Queue.push(std::make_pair(BB, I)); ++BI; } } // Start a cluster with the entry point - BinaryBasicBlock *Entry = &*BasicBlocks.begin(); + BinaryBasicBlock *Entry = *BasicBlocksLayout.begin(); Clusters.emplace_back(); auto &EntryCluster = Clusters.back(); EntryCluster.push_back(Entry); @@ -661,6 +677,14 @@ void BinaryFunction::optimizeLayout(bool DumpLayout) { BBToClusterMap[BBDst] = I; } + // Create an extra cluster for unvisited basic blocks + std::vector Unvisited; + for (auto BB : BasicBlocksLayout) { + if (BBToClusterMap[BB] == -1) { + Unvisited.push_back(BB); + } + } + // Define final function layout based on clusters BasicBlocksLayout.clear(); for (auto &Cluster : Clusters) { @@ -670,11 +694,8 @@ void BinaryFunction::optimizeLayout(bool DumpLayout) { // Finalize layout with BBs that weren't assigned to any cluster, preserving // their relative order - for (auto &BB : BasicBlocks) { - if (BBToClusterMap[&BB] == -1) { - BasicBlocksLayout.push_back(&BB); - } - } + BasicBlocksLayout.insert(BasicBlocksLayout.end(), Unvisited.begin(), + Unvisited.end()); fixBranches(); @@ -705,19 +726,19 @@ void BinaryFunction::solveOptimalLayout(bool DumpLayout) { dbgs() << "finding optimal block layout for " << getName() << "\n"; } - unsigned N = BasicBlocks.size(); + unsigned N = BasicBlocksLayout.size(); // Populating weight map and index map - for (auto &BB : BasicBlocks) { - BBToIndex[&BB] = IndexToBB.size(); - IndexToBB.push_back(&BB); + for (auto BB : BasicBlocksLayout) { + BBToIndex[BB] = IndexToBB.size(); + IndexToBB.push_back(BB); } Weight.resize(N); - for (auto &BB : BasicBlocks) { - auto BI = BB.BranchInfo.begin(); - Weight[BBToIndex[&BB]].resize(N); - for (auto &I : BB.successors()) { + for (auto BB : BasicBlocksLayout) { + auto BI = BB->BranchInfo.begin(); + Weight[BBToIndex[BB]].resize(N); + for (auto I : BB->successors()) { if (BI->Count != BinaryBasicBlock::COUNT_FALLTHROUGH_EDGE) - Weight[BBToIndex[&BB]][BBToIndex[I]] = BI->Count; + Weight[BBToIndex[BB]][BBToIndex[I]] = BI->Count; ++BI; } } @@ -765,6 +786,8 @@ void BinaryFunction::solveOptimalLayout(bool DumpLayout) { } } + std::vector PastLayout = BasicBlocksLayout; + // Define final function layout based on layout that maximizes weight BasicBlocksLayout.clear(); unsigned Last = BestLast; @@ -791,9 +814,9 @@ void BinaryFunction::solveOptimalLayout(bool DumpLayout) { std::reverse(BasicBlocksLayout.begin(), BasicBlocksLayout.end()); // Finalize layout with BBs that weren't assigned to the layout - for (auto &BB : BasicBlocks) { - if (Visited[BBToIndex[&BB]] == false) - BasicBlocksLayout.push_back(&BB); + for (auto BB : PastLayout) { + if (Visited[BBToIndex[BB]] == false) + BasicBlocksLayout.push_back(BB); } fixBranches(); diff --git a/bolt/BinaryFunction.h b/bolt/BinaryFunction.h index a1de19509109..829de7b8c1a6 100644 --- a/bolt/BinaryFunction.h +++ b/bolt/BinaryFunction.h @@ -32,6 +32,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include +#include using namespace llvm::object; @@ -161,6 +162,7 @@ public: typedef std::reverse_iterator const_reverse_iterator; typedef std::reverse_iterator reverse_iterator; typedef BasicBlockOrderType::iterator order_iterator; + typedef BasicBlockOrderType::const_iterator const_order_iterator; // CFG iterators. iterator begin() { return BasicBlocks.begin(); } @@ -180,6 +182,14 @@ public: const BinaryBasicBlock & back() const { return BasicBlocks.back(); } BinaryBasicBlock & back() { return BasicBlocks.back(); } + unsigned layout_size() const { + return (unsigned)BasicBlocksLayout.size(); + } + const_order_iterator layout_begin() const { + return BasicBlocksLayout.begin(); + } + order_iterator layout_begin() { return BasicBlocksLayout.begin(); } + inline iterator_range layout() { return iterator_range(BasicBlocksLayout.begin(), BasicBlocksLayout.end()); @@ -281,6 +291,10 @@ public: return BB; } + /// Rebuilds BBs layout, ignoring dead BBs. Returns the number of removed + /// BBs. + unsigned eraseDeadBBs(std::map &ToPreserve); + /// Return basic block that started at offset \p Offset. BinaryBasicBlock *getBasicBlockAtOffset(uint64_t Offset) { BinaryBasicBlock *BB = getBasicBlockContainingOffset(Offset); diff --git a/bolt/llvm-flo.cpp b/bolt/llvm-flo.cpp index d7f607ba0137..20502e3e2865 100644 --- a/bolt/llvm-flo.cpp +++ b/bolt/llvm-flo.cpp @@ -51,6 +51,7 @@ #include "llvm/Target/TargetMachine.h" #include #include +#include #include #undef DEBUG_TYPE @@ -465,6 +466,7 @@ static void OptimizeFile(ELFObjectFileBase *File, const DataReader &DR) { // Run optimization passes. // // FIXME: use real optimization passes. + bool NagUser = true; for (auto &BFI : BinaryFunctions) { auto &Function = BFI.second; // Detect and eliminate unreachable basic blocks. We could have those @@ -473,19 +475,41 @@ static void OptimizeFile(ELFObjectFileBase *File, const DataReader &DR) { // FIXME: this wouldn't work with C++ exceptions until we implement // support for those as there will be "invisible" edges // in the graph. - if (opts::EliminateUnreachable) { - bool IsFirst = true; - for (auto &BB : Function) { - if (!IsFirst && BB.pred_empty()) { - outs() << "FLO: basic block " << BB.getName() << " in function " - << Function.getName() << " is dead\n"; - // TODO: currently lacking interface to eliminate basic block. - } - IsFirst = false; + if (opts::EliminateUnreachable && Function.layout_size() > 0) { + if (NagUser) { + outs() + << "FLO-WARNING: Using -eliminate-unreachable is experimental and " + "unsafe for exceptions\n"; + NagUser = false; } + + std::stack Stack; + std::map Reachable; + BinaryBasicBlock *Entry = *Function.layout_begin(); + Stack.push(Entry); + Reachable[Entry] = true; + // Determine reachable BBs from the entry point + while (!Stack.empty()) { + auto BB = Stack.top(); + Stack.pop(); + for (auto Succ : BB->successors()) { + if (Reachable[Succ]) + continue; + Reachable[Succ] = true; + Stack.push(Succ); + } + } + + if (unsigned Count = Function.eraseDeadBBs(Reachable)) { + outs() << "FLO: Removed " << Count + << " dead basic block(s) in function " << Function.getName() + << "\n"; + } + DEBUG(dbgs() << "*** After unreachable block elimination ***\n"); DEBUG(Function.print(dbgs(), /* PrintInstructions = */ true)); } + if (opts::ReorderBlocks) { BFI.second.optimizeLayout(opts::DumpLayout); }