Make the code of blockEndsInUnreachable to match the function blockEndsInUnreachable in CodeGen/BranchFolding.cpp. I also have added a note to make sure the code of this function will not be modified unless the back-end version is also modified. An early return before outlining has been added to avoid outlining the full function body when the first block in the function is marked cold. The static analysis of cold code has been amended to avoid marking the whole function as cold by back-propagation because the back-propagation would mark blocks with return statements as cold. The patch adds debug statements to help discover these problems. Differential Revision: https://reviews.llvm.org/D52904 llvm-svn: 344558
503 lines
16 KiB
C++
503 lines
16 KiB
C++
//===- HotColdSplitting.cpp -- Outline Cold Regions -------------*- C++ -*-===//
|
|
//
|
|
// The LLVM Compiler Infrastructure
|
|
//
|
|
// This file is distributed under the University of Illinois Open Source
|
|
// License. See LICENSE.TXT for details.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
// Outline cold regions to a separate function.
|
|
// TODO: Update BFI and BPI
|
|
// TODO: Add all the outlined functions to a separate section.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "llvm/ADT/SmallVector.h"
|
|
#include "llvm/ADT/Statistic.h"
|
|
#include "llvm/Analysis/AliasAnalysis.h"
|
|
#include "llvm/Analysis/BlockFrequencyInfo.h"
|
|
#include "llvm/Analysis/BranchProbabilityInfo.h"
|
|
#include "llvm/Analysis/CFG.h"
|
|
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
|
|
#include "llvm/Analysis/PostDominators.h"
|
|
#include "llvm/Analysis/ProfileSummaryInfo.h"
|
|
#include "llvm/Analysis/TargetTransformInfo.h"
|
|
#include "llvm/IR/BasicBlock.h"
|
|
#include "llvm/IR/CFG.h"
|
|
#include "llvm/IR/DataLayout.h"
|
|
#include "llvm/IR/DiagnosticInfo.h"
|
|
#include "llvm/IR/Dominators.h"
|
|
#include "llvm/IR/Function.h"
|
|
#include "llvm/IR/Instruction.h"
|
|
#include "llvm/IR/Instructions.h"
|
|
#include "llvm/IR/Metadata.h"
|
|
#include "llvm/IR/Module.h"
|
|
#include "llvm/IR/PassManager.h"
|
|
#include "llvm/IR/Type.h"
|
|
#include "llvm/IR/Use.h"
|
|
#include "llvm/IR/User.h"
|
|
#include "llvm/IR/Value.h"
|
|
#include "llvm/Pass.h"
|
|
#include "llvm/Support/BlockFrequency.h"
|
|
#include "llvm/Support/BranchProbability.h"
|
|
#include "llvm/Support/Debug.h"
|
|
#include "llvm/Support/raw_ostream.h"
|
|
#include "llvm/Transforms/IPO.h"
|
|
#include "llvm/Transforms/IPO/HotColdSplitting.h"
|
|
#include "llvm/Transforms/Scalar.h"
|
|
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
|
|
#include "llvm/Transforms/Utils/Cloning.h"
|
|
#include "llvm/Transforms/Utils/CodeExtractor.h"
|
|
#include "llvm/Transforms/Utils/Local.h"
|
|
#include "llvm/Transforms/Utils/SSAUpdater.h"
|
|
#include "llvm/Transforms/Utils/ValueMapper.h"
|
|
#include <algorithm>
|
|
#include <cassert>
|
|
|
|
#define DEBUG_TYPE "hotcoldsplit"
|
|
|
|
STATISTIC(NumColdSESEFound,
|
|
"Number of cold single entry single exit (SESE) regions found.");
|
|
STATISTIC(NumColdSESEOutlined,
|
|
"Number of cold single entry single exit (SESE) regions outlined.");
|
|
|
|
using namespace llvm;
|
|
|
|
static cl::opt<bool> EnableStaticAnalyis("hot-cold-static-analysis",
|
|
cl::init(true), cl::Hidden);
|
|
|
|
|
|
namespace {
|
|
|
|
struct PostDomTree : PostDomTreeBase<BasicBlock> {
|
|
PostDomTree(Function &F) { recalculate(F); }
|
|
};
|
|
|
|
typedef DenseSet<const BasicBlock *> DenseSetBB;
|
|
typedef DenseMap<const BasicBlock *, uint64_t> DenseMapBBInt;
|
|
|
|
// From: https://reviews.llvm.org/D22558
|
|
// Exit is not part of the region.
|
|
static bool isSingleEntrySingleExit(BasicBlock *Entry, const BasicBlock *Exit,
|
|
DominatorTree *DT, PostDomTree *PDT,
|
|
SmallVectorImpl<BasicBlock *> &Region) {
|
|
if (!DT->dominates(Entry, Exit))
|
|
return false;
|
|
|
|
if (!PDT->dominates(Exit, Entry))
|
|
return false;
|
|
|
|
for (auto I = df_begin(Entry), E = df_end(Entry); I != E;) {
|
|
if (*I == Exit) {
|
|
I.skipChildren();
|
|
continue;
|
|
}
|
|
if (!DT->dominates(Entry, *I))
|
|
return false;
|
|
Region.push_back(*I);
|
|
++I;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
// Same as blockEndsInUnreachable in CodeGen/BranchFolding.cpp. Do not modify
|
|
// this function unless you modify the MBB version as well.
|
|
//
|
|
/// A no successor, non-return block probably ends in unreachable and is cold.
|
|
/// Also consider a block that ends in an indirect branch to be a return block,
|
|
/// since many targets use plain indirect branches to return.
|
|
bool blockEndsInUnreachable(const BasicBlock &BB) {
|
|
if (!succ_empty(&BB))
|
|
return false;
|
|
if (BB.empty())
|
|
return true;
|
|
const Instruction *I = BB.getTerminator();
|
|
return !(isa<ReturnInst>(I) || isa<IndirectBrInst>(I));
|
|
}
|
|
|
|
static bool exceptionHandlingFunctions(const CallInst *CI) {
|
|
auto F = CI->getCalledFunction();
|
|
if (!F)
|
|
return false;
|
|
auto FName = F->getName();
|
|
return FName == "__cxa_begin_catch" ||
|
|
FName == "__cxa_free_exception" ||
|
|
FName == "__cxa_allocate_exception" ||
|
|
FName == "__cxa_begin_catch" ||
|
|
FName == "__cxa_end_catch";
|
|
}
|
|
|
|
static bool unlikelyExecuted(const BasicBlock &BB) {
|
|
if (blockEndsInUnreachable(BB))
|
|
return true;
|
|
// Exception handling blocks are unlikely executed.
|
|
if (BB.isEHPad())
|
|
return true;
|
|
for (const Instruction &I : BB)
|
|
if (const CallInst *CI = dyn_cast<CallInst>(&I)) {
|
|
// The block is cold if it calls functions tagged as cold or noreturn.
|
|
if (CI->hasFnAttr(Attribute::Cold) ||
|
|
CI->hasFnAttr(Attribute::NoReturn) ||
|
|
exceptionHandlingFunctions(CI))
|
|
return true;
|
|
|
|
// Assume that inline assembly is hot code.
|
|
if (isa<InlineAsm>(CI->getCalledValue()))
|
|
return false;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
static bool returnsOrHasSideEffects(const BasicBlock &BB) {
|
|
const TerminatorInst *I = BB.getTerminator();
|
|
if (isa<ReturnInst>(I) || isa<IndirectBrInst>(I) || isa<InvokeInst>(I))
|
|
return true;
|
|
|
|
for (const Instruction &I : BB)
|
|
if (const CallInst *CI = dyn_cast<CallInst>(&I)) {
|
|
if (CI->hasFnAttr(Attribute::NoReturn))
|
|
return true;
|
|
|
|
if (isa<InlineAsm>(CI->getCalledValue()))
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
static DenseSetBB getHotBlocks(Function &F) {
|
|
|
|
// Mark all cold basic blocks.
|
|
DenseSetBB ColdBlocks;
|
|
for (BasicBlock &BB : F)
|
|
if (unlikelyExecuted(BB)) {
|
|
LLVM_DEBUG(llvm::dbgs() << "\nForward propagation marks cold: " << BB);
|
|
ColdBlocks.insert((const BasicBlock *)&BB);
|
|
}
|
|
|
|
// Forward propagation: basic blocks are hot when they are reachable from the
|
|
// beginning of the function through a path that does not contain cold blocks.
|
|
SmallVector<const BasicBlock *, 8> WL;
|
|
DenseSetBB HotBlocks;
|
|
|
|
const BasicBlock *It = &F.front();
|
|
if (!ColdBlocks.count(It)) {
|
|
HotBlocks.insert(It);
|
|
// Breadth First Search to mark edges reachable from hot.
|
|
WL.push_back(It);
|
|
while (WL.size() > 0) {
|
|
It = WL.pop_back_val();
|
|
|
|
for (const BasicBlock *Succ : successors(It)) {
|
|
// Do not visit blocks that are cold.
|
|
if (!ColdBlocks.count(Succ) && !HotBlocks.count(Succ)) {
|
|
HotBlocks.insert(Succ);
|
|
WL.push_back(Succ);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
assert(WL.empty() && "work list should be empty");
|
|
|
|
DenseMapBBInt NumHotSuccessors;
|
|
// Back propagation: when all successors of a basic block are cold, the
|
|
// basic block is cold as well.
|
|
for (BasicBlock &BBRef : F) {
|
|
const BasicBlock *BB = &BBRef;
|
|
if (HotBlocks.count(BB)) {
|
|
// Keep a count of hot successors for every hot block.
|
|
NumHotSuccessors[BB] = 0;
|
|
for (const BasicBlock *Succ : successors(BB))
|
|
if (!ColdBlocks.count(Succ))
|
|
NumHotSuccessors[BB] += 1;
|
|
|
|
// Add to work list the blocks with all successors cold. Those are the
|
|
// root nodes in the next loop, where we will move those blocks from
|
|
// HotBlocks to ColdBlocks and iterate over their predecessors.
|
|
if (NumHotSuccessors[BB] == 0)
|
|
WL.push_back(BB);
|
|
}
|
|
}
|
|
|
|
while (WL.size() > 0) {
|
|
It = WL.pop_back_val();
|
|
if (ColdBlocks.count(It))
|
|
continue;
|
|
|
|
// Do not back-propagate to blocks that return or have side effects.
|
|
if (returnsOrHasSideEffects(*It))
|
|
continue;
|
|
|
|
// Move the block from HotBlocks to ColdBlocks.
|
|
LLVM_DEBUG(llvm::dbgs() << "\nBack propagation marks cold: " << *It);
|
|
HotBlocks.erase(It);
|
|
ColdBlocks.insert(It);
|
|
|
|
// Iterate over the predecessors.
|
|
for (const BasicBlock *Pred : predecessors(It)) {
|
|
if (HotBlocks.count(Pred)) {
|
|
NumHotSuccessors[Pred] -= 1;
|
|
|
|
// If Pred has no more hot successors, add it to the work list.
|
|
if (NumHotSuccessors[Pred] == 0)
|
|
WL.push_back(Pred);
|
|
}
|
|
}
|
|
}
|
|
|
|
return HotBlocks;
|
|
}
|
|
|
|
class HotColdSplitting {
|
|
public:
|
|
HotColdSplitting(ProfileSummaryInfo *ProfSI,
|
|
function_ref<BlockFrequencyInfo *(Function &)> GBFI,
|
|
function_ref<TargetTransformInfo &(Function &)> GTTI,
|
|
std::function<OptimizationRemarkEmitter &(Function &)> *GORE)
|
|
: PSI(ProfSI), GetBFI(GBFI), GetTTI(GTTI), GetORE(GORE) {}
|
|
bool run(Module &M);
|
|
|
|
private:
|
|
bool shouldOutlineFrom(const Function &F) const;
|
|
const Function *outlineColdBlocks(Function &F, const DenseSetBB &ColdBlock,
|
|
DominatorTree *DT, PostDomTree *PDT);
|
|
Function *extractColdRegion(const SmallVectorImpl<BasicBlock *> &Region,
|
|
DominatorTree *DT, BlockFrequencyInfo *BFI,
|
|
OptimizationRemarkEmitter &ORE);
|
|
bool isOutlineCandidate(const SmallVectorImpl<BasicBlock *> &Region,
|
|
const BasicBlock *Exit) const {
|
|
if (!Exit)
|
|
return false;
|
|
|
|
// Regions with landing pads etc.
|
|
for (const BasicBlock *BB : Region) {
|
|
if (BB->isEHPad() || BB->hasAddressTaken())
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
SmallPtrSet<const Function *, 2> OutlinedFunctions;
|
|
ProfileSummaryInfo *PSI;
|
|
function_ref<BlockFrequencyInfo *(Function &)> GetBFI;
|
|
function_ref<TargetTransformInfo &(Function &)> GetTTI;
|
|
std::function<OptimizationRemarkEmitter &(Function &)> *GetORE;
|
|
};
|
|
|
|
class HotColdSplittingLegacyPass : public ModulePass {
|
|
public:
|
|
static char ID;
|
|
HotColdSplittingLegacyPass() : ModulePass(ID) {
|
|
initializeHotColdSplittingLegacyPassPass(*PassRegistry::getPassRegistry());
|
|
}
|
|
|
|
void getAnalysisUsage(AnalysisUsage &AU) const override {
|
|
AU.addRequired<AssumptionCacheTracker>();
|
|
AU.addRequired<BlockFrequencyInfoWrapperPass>();
|
|
AU.addRequired<ProfileSummaryInfoWrapperPass>();
|
|
AU.addRequired<TargetTransformInfoWrapperPass>();
|
|
}
|
|
|
|
bool runOnModule(Module &M) override;
|
|
};
|
|
|
|
} // end anonymous namespace
|
|
|
|
// Returns false if the function should not be considered for hot-cold split
|
|
// optimization.
|
|
bool HotColdSplitting::shouldOutlineFrom(const Function &F) const {
|
|
// Do not try to outline again from an already outlined cold function.
|
|
if (OutlinedFunctions.count(&F))
|
|
return false;
|
|
|
|
if (F.size() <= 2)
|
|
return false;
|
|
|
|
if (F.hasAddressTaken())
|
|
return false;
|
|
|
|
if (F.hasFnAttribute(Attribute::AlwaysInline))
|
|
return false;
|
|
|
|
if (F.hasFnAttribute(Attribute::NoInline))
|
|
return false;
|
|
|
|
if (F.getCallingConv() == CallingConv::Cold)
|
|
return false;
|
|
|
|
if (PSI->isFunctionEntryCold(&F))
|
|
return false;
|
|
return true;
|
|
}
|
|
|
|
Function *
|
|
HotColdSplitting::extractColdRegion(const SmallVectorImpl<BasicBlock *> &Region,
|
|
DominatorTree *DT, BlockFrequencyInfo *BFI,
|
|
OptimizationRemarkEmitter &ORE) {
|
|
LLVM_DEBUG(for (auto *BB : Region)
|
|
llvm::dbgs() << "\nExtracting: " << *BB;);
|
|
|
|
// TODO: Pass BFI and BPI to update profile information.
|
|
CodeExtractor CE(Region, DT);
|
|
|
|
SetVector<Value *> Inputs, Outputs, Sinks;
|
|
CE.findInputsOutputs(Inputs, Outputs, Sinks);
|
|
|
|
// Do not extract regions that have live exit variables.
|
|
if (Outputs.size() > 0)
|
|
return nullptr;
|
|
|
|
if (Function *OutF = CE.extractCodeRegion()) {
|
|
User *U = *OutF->user_begin();
|
|
CallInst *CI = cast<CallInst>(U);
|
|
CallSite CS(CI);
|
|
NumColdSESEOutlined++;
|
|
if (GetTTI(*OutF).useColdCCForColdCall(*OutF)) {
|
|
OutF->setCallingConv(CallingConv::Cold);
|
|
CS.setCallingConv(CallingConv::Cold);
|
|
}
|
|
CI->setIsNoInline();
|
|
LLVM_DEBUG(llvm::dbgs() << "Outlined Region: " << *OutF);
|
|
return OutF;
|
|
}
|
|
|
|
ORE.emit([&]() {
|
|
return OptimizationRemarkMissed(DEBUG_TYPE, "ExtractFailed",
|
|
&*Region[0]->begin())
|
|
<< "Failed to extract region at block "
|
|
<< ore::NV("Block", Region.front());
|
|
});
|
|
return nullptr;
|
|
}
|
|
|
|
// Return the function created after outlining, nullptr otherwise.
|
|
const Function *HotColdSplitting::outlineColdBlocks(Function &F,
|
|
const DenseSetBB &HotBlocks,
|
|
DominatorTree *DT,
|
|
PostDomTree *PDT) {
|
|
auto BFI = GetBFI(F);
|
|
auto &ORE = (*GetORE)(F);
|
|
// Walking the dominator tree allows us to find the largest
|
|
// cold region.
|
|
BasicBlock *Begin = DT->getRootNode()->getBlock();
|
|
|
|
// Early return if the beginning of the function has been marked cold,
|
|
// otherwise all the function gets outlined.
|
|
if (PSI->isColdBB(Begin, BFI) || !HotBlocks.count(Begin))
|
|
return nullptr;
|
|
|
|
for (auto I = df_begin(Begin), E = df_end(Begin); I != E; ++I) {
|
|
BasicBlock *BB = *I;
|
|
if (PSI->isColdBB(BB, BFI) || !HotBlocks.count(BB)) {
|
|
SmallVector<BasicBlock *, 4> ValidColdRegion, Region;
|
|
BasicBlock *Exit = (*PDT)[BB]->getIDom()->getBlock();
|
|
BasicBlock *ExitColdRegion = nullptr;
|
|
|
|
// Estimated cold region between a BB and its dom-frontier.
|
|
while (Exit && isSingleEntrySingleExit(BB, Exit, DT, PDT, Region) &&
|
|
isOutlineCandidate(Region, Exit)) {
|
|
ExitColdRegion = Exit;
|
|
ValidColdRegion = Region;
|
|
Region.clear();
|
|
// Update Exit recursively to its dom-frontier.
|
|
Exit = (*PDT)[Exit]->getIDom()->getBlock();
|
|
}
|
|
if (ExitColdRegion) {
|
|
// Do not outline a region with only one block.
|
|
if (ValidColdRegion.size() == 1)
|
|
continue;
|
|
|
|
++NumColdSESEFound;
|
|
ValidColdRegion.push_back(ExitColdRegion);
|
|
// Candidate for outlining. FIXME: Continue outlining.
|
|
return extractColdRegion(ValidColdRegion, DT, BFI, ORE);
|
|
}
|
|
}
|
|
}
|
|
return nullptr;
|
|
}
|
|
|
|
bool HotColdSplitting::run(Module &M) {
|
|
for (auto &F : M) {
|
|
if (!shouldOutlineFrom(F))
|
|
continue;
|
|
DominatorTree DT(F);
|
|
PostDomTree PDT(F);
|
|
PDT.recalculate(F);
|
|
DenseSetBB HotBlocks;
|
|
if (EnableStaticAnalyis) // Static analysis of cold blocks.
|
|
HotBlocks = getHotBlocks(F);
|
|
|
|
const Function *Outlined = outlineColdBlocks(F, HotBlocks, &DT, &PDT);
|
|
if (Outlined)
|
|
OutlinedFunctions.insert(Outlined);
|
|
}
|
|
return true;
|
|
}
|
|
|
|
bool HotColdSplittingLegacyPass::runOnModule(Module &M) {
|
|
if (skipModule(M))
|
|
return false;
|
|
ProfileSummaryInfo *PSI =
|
|
getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
|
|
auto GTTI = [this](Function &F) -> TargetTransformInfo & {
|
|
return this->getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
|
|
};
|
|
auto GBFI = [this](Function &F) {
|
|
return &this->getAnalysis<BlockFrequencyInfoWrapperPass>(F).getBFI();
|
|
};
|
|
std::unique_ptr<OptimizationRemarkEmitter> ORE;
|
|
std::function<OptimizationRemarkEmitter &(Function &)> GetORE =
|
|
[&ORE](Function &F) -> OptimizationRemarkEmitter & {
|
|
ORE.reset(new OptimizationRemarkEmitter(&F));
|
|
return *ORE.get();
|
|
};
|
|
|
|
return HotColdSplitting(PSI, GBFI, GTTI, &GetORE).run(M);
|
|
}
|
|
|
|
PreservedAnalyses
|
|
HotColdSplittingPass::run(Module &M, ModuleAnalysisManager &AM) {
|
|
auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
|
|
|
|
std::function<AssumptionCache &(Function &)> GetAssumptionCache =
|
|
[&FAM](Function &F) -> AssumptionCache & {
|
|
return FAM.getResult<AssumptionAnalysis>(F);
|
|
};
|
|
|
|
auto GBFI = [&FAM](Function &F) {
|
|
return &FAM.getResult<BlockFrequencyAnalysis>(F);
|
|
};
|
|
|
|
std::function<TargetTransformInfo &(Function &)> GTTI =
|
|
[&FAM](Function &F) -> TargetTransformInfo & {
|
|
return FAM.getResult<TargetIRAnalysis>(F);
|
|
};
|
|
|
|
std::unique_ptr<OptimizationRemarkEmitter> ORE;
|
|
std::function<OptimizationRemarkEmitter &(Function &)> GetORE =
|
|
[&ORE](Function &F) -> OptimizationRemarkEmitter & {
|
|
ORE.reset(new OptimizationRemarkEmitter(&F));
|
|
return *ORE.get();
|
|
};
|
|
|
|
ProfileSummaryInfo *PSI = &AM.getResult<ProfileSummaryAnalysis>(M);
|
|
|
|
if (HotColdSplitting(PSI, GBFI, GTTI, &GetORE).run(M))
|
|
return PreservedAnalyses::none();
|
|
return PreservedAnalyses::all();
|
|
}
|
|
|
|
char HotColdSplittingLegacyPass::ID = 0;
|
|
INITIALIZE_PASS_BEGIN(HotColdSplittingLegacyPass, "hotcoldsplit",
|
|
"Hot Cold Splitting", false, false)
|
|
INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
|
|
INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass)
|
|
INITIALIZE_PASS_END(HotColdSplittingLegacyPass, "hotcoldsplit",
|
|
"Hot Cold Splitting", false, false)
|
|
|
|
ModulePass *llvm::createHotColdSplittingPass() {
|
|
return new HotColdSplittingLegacyPass();
|
|
}
|