Files
clang-p2996/bolt/BinaryFunction.cpp
Maksim Panchenko 85b99eb7b7 Eliminate nop instruction in input and derive alignment.
Summary:
Nop instructions are primarily used for alignment purposes on the input.
We remove all nops when we build CFG and derive alignment of basic blocks
based on existing alignment and a presence of nops before it. This
will not always work as some basic blocks will be naturally aligned
without necessity for nops. However, it's better than random alignment.
We would also add heuristics for BB alignment based on execution profile.

(cherry picked from FBD2561740)
2015-10-20 10:51:17 -07:00

926 lines
30 KiB
C++

//===--- BinaryFunction.cpp - Interface for machine-level function --------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
//===----------------------------------------------------------------------===//
#include "BinaryBasicBlock.h"
#include "BinaryFunction.h"
#include "DataReader.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstPrinter.h"
#include "llvm/Object/ObjectFile.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include <limits>
#include <queue>
#include <string>
#undef DEBUG_TYPE
#define DEBUG_TYPE "flo"
namespace llvm {
namespace flo {
BinaryBasicBlock *
BinaryFunction::getBasicBlockContainingOffset(uint64_t Offset) {
if (Offset > Size)
return nullptr;
if (BasicBlocks.empty())
return nullptr;
auto I = std::upper_bound(BasicBlocks.begin(),
BasicBlocks.end(),
BinaryBasicBlock(Offset));
assert(I != BasicBlocks.begin() && "first basic block not at offset 0");
return &(*--I);
}
void BinaryFunction::print(raw_ostream &OS, bool PrintInstructions) const {
StringRef SectionName;
Section.getName(SectionName);
OS << "Binary Function \"" << getName() << "\" {"
<< "\n State : " << CurrentState
<< "\n Address : 0x" << Twine::utohexstr(Address)
<< "\n Size : 0x" << Twine::utohexstr(Size)
<< "\n MaxSize : 0x" << Twine::utohexstr(MaxSize)
<< "\n Offset : 0x" << Twine::utohexstr(FileOffset)
<< "\n Section : " << SectionName
<< "\n Orc Section : " << getCodeSectionName()
<< "\n IsSimple : " << IsSimple
<< "\n BB count : " << BasicBlocks.size()
<< "\n Image : 0x" << Twine::utohexstr(ImageAddress);
if (ExecutionCount != COUNT_NO_PROFILE)
OS << "\n Exec Count : " << ExecutionCount;
OS << "\n}\n";
if (!PrintInstructions || !BC.InstPrinter)
return;
// Offset of the instruction in function.
uint64_t Offset{0};
if (BasicBlocks.empty() && !Instructions.empty()) {
// Print before CFG was built.
for (const auto &II : Instructions) {
auto Offset = II.first;
// Print label if exists at this offset.
auto LI = Labels.find(Offset);
if (LI != Labels.end())
OS << LI->second->getName() << ":\n";
auto &Instruction = II.second;
OS << format(" %08" PRIx64 ": ", Offset);
BC.InstPrinter->printInst(&Instruction, OS, "", *BC.STI);
OS << "\n";
}
}
for (auto BB : BasicBlocksLayout) {
OS << BB->getName() << " ("
<< BB->Instructions.size() << " instructions, align : "
<< BB->getAlignment() << ")\n";
uint64_t BBExecCount = BB->getExecutionCount();
if (BBExecCount != BinaryBasicBlock::COUNT_NO_PROFILE) {
OS << " Exec Count : " << BBExecCount << "\n";
}
if (!BB->Predecessors.empty()) {
OS << " Predecessors: ";
auto Sep = "";
for (auto Pred : BB->Predecessors) {
OS << Sep << Pred->getName();
Sep = ", ";
}
OS << '\n';
}
Offset = RoundUpToAlignment(Offset, BB->getAlignment());
for (auto &Instr : *BB) {
OS << format(" %08" PRIx64 ": ", Offset);
BC.InstPrinter->printInst(&Instr, OS, "", *BC.STI);
OS << "\n";
// In case we need MCInst printer:
// Instr.dump_pretty(OS, InstructionPrinter.get());
// Calculate the size of the instruction.
// Note: this is imprecise since happening prior to relaxation.
SmallString<256> Code;
SmallVector<MCFixup, 4> Fixups;
raw_svector_ostream VecOS(Code);
BC.MCE->encodeInstruction(Instr, VecOS, Fixups, *BC.STI);
Offset += Code.size();
}
if (!BB->Successors.empty()) {
OS << " Successors: ";
auto BI = BB->BranchInfo.begin();
auto Sep = "";
for (auto Succ : BB->Successors) {
assert(BI != BB->BranchInfo.end() && "missing BranchInfo entry");
OS << Sep << Succ->getName();
if (ExecutionCount != COUNT_NO_PROFILE &&
BI->MispredictedCount != BinaryBasicBlock::COUNT_FALLTHROUGH_EDGE) {
OS << " (mispreds: " << BI->MispredictedCount
<< ", count: " << BI->Count << ")";
} else if (ExecutionCount != COUNT_NO_PROFILE &&
BI->Count != BinaryBasicBlock::COUNT_FALLTHROUGH_EDGE) {
OS << " (inferred count: " << BI->Count << ")";
}
Sep = ", ";
++BI;
}
OS << '\n';
}
OS << '\n';
}
OS << "End of Function \"" << getName() << "\"\n";
}
bool BinaryFunction::disassemble(ArrayRef<uint8_t> FunctionData) {
assert(FunctionData.size() == getSize() &&
"function size does not match raw data size");
auto &Ctx = BC.Ctx;
auto &MIA = BC.MIA;
// Insert a label at the beginning of the function. This will be our first
// basic block.
Labels[0] = Ctx->createTempSymbol("BB0", false);
bool IsSimple = true;
for (uint64_t Offset = 0; IsSimple && (Offset < getSize()); ) {
MCInst Instruction;
uint64_t Size;
if (!BC.DisAsm->getInstruction(Instruction,
Size,
FunctionData.slice(Offset),
getAddress() + Offset,
nulls(),
nulls())) {
// Ignore this function. Skip to the next one.
IsSimple = false;
break;
}
if (MIA->isUnsupported(Instruction)) {
DEBUG(dbgs() << "FLO: unsupported instruction seen. Skipping function "
<< getName() << ".\n");
IsSimple = false;
break;
}
if (MIA->isIndirectBranch(Instruction)) {
DEBUG(dbgs() << "FLO: indirect branch seen. Skipping function "
<< getName() << ".\n");
IsSimple = false;
break;
}
uint64_t AbsoluteInstrAddr = getAddress() + Offset;
if (MIA->isBranch(Instruction) || MIA->isCall(Instruction)) {
uint64_t InstructionTarget = 0;
if (MIA->evaluateBranch(Instruction,
AbsoluteInstrAddr,
Size,
InstructionTarget)) {
// Check if the target is within the same function. Otherwise it's
// a call, possibly a tail call.
//
// If the target *is* the function address it could be either a branch
// or a recursive call.
bool IsCall = MIA->isCall(Instruction);
MCSymbol *TargetSymbol{nullptr};
uint64_t TargetOffset{0};
if (IsCall && containsAddress(InstructionTarget)) {
if (InstructionTarget == getAddress()) {
// Recursive call.
TargetSymbol = Ctx->getOrCreateSymbol(getName());
} else {
// Possibly an old-style PIC code
DEBUG(dbgs() << "FLO: internal call detected at 0x"
<< Twine::utohexstr(AbsoluteInstrAddr)
<< " in function " << getName() << "\n");
IsSimple = false;
break;
}
}
if (!TargetSymbol) {
// Create either local label or external symbol.
if (containsAddress(InstructionTarget)) {
// Check if there's already a registered label.
TargetOffset = InstructionTarget - getAddress();
auto LI = Labels.find(TargetOffset);
if (LI == Labels.end()) {
TargetSymbol = Ctx->createTempSymbol();
Labels[TargetOffset] = TargetSymbol;
} else {
TargetSymbol = LI->second;
}
} else {
// This is a call regardless of the opcode (e.g. tail call).
IsCall = true;
TargetSymbol = BC.getOrCreateGlobalSymbol(InstructionTarget,
"FUNCat");
}
}
Instruction.clear();
Instruction.addOperand(
MCOperand::createExpr(
MCSymbolRefExpr::create(TargetSymbol,
MCSymbolRefExpr::VK_None,
*Ctx)));
if (!IsCall) {
// Add local branch info.
LocalBranches.push_back({Offset, TargetOffset});
}
} else {
// Indirect call
DEBUG(dbgs() << "FLO: indirect call detected (not yet supported)\n");
IsSimple = false;
break;
}
} else {
if (MIA->hasRIPOperand(Instruction)) {
uint64_t TargetAddress{0};
MCSymbol *TargetSymbol{nullptr};
if (!MIA->evaluateRIPOperand(Instruction, AbsoluteInstrAddr,
Size, TargetAddress)) {
DEBUG(
dbgs() << "FLO: rip-relative operand could not be evaluated:\n";
BC.InstPrinter->printInst(&Instruction, dbgs(), "", *BC.STI);
dbgs() << '\n';
Instruction.dump_pretty(dbgs(), BC.InstPrinter.get());
dbgs() << '\n';
);
IsSimple = false;
break;
}
// FIXME: check that the address is in data, not in code.
TargetSymbol = BC.getOrCreateGlobalSymbol(TargetAddress, "DATAat");
MIA->replaceRIPOperandDisp(
Instruction,
MCOperand::createExpr(
MCSymbolRefExpr::create(TargetSymbol,
MCSymbolRefExpr::VK_None,
*Ctx)));
}
}
addInstruction(Offset, std::move(Instruction));
Offset += Size;
}
setSimple(IsSimple);
// TODO: clear memory if not simple function?
// Update state.
updateState(State::Disassembled);
// Print the function in the new state.
DEBUG(print(dbgs(), /* PrintInstructions = */ true));
return true;
}
bool BinaryFunction::buildCFG() {
auto &MIA = BC.MIA;
auto BranchDataOrErr = BC.DR.getFuncBranchData(getName());
if (std::error_code EC = BranchDataOrErr.getError()) {
DEBUG(dbgs() << "no branch data found for \"" << getName() << "\"\n");
} else {
ExecutionCount = BC.DR.countBranchesTo(getName());
}
if (!isSimple())
return false;
if (!(CurrentState == State::Disassembled))
return false;
assert(BasicBlocks.empty() && "basic block list should be empty");
assert((Labels.find(0) != Labels.end()) &&
"first instruction should always have a label");
// Create basic blocks in the original layout order:
//
// * Every instruction with associated label marks
// the beginning of a basic block.
// * Conditional instruction marks the end of a basic block,
// except when the following instruction is an
// unconditional branch, and the unconditional branch is not
// a destination of another branch. In the latter case, the
// basic block will consist of a single unconditional branch
// (missed optimization opportunity?).
//
// Created basic blocks are sorted in layout order since they are
// created in the same order as instructions, and instructions are
// sorted by offsets.
BinaryBasicBlock *InsertBB{nullptr};
BinaryBasicBlock *PrevBB{nullptr};
bool IsLastInstrNop = false;
for (auto &InstrInfo : Instructions) {
auto LI = Labels.find(InstrInfo.first);
if (LI != Labels.end()) {
// Always create new BB at branch destination.
PrevBB = InsertBB;
InsertBB = addBasicBlock(LI->first, LI->second,
/* DeriveAlignment = */ IsLastInstrNop);
}
if (!InsertBB) {
// It must be a fallthrough. Create a new block unless we see an
// unconditional branch.
assert(PrevBB && "no previous basic block for a fall through");
if (MIA->isUnconditionalBranch(InstrInfo.second)) {
// Temporarily restore inserter basic block.
InsertBB = PrevBB;
} else {
InsertBB = addBasicBlock(InstrInfo.first,
BC.Ctx->createTempSymbol("FT", true),
/* DeriveAlignment = */ IsLastInstrNop);
}
}
// Ignore nops. We use nops to derive alignment of the next basic block.
// It will not always work, as some blocks are naturally aligned, but
// it's just part of heuristic for block alignment.
if (MIA->isNoop(InstrInfo.second)) {
IsLastInstrNop = true;
continue;
}
IsLastInstrNop = false;
InsertBB->addInstruction(InstrInfo.second);
// How well do we detect tail calls here?
if (MIA->isTerminator(InstrInfo.second)) {
PrevBB = InsertBB;
InsertBB = nullptr;
}
}
// Set the basic block layout to the original order
for (auto &BB : BasicBlocks) {
BasicBlocksLayout.emplace_back(&BB);
}
// Intermediate dump.
DEBUG(print(dbgs(), /* PrintInstructions = */ true));
// TODO: handle properly calls to no-return functions,
// e.g. exit(3), etc. Otherwise we'll see a false fall-through
// blocks.
for (auto &Branch : LocalBranches) {
DEBUG(dbgs() << "registering branch [0x" << Twine::utohexstr(Branch.first)
<< "] -> [0x" << Twine::utohexstr(Branch.second) << "]\n");
BinaryBasicBlock *FromBB = getBasicBlockContainingOffset(Branch.first);
assert(FromBB && "cannot find BB containing FROM branch");
BinaryBasicBlock *ToBB = getBasicBlockAtOffset(Branch.second);
assert(ToBB && "cannot find BB containing TO branch");
if (std::error_code EC = BranchDataOrErr.getError()) {
FromBB->addSuccessor(ToBB);
} else {
const FuncBranchData &BranchData = BranchDataOrErr.get();
auto BranchInfoOrErr = BranchData.getBranch(Branch.first, Branch.second);
if (std::error_code EC = BranchInfoOrErr.getError()) {
FromBB->addSuccessor(ToBB);
} else {
const BranchInfo &BInfo = BranchInfoOrErr.get();
FromBB->addSuccessor(ToBB, BInfo.Branches, BInfo.Mispreds);
}
}
}
// Add fall-through branches.
PrevBB = nullptr;
bool IsPrevFT = false; // Is previous block a fall-through.
for (auto &BB : BasicBlocks) {
if (IsPrevFT) {
PrevBB->addSuccessor(&BB, BinaryBasicBlock::COUNT_FALLTHROUGH_EDGE,
BinaryBasicBlock::COUNT_FALLTHROUGH_EDGE);
}
MCInst &LastInst = BB.back();
if (BB.empty()) {
IsPrevFT = true;
} else if (BB.succ_size() == 0) {
IsPrevFT = MIA->isTerminator(LastInst) ? false : true;
} else if (BB.succ_size() == 1) {
IsPrevFT = MIA->isConditionalBranch(LastInst) ? true : false;
} else {
// Either ends with 2 branches, or with an indirect jump.
IsPrevFT = false;
}
PrevBB = &BB;
}
if (!IsPrevFT) {
// Possibly a call that does not return.
DEBUG(dbgs() << "last block was marked as a fall-through\n");
}
// Infer frequency for non-taken branches
if (ExecutionCount != COUNT_NO_PROFILE && !BranchDataOrErr.getError()) {
inferFallThroughCounts();
}
// Clean-up memory taken by instructions and labels.
clearInstructions();
clearLabels();
clearLocalBranches();
// Update the state.
CurrentState = State::CFG;
// Print the function in the new state.
DEBUG(print(dbgs(), /* PrintInstructions = */ true));
return true;
}
void BinaryFunction::inferFallThroughCounts() {
assert(!BasicBlocks.empty() && "basic block list should not be empty");
// Compute preliminary execution time for each basic block
for (auto &CurBB : BasicBlocks) {
if (&CurBB == &*BasicBlocks.begin()) {
CurBB.ExecutionCount = ExecutionCount;
continue;
}
CurBB.ExecutionCount = 0;
}
for (auto &CurBB : BasicBlocks) {
auto SuccCount = CurBB.BranchInfo.begin();
for (auto Succ : CurBB.successors()) {
if (SuccCount->Count != BinaryBasicBlock::COUNT_FALLTHROUGH_EDGE)
Succ->ExecutionCount += SuccCount->Count;
++SuccCount;
}
}
// Work on a basic block at a time, propagating frequency information forwards
// It is important to walk in the layour order
for (auto &CurBB : BasicBlocks) {
uint64_t BBExecCount = CurBB.getExecutionCount();
// Propagate this information to successors, filling in fall-through edges
// with frequency information
if (CurBB.succ_size() == 0)
continue;
// Calculate frequency of outgoing branches from this node according to
// LBR data
uint64_t ReportedBranches = 0;
for (auto &SuccCount : CurBB.BranchInfo) {
if (SuccCount.Count != BinaryBasicBlock::COUNT_FALLTHROUGH_EDGE)
ReportedBranches += SuccCount.Count;
}
// Infer the frequency of the fall-through edge, representing not taking the
// branch
uint64_t Inferred = 0;
if (BBExecCount > ReportedBranches)
Inferred = BBExecCount - ReportedBranches;
if (BBExecCount < ReportedBranches)
errs() << "FLO-WARNING: Fall-through inference is slightly inconsistent. "
"BB exec frequency is less than the outgoing edges frequency\n";
// Put this information into the fall-through edge
if (CurBB.succ_size() == 0)
continue;
// If there is a FT, the last successor will be it.
auto &SuccCount = CurBB.BranchInfo.back();
auto &Succ = CurBB.Successors.back();
if (SuccCount.Count == BinaryBasicBlock::COUNT_FALLTHROUGH_EDGE) {
SuccCount.Count = Inferred;
Succ->ExecutionCount += Inferred;
}
} // end for (CurBB : BasicBlocks)
return;
}
void BinaryFunction::optimizeLayout(bool DumpLayout) {
// Bail if no profiling information or if empty
if (getExecutionCount() == BinaryFunction::COUNT_NO_PROFILE || empty()) {
return;
}
// Work on optimal solution if problem is small enough
if (BasicBlocks.size() <= FUNC_SIZE_THRESHOLD)
return solveOptimalLayout(DumpLayout);
if (DumpLayout) {
dbgs() << "running block layout heuristics on " << getName() << "\n";
}
// Greedy heuristic implementation for the TSP, applied to BB layout. Try to
// maximize weight during a path traversing all BBs. In this way, we will
// convert the hottest branches into fall-throughs.
// Encode an edge between two basic blocks, source and destination
typedef std::pair<BinaryBasicBlock *, BinaryBasicBlock *> EdgeTy;
std::map<EdgeTy, uint64_t> Weight;
// Define a comparison function to establish SWO between edges
auto Comp = [&Weight](EdgeTy A, EdgeTy B) { return Weight[A] > Weight[B]; };
std::priority_queue<EdgeTy, std::vector<EdgeTy>, decltype(Comp)> Queue(Comp);
typedef std::vector<BinaryBasicBlock *> ClusterTy;
typedef std::map<BinaryBasicBlock *, int> BBToClusterMapTy;
std::vector<ClusterTy> Clusters;
BBToClusterMapTy BBToClusterMap;
// Populating priority queue with all edges
for (auto &BB : BasicBlocks) {
BBToClusterMap[&BB] = -1; // Mark as unmapped
auto BI = BB.BranchInfo.begin();
for (auto &I : BB.successors()) {
if (BI->Count != BinaryBasicBlock::COUNT_FALLTHROUGH_EDGE)
Weight[std::make_pair(&BB, I)] = BI->Count;
Queue.push(std::make_pair(&BB, I));
++BI;
}
}
// Start a cluster with the entry point
BinaryBasicBlock *Entry = &*BasicBlocks.begin();
Clusters.emplace_back();
auto &EntryCluster = Clusters.back();
EntryCluster.push_back(Entry);
BBToClusterMap[Entry] = 0;
// Grow clusters in a greedy fashion
while (!Queue.empty()) {
auto elmt = Queue.top();
Queue.pop();
BinaryBasicBlock *BBSrc = elmt.first;
BinaryBasicBlock *BBDst = elmt.second;
int I = 0, J = 0;
// Case 1: BBSrc and BBDst are the same. Ignore this edge
if (BBSrc == BBDst)
continue;
// Case 2: Both BBSrc and BBDst are already allocated
if ((I = BBToClusterMap[BBSrc]) != -1 &&
(J = BBToClusterMap[BBDst]) != -1) {
// Case 2a: If they are already allocated at the same cluster, ignore
if (I == J)
continue;
auto &ClusterA = Clusters[I];
auto &ClusterB = Clusters[J];
if (ClusterA.back() == BBSrc && ClusterB.front() == BBDst) {
// Case 2b: BBSrc is at the end of a cluster and BBDst is at the start,
// allowing us to merge two clusters
for (auto BB : ClusterB)
BBToClusterMap[BB] = I;
ClusterA.insert(ClusterA.end(), ClusterB.begin(), ClusterB.end());
ClusterB.clear();
} else {
// Case 2c: Both BBSrc and BBDst are allocated in positions we cannot
// merge them, so we ignore this edge.
}
continue;
}
// Case 3: BBSrc is already allocated in a cluster
if ((I = BBToClusterMap[BBSrc]) != -1) {
auto &Cluster = Clusters[I];
if (Cluster.back() == BBSrc) {
// Case 3a: BBSrc is allocated at the end of this cluster. We put
// BBSrc and BBDst together.
Cluster.push_back(BBDst);
BBToClusterMap[BBDst] = I;
} else {
// Case 3b: We cannot put BBSrc and BBDst in consecutive positions,
// so we ignore this edge.
}
continue;
}
// Case 4: BBSrc is not in a cluster, but BBDst is
if ((I = BBToClusterMap[BBDst]) != -1) {
auto &Cluster = Clusters[I];
if (Cluster.front() == BBDst) {
// Case 4a: BBDst is allocated at the start of this cluster. We put
// BBSrc and BBDst together.
Cluster.insert(Cluster.begin(), BBSrc);
BBToClusterMap[BBSrc] = I;
} else {
// Case 4b: We cannot put BBSrc and BBDst in consecutive positions,
// so we ignore this edge.
}
continue;
}
// Case 5: Both BBSrc and BBDst are unallocated, so we create a new cluster
// with them
I = Clusters.size();
Clusters.emplace_back();
auto &Cluster = Clusters.back();
Cluster.push_back(BBSrc);
Cluster.push_back(BBDst);
BBToClusterMap[BBSrc] = I;
BBToClusterMap[BBDst] = I;
}
// Define final function layout based on clusters
BasicBlocksLayout.clear();
for (auto &Cluster : Clusters) {
BasicBlocksLayout.insert(BasicBlocksLayout.end(), Cluster.begin(),
Cluster.end());
}
// Finalize layout with BBs that weren't assigned to any cluster, preserving
// their relative order
for (auto &BB : BasicBlocks) {
if (BBToClusterMap[&BB] == -1) {
BasicBlocksLayout.push_back(&BB);
}
}
fixBranches();
if (DumpLayout) {
dbgs() << "original BB order is: ";
auto Sep = "";
for (auto &BB : BasicBlocks) {
dbgs() << Sep << BB.getName();
Sep = ",";
}
dbgs() << "\nnew order is: ";
Sep = "";
for (auto BB : BasicBlocksLayout) {
dbgs() << Sep << BB->getName();
Sep = ",";
}
dbgs() << "\n";
print(dbgs(), /* PrintInstructions = */ true);
}
}
void BinaryFunction::solveOptimalLayout(bool DumpLayout) {
std::vector<std::vector<uint64_t>> Weight;
std::map<BinaryBasicBlock *, int> BBToIndex;
std::vector<BinaryBasicBlock *> IndexToBB;
if (DumpLayout) {
dbgs() << "finding optimal block layout for " << getName() << "\n";
}
unsigned N = BasicBlocks.size();
// Populating weight map and index map
for (auto &BB : BasicBlocks) {
BBToIndex[&BB] = IndexToBB.size();
IndexToBB.push_back(&BB);
}
Weight.resize(N);
for (auto &BB : BasicBlocks) {
auto BI = BB.BranchInfo.begin();
Weight[BBToIndex[&BB]].resize(N);
for (auto &I : BB.successors()) {
if (BI->Count != BinaryBasicBlock::COUNT_FALLTHROUGH_EDGE)
Weight[BBToIndex[&BB]][BBToIndex[I]] = BI->Count;
++BI;
}
}
std::vector<std::vector<int64_t>> DP;
DP.resize(1 << N);
for (auto &Elmt : DP) {
Elmt.resize(N, -1);
}
// Start with the entry basic block being allocated with cost zero
DP[1][0] = 0;
// Walk through TSP solutions using a bitmask to represent state (current set
// of BBs in the layout)
unsigned BestSet = 1;
unsigned BestLast = 0;
int64_t BestWeight = 0;
for (unsigned Set = 1; Set < (1U << N); ++Set) {
// Traverse each possibility of Last BB visited in this layout
for (unsigned Last = 0; Last < N; ++Last) {
// Case 1: There is no possible layout with this BB as Last
if (DP[Set][Last] == -1)
continue;
// Case 2: There is a layout with this Set and this Last, and we try
// to expand this set with New
for (unsigned New = 1; New < N; ++New) {
// Case 2a: BB "New" is already in this Set
if ((Set & (1 << New)) != 0)
continue;
// Case 2b: BB "New" is not in this set and we add it to this Set and
// record total weight of this layout with "New" as the last BB.
unsigned NewSet = (Set | (1 << New));
if (DP[NewSet][New] == -1)
DP[NewSet][New] = DP[Set][Last] + (int64_t)Weight[Last][New];
DP[NewSet][New] = std::max(DP[NewSet][New],
DP[Set][Last] + (int64_t)Weight[Last][New]);
if (DP[NewSet][New] > BestWeight) {
BestWeight = DP[NewSet][New];
BestSet = NewSet;
BestLast = New;
}
}
}
}
// Define final function layout based on layout that maximizes weight
BasicBlocksLayout.clear();
unsigned Last = BestLast;
unsigned Set = BestSet;
std::vector<bool> Visited;
Visited.resize(N);
Visited[Last] = true;
BasicBlocksLayout.push_back(IndexToBB[Last]);
Set = Set & ~(1U << Last);
while (Set != 0) {
int64_t Best = -1;
for (unsigned I = 0; I < N; ++I) {
if (DP[Set][I] == -1)
continue;
if (DP[Set][I] > Best) {
Last = I;
Best = DP[Set][I];
}
}
Visited[Last] = true;
BasicBlocksLayout.push_back(IndexToBB[Last]);
Set = Set & ~(1U << Last);
}
std::reverse(BasicBlocksLayout.begin(), BasicBlocksLayout.end());
// Finalize layout with BBs that weren't assigned to the layout
for (auto &BB : BasicBlocks) {
if (Visited[BBToIndex[&BB]] == false)
BasicBlocksLayout.push_back(&BB);
}
fixBranches();
if (DumpLayout) {
dbgs() << "original BB order is: ";
auto Sep = "";
for (auto &BB : BasicBlocks) {
dbgs() << Sep << BB.getName();
Sep = ",";
}
dbgs() << "\nnew order is: ";
Sep = "";
for (auto BB : BasicBlocksLayout) {
dbgs() << Sep << BB->getName();
Sep = ",";
}
dbgs() << "\n";
print(dbgs(), /* PrintInstructions = */ true);
}
}
const BinaryBasicBlock *
BinaryFunction::getOriginalLayoutSuccessor(const BinaryBasicBlock *BB) const {
auto I = std::upper_bound(BasicBlocks.begin(), BasicBlocks.end(), *BB);
assert(I != BasicBlocks.begin() && "first basic block not at offset 0");
if (I == BasicBlocks.end())
return nullptr;
return &*I;
}
void BinaryFunction::fixBranches() {
auto &MIA = BC.MIA;
for (unsigned I = 0, E = BasicBlocksLayout.size(); I != E; ++I) {
BinaryBasicBlock *BB = BasicBlocksLayout[I];
if (BB->begin() == BB->end())
continue;
const MCSymbol *TBB = nullptr;
const MCSymbol *FBB = nullptr;
MCInst *CondBranch = nullptr;
MCInst *UncondBranch = nullptr;
if (!MIA->analyzeBranch(BB->Instructions, TBB, FBB, CondBranch,
UncondBranch)) {
continue;
}
// Check if the original fall-through for this block has been moved
const MCSymbol *FT = nullptr;
if (I + 1 != BasicBlocksLayout.size())
FT = BasicBlocksLayout[I + 1]->getLabel();
const BinaryBasicBlock *OldFTBB = getOriginalLayoutSuccessor(BB);
const MCSymbol *OldFT = nullptr;
if (OldFTBB != nullptr)
OldFT = OldFTBB->getLabel();
// Case 1: There are no branches in this basic block and it just falls
// through
if (CondBranch == nullptr && UncondBranch == nullptr) {
// Case 1a: Last instruction is a return, so it does *not* fall through to
// the next block.
if (MIA->isReturn(BB->back()))
continue;
// Case 1b: Layout has changed and the fallthrough is not the same. Need
// to add a new unconditional branch to jump to the old fallthrough.
if (FT != OldFT && OldFT != nullptr) {
MCInst NewInst;
if (!MIA->createUncondBranch(NewInst, OldFT, BC.Ctx.get()))
llvm_unreachable("Target does not support creating new branches");
BB->Instructions.emplace_back(std::move(NewInst));
}
// Case 1c: Layout hasn't changed, nothing to do.
continue;
}
// Case 2: There is a single jump, unconditional, in this basic block
if (CondBranch == nullptr) {
// Case 2a: It jumps to the new fall-through, so we can delete it
if (TBB == FT) {
BB->eraseInstruction(UncondBranch);
}
// Case 2b: If 2a doesn't happen, there is nothing we can do
continue;
}
// Case 3: There is a single jump, conditional, in this basic block
if (UncondBranch == nullptr) {
// Case 3a: If the taken branch goes to the next block in the new layout,
// invert this conditional branch logic so we can make this a fallthrough.
if (TBB == FT) {
assert(OldFT != nullptr && "malformed CFG");
if (!MIA->reverseBranchCondition(*CondBranch, OldFT, BC.Ctx.get()))
llvm_unreachable("Target does not support reversing branches");
continue;
}
// Case 3b: Need to add a new unconditional branch because layout
// has changed
if (FT != OldFT && OldFT != nullptr) {
MCInst NewInst;
if (!MIA->createUncondBranch(NewInst, OldFT, BC.Ctx.get()))
llvm_unreachable("Target does not support creating new branches");
BB->Instructions.emplace_back(std::move(NewInst));
continue;
}
// Case 3c: Old fall-through is the same as the new one, no need to change
continue;
}
// Case 4: There are two jumps in this basic block, one conditional followed
// by another unconditional.
// Case 4a: If the unconditional jump target is the new fall through,
// delete it.
if (FBB == FT) {
BB->eraseInstruction(UncondBranch);
continue;
}
// Case 4b: If the taken branch goes to the next block in the new layout,
// invert this conditional branch logic so we can make this a fallthrough.
// Now we don't need the unconditional jump anymore, so we also delete it.
if (TBB == FT) {
if (!MIA->reverseBranchCondition(*CondBranch, FBB, BC.Ctx.get()))
llvm_unreachable("Target does not support reversing branches");
BB->eraseInstruction(UncondBranch);
continue;
}
// Case 4c: Nothing interesting happening.
}
}
} // namespace flo
} // namespace llvm