//===--- BinaryFunction.cpp - Interface for machine-level function --------===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // //===----------------------------------------------------------------------===// #include "llvm/ADT/StringRef.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstPrinter.h" #include "llvm/Object/ObjectFile.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include #include #include #include "BinaryBasicBlock.h" #include "BinaryFunction.h" #include "DataReader.h" #undef DEBUG_TYPE #define DEBUG_TYPE "flo" namespace llvm { namespace flo { BinaryBasicBlock * BinaryFunction::getBasicBlockContainingOffset(uint64_t Offset) { if (Offset > Size) return nullptr; if (BasicBlocks.empty()) return nullptr; auto I = std::upper_bound(BasicBlocks.begin(), BasicBlocks.end(), BinaryBasicBlock(Offset)); assert(I != BasicBlocks.begin() && "first basic block not at offset 0"); return &(*--I); } void BinaryFunction::print(raw_ostream &OS, bool PrintInstructions) const { StringRef SectionName; Section.getName(SectionName); OS << "Binary Function \"" << getName() << "\" {" << "\n State : " << CurrentState << "\n Address : 0x" << Twine::utohexstr(Address) << "\n Size : 0x" << Twine::utohexstr(Size) << "\n MaxSize : 0x" << Twine::utohexstr(MaxSize) << "\n Offset : 0x" << Twine::utohexstr(FileOffset) << "\n Section : " << SectionName << "\n Orc Section : " << getCodeSectionName() << "\n IsSimple : " << IsSimple << "\n BB count : " << BasicBlocks.size() << "\n Image : 0x" << Twine::utohexstr(ImageAddress); if (ExecutionCount != COUNT_NO_PROFILE) OS << "\n Exec Count : " << ExecutionCount; OS << "\n}\n"; if (!PrintInstructions || !BC.InstPrinter) return; // Offset of the instruction in function. uint64_t Offset{0}; if (BasicBlocks.empty() && !Instructions.empty()) { // Print before CFG was built. for (const auto &II : Instructions) { auto Offset = II.first; // Print label if exists at this offset. auto LI = Labels.find(Offset); if (LI != Labels.end()) OS << LI->second->getName() << ":\n"; auto &Instruction = II.second; OS << format(" %08" PRIx64 ": ", Offset); BC.InstPrinter->printInst(&Instruction, OS, "", *BC.STI); OS << "\n"; } } for (const auto &BB : BasicBlocks) { OS << BB.getName() << " (" << BB.Instructions.size() << " instructions)\n"; uint64_t BBExecCount = BB.getExecutionCount(); if (BBExecCount != BinaryBasicBlock::COUNT_NO_PROFILE) { OS << " Exec Count : " << BBExecCount << "\n"; } if (!BB.Predecessors.empty()) { OS << " Predecessors: "; auto Sep = ""; for (auto Pred : BB.Predecessors) { OS << Sep << Pred->getName(); Sep = ", "; } OS << '\n'; } Offset = RoundUpToAlignment(Offset, BB.getAlignment()); for (auto &Instr : BB) { OS << format(" %08" PRIx64 ": ", Offset); BC.InstPrinter->printInst(&Instr, OS, "", *BC.STI); OS << "\n"; // In case we need MCInst printer: // Instr.dump_pretty(OS, InstructionPrinter.get()); // Calculate the size of the instruction. // Note: this is imprecise since happening prior to relaxation. SmallString<256> Code; SmallVector Fixups; raw_svector_ostream VecOS(Code); BC.MCE->encodeInstruction(Instr, VecOS, Fixups, *BC.STI); Offset += Code.size(); } if (!BB.Successors.empty()) { OS << " Successors: "; auto BI = BB.BranchInfo.begin(); auto Sep = ""; for (auto Succ : BB.Successors) { assert(BI != BB.BranchInfo.end() && "missing BranchInfo entry"); OS << Sep << Succ->getName(); if (ExecutionCount != COUNT_NO_PROFILE && BI->MispredictedCount != BinaryBasicBlock::COUNT_FALLTHROUGH_EDGE) { OS << " (mispreds: " << BI->MispredictedCount << ", count: " << BI->Count << ")"; } else if (ExecutionCount != COUNT_NO_PROFILE && BI->Count != BinaryBasicBlock::COUNT_FALLTHROUGH_EDGE) { OS << " (inferred count: " << BI->Count << ")"; } Sep = ", "; ++BI; } OS << '\n'; } OS << '\n'; } OS << "End of Function \"" << getName() << "\"\n"; } bool BinaryFunction::disassemble(ArrayRef FunctionData) { assert(FunctionData.size() == getSize() && "function size does not match raw data size"); auto &Ctx = BC.Ctx; auto &MIA = BC.MIA; // Insert a label at the beginning of the function. This will be our first // basic block. Labels[0] = Ctx->createTempSymbol("BB0", false); bool IsSimple = true; for (uint64_t Offset = 0; IsSimple && (Offset < getSize()); ) { MCInst Instruction; uint64_t Size; if (!BC.DisAsm->getInstruction(Instruction, Size, FunctionData.slice(Offset), getAddress() + Offset, nulls(), nulls())) { // Ignore this function. Skip to the next one. IsSimple = false; break; } if (MIA->isIndirectBranch(Instruction)) { IsSimple = false; break; } uint64_t AbsoluteInstrAddr = getAddress() + Offset; if (MIA->isBranch(Instruction) || MIA->isCall(Instruction)) { uint64_t InstructionTarget = 0; if (MIA->evaluateBranch(Instruction, AbsoluteInstrAddr, Size, InstructionTarget)) { // Check if the target is within the same function. Otherwise it's // a call, possibly a tail call. // // If the target *is* the function address it could be either a branch // or a recursive call. bool IsCall = MIA->isCall(Instruction); MCSymbol *TargetSymbol{nullptr}; uint64_t TargetOffset{0}; if (IsCall && containsAddress(InstructionTarget)) { if (InstructionTarget == getAddress()) { // Recursive call. TargetSymbol = Ctx->getOrCreateSymbol(getName()); } else { // Possibly an old-style PIC code DEBUG(dbgs() << "FLO: internal call detected at 0x" << Twine::utohexstr(AbsoluteInstrAddr) << " in function " << getName() << "\n"); IsSimple = false; break; } } if (!TargetSymbol) { // Create either local label or external symbol. if (containsAddress(InstructionTarget)) { // Check if there's already a registered label. TargetOffset = InstructionTarget - getAddress(); auto LI = Labels.find(TargetOffset); if (LI == Labels.end()) { TargetSymbol = Ctx->createTempSymbol(); Labels[TargetOffset] = TargetSymbol; } else { TargetSymbol = LI->second; } } else { // This is a call regardless of the opcode (e.g. tail call). IsCall = true; // Check if we already have a symbol at this address. std::string Name; auto NI = BC.GlobalAddresses.find(InstructionTarget); if (NI != BC.GlobalAddresses.end()) { // Any registered name will do. Name = NI->second; } else { // Create a new symbol at the destination. Name = (Twine("FUNCat0x") + Twine::utohexstr(InstructionTarget)).str(); BC.GlobalAddresses.emplace(std::make_pair(InstructionTarget, Name)); } TargetSymbol = Ctx->getOrCreateSymbol(Name); BC.GlobalSymbols[Name] = InstructionTarget; } } Instruction.clear(); Instruction.addOperand( MCOperand::createExpr( MCSymbolRefExpr::create(TargetSymbol, MCSymbolRefExpr::VK_None, *Ctx))); if (!IsCall) { // Add local branch info. LocalBranches.push_back({Offset, TargetOffset}); } } else { // Indirect call DEBUG(dbgs() << "FLO: indirect call detected (not yet supported)\n"); IsSimple = false; break; } } else { if (MIA->hasRIPOperand(Instruction)) { uint64_t TargetAddress{0}; MCSymbol *TargetSymbol{nullptr}; if (!MIA->evaluateRIPOperand(Instruction, AbsoluteInstrAddr, Size, TargetAddress)) { DEBUG( dbgs() << "FLO: rip-relative operand could not be evaluated:\n"; BC.InstPrinter->printInst(&Instruction, dbgs(), "", *BC.STI); dbgs() << '\n'; Instruction.dump_pretty(dbgs(), BC.InstPrinter.get()); dbgs() << '\n'; ); IsSimple = false; break; } std::string Name; auto NI = BC.GlobalAddresses.find(TargetAddress); if (NI != BC.GlobalAddresses.end()) { Name = NI->second; } else { // Register new "data" symbol at the destination. Name = (Twine("DATAat0x") + Twine::utohexstr(TargetAddress)).str(); BC.GlobalAddresses.emplace(std::make_pair(TargetAddress, Name)); } TargetSymbol = Ctx->getOrCreateSymbol(Name); BC.GlobalSymbols[Name] = TargetAddress; MIA->replaceRIPOperandDisp( Instruction, MCOperand::createExpr( MCSymbolRefExpr::create(TargetSymbol, MCSymbolRefExpr::VK_None, *Ctx))); } } addInstruction(Offset, std::move(Instruction)); Offset += Size; } setSimple(IsSimple); // TODO: clear memory if not simple function? // Update state. updateState(State::Disassembled); // Print the function in the new state. DEBUG(print(dbgs(), /* PrintInstructions = */ true)); return true; } bool BinaryFunction::buildCFG() { auto &MIA = BC.MIA; auto BranchDataOrErr = BC.DR.getFuncBranchData(getName()); if (std::error_code EC = BranchDataOrErr.getError()) { DEBUG(dbgs() << "no branch data found for \"" << getName() << "\"\n"); } else { ExecutionCount = BC.DR.countBranchesTo(getName()); } if (!isSimple()) return false; if (!(CurrentState == State::Disassembled)) return false; assert(BasicBlocks.empty() && "basic block list should be empty"); assert((Labels.find(0) != Labels.end()) && "first instruction should always have a label"); // Create basic blocks in the original layout order: // // * Every instruction with associated label marks // the beginning of a basic block. // * Conditional instruction marks the end of a basic block, // except when the following instruction is an // unconditional branch, and the unconditional branch is not // a destination of another branch. In the latter case, the // basic block will consist of a single unconditional branch // (missed optimization opportunity?). // // Created basic blocks are sorted in layout order since they are // created in the same order as instructions, and instructions are // sorted by offsets. BinaryBasicBlock *InsertBB{nullptr}; BinaryBasicBlock *PrevBB{nullptr}; for (auto &InstrInfo : Instructions) { auto LI = Labels.find(InstrInfo.first); if (LI != Labels.end()) { // Always create new BB at branch destination. PrevBB = InsertBB; InsertBB = addBasicBlock(LI->first, LI->second); } if (!InsertBB) { // It must be a fallthrough. Create a new block unless we see an // unconditional branch. assert(PrevBB && "no previous basic block for a fall through"); if (MIA->isUnconditionalBranch(InstrInfo.second)) { // Temporarily restore inserter basic block. InsertBB = PrevBB; } else { InsertBB = addBasicBlock(InstrInfo.first, BC.Ctx->createTempSymbol("FT", true)); } } InsertBB->addInstruction(InstrInfo.second); // How well do we detect tail calls here? if (MIA->isTerminator(InstrInfo.second)) { PrevBB = InsertBB; InsertBB = nullptr; } } // Set the basic block layout to the original order for (auto &BB : BasicBlocks) { BasicBlocksLayout.emplace_back(&BB); } // Intermediate dump. DEBUG(print(dbgs(), /* PrintInstructions = */ true)); // TODO: handle properly calls to no-return functions, // e.g. exit(3), etc. Otherwise we'll see a false fall-through // blocks. for (auto &Branch : LocalBranches) { DEBUG(dbgs() << "registering branch [0x" << Twine::utohexstr(Branch.first) << "] -> [0x" << Twine::utohexstr(Branch.second) << "]\n"); BinaryBasicBlock *FromBB = getBasicBlockContainingOffset(Branch.first); assert(FromBB && "cannot find BB containing FROM branch"); BinaryBasicBlock *ToBB = getBasicBlockAtOffset(Branch.second); assert(ToBB && "cannot find BB containing TO branch"); if (std::error_code EC = BranchDataOrErr.getError()) { FromBB->addSuccessor(ToBB); } else { const FuncBranchData &BranchData = BranchDataOrErr.get(); auto BranchInfoOrErr = BranchData.getBranch(Branch.first, Branch.second); if (std::error_code EC = BranchInfoOrErr.getError()) { FromBB->addSuccessor(ToBB); } else { const BranchInfo &BInfo = BranchInfoOrErr.get(); FromBB->addSuccessor(ToBB, BInfo.Branches, BInfo.Mispreds); } } } // Add fall-through branches. PrevBB = nullptr; bool IsPrevFT = false; // Is previous block a fall-through. for (auto &BB : BasicBlocks) { if (IsPrevFT) { PrevBB->addSuccessor(&BB, BinaryBasicBlock::COUNT_FALLTHROUGH_EDGE, BinaryBasicBlock::COUNT_FALLTHROUGH_EDGE); } MCInst &LastInst = BB.back(); if (BB.succ_size() == 0) { IsPrevFT = MIA->isTerminator(LastInst) ? false : true; } else if (BB.succ_size() == 1) { IsPrevFT = MIA->isConditionalBranch(LastInst) ? true : false; } else { // Either ends with 2 branches, or with an indirect jump. IsPrevFT = false; } PrevBB = &BB; } if (!IsPrevFT) { // Possibly a call that does not return. DEBUG(dbgs() << "last block was marked as a fall-through\n"); } // Infer frequency for non-taken branches if (ExecutionCount != COUNT_NO_PROFILE && !BranchDataOrErr.getError()) { inferFallThroughCounts(); } // Clean-up memory taken by instructions and labels. clearInstructions(); clearLabels(); clearLocalBranches(); // Update the state. CurrentState = State::CFG; // Print the function in the new state. DEBUG(print(dbgs(), /* PrintInstructions = */ true)); return true; } void BinaryFunction::inferFallThroughCounts() { assert(!BasicBlocks.empty() && "basic block list should not be empty"); // Compute preliminary execution time for each basic block for (auto &CurBB : BasicBlocks) { if (&CurBB == &*BasicBlocks.begin()) { CurBB.ExecutionCount = ExecutionCount; continue; } CurBB.ExecutionCount = 0; } for (auto &CurBB : BasicBlocks) { auto SuccCount = CurBB.BranchInfo.begin(); for (auto Succ : CurBB.successors()) { if (SuccCount->Count != BinaryBasicBlock::COUNT_FALLTHROUGH_EDGE) Succ->ExecutionCount += SuccCount->Count; ++SuccCount; } } // Work on a basic block at a time, propagating frequency information forwards // It is important to walk in the layour order for (auto &CurBB : BasicBlocks) { uint64_t BBExecCount = CurBB.getExecutionCount(); // Propagate this information to successors, filling in fall-through edges // with frequency information if (CurBB.succ_size() == 0) continue; // Calculate frequency of outgoing branches from this node according to // LBR data uint64_t ReportedBranches = 0; for (auto &SuccCount : CurBB.BranchInfo) { if (SuccCount.Count != BinaryBasicBlock::COUNT_FALLTHROUGH_EDGE) ReportedBranches += SuccCount.Count; } // Infer the frequency of the fall-through edge, representing not taking the // branch uint64_t Inferred = 0; if (BBExecCount > ReportedBranches) Inferred = BBExecCount - ReportedBranches; if (BBExecCount < ReportedBranches) errs() << "FLO-WARNING: Fall-through inference is slightly inconsistent. " "BB exec frequency is less than the outgoing edges frequency\n"; // Put this information into the fall-through edge if (CurBB.succ_size() == 0) continue; // If there is a FT, the last successor will be it. auto &SuccCount = CurBB.BranchInfo.back(); auto &Succ = CurBB.Successors.back(); if (SuccCount.Count == BinaryBasicBlock::COUNT_FALLTHROUGH_EDGE) { SuccCount.Count = Inferred; Succ->ExecutionCount += Inferred; } } // end for (CurBB : BasicBlocks) return; } void BinaryFunction::optimizeLayout(bool DumpLayout) { // Bail if no profiling information if (getExecutionCount() == BinaryFunction::COUNT_NO_PROFILE) { return; } // Work on optimal solution if problem is small enough if (BasicBlocks.size() <= FUNC_SIZE_THRESHOLD) return solveOptimalLayout(DumpLayout); if (DumpLayout) { dbgs() << "running block layout heuristics on " << getName() << "\n"; } // Greedy heuristic implementation for the TSP, applied to BB layout. Try to // maximize weight during a path traversing all BBs. In this way, we will // convert the hottest branches into fall-throughs. // Encode an edge between two basic blocks, source and destination typedef std::pair EdgeTy; std::map Weight; // Define a comparison function to establish SWO between edges auto Comp = [&Weight](EdgeTy A, EdgeTy B) { return Weight[A] > Weight[B]; }; std::priority_queue, decltype(Comp)> Queue(Comp); typedef std::vector ClusterTy; typedef std::map BBToClusterMapTy; std::vector Clusters; BBToClusterMapTy BBToClusterMap; // Populating priority queue with all edges for (auto &BB : BasicBlocks) { BBToClusterMap[&BB] = -1; // Mark as unmapped auto BI = BB.BranchInfo.begin(); for (auto &I : BB.successors()) { if (BI->Count != BinaryBasicBlock::COUNT_FALLTHROUGH_EDGE) Weight[std::make_pair(&BB, I)] = BI->Count; Queue.push(std::make_pair(&BB, I)); ++BI; } } // Start a cluster with the entry point BinaryBasicBlock *Entry = &*BasicBlocks.begin(); Clusters.emplace_back(); auto &EntryCluster = Clusters.back(); EntryCluster.push_back(Entry); BBToClusterMap[Entry] = 0; // Grow clusters in a greedy fashion while (!Queue.empty()) { auto elmt = Queue.top(); Queue.pop(); BinaryBasicBlock *BBSrc = elmt.first; BinaryBasicBlock *BBDst = elmt.second; int I = 0, J = 0; // Case 1: BBSrc and BBDst are the same. Ignore this edge if (BBSrc == BBDst) continue; // Case 2: Both BBSrc and BBDst are already allocated if ((I = BBToClusterMap[BBSrc]) != -1 && (J = BBToClusterMap[BBDst]) != -1) { auto &ClusterA = Clusters[I]; auto &ClusterB = Clusters[J]; if (ClusterA.back() == BBSrc && ClusterB.front() == BBDst) { // Case 2a: BBSrc is at the end of a cluster and BBDst is at the start, // allowing us to merge two clusters for (auto BB : ClusterB) BBToClusterMap[BB] = I; ClusterA.insert(ClusterA.end(), ClusterB.begin(), ClusterB.end()); ClusterB.clear(); } else { // Case 2b: Both BBSrc and BBDst are allocated in positions we cannot // merge them, so we ignore this edge. } continue; } // Case 3: BBSrc is already allocated in a cluster if ((I = BBToClusterMap[BBSrc]) != -1) { auto &Cluster = Clusters[I]; if (Cluster.back() == BBSrc) { // Case 3a: BBSrc is allocated at the end of this cluster. We put // BBSrc and BBDst together. Cluster.push_back(BBDst); BBToClusterMap[BBDst] = I; } else { // Case 3b: We cannot put BBSrc and BBDst in consecutive positions, // so we ignore this edge. } continue; } // Case 4: BBSrc is not in a cluster, but BBDst is if ((I = BBToClusterMap[BBDst]) != -1) { auto &Cluster = Clusters[I]; if (Cluster.front() == BBDst) { // Case 4a: BBDst is allocated at the start of this cluster. We put // BBSrc and BBDst together. Cluster.insert(Cluster.begin(), BBSrc); BBToClusterMap[BBSrc] = I; } else { // Case 4b: We cannot put BBSrc and BBDst in consecutive positions, // so we ignore this edge. } continue; } // Case 5: Both BBSrc and BBDst are unallocated, so we create a new cluster // with them I = Clusters.size(); Clusters.emplace_back(); auto &Cluster = Clusters.back(); Cluster.push_back(BBSrc); Cluster.push_back(BBDst); BBToClusterMap[BBSrc] = I; BBToClusterMap[BBDst] = I; } // Define final function layout based on clusters BasicBlocksLayout.clear(); for (auto &Cluster : Clusters) { BasicBlocksLayout.insert(BasicBlocksLayout.end(), Cluster.begin(), Cluster.end()); } // Finalize layout with BBs that weren't assigned to any cluster, preserving // their relative order for (auto &BB : BasicBlocks) { if (BBToClusterMap[&BB] == -1) BasicBlocksLayout.push_back(&BB); } if (DumpLayout) { dbgs() << "original BB order is: "; auto Sep = ""; for (auto &BB : BasicBlocks) { dbgs() << Sep << BB.getName(); Sep = ","; } dbgs() << "\nnew order is: "; Sep = ""; for (auto BB : BasicBlocksLayout) { dbgs() << Sep << BB->getName(); Sep = ","; } dbgs() << "\n"; } } void BinaryFunction::solveOptimalLayout(bool DumpLayout) { std::vector> Weight; std::map BBToIndex; std::vector IndexToBB; if (DumpLayout) { dbgs() << "finding optimal block layout for " << getName() << "\n"; } unsigned N = BasicBlocks.size(); // Populating weight map and index map for (auto &BB : BasicBlocks) { BBToIndex[&BB] = IndexToBB.size(); IndexToBB.push_back(&BB); } Weight.resize(N); for (auto &BB : BasicBlocks) { auto BI = BB.BranchInfo.begin(); Weight[BBToIndex[&BB]].resize(N); for (auto &I : BB.successors()) { if (BI->Count != BinaryBasicBlock::COUNT_FALLTHROUGH_EDGE) Weight[BBToIndex[&BB]][BBToIndex[I]] = BI->Count; ++BI; } } std::vector> DP; DP.resize(1 << N); for (auto &Elmt : DP) { Elmt.resize(N, -1); } // Start with the entry basic block being allocated with cost zero DP[1][0] = 0; // Walk through TSP solutions using a bitmask to represent state (current set // of BBs in the layout) unsigned BestSet = 1; unsigned BestLast = 0; int64_t BestWeight = 0; for (unsigned Set = 1; Set < (1U << N); ++Set) { // Traverse each possibility of Last BB visited in this layout for (unsigned Last = 0; Last < N; ++Last) { // Case 1: There is no possible layout with this BB as Last if (DP[Set][Last] == -1) continue; // Case 2: There is a layout with this Set and this Last, and we try // to expand this set with New for (unsigned New = 1; New < N; ++New) { // Case 2a: BB "New" is already in this Set if ((Set & (1 << New)) != 0) continue; // Case 2b: BB "New" is not in this set and we add it to this Set and // record total weight of this layout with "New" as the last BB. unsigned NewSet = (Set | (1 << New)); if (DP[NewSet][New] == -1) DP[NewSet][New] = DP[Set][Last] + (int64_t)Weight[Last][New]; DP[NewSet][New] = std::max(DP[NewSet][New], DP[Set][Last] + (int64_t)Weight[Last][New]); if (DP[NewSet][New] > BestWeight) { BestWeight = DP[NewSet][New]; BestSet = NewSet; BestLast = New; } } } } // Define final function layout based on layout that maximizes weight BasicBlocksLayout.clear(); unsigned Last = BestLast; unsigned Set = BestSet; std::vector Visited; Visited.resize(N); Visited[Last] = true; BasicBlocksLayout.push_back(IndexToBB[Last]); Set = Set & ~(1U << Last); while (Set != 0) { int64_t Best = -1; for (unsigned I = 0; I < N; ++I) { if (DP[Set][I] == -1) continue; if (DP[Set][I] > Best) { Last = I; Best = DP[Set][I]; } } Visited[Last] = true; BasicBlocksLayout.push_back(IndexToBB[Last]); Set = Set & ~(1U << Last); } std::reverse(BasicBlocksLayout.begin(), BasicBlocksLayout.end()); // Finalize layout with BBs that weren't assigned to the layout for (auto &BB : BasicBlocks) { if (Visited[BBToIndex[&BB]] == false) BasicBlocksLayout.push_back(&BB); } if (DumpLayout) { dbgs() << "original BB order is: "; auto Sep = ""; for (auto &BB : BasicBlocks) { dbgs() << Sep << BB.getName(); Sep = ","; } dbgs() << "\nnew order is: "; Sep = ""; for (auto BB : BasicBlocksLayout) { dbgs() << Sep << BB->getName(); Sep = ","; } dbgs() << "\n"; DEBUG(print(dbgs(), /* PrintInstructions = */ true)); } } } // namespace flo } // namespace llvm