Files
clang-p2996/bolt/lib/Passes/CMOVConversion.cpp
Amir Ayupov 687e4af1c0 [BOLT] CMOVConversion pass
Convert simple hammocks into cmov based on misprediction rate.

Test Plan:
- Assembly test: `cmov-conversion.s`
- Testing on a binary:
  # Bootstrap clang with `-x86-cmov-converter-force-all` and `-Wl,--emit-relocs`
  (Release build)
  # Collect perf.data:

    - `clang++ <opts> bolt/lib/Core/BinaryFunction.cpp -E > bf.cpp`
    - `perf record -e cycles:u -j any,u -- clang-15 bf.cpp -O2 -std=c++14 -c -o bf.o`
  # Optimize clang-15 with and w/o -cmov-conversion:
    - `llvm-bolt clang-15 -p perf.data -o clang-15.bolt`
    - `llvm-bolt clang-15 -p perf.data -cmov-conversion -o clang-15.bolt.cmovconv`
  # Run perf experiment:
    - test: `clang-15.bolt.cmovconv`,
    - control: `clang-15.bolt`,
    - workload (clang options): `bf.cpp -O2 -std=c++14 -c -o bf.o`
Results:
```
  task-clock [delta: -360.21 ± 356.75, delta(%): -1.7760 ± 1.7589, p-value: 0.047951, balance: -6]
  instructions  [delta: 44061118 ± 13246382, delta(%): 0.0690 ± 0.0207, p-value: 0.000001, balance: 50]
  icache-misses [delta: -5534468 ± 2779620, delta(%): -0.4331 ± 0.2175, p-value: 0.028014, balance: -28]
  branch-misses [delta: -1624270 ± 1113244, delta(%): -0.3456 ± 0.2368, p-value: 0.030300, balance: -22]
```

Reviewed By: rafauler

Differential Revision: https://reviews.llvm.org/D120177
2022-03-08 10:44:31 -08:00

288 lines
10 KiB
C++

//===- bolt/Passes/CMOVConversion.cpp ------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file implements the CMOV conversion pass.
//
//===----------------------------------------------------------------------===//
#include "bolt/Passes/CMOVConversion.h"
#include "bolt/Core/BinaryBasicBlock.h"
#include "bolt/Core/BinaryContext.h"
#include "bolt/Utils/CommandLineOpts.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
#include <numeric>
#define DEBUG_TYPE "cmov"
using namespace llvm;
namespace opts {
extern cl::OptionCategory BoltOptCategory;
static cl::opt<int> BiasThreshold(
"cmov-conversion-bias-threshold",
cl::desc("minimum condition bias (pct) to perform a CMOV conversion, "
"-1 to not account bias"),
cl::ReallyHidden, cl::init(1), cl::cat(BoltOptCategory));
static cl::opt<int> MispredictionThreshold(
"cmov-conversion-misprediction-threshold",
cl::desc("minimum misprediction rate (pct) to perform a CMOV conversion, "
"-1 to not account misprediction rate"),
cl::ReallyHidden, cl::init(5), cl::cat(BoltOptCategory));
static cl::opt<bool> ConvertStackMemOperand(
"cmov-conversion-convert-stack-mem-operand",
cl::desc("convert moves with stack memory operand (potentially unsafe)"),
cl::ReallyHidden, cl::init(false), cl::cat(BoltOptCategory));
static cl::opt<bool> ConvertBasePtrStackMemOperand(
"cmov-conversion-convert-rbp-stack-mem-operand",
cl::desc("convert moves with rbp stack memory operand (unsafe, must be off "
"for binaries compiled with -fomit-frame-pointer)"),
cl::ReallyHidden, cl::init(false), cl::cat(BoltOptCategory));
} // namespace opts
namespace llvm {
namespace bolt {
// Return true if the CFG conforms to the following subgraph:
// Predecessor
// / \
// | RHS
// \ /
// LHS
// Caller guarantees that LHS and RHS share the same predecessor.
bool isIfThenSubgraph(const BinaryBasicBlock &LHS,
const BinaryBasicBlock &RHS) {
if (LHS.pred_size() != 2 || RHS.pred_size() != 1)
return false;
// Sanity check
BinaryBasicBlock *Predecessor = *RHS.pred_begin();
assert(Predecessor && LHS.isPredecessor(Predecessor) && "invalid subgraph");
(void)Predecessor;
if (!LHS.isPredecessor(&RHS))
return false;
if (RHS.succ_size() != 1)
return false;
return true;
}
bool matchCFGSubgraph(BinaryBasicBlock &BB, BinaryBasicBlock *&ConditionalSucc,
BinaryBasicBlock *&UnconditionalSucc,
bool &IsConditionalTaken) {
BinaryBasicBlock *TakenSucc = BB.getConditionalSuccessor(true);
BinaryBasicBlock *FallthroughSucc = BB.getConditionalSuccessor(false);
bool IsIfThenTaken = isIfThenSubgraph(*FallthroughSucc, *TakenSucc);
bool IsIfThenFallthrough = isIfThenSubgraph(*TakenSucc, *FallthroughSucc);
if (!IsIfThenFallthrough && !IsIfThenTaken)
return false;
assert((!IsIfThenFallthrough || !IsIfThenTaken) && "Invalid subgraph");
// Output parameters
ConditionalSucc = IsIfThenTaken ? TakenSucc : FallthroughSucc;
UnconditionalSucc = IsIfThenTaken ? FallthroughSucc : TakenSucc;
IsConditionalTaken = IsIfThenTaken;
return true;
}
// Return true if basic block instructions can be converted into cmov(s).
bool canConvertInstructions(const BinaryContext &BC, const BinaryBasicBlock &BB,
unsigned CC) {
if (BB.empty())
return false;
const MCInst *LastInst = BB.getLastNonPseudoInstr();
// Only pseudo instructions, can't be converted into CMOV
if (LastInst == nullptr)
return false;
for (const MCInst &Inst : BB) {
if (BC.MIB->isPseudo(Inst))
continue;
// Unconditional branch as a last instruction is OK
if (&Inst == LastInst && BC.MIB->isUnconditionalBranch(Inst))
continue;
MCInst Cmov(Inst);
// GPR move is OK
if (!BC.MIB->convertMoveToConditionalMove(
Cmov, CC, opts::ConvertStackMemOperand,
opts::ConvertBasePtrStackMemOperand)) {
LLVM_DEBUG({
dbgs() << BB.getName() << ": can't convert instruction ";
BC.printInstruction(dbgs(), Cmov);
});
return false;
}
}
return true;
}
void convertMoves(const BinaryContext &BC, BinaryBasicBlock &BB, unsigned CC) {
for (auto II = BB.begin(), IE = BB.end(); II != IE; ++II) {
if (BC.MIB->isPseudo(*II))
continue;
if (BC.MIB->isUnconditionalBranch(*II)) {
// XXX: this invalidates II but we return immediately
BB.eraseInstruction(II);
return;
}
bool Result = BC.MIB->convertMoveToConditionalMove(
*II, CC, opts::ConvertStackMemOperand,
opts::ConvertBasePtrStackMemOperand);
assert(Result && "unexpected instruction");
(void)Result;
}
}
// Returns misprediction rate if the profile data is available, -1 otherwise.
std::pair<int, uint64_t>
calculateMispredictionRate(const BinaryBasicBlock &BB) {
uint64_t TotalExecCount = 0;
uint64_t TotalMispredictionCount = 0;
for (auto BI : BB.branch_info()) {
TotalExecCount += BI.Count;
if (BI.MispredictedCount != BinaryBasicBlock::COUNT_INFERRED)
TotalMispredictionCount += BI.MispredictedCount;
}
if (!TotalExecCount)
return {-1, TotalMispredictionCount};
return {100.0f * TotalMispredictionCount / TotalExecCount,
TotalMispredictionCount};
}
// Returns conditional succ bias if the profile is available, -1 otherwise.
int calculateConditionBias(const BinaryBasicBlock &BB,
const BinaryBasicBlock &ConditionalSucc) {
if (auto BranchStats = BB.getBranchStats(&ConditionalSucc))
return BranchStats->first;
return -1;
}
void CMOVConversion::Stats::dump() {
outs() << "converted static " << StaticPerformed << "/" << StaticPossible
<< formatv(" ({0:P}) ", getStaticRatio())
<< "hammock(s) into CMOV sequences, with dynamic execution count "
<< DynamicPerformed << "/" << DynamicPossible
<< formatv(" ({0:P}), ", getDynamicRatio()) << "saving " << RemovedMP
<< "/" << PossibleMP << formatv(" ({0:P}) ", getMPRatio())
<< "mispredictions\n";
}
void CMOVConversion::runOnFunction(BinaryFunction &Function) {
BinaryContext &BC = Function.getBinaryContext();
bool Modified = false;
// Function-local stats
Stats Local;
// Traverse blocks in RPO, merging block with a converted cmov with its
// successor.
for (BinaryBasicBlock *BB : post_order(&Function)) {
uint64_t BBExecCount = BB->getKnownExecutionCount();
if (BB->empty() || // The block must have instructions
BBExecCount == 0 || // must be hot
BB->succ_size() != 2 || // with two successors
BB->hasJumpTable()) // no jump table
continue;
assert(BB->isValid() && "traversal internal error");
// Check branch instruction
auto BranchInstrIter = BB->getLastNonPseudo();
if (BranchInstrIter == BB->rend() ||
!BC.MIB->isConditionalBranch(*BranchInstrIter))
continue;
// Check successors
BinaryBasicBlock *ConditionalSucc, *UnconditionalSucc;
bool IsConditionalTaken;
if (!matchCFGSubgraph(*BB, ConditionalSucc, UnconditionalSucc,
IsConditionalTaken)) {
LLVM_DEBUG(dbgs() << BB->getName() << ": couldn't match hammock\n");
continue;
}
unsigned CC = BC.MIB->getCondCode(*BranchInstrIter);
if (!IsConditionalTaken)
CC = BC.MIB->getInvertedCondCode(CC);
// Check contents of the conditional block
if (!canConvertInstructions(BC, *ConditionalSucc, CC))
continue;
int ConditionBias = calculateConditionBias(*BB, *ConditionalSucc);
int MispredictionRate = 0;
uint64_t MispredictionCount = 0;
std::tie(MispredictionRate, MispredictionCount) =
calculateMispredictionRate(*BB);
Local.StaticPossible++;
Local.DynamicPossible += BBExecCount;
Local.PossibleMP += MispredictionCount;
// If the conditional successor is never executed, don't convert it
if (ConditionBias < opts::BiasThreshold) {
LLVM_DEBUG(dbgs() << BB->getName() << "->" << ConditionalSucc->getName()
<< " bias = " << ConditionBias
<< ", less than threshold " << opts::BiasThreshold
<< '\n');
continue;
}
// Check the misprediction rate of a branch
if (MispredictionRate < opts::MispredictionThreshold) {
LLVM_DEBUG(dbgs() << BB->getName() << " misprediction rate = "
<< MispredictionRate << ", less than threshold "
<< opts::MispredictionThreshold << '\n');
continue;
}
// remove conditional branch
BB->eraseInstruction(std::prev(BranchInstrIter.base()));
BB->removeAllSuccessors();
// Convert instructions from the conditional successor into cmov's in BB.
convertMoves(BC, *ConditionalSucc, CC);
BB->addInstructions(ConditionalSucc->begin(), ConditionalSucc->end());
ConditionalSucc->markValid(false);
// RPO traversal guarantees that the successor is visited and merged if
// necessary. Merge the unconditional successor into the current block.
BB->addInstructions(UnconditionalSucc->begin(), UnconditionalSucc->end());
UnconditionalSucc->moveAllSuccessorsTo(BB);
UnconditionalSucc->markValid(false);
Local.StaticPerformed++;
Local.DynamicPerformed += BBExecCount;
Local.RemovedMP += MispredictionCount;
Modified = true;
}
if (Modified)
Function.eraseInvalidBBs();
if (opts::Verbosity > 1) {
outs() << "BOLT-INFO: CMOVConversion: " << Function << ", ";
Local.dump();
}
Global = Global + Local;
}
void CMOVConversion::runOnFunctions(BinaryContext &BC) {
for (auto &It : BC.getBinaryFunctions()) {
BinaryFunction &Function = It.second;
if (!shouldOptimize(Function))
continue;
runOnFunction(Function);
}
outs() << "BOLT-INFO: CMOVConversion total: ";
Global.dump();
}
} // end namespace bolt
} // end namespace llvm