Files
clang-p2996/bolt/lib/Core/ParallelUtilities.cpp
Rafael Auler a34c753fe7 Rebase: [NFC] Refactor sources to be buildable in shared mode
Summary:
Moves source files into separate components, and make explicit
component dependency on each other, so LLVM build system knows how to
build BOLT in BUILD_SHARED_LIBS=ON.

Please use the -c merge.renamelimit=230 git option when rebasing your
work on top of this change.

To achieve this, we create a new library to hold core IR files (most
classes beginning with Binary in their names), a new library to hold
Utils, some command line options shared across both RewriteInstance
and core IR files, a new library called Rewrite to hold most classes
concerned with running top-level functions coordinating the binary
rewriting process, and a new library called Profile to hold classes
dealing with profile reading and writing.

To remove the dependency from BinaryContext into X86-specific classes,
we do some refactoring on the BinaryContext constructor to receive a
reference to the specific backend directly from RewriteInstance. Then,
the dependency on X86 or AArch64-specific classes is transfered to the
Rewrite library. We can't have the Core library depend on targets
because targets depend on Core (which would create a cycle).

Files implementing the entry point of a tool are transferred to the
tools/ folder. All header files are transferred to the include/
folder. The src/ folder was renamed to lib/.

(cherry picked from FBD32746834)
2021-10-08 11:47:10 -07:00

240 lines
7.5 KiB
C++

//===--- ParallelUtilities.cpp --------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
//===----------------------------------------------------------------------===//
#include "bolt/Core/ParallelUtilities.h"
#include "bolt/Core/BinaryContext.h"
#include "bolt/Core/BinaryFunction.h"
#include "llvm/Support/ThreadPool.h"
#include "llvm/Support/Timer.h"
#include <mutex>
#include <shared_mutex>
#define DEBUG_TYPE "par-utils"
namespace opts {
extern cl::OptionCategory BoltCategory;
cl::opt<unsigned>
ThreadCount("thread-count",
cl::desc("number of threads"),
cl::init(hardware_concurrency().compute_thread_count()),
cl::cat(BoltCategory));
cl::opt<bool>
NoThreads("no-threads",
cl::desc("disable multithreading"),
cl::init(false),
cl::cat(BoltCategory));
cl::opt<unsigned>
TaskCount("tasks-per-thread",
cl::desc("number of tasks to be created per thread"),
cl::init(20),
cl::cat(BoltCategory));
} // namespace opts
namespace llvm {
namespace bolt {
namespace ParallelUtilities {
namespace {
/// A single thread pool that is used to run parallel tasks
std::unique_ptr<ThreadPool> ThreadPoolPtr;
unsigned computeCostFor(const BinaryFunction &BF,
const PredicateTy &SkipPredicate,
const SchedulingPolicy &SchedPolicy) {
if (SchedPolicy == SchedulingPolicy::SP_TRIVIAL)
return 1;
if (SkipPredicate && SkipPredicate(BF))
return 0;
switch (SchedPolicy) {
case SchedulingPolicy::SP_CONSTANT:
return 1;
case SchedulingPolicy::SP_INST_LINEAR:
return BF.getSize();
case SchedulingPolicy::SP_INST_QUADRATIC:
return BF.getSize() * BF.getSize();
case SchedulingPolicy::SP_BB_LINEAR:
return BF.size();
case SchedulingPolicy::SP_BB_QUADRATIC:
return BF.size() * BF.size();
default:
llvm_unreachable("unsupported scheduling policy");
}
}
inline unsigned estimateTotalCost(const BinaryContext &BC,
const PredicateTy &SkipPredicate,
SchedulingPolicy &SchedPolicy) {
if (SchedPolicy == SchedulingPolicy::SP_TRIVIAL)
return BC.getBinaryFunctions().size();
unsigned TotalCost = 0;
for (auto &BFI : BC.getBinaryFunctions()) {
const BinaryFunction &BF = BFI.second;
TotalCost += computeCostFor(BF, SkipPredicate, SchedPolicy);
}
// Switch to trivial scheduling if total estimated work is zero
if (TotalCost == 0) {
outs() << "BOLT-WARNING: Running parallel work of 0 estimated cost, will "
"switch to trivial scheduling.\n";
SchedPolicy = SP_TRIVIAL;
TotalCost = BC.getBinaryFunctions().size();
}
return TotalCost;
}
} // namespace
ThreadPool &getThreadPool() {
if (ThreadPoolPtr.get())
return *ThreadPoolPtr;
ThreadPoolPtr = std::make_unique<ThreadPool>(
llvm::hardware_concurrency(opts::ThreadCount));
return *ThreadPoolPtr;
}
void runOnEachFunction(BinaryContext &BC, SchedulingPolicy SchedPolicy,
WorkFuncTy WorkFunction, PredicateTy SkipPredicate,
std::string LogName, bool ForceSequential,
unsigned TasksPerThread) {
if (BC.getBinaryFunctions().size() == 0)
return;
auto runBlock = [&](std::map<uint64_t, BinaryFunction>::iterator BlockBegin,
std::map<uint64_t, BinaryFunction>::iterator BlockEnd) {
Timer T(LogName, LogName);
LLVM_DEBUG(T.startTimer());
for (auto It = BlockBegin; It != BlockEnd; ++It) {
BinaryFunction &BF = It->second;
if (SkipPredicate && SkipPredicate(BF))
continue;
WorkFunction(BF);
}
LLVM_DEBUG(T.stopTimer());
};
if (opts::NoThreads || ForceSequential) {
runBlock(BC.getBinaryFunctions().begin(), BC.getBinaryFunctions().end());
return;
}
// Estimate the overall runtime cost using the scheduling policy
const unsigned TotalCost = estimateTotalCost(BC, SkipPredicate, SchedPolicy);
const unsigned BlocksCount = TasksPerThread * opts::ThreadCount;
const unsigned BlockCost =
TotalCost > BlocksCount ? TotalCost / BlocksCount : 1;
// Divide work into blocks of equal cost
ThreadPool &Pool = getThreadPool();
auto BlockBegin = BC.getBinaryFunctions().begin();
unsigned CurrentCost = 0;
for (auto It = BC.getBinaryFunctions().begin();
It != BC.getBinaryFunctions().end(); ++It) {
BinaryFunction &BF = It->second;
CurrentCost += computeCostFor(BF, SkipPredicate, SchedPolicy);
if (CurrentCost >= BlockCost) {
Pool.async(runBlock, BlockBegin, std::next(It));
BlockBegin = std::next(It);
CurrentCost = 0;
}
}
Pool.async(runBlock, BlockBegin, BC.getBinaryFunctions().end());
Pool.wait();
}
void runOnEachFunctionWithUniqueAllocId(
BinaryContext &BC, SchedulingPolicy SchedPolicy,
WorkFuncWithAllocTy WorkFunction, PredicateTy SkipPredicate,
std::string LogName, bool ForceSequential, unsigned TasksPerThread) {
if (BC.getBinaryFunctions().size() == 0)
return;
std::shared_timed_mutex MainLock;
auto runBlock = [&](std::map<uint64_t, BinaryFunction>::iterator BlockBegin,
std::map<uint64_t, BinaryFunction>::iterator BlockEnd,
MCPlusBuilder::AllocatorIdTy AllocId) {
Timer T(LogName, LogName);
LLVM_DEBUG(T.startTimer());
std::shared_lock<std::shared_timed_mutex> Lock(MainLock);
for (auto It = BlockBegin; It != BlockEnd; ++It) {
BinaryFunction &BF = It->second;
if (SkipPredicate && SkipPredicate(BF))
continue;
WorkFunction(BF, AllocId);
}
LLVM_DEBUG(T.stopTimer());
};
if (opts::NoThreads || ForceSequential) {
runBlock(BC.getBinaryFunctions().begin(), BC.getBinaryFunctions().end(), 0);
return;
}
// This lock is used to postpone task execution
std::unique_lock<std::shared_timed_mutex> Lock(MainLock);
// Estimate the overall runtime cost using the scheduling policy
const unsigned TotalCost = estimateTotalCost(BC, SkipPredicate, SchedPolicy);
const unsigned BlocksCount = TasksPerThread * opts::ThreadCount;
const unsigned BlockCost =
TotalCost > BlocksCount ? TotalCost / BlocksCount : 1;
// Divide work into blocks of equal cost
ThreadPool &Pool = getThreadPool();
auto BlockBegin = BC.getBinaryFunctions().begin();
unsigned CurrentCost = 0;
unsigned AllocId = 1;
for (auto It = BC.getBinaryFunctions().begin();
It != BC.getBinaryFunctions().end(); ++It) {
BinaryFunction &BF = It->second;
CurrentCost += computeCostFor(BF, SkipPredicate, SchedPolicy);
if (CurrentCost >= BlockCost) {
if (!BC.MIB->checkAllocatorExists(AllocId)) {
MCPlusBuilder::AllocatorIdTy Id =
BC.MIB->initializeNewAnnotationAllocator();
(void)Id;
assert(AllocId == Id && "unexpected allocator id created");
}
Pool.async(runBlock, BlockBegin, std::next(It), AllocId);
AllocId++;
BlockBegin = std::next(It);
CurrentCost = 0;
}
}
if (!BC.MIB->checkAllocatorExists(AllocId)) {
MCPlusBuilder::AllocatorIdTy Id =
BC.MIB->initializeNewAnnotationAllocator();
(void)Id;
assert(AllocId == Id && "unexpected allocator id created");
}
Pool.async(runBlock, BlockBegin, BC.getBinaryFunctions().end(), AllocId);
Lock.unlock();
Pool.wait();
}
} // namespace ParallelUtilities
} // namespace bolt
} // namespace llvm