[BOLT] Introduce binary analysis tool based on BOLT (#115330)
This initial commit does not add any specific binary analyses yet, it merely contains the boilerplate to introduce a new BOLT-based tool. This basically combines the 4 first patches from the prototype pac-ret and stack-clash binary analyzer discussed in RFC https://discourse.llvm.org/t/rfc-bolt-based-binary-analysis-tool-to-verify-correctness-of-security-hardening/78148 and published at https://github.com/llvm/llvm-project/compare/main...kbeyls:llvm-project:bolt-gadget-scanner-prototype The introduction of such a BOLT-based binary analysis tool was proposed and discussed in at least the following places: - The RFC pointed to above - EuroLLVM 2024 round table https://discourse.llvm.org/t/summary-of-bolt-as-a-binary-analysis-tool-round-table-at-eurollvm/78441 The round table showed quite a few people interested in being able to build a custom binary analysis quickly with a tool like this. - Also at the US LLVM dev meeting a few weeks ago, I heard interest from a few people, asking when the tool would be available upstream. - The presentation "Adding Pointer Authentication ABI support for your ELF platform" (https://llvm.swoogo.com/2024devmtg/session/2512720/adding-pointer-authentication-abi-support-for-your-elf-platform) explicitly mentioned interest to extend the prototype tool to verify correct implementation of pauthabi.
This commit is contained in:
20
bolt/docs/BinaryAnalysis.md
Normal file
20
bolt/docs/BinaryAnalysis.md
Normal file
@@ -0,0 +1,20 @@
|
||||
# BOLT-based binary analysis
|
||||
|
||||
As part of post-link-time optimizing, BOLT needs to perform a range of analyses
|
||||
on binaries such as recontructing control flow graphs, and more.
|
||||
|
||||
The `llvm-bolt-binary-analysis` tool enables running requested binary analyses
|
||||
on binaries, and generating reports. It does this by building on top of the
|
||||
analyses implemented in the BOLT libraries.
|
||||
|
||||
## Which binary analyses are implemented?
|
||||
|
||||
At the moment, no binary analyses are implemented.
|
||||
|
||||
The goal is to make it easy using a plug-in framework to add your own analyses.
|
||||
|
||||
## How to add your own binary analysis
|
||||
|
||||
_TODO: this section needs to be written. Ideally, we should have a simple
|
||||
"example" or "template" analysis that can be the starting point for implementing
|
||||
custom analyses_
|
||||
@@ -164,6 +164,9 @@ private:
|
||||
|
||||
void preregisterSections();
|
||||
|
||||
/// run analyses requested in binary analysis mode.
|
||||
void runBinaryAnalyses();
|
||||
|
||||
/// Run optimizations that operate at the binary, or post-linker, level.
|
||||
void runOptimizationPasses();
|
||||
|
||||
|
||||
@@ -18,6 +18,7 @@
|
||||
namespace opts {
|
||||
|
||||
extern bool HeatmapMode;
|
||||
extern bool BinaryAnalysisMode;
|
||||
|
||||
extern llvm::cl::OptionCategory BoltCategory;
|
||||
extern llvm::cl::OptionCategory BoltDiffCategory;
|
||||
@@ -27,6 +28,7 @@ extern llvm::cl::OptionCategory BoltOutputCategory;
|
||||
extern llvm::cl::OptionCategory AggregatorCategory;
|
||||
extern llvm::cl::OptionCategory BoltInstrCategory;
|
||||
extern llvm::cl::OptionCategory HeatmapCategory;
|
||||
extern llvm::cl::OptionCategory BinaryAnalysisCategory;
|
||||
|
||||
extern llvm::cl::opt<unsigned> AlignText;
|
||||
extern llvm::cl::opt<unsigned> AlignFunctions;
|
||||
|
||||
@@ -699,6 +699,11 @@ Error RewriteInstance::run() {
|
||||
if (opts::DiffOnly)
|
||||
return Error::success();
|
||||
|
||||
if (opts::BinaryAnalysisMode) {
|
||||
runBinaryAnalyses();
|
||||
return Error::success();
|
||||
}
|
||||
|
||||
preregisterSections();
|
||||
|
||||
runOptimizationPasses();
|
||||
@@ -3475,6 +3480,8 @@ void RewriteInstance::runOptimizationPasses() {
|
||||
BC->logBOLTErrorsAndQuitOnFatal(BinaryFunctionPassManager::runAllPasses(*BC));
|
||||
}
|
||||
|
||||
void RewriteInstance::runBinaryAnalyses() {}
|
||||
|
||||
void RewriteInstance::preregisterSections() {
|
||||
// Preregister sections before emission to set their order in the output.
|
||||
const unsigned ROFlags = BinarySection::getFlags(/*IsReadOnly*/ true,
|
||||
|
||||
@@ -29,6 +29,7 @@ const char *BoltRevision =
|
||||
namespace opts {
|
||||
|
||||
bool HeatmapMode = false;
|
||||
bool BinaryAnalysisMode = false;
|
||||
|
||||
cl::OptionCategory BoltCategory("BOLT generic options");
|
||||
cl::OptionCategory BoltDiffCategory("BOLTDIFF generic options");
|
||||
@@ -38,6 +39,7 @@ cl::OptionCategory BoltOutputCategory("Output options");
|
||||
cl::OptionCategory AggregatorCategory("Data aggregation options");
|
||||
cl::OptionCategory BoltInstrCategory("BOLT instrumentation options");
|
||||
cl::OptionCategory HeatmapCategory("Heatmap options");
|
||||
cl::OptionCategory BinaryAnalysisCategory("BinaryAnalysis options");
|
||||
|
||||
cl::opt<unsigned> AlignText("align-text",
|
||||
cl::desc("alignment of .text section"), cl::Hidden,
|
||||
|
||||
@@ -37,6 +37,7 @@ list(APPEND BOLT_TEST_DEPS
|
||||
lld
|
||||
llvm-config
|
||||
llvm-bolt
|
||||
llvm-bolt-binary-analysis
|
||||
llvm-bolt-heatmap
|
||||
llvm-bat-dump
|
||||
llvm-dwarfdump
|
||||
|
||||
1
bolt/test/binary-analysis/AArch64/Inputs/dummy.txt
Normal file
1
bolt/test/binary-analysis/AArch64/Inputs/dummy.txt
Normal file
@@ -0,0 +1 @@
|
||||
dummy
|
||||
33
bolt/test/binary-analysis/AArch64/cmdline-args.test
Normal file
33
bolt/test/binary-analysis/AArch64/cmdline-args.test
Normal file
@@ -0,0 +1,33 @@
|
||||
# This file tests error messages produced on invalid command line arguments.
|
||||
# It also checks that help messages are generated as expected.
|
||||
|
||||
# Verify that an error message is provided if an input file is missing or incorrect
|
||||
|
||||
RUN: not llvm-bolt-binary-analysis 2>&1 | FileCheck -check-prefix=NOFILEARG %s
|
||||
NOFILEARG: llvm-bolt-binary-analysis: Not enough positional command line arguments specified!
|
||||
NOFILEARG-NEXT: Must specify at least 1 positional argument: See: {{.*}}llvm-bolt-binary-analysis --help
|
||||
|
||||
RUN: not llvm-bolt-binary-analysis non-existing-file 2>&1 | FileCheck -check-prefix=NONEXISTINGFILEARG %s
|
||||
NONEXISTINGFILEARG: llvm-bolt-binary-analysis: 'non-existing-file': No such file or directory.
|
||||
|
||||
RUN: not llvm-bolt-binary-analysis %p/Inputs/dummy.txt 2>&1 | FileCheck -check-prefix=NOELFFILEARG %s
|
||||
NOELFFILEARG: llvm-bolt-binary-analysis: '{{.*}}/Inputs/dummy.txt': The file was not recognized as a valid object file.
|
||||
|
||||
RUN: %clang %cflags %p/../../Inputs/asm_foo.s %p/../../Inputs/asm_main.c -o %t.exe
|
||||
RUN: llvm-bolt-binary-analysis %t.exe 2>&1 | FileCheck -check-prefix=VALIDELFFILEARG --allow-empty %s
|
||||
# Check that there are no BOLT-WARNING or BOLT-ERROR output lines
|
||||
VALIDELFFILEARG: BOLT-INFO:
|
||||
VALIDELFFILEARG-NOT: BOLT-WARNING:
|
||||
VALIDELFFILEARG-NOT: BOLT-ERROR:
|
||||
|
||||
# Check --help output
|
||||
|
||||
RUN: llvm-bolt-binary-analysis --help 2>&1 | FileCheck -check-prefix=HELP %s
|
||||
|
||||
HELP: OVERVIEW: BinaryAnalysis
|
||||
HELP-EMPTY:
|
||||
HELP-NEXT: USAGE: llvm-bolt-binary-analysis [options] <executable>
|
||||
HELP-EMPTY:
|
||||
HELP-NEXT: OPTIONS:
|
||||
HELP-EMPTY:
|
||||
HELP-NEXT: Generic Options:
|
||||
7
bolt/test/binary-analysis/AArch64/lit.local.cfg
Normal file
7
bolt/test/binary-analysis/AArch64/lit.local.cfg
Normal file
@@ -0,0 +1,7 @@
|
||||
if "AArch64" not in config.root.targets:
|
||||
config.unsupported = True
|
||||
|
||||
flags = "--target=aarch64-linux-gnu -nostartfiles -nostdlib -ffreestanding -Wl,--emit-relocs"
|
||||
|
||||
config.substitutions.insert(0, ("%cflags", f"%cflags {flags}"))
|
||||
config.substitutions.insert(0, ("%cxxflags", f"%cxxflags {flags}"))
|
||||
@@ -110,6 +110,7 @@ tools = [
|
||||
),
|
||||
ToolSubst("llvm-boltdiff", unresolved="fatal"),
|
||||
ToolSubst("llvm-bolt-heatmap", unresolved="fatal"),
|
||||
ToolSubst("llvm-bolt-binary-analysis", unresolved="fatal"),
|
||||
ToolSubst("llvm-bat-dump", unresolved="fatal"),
|
||||
ToolSubst("perf2bolt", unresolved="fatal"),
|
||||
ToolSubst("yaml2obj", unresolved="fatal"),
|
||||
|
||||
@@ -7,3 +7,4 @@ add_subdirectory(llvm-bolt-fuzzer)
|
||||
add_subdirectory(bat-dump)
|
||||
add_subdirectory(merge-fdata)
|
||||
add_subdirectory(heatmap)
|
||||
add_subdirectory(binary-analysis)
|
||||
|
||||
19
bolt/tools/binary-analysis/CMakeLists.txt
Normal file
19
bolt/tools/binary-analysis/CMakeLists.txt
Normal file
@@ -0,0 +1,19 @@
|
||||
set(LLVM_LINK_COMPONENTS
|
||||
${LLVM_TARGETS_TO_BUILD}
|
||||
MC
|
||||
Object
|
||||
Support
|
||||
)
|
||||
|
||||
add_bolt_tool(llvm-bolt-binary-analysis
|
||||
binary-analysis.cpp
|
||||
DISABLE_LLVM_LINK_LLVM_DYLIB
|
||||
)
|
||||
|
||||
target_link_libraries(llvm-bolt-binary-analysis
|
||||
PRIVATE
|
||||
LLVMBOLTRewrite
|
||||
LLVMBOLTUtils
|
||||
)
|
||||
|
||||
add_dependencies(bolt llvm-bolt-binary-analysis)
|
||||
122
bolt/tools/binary-analysis/binary-analysis.cpp
Normal file
122
bolt/tools/binary-analysis/binary-analysis.cpp
Normal file
@@ -0,0 +1,122 @@
|
||||
//===- bolt/tools/binary-analysis/binary-analysis.cpp ---------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This is a generic binary analysis tool, where multiple different specific
|
||||
// binary analyses can be plugged in to. The binary analyses are mostly built
|
||||
// on top of BOLT components.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "bolt/Rewrite/RewriteInstance.h"
|
||||
#include "bolt/Utils/CommandLineOpts.h"
|
||||
#include "llvm/MC/TargetRegistry.h"
|
||||
#include "llvm/Object/Binary.h"
|
||||
#include "llvm/Object/ELFObjectFile.h"
|
||||
#include "llvm/Support/CommandLine.h"
|
||||
#include "llvm/Support/Errc.h"
|
||||
#include "llvm/Support/ManagedStatic.h"
|
||||
#include "llvm/Support/PrettyStackTrace.h"
|
||||
#include "llvm/Support/Program.h"
|
||||
#include "llvm/Support/Signals.h"
|
||||
#include "llvm/Support/TargetSelect.h"
|
||||
#include "llvm/Support/VirtualFileSystem.h"
|
||||
|
||||
#define DEBUG_TYPE "bolt"
|
||||
|
||||
using namespace llvm;
|
||||
using namespace object;
|
||||
using namespace bolt;
|
||||
|
||||
namespace opts {
|
||||
|
||||
static cl::OptionCategory *BinaryAnalysisCategories[] = {
|
||||
&BinaryAnalysisCategory};
|
||||
|
||||
static cl::opt<std::string> InputFilename(cl::Positional,
|
||||
cl::desc("<executable>"),
|
||||
cl::Required,
|
||||
cl::cat(BinaryAnalysisCategory),
|
||||
cl::sub(cl::SubCommand::getAll()));
|
||||
|
||||
} // namespace opts
|
||||
|
||||
static StringRef ToolName = "llvm-bolt-binary-analysis";
|
||||
|
||||
static void report_error(StringRef Message, std::error_code EC) {
|
||||
assert(EC);
|
||||
errs() << ToolName << ": '" << Message << "': " << EC.message() << ".\n";
|
||||
exit(1);
|
||||
}
|
||||
|
||||
static void report_error(StringRef Message, Error E) {
|
||||
assert(E);
|
||||
errs() << ToolName << ": '" << Message << "': " << toString(std::move(E))
|
||||
<< ".\n";
|
||||
exit(1);
|
||||
}
|
||||
|
||||
void ParseCommandLine(int argc, char **argv) {
|
||||
cl::HideUnrelatedOptions(ArrayRef(opts::BinaryAnalysisCategories));
|
||||
// Register the target printer for --version.
|
||||
cl::AddExtraVersionPrinter(TargetRegistry::printRegisteredTargetsForVersion);
|
||||
|
||||
cl::ParseCommandLineOptions(argc, argv, "BinaryAnalysis\n");
|
||||
}
|
||||
|
||||
static std::string GetExecutablePath(const char *Argv0) {
|
||||
SmallString<256> ExecutablePath(Argv0);
|
||||
// Do a PATH lookup if Argv0 isn't a valid path.
|
||||
if (!llvm::sys::fs::exists(ExecutablePath))
|
||||
if (llvm::ErrorOr<std::string> P =
|
||||
llvm::sys::findProgramByName(ExecutablePath))
|
||||
ExecutablePath = *P;
|
||||
return std::string(ExecutablePath.str());
|
||||
}
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
// Print a stack trace if we signal out.
|
||||
sys::PrintStackTraceOnErrorSignal(argv[0]);
|
||||
PrettyStackTraceProgram X(argc, argv);
|
||||
|
||||
std::string ToolPath = GetExecutablePath(argv[0]);
|
||||
|
||||
llvm_shutdown_obj Y; // Call llvm_shutdown() on exit.
|
||||
|
||||
// Initialize targets and assembly printers/parsers.
|
||||
llvm::InitializeAllTargetInfos();
|
||||
llvm::InitializeAllTargetMCs();
|
||||
llvm::InitializeAllAsmParsers();
|
||||
llvm::InitializeAllDisassemblers();
|
||||
|
||||
llvm::InitializeAllTargets();
|
||||
llvm::InitializeAllAsmPrinters();
|
||||
|
||||
ParseCommandLine(argc, argv);
|
||||
|
||||
opts::BinaryAnalysisMode = true;
|
||||
|
||||
if (!sys::fs::exists(opts::InputFilename))
|
||||
report_error(opts::InputFilename, errc::no_such_file_or_directory);
|
||||
|
||||
Expected<OwningBinary<Binary>> BinaryOrErr =
|
||||
createBinary(opts::InputFilename);
|
||||
if (Error E = BinaryOrErr.takeError())
|
||||
report_error(opts::InputFilename, std::move(E));
|
||||
Binary &Binary = *BinaryOrErr.get().getBinary();
|
||||
|
||||
if (auto *e = dyn_cast<ELFObjectFileBase>(&Binary)) {
|
||||
auto RIOrErr = RewriteInstance::create(e, argc, argv, ToolPath);
|
||||
if (Error E = RIOrErr.takeError())
|
||||
report_error(opts::InputFilename, std::move(E));
|
||||
RewriteInstance &RI = *RIOrErr.get();
|
||||
if (Error E = RI.run())
|
||||
report_error(opts::InputFilename, std::move(E));
|
||||
}
|
||||
|
||||
return EXIT_SUCCESS;
|
||||
}
|
||||
Reference in New Issue
Block a user