From ceb7214be0287f536b292a41f8a7dc2e1467d72d Mon Sep 17 00:00:00 2001 From: Kristof Beyls Date: Thu, 12 Dec 2024 11:06:27 +0100 Subject: [PATCH] [BOLT] Introduce binary analysis tool based on BOLT (#115330) This initial commit does not add any specific binary analyses yet, it merely contains the boilerplate to introduce a new BOLT-based tool. This basically combines the 4 first patches from the prototype pac-ret and stack-clash binary analyzer discussed in RFC https://discourse.llvm.org/t/rfc-bolt-based-binary-analysis-tool-to-verify-correctness-of-security-hardening/78148 and published at https://github.com/llvm/llvm-project/compare/main...kbeyls:llvm-project:bolt-gadget-scanner-prototype The introduction of such a BOLT-based binary analysis tool was proposed and discussed in at least the following places: - The RFC pointed to above - EuroLLVM 2024 round table https://discourse.llvm.org/t/summary-of-bolt-as-a-binary-analysis-tool-round-table-at-eurollvm/78441 The round table showed quite a few people interested in being able to build a custom binary analysis quickly with a tool like this. - Also at the US LLVM dev meeting a few weeks ago, I heard interest from a few people, asking when the tool would be available upstream. - The presentation "Adding Pointer Authentication ABI support for your ELF platform" (https://llvm.swoogo.com/2024devmtg/session/2512720/adding-pointer-authentication-abi-support-for-your-elf-platform) explicitly mentioned interest to extend the prototype tool to verify correct implementation of pauthabi. --- bolt/docs/BinaryAnalysis.md | 20 +++ bolt/include/bolt/Rewrite/RewriteInstance.h | 3 + bolt/include/bolt/Utils/CommandLineOpts.h | 2 + bolt/lib/Rewrite/RewriteInstance.cpp | 7 + bolt/lib/Utils/CommandLineOpts.cpp | 2 + bolt/test/CMakeLists.txt | 1 + .../binary-analysis/AArch64/Inputs/dummy.txt | 1 + .../binary-analysis/AArch64/cmdline-args.test | 33 +++++ .../binary-analysis/AArch64/lit.local.cfg | 7 + bolt/test/lit.cfg.py | 1 + bolt/tools/CMakeLists.txt | 1 + bolt/tools/binary-analysis/CMakeLists.txt | 19 +++ .../tools/binary-analysis/binary-analysis.cpp | 122 ++++++++++++++++++ 13 files changed, 219 insertions(+) create mode 100644 bolt/docs/BinaryAnalysis.md create mode 100644 bolt/test/binary-analysis/AArch64/Inputs/dummy.txt create mode 100644 bolt/test/binary-analysis/AArch64/cmdline-args.test create mode 100644 bolt/test/binary-analysis/AArch64/lit.local.cfg create mode 100644 bolt/tools/binary-analysis/CMakeLists.txt create mode 100644 bolt/tools/binary-analysis/binary-analysis.cpp diff --git a/bolt/docs/BinaryAnalysis.md b/bolt/docs/BinaryAnalysis.md new file mode 100644 index 000000000000..f91b77d046de --- /dev/null +++ b/bolt/docs/BinaryAnalysis.md @@ -0,0 +1,20 @@ +# BOLT-based binary analysis + +As part of post-link-time optimizing, BOLT needs to perform a range of analyses +on binaries such as recontructing control flow graphs, and more. + +The `llvm-bolt-binary-analysis` tool enables running requested binary analyses +on binaries, and generating reports. It does this by building on top of the +analyses implemented in the BOLT libraries. + +## Which binary analyses are implemented? + +At the moment, no binary analyses are implemented. + +The goal is to make it easy using a plug-in framework to add your own analyses. + +## How to add your own binary analysis + +_TODO: this section needs to be written. Ideally, we should have a simple +"example" or "template" analysis that can be the starting point for implementing +custom analyses_ diff --git a/bolt/include/bolt/Rewrite/RewriteInstance.h b/bolt/include/bolt/Rewrite/RewriteInstance.h index 73d2857f946c..42094cb73210 100644 --- a/bolt/include/bolt/Rewrite/RewriteInstance.h +++ b/bolt/include/bolt/Rewrite/RewriteInstance.h @@ -164,6 +164,9 @@ private: void preregisterSections(); + /// run analyses requested in binary analysis mode. + void runBinaryAnalyses(); + /// Run optimizations that operate at the binary, or post-linker, level. void runOptimizationPasses(); diff --git a/bolt/include/bolt/Utils/CommandLineOpts.h b/bolt/include/bolt/Utils/CommandLineOpts.h index 04bf7db5de95..111eb650c374 100644 --- a/bolt/include/bolt/Utils/CommandLineOpts.h +++ b/bolt/include/bolt/Utils/CommandLineOpts.h @@ -18,6 +18,7 @@ namespace opts { extern bool HeatmapMode; +extern bool BinaryAnalysisMode; extern llvm::cl::OptionCategory BoltCategory; extern llvm::cl::OptionCategory BoltDiffCategory; @@ -27,6 +28,7 @@ extern llvm::cl::OptionCategory BoltOutputCategory; extern llvm::cl::OptionCategory AggregatorCategory; extern llvm::cl::OptionCategory BoltInstrCategory; extern llvm::cl::OptionCategory HeatmapCategory; +extern llvm::cl::OptionCategory BinaryAnalysisCategory; extern llvm::cl::opt AlignText; extern llvm::cl::opt AlignFunctions; diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp index 76e1f0156f82..dfac662aebb6 100644 --- a/bolt/lib/Rewrite/RewriteInstance.cpp +++ b/bolt/lib/Rewrite/RewriteInstance.cpp @@ -699,6 +699,11 @@ Error RewriteInstance::run() { if (opts::DiffOnly) return Error::success(); + if (opts::BinaryAnalysisMode) { + runBinaryAnalyses(); + return Error::success(); + } + preregisterSections(); runOptimizationPasses(); @@ -3475,6 +3480,8 @@ void RewriteInstance::runOptimizationPasses() { BC->logBOLTErrorsAndQuitOnFatal(BinaryFunctionPassManager::runAllPasses(*BC)); } +void RewriteInstance::runBinaryAnalyses() {} + void RewriteInstance::preregisterSections() { // Preregister sections before emission to set their order in the output. const unsigned ROFlags = BinarySection::getFlags(/*IsReadOnly*/ true, diff --git a/bolt/lib/Utils/CommandLineOpts.cpp b/bolt/lib/Utils/CommandLineOpts.cpp index de82420a1671..17f090aa61ee 100644 --- a/bolt/lib/Utils/CommandLineOpts.cpp +++ b/bolt/lib/Utils/CommandLineOpts.cpp @@ -29,6 +29,7 @@ const char *BoltRevision = namespace opts { bool HeatmapMode = false; +bool BinaryAnalysisMode = false; cl::OptionCategory BoltCategory("BOLT generic options"); cl::OptionCategory BoltDiffCategory("BOLTDIFF generic options"); @@ -38,6 +39,7 @@ cl::OptionCategory BoltOutputCategory("Output options"); cl::OptionCategory AggregatorCategory("Data aggregation options"); cl::OptionCategory BoltInstrCategory("BOLT instrumentation options"); cl::OptionCategory HeatmapCategory("Heatmap options"); +cl::OptionCategory BinaryAnalysisCategory("BinaryAnalysis options"); cl::opt AlignText("align-text", cl::desc("alignment of .text section"), cl::Hidden, diff --git a/bolt/test/CMakeLists.txt b/bolt/test/CMakeLists.txt index d468ff984840..6e18b028bddf 100644 --- a/bolt/test/CMakeLists.txt +++ b/bolt/test/CMakeLists.txt @@ -37,6 +37,7 @@ list(APPEND BOLT_TEST_DEPS lld llvm-config llvm-bolt + llvm-bolt-binary-analysis llvm-bolt-heatmap llvm-bat-dump llvm-dwarfdump diff --git a/bolt/test/binary-analysis/AArch64/Inputs/dummy.txt b/bolt/test/binary-analysis/AArch64/Inputs/dummy.txt new file mode 100644 index 000000000000..2995a4d0e749 --- /dev/null +++ b/bolt/test/binary-analysis/AArch64/Inputs/dummy.txt @@ -0,0 +1 @@ +dummy \ No newline at end of file diff --git a/bolt/test/binary-analysis/AArch64/cmdline-args.test b/bolt/test/binary-analysis/AArch64/cmdline-args.test new file mode 100644 index 000000000000..e414818644a3 --- /dev/null +++ b/bolt/test/binary-analysis/AArch64/cmdline-args.test @@ -0,0 +1,33 @@ +# This file tests error messages produced on invalid command line arguments. +# It also checks that help messages are generated as expected. + +# Verify that an error message is provided if an input file is missing or incorrect + +RUN: not llvm-bolt-binary-analysis 2>&1 | FileCheck -check-prefix=NOFILEARG %s +NOFILEARG: llvm-bolt-binary-analysis: Not enough positional command line arguments specified! +NOFILEARG-NEXT: Must specify at least 1 positional argument: See: {{.*}}llvm-bolt-binary-analysis --help + +RUN: not llvm-bolt-binary-analysis non-existing-file 2>&1 | FileCheck -check-prefix=NONEXISTINGFILEARG %s +NONEXISTINGFILEARG: llvm-bolt-binary-analysis: 'non-existing-file': No such file or directory. + +RUN: not llvm-bolt-binary-analysis %p/Inputs/dummy.txt 2>&1 | FileCheck -check-prefix=NOELFFILEARG %s +NOELFFILEARG: llvm-bolt-binary-analysis: '{{.*}}/Inputs/dummy.txt': The file was not recognized as a valid object file. + +RUN: %clang %cflags %p/../../Inputs/asm_foo.s %p/../../Inputs/asm_main.c -o %t.exe +RUN: llvm-bolt-binary-analysis %t.exe 2>&1 | FileCheck -check-prefix=VALIDELFFILEARG --allow-empty %s +# Check that there are no BOLT-WARNING or BOLT-ERROR output lines +VALIDELFFILEARG: BOLT-INFO: +VALIDELFFILEARG-NOT: BOLT-WARNING: +VALIDELFFILEARG-NOT: BOLT-ERROR: + +# Check --help output + +RUN: llvm-bolt-binary-analysis --help 2>&1 | FileCheck -check-prefix=HELP %s + +HELP: OVERVIEW: BinaryAnalysis +HELP-EMPTY: +HELP-NEXT: USAGE: llvm-bolt-binary-analysis [options] +HELP-EMPTY: +HELP-NEXT: OPTIONS: +HELP-EMPTY: +HELP-NEXT: Generic Options: diff --git a/bolt/test/binary-analysis/AArch64/lit.local.cfg b/bolt/test/binary-analysis/AArch64/lit.local.cfg new file mode 100644 index 000000000000..6f247dd52e82 --- /dev/null +++ b/bolt/test/binary-analysis/AArch64/lit.local.cfg @@ -0,0 +1,7 @@ +if "AArch64" not in config.root.targets: + config.unsupported = True + +flags = "--target=aarch64-linux-gnu -nostartfiles -nostdlib -ffreestanding -Wl,--emit-relocs" + +config.substitutions.insert(0, ("%cflags", f"%cflags {flags}")) +config.substitutions.insert(0, ("%cxxflags", f"%cxxflags {flags}")) diff --git a/bolt/test/lit.cfg.py b/bolt/test/lit.cfg.py index da3ae34ba3bd..0d05229be2bf 100644 --- a/bolt/test/lit.cfg.py +++ b/bolt/test/lit.cfg.py @@ -110,6 +110,7 @@ tools = [ ), ToolSubst("llvm-boltdiff", unresolved="fatal"), ToolSubst("llvm-bolt-heatmap", unresolved="fatal"), + ToolSubst("llvm-bolt-binary-analysis", unresolved="fatal"), ToolSubst("llvm-bat-dump", unresolved="fatal"), ToolSubst("perf2bolt", unresolved="fatal"), ToolSubst("yaml2obj", unresolved="fatal"), diff --git a/bolt/tools/CMakeLists.txt b/bolt/tools/CMakeLists.txt index 22ea3b9bd805..3383902cffc4 100644 --- a/bolt/tools/CMakeLists.txt +++ b/bolt/tools/CMakeLists.txt @@ -7,3 +7,4 @@ add_subdirectory(llvm-bolt-fuzzer) add_subdirectory(bat-dump) add_subdirectory(merge-fdata) add_subdirectory(heatmap) +add_subdirectory(binary-analysis) diff --git a/bolt/tools/binary-analysis/CMakeLists.txt b/bolt/tools/binary-analysis/CMakeLists.txt new file mode 100644 index 000000000000..841fc5b37118 --- /dev/null +++ b/bolt/tools/binary-analysis/CMakeLists.txt @@ -0,0 +1,19 @@ +set(LLVM_LINK_COMPONENTS + ${LLVM_TARGETS_TO_BUILD} + MC + Object + Support + ) + +add_bolt_tool(llvm-bolt-binary-analysis + binary-analysis.cpp + DISABLE_LLVM_LINK_LLVM_DYLIB + ) + +target_link_libraries(llvm-bolt-binary-analysis + PRIVATE + LLVMBOLTRewrite + LLVMBOLTUtils + ) + +add_dependencies(bolt llvm-bolt-binary-analysis) diff --git a/bolt/tools/binary-analysis/binary-analysis.cpp b/bolt/tools/binary-analysis/binary-analysis.cpp new file mode 100644 index 000000000000..b03fee3e025a --- /dev/null +++ b/bolt/tools/binary-analysis/binary-analysis.cpp @@ -0,0 +1,122 @@ +//===- bolt/tools/binary-analysis/binary-analysis.cpp ---------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This is a generic binary analysis tool, where multiple different specific +// binary analyses can be plugged in to. The binary analyses are mostly built +// on top of BOLT components. +// +//===----------------------------------------------------------------------===// + +#include "bolt/Rewrite/RewriteInstance.h" +#include "bolt/Utils/CommandLineOpts.h" +#include "llvm/MC/TargetRegistry.h" +#include "llvm/Object/Binary.h" +#include "llvm/Object/ELFObjectFile.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Errc.h" +#include "llvm/Support/ManagedStatic.h" +#include "llvm/Support/PrettyStackTrace.h" +#include "llvm/Support/Program.h" +#include "llvm/Support/Signals.h" +#include "llvm/Support/TargetSelect.h" +#include "llvm/Support/VirtualFileSystem.h" + +#define DEBUG_TYPE "bolt" + +using namespace llvm; +using namespace object; +using namespace bolt; + +namespace opts { + +static cl::OptionCategory *BinaryAnalysisCategories[] = { + &BinaryAnalysisCategory}; + +static cl::opt InputFilename(cl::Positional, + cl::desc(""), + cl::Required, + cl::cat(BinaryAnalysisCategory), + cl::sub(cl::SubCommand::getAll())); + +} // namespace opts + +static StringRef ToolName = "llvm-bolt-binary-analysis"; + +static void report_error(StringRef Message, std::error_code EC) { + assert(EC); + errs() << ToolName << ": '" << Message << "': " << EC.message() << ".\n"; + exit(1); +} + +static void report_error(StringRef Message, Error E) { + assert(E); + errs() << ToolName << ": '" << Message << "': " << toString(std::move(E)) + << ".\n"; + exit(1); +} + +void ParseCommandLine(int argc, char **argv) { + cl::HideUnrelatedOptions(ArrayRef(opts::BinaryAnalysisCategories)); + // Register the target printer for --version. + cl::AddExtraVersionPrinter(TargetRegistry::printRegisteredTargetsForVersion); + + cl::ParseCommandLineOptions(argc, argv, "BinaryAnalysis\n"); +} + +static std::string GetExecutablePath(const char *Argv0) { + SmallString<256> ExecutablePath(Argv0); + // Do a PATH lookup if Argv0 isn't a valid path. + if (!llvm::sys::fs::exists(ExecutablePath)) + if (llvm::ErrorOr P = + llvm::sys::findProgramByName(ExecutablePath)) + ExecutablePath = *P; + return std::string(ExecutablePath.str()); +} + +int main(int argc, char **argv) { + // Print a stack trace if we signal out. + sys::PrintStackTraceOnErrorSignal(argv[0]); + PrettyStackTraceProgram X(argc, argv); + + std::string ToolPath = GetExecutablePath(argv[0]); + + llvm_shutdown_obj Y; // Call llvm_shutdown() on exit. + + // Initialize targets and assembly printers/parsers. + llvm::InitializeAllTargetInfos(); + llvm::InitializeAllTargetMCs(); + llvm::InitializeAllAsmParsers(); + llvm::InitializeAllDisassemblers(); + + llvm::InitializeAllTargets(); + llvm::InitializeAllAsmPrinters(); + + ParseCommandLine(argc, argv); + + opts::BinaryAnalysisMode = true; + + if (!sys::fs::exists(opts::InputFilename)) + report_error(opts::InputFilename, errc::no_such_file_or_directory); + + Expected> BinaryOrErr = + createBinary(opts::InputFilename); + if (Error E = BinaryOrErr.takeError()) + report_error(opts::InputFilename, std::move(E)); + Binary &Binary = *BinaryOrErr.get().getBinary(); + + if (auto *e = dyn_cast(&Binary)) { + auto RIOrErr = RewriteInstance::create(e, argc, argv, ToolPath); + if (Error E = RIOrErr.takeError()) + report_error(opts::InputFilename, std::move(E)); + RewriteInstance &RI = *RIOrErr.get(); + if (Error E = RI.run()) + report_error(opts::InputFilename, std::move(E)); + } + + return EXIT_SUCCESS; +}