[DLCov] Origin-Tracking: Enable collecting and symbolizing stack traces (#143591)

This patch is part of a series that adds origin-tracking to the debugify
source location coverage checks, allowing us to report symbolized stack
traces of the point where missing source locations appear.

This patch adds a pair of new functions in `signals.h` that can be used
to collect and symbolize stack traces respectively. This has major
implementation overlap with the existing stack trace
collection/symbolizing methods, but the existing functions are
specialized for dumping a stack trace to stderr when LLVM crashes, while
these new functions are meant to be called repeatedly during the
execution of the program, and therefore we need a separate set of
functions.
This commit is contained in:
Stephen Tozer
2025-07-02 12:01:17 +01:00
committed by GitHub
parent a2c9f7dbcc
commit 35626e97d8
4 changed files with 198 additions and 65 deletions

View File

@@ -14,10 +14,25 @@
#ifndef LLVM_SUPPORT_SIGNALS_H
#define LLVM_SUPPORT_SIGNALS_H
#include "llvm/Config/llvm-config.h"
#include "llvm/Support/Compiler.h"
#include <cstdint>
#include <string>
#if LLVM_ENABLE_DEBUGLOC_TRACKING_ORIGIN
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/SmallVector.h"
namespace llvm {
// Typedefs that are convenient but only used by the stack-trace-collection code
// added if DebugLoc origin-tracking is enabled.
using AddressSet = DenseSet<void *, DenseMapInfo<void *, void>>;
using SymbolizedAddressMap =
DenseMap<void *, SmallVector<std::string, 0>, DenseMapInfo<void *, void>,
detail::DenseMapPair<void *, SmallVector<std::string, 0>>>;
} // namespace llvm
#endif
namespace llvm {
class StringRef;
class raw_ostream;
@@ -57,6 +72,28 @@ LLVM_ABI void DisableSystemDialogsOnCrash();
/// specified, the entire frame is printed.
LLVM_ABI void PrintStackTrace(raw_ostream &OS, int Depth = 0);
#if LLVM_ENABLE_DEBUGLOC_TRACKING_ORIGIN
#ifdef NDEBUG
#error DebugLoc origin-tracking should not be enabled in Release builds.
#endif
/// Populates the given array with a stack trace of the current program, up to
/// MaxDepth frames. Returns the number of frames returned, which will be
/// inserted into \p StackTrace from index 0. All entries after the returned
/// depth will be unmodified. NB: This is only intended to be used for
/// introspection of LLVM by Debugify, will not be enabled in release builds,
/// and should not be relied on for other purposes.
template <unsigned long MaxDepth>
int getStackTrace(std::array<void *, MaxDepth> &StackTrace);
/// Takes a set of \p Addresses, symbolizes them and stores the result in the
/// provided \p SymbolizedAddresses map.
/// NB: This is only intended to be used for introspection of LLVM by
/// Debugify, will not be enabled in release builds, and should not be relied
/// on for other purposes.
void symbolizeAddresses(AddressSet &Addresses,
SymbolizedAddressMap &SymbolizedAddresses);
#endif
// Run all registered signal handlers.
LLVM_ABI void RunSignalHandlers();

View File

@@ -31,7 +31,6 @@
#include "llvm/Support/raw_ostream.h"
#include <array>
#include <cmath>
#include <vector>
//===----------------------------------------------------------------------===//
//=== WARNING: Implementation here must contain only TRULY operating system
@@ -137,47 +136,28 @@ static FormattedNumber format_ptr(void *PC) {
return format_hex((uint64_t)PC, PtrWidth);
}
/// Helper that launches llvm-symbolizer and symbolizes a backtrace.
LLVM_ATTRIBUTE_USED
static bool printSymbolizedStackTrace(StringRef Argv0, void **StackTrace,
int Depth, llvm::raw_ostream &OS) {
if (DisableSymbolicationFlag || getenv(DisableSymbolizationEnv))
return false;
// Don't recursively invoke the llvm-symbolizer binary.
if (Argv0.contains("llvm-symbolizer"))
return false;
// FIXME: Subtract necessary number from StackTrace entries to turn return addresses
// into actual instruction addresses.
// Use llvm-symbolizer tool to symbolize the stack traces. First look for it
// alongside our binary, then in $PATH.
ErrorOr<std::string> LLVMSymbolizerPathOrErr = std::error_code();
if (const char *Path = getenv(LLVMSymbolizerPathEnv)) {
LLVMSymbolizerPathOrErr = sys::findProgramByName(Path);
} else if (!Argv0.empty()) {
StringRef Parent = llvm::sys::path::parent_path(Argv0);
if (!Parent.empty())
LLVMSymbolizerPathOrErr = sys::findProgramByName("llvm-symbolizer", Parent);
}
if (!LLVMSymbolizerPathOrErr)
LLVMSymbolizerPathOrErr = sys::findProgramByName("llvm-symbolizer");
if (!LLVMSymbolizerPathOrErr)
return false;
const std::string &LLVMSymbolizerPath = *LLVMSymbolizerPathOrErr;
// If we don't know argv0 or the address of main() at this point, try
// to guess it anyway (it's possible on some platforms).
std::string MainExecutableName =
sys::fs::exists(Argv0) ? (std::string)std::string(Argv0)
: sys::fs::getMainExecutable(nullptr, nullptr);
/// Reads a file \p Filename written by llvm-symbolizer containing function
/// names and source locations for the addresses in \p AddressList and returns
/// the strings in a vector of pairs, where the first pair element is the index
/// of the corresponding entry in AddressList and the second is the symbolized
/// frame, in a format based on the sanitizer stack trace printer, with the
/// exception that it does not write out frame numbers (i.e. "#2 " for the
/// third address), as it is not assumed that \p AddressList corresponds to a
/// single stack trace.
/// There may be multiple returned entries for a single \p AddressList entry if
/// that frame address corresponds to one or more inlined frames; in this case,
/// all frames for an address will appear contiguously and in-order.
std::optional<SmallVector<std::pair<unsigned, std::string>, 0>>
collectAddressSymbols(void **AddressList, unsigned AddressCount,
const char *MainExecutableName,
const std::string &LLVMSymbolizerPath) {
BumpPtrAllocator Allocator;
StringSaver StrPool(Allocator);
std::vector<const char *> Modules(Depth, nullptr);
std::vector<intptr_t> Offsets(Depth, 0);
if (!findModulesAndOffsets(StackTrace, Depth, Modules.data(), Offsets.data(),
MainExecutableName.c_str(), StrPool))
return false;
SmallVector<const char *, 0> Modules(AddressCount, nullptr);
SmallVector<intptr_t, 0> Offsets(AddressCount, 0);
if (!findModulesAndOffsets(AddressList, AddressCount, Modules.data(),
Offsets.data(), MainExecutableName, StrPool))
return {};
int InputFD;
SmallString<32> InputFile, OutputFile;
sys::fs::createTemporaryFile("symbolizer-input", "", InputFD, InputFile);
@@ -187,9 +167,9 @@ static bool printSymbolizedStackTrace(StringRef Argv0, void **StackTrace,
{
raw_fd_ostream Input(InputFD, true);
for (int i = 0; i < Depth; i++) {
if (Modules[i])
Input << Modules[i] << " " << (void*)Offsets[i] << "\n";
for (unsigned AddrIdx = 0; AddrIdx < AddressCount; AddrIdx++) {
if (Modules[AddrIdx])
Input << Modules[AddrIdx] << " " << (void *)Offsets[AddrIdx] << "\n";
}
}
@@ -206,53 +186,149 @@ static bool printSymbolizedStackTrace(StringRef Argv0, void **StackTrace,
int RunResult =
sys::ExecuteAndWait(LLVMSymbolizerPath, Args, std::nullopt, Redirects);
if (RunResult != 0)
return false;
return {};
// This report format is based on the sanitizer stack trace printer. See
// sanitizer_stacktrace_printer.cc in compiler-rt.
SmallVector<std::pair<unsigned, std::string>, 0> Result;
auto OutputBuf = MemoryBuffer::getFile(OutputFile.c_str());
if (!OutputBuf)
return false;
return {};
StringRef Output = OutputBuf.get()->getBuffer();
SmallVector<StringRef, 32> Lines;
Output.split(Lines, "\n");
auto CurLine = Lines.begin();
int frame_no = 0;
for (int i = 0; i < Depth; i++) {
auto PrintLineHeader = [&]() {
OS << right_justify(formatv("#{0}", frame_no++).str(),
std::log10(Depth) + 2)
<< ' ' << format_ptr(StackTrace[i]) << ' ';
};
if (!Modules[i]) {
PrintLineHeader();
OS << '\n';
auto *CurLine = Lines.begin();
// Lines contains the output from llvm-symbolizer, which should contain for
// each address with a module in order of appearance, one or more lines
// containing the function name and line associated with that address,
// followed by an empty line.
// For each address, adds an output entry for every real or inlined frame at
// that address. For addresses without known modules, we have a single entry
// containing just the formatted address; for all other output entries, we
// output the function entry if it is known, and either the line number if it
// is known or the module+address offset otherwise.
for (unsigned AddrIdx = 0; AddrIdx < AddressCount; AddrIdx++) {
if (!Modules[AddrIdx]) {
auto &SymbolizedFrame = Result.emplace_back(std::make_pair(AddrIdx, ""));
raw_string_ostream OS(SymbolizedFrame.second);
OS << format_ptr(AddressList[AddrIdx]);
continue;
}
// Read pairs of lines (function name and file/line info) until we
// encounter empty line.
for (;;) {
if (CurLine == Lines.end())
return false;
return {};
StringRef FunctionName = *CurLine++;
if (FunctionName.empty())
break;
PrintLineHeader();
auto &SymbolizedFrame = Result.emplace_back(std::make_pair(AddrIdx, ""));
raw_string_ostream OS(SymbolizedFrame.second);
OS << format_ptr(AddressList[AddrIdx]) << ' ';
if (!FunctionName.starts_with("??"))
OS << FunctionName << ' ';
if (CurLine == Lines.end())
return false;
return {};
StringRef FileLineInfo = *CurLine++;
if (!FileLineInfo.starts_with("??"))
if (!FileLineInfo.starts_with("??")) {
OS << FileLineInfo;
else
OS << "(" << Modules[i] << '+' << format_hex(Offsets[i], 0) << ")";
OS << "\n";
} else {
OS << "(" << Modules[AddrIdx] << '+' << format_hex(Offsets[AddrIdx], 0)
<< ")";
}
}
}
return Result;
}
ErrorOr<std::string> getLLVMSymbolizerPath(StringRef Argv0 = {}) {
ErrorOr<std::string> LLVMSymbolizerPathOrErr = std::error_code();
if (const char *Path = getenv(LLVMSymbolizerPathEnv)) {
LLVMSymbolizerPathOrErr = sys::findProgramByName(Path);
} else if (!Argv0.empty()) {
StringRef Parent = llvm::sys::path::parent_path(Argv0);
if (!Parent.empty())
LLVMSymbolizerPathOrErr =
sys::findProgramByName("llvm-symbolizer", Parent);
}
if (!LLVMSymbolizerPathOrErr)
LLVMSymbolizerPathOrErr = sys::findProgramByName("llvm-symbolizer");
return LLVMSymbolizerPathOrErr;
}
/// Helper that launches llvm-symbolizer and symbolizes a backtrace.
LLVM_ATTRIBUTE_USED
static bool printSymbolizedStackTrace(StringRef Argv0, void **StackTrace,
int Depth, llvm::raw_ostream &OS) {
if (DisableSymbolicationFlag || getenv(DisableSymbolizationEnv))
return false;
// Don't recursively invoke the llvm-symbolizer binary.
if (Argv0.contains("llvm-symbolizer"))
return false;
// FIXME: Subtract necessary number from StackTrace entries to turn return
// addresses into actual instruction addresses.
// Use llvm-symbolizer tool to symbolize the stack traces. First look for it
// alongside our binary, then in $PATH.
ErrorOr<std::string> LLVMSymbolizerPathOrErr = getLLVMSymbolizerPath(Argv0);
if (!LLVMSymbolizerPathOrErr)
return false;
const std::string &LLVMSymbolizerPath = *LLVMSymbolizerPathOrErr;
// If we don't know argv0 or the address of main() at this point, try
// to guess it anyway (it's possible on some platforms).
std::string MainExecutableName =
sys::fs::exists(Argv0) ? (std::string)std::string(Argv0)
: sys::fs::getMainExecutable(nullptr, nullptr);
auto SymbolizedAddressesOpt = collectAddressSymbols(
StackTrace, Depth, MainExecutableName.c_str(), LLVMSymbolizerPath);
if (!SymbolizedAddressesOpt)
return false;
for (unsigned FrameNo = 0; FrameNo < SymbolizedAddressesOpt->size();
++FrameNo) {
OS << right_justify(formatv("#{0}", FrameNo).str(), std::log10(Depth) + 2)
<< ' ' << (*SymbolizedAddressesOpt)[FrameNo].second << '\n';
}
return true;
}
#if LLVM_ENABLE_DEBUGLOC_TRACKING_ORIGIN
void sys::symbolizeAddresses(AddressSet &Addresses,
SymbolizedAddressMap &SymbolizedAddresses) {
assert(!DisableSymbolicationFlag && !getenv(DisableSymbolizationEnv) &&
"Debugify origin stacktraces require symbolization to be enabled.");
// Convert Set of Addresses to ordered list.
SmallVector<void *, 0> AddressList(Addresses.begin(), Addresses.end());
if (AddressList.empty())
return;
llvm::sort(AddressList);
// Use llvm-symbolizer tool to symbolize the stack traces. First look for it
// alongside our binary, then in $PATH.
ErrorOr<std::string> LLVMSymbolizerPathOrErr = getLLVMSymbolizerPath();
if (!LLVMSymbolizerPathOrErr)
report_fatal_error("Debugify origin stacktraces require llvm-symbolizer");
const std::string &LLVMSymbolizerPath = *LLVMSymbolizerPathOrErr;
// Try to guess the main executable name, since we don't have argv0 available
// here.
std::string MainExecutableName = sys::fs::getMainExecutable(nullptr, nullptr);
auto SymbolizedAddressesOpt =
collectAddressSymbols(AddressList.begin(), AddressList.size(),
MainExecutableName.c_str(), LLVMSymbolizerPath);
if (!SymbolizedAddressesOpt)
return;
for (auto SymbolizedFrame : *SymbolizedAddressesOpt) {
SmallVector<std::string, 0> &SymbolizedAddrs =
SymbolizedAddresses[AddressList[SymbolizedFrame.first]];
SymbolizedAddrs.push_back(SymbolizedFrame.second);
}
return;
}
#endif
static bool printMarkupContext(raw_ostream &OS, const char *MainExecutableName);
LLVM_ATTRIBUTE_USED

View File

@@ -507,6 +507,21 @@ static int dl_iterate_phdr_cb(dl_phdr_info *info, size_t size, void *arg) {
return 0;
}
#if LLVM_ENABLE_DEBUGLOC_TRACKING_ORIGIN
#if !defined(HAVE_BACKTRACE)
#error DebugLoc origin-tracking currently requires `backtrace()`.
#endif
namespace llvm {
namespace sys {
template <unsigned long MaxDepth>
int getStackTrace(std::array<void *, MaxDepth> &StackTrace) {
return backtrace(StackTrace.data(), MaxDepth);
}
template int getStackTrace<16ul>(std::array<void *, 16ul> &);
} // namespace sys
} // namespace llvm
#endif
/// If this is an ELF platform, we can find all loaded modules and their virtual
/// addresses with dl_iterate_phdr.
static bool findModulesAndOffsets(void **StackTrace, int Depth,

View File

@@ -9,6 +9,7 @@
// This file provides the Win32 specific implementation of the Signals class.
//
//===----------------------------------------------------------------------===//
#include "llvm/Config/llvm-config.h"
#include "llvm/Support/ConvertUTF.h"
#include "llvm/Support/ExitCodes.h"
#include "llvm/Support/FileSystem.h"
@@ -478,6 +479,10 @@ void sys::PrintStackTraceOnErrorSignal(StringRef Argv0,
}
} // namespace llvm
#if LLVM_ENABLE_DEBUGLOC_TRACKING_ORIGIN
#error DebugLoc origin-tracking currently unimplemented for Windows.
#endif
static void LocalPrintStackTrace(raw_ostream &OS, PCONTEXT C) {
STACKFRAME64 StackFrame{};
CONTEXT Context{};