The existing SymbolFilePDB only works on Windows, as it is written against a closed-source Microsoft SDK that ships with their debugging tools. There are several reasons we want to bypass this and go straight to the bits of the PDB, but just to list a few: More room for optimization. We can't see inside the implementation of the Microsoft SDK, so we don't always know if we're doing things in the most efficient way possible. For example, setting a breakpoint on main of a big program currently takes several seconds. With the implementation here, the time is unnoticeable. We want to be able to symbolize Windows minidumps even if not on Windows. Someone should be able to debug Windows minidumps as if they were on Windows, given that no running process is necessary. This patch is a very crude first attempt at filling out some of the basic pieces. I've implemented FindFunctions, ParseCompileUnitLineTable, and ResolveSymbolContext for a limited subset of possible parameter values, which is just enough to get it to display something nice for the breakpoint location. I've added several tests exercising this functionality which are limited enough to work on all platforms but still exercise this functionality. I'll try to add as many tests of this nature as I can, but at some point we'll need a live process. For now, this plugin is enabled always on non-Windows, and by setting the environment variable LLDB_USE_NATIVE_PDB_READER=1 on Windows. Eventually, once it's at parity with the Windows implementation, we'll delete the Windows DIA-based implementation. Differential Revision: https://reviews.llvm.org/D53002 llvm-svn: 344154
230 lines
8.4 KiB
C++
230 lines
8.4 KiB
C++
//===-- CompileUnitIndex.cpp ------------------------------------*- C++ -*-===//
|
|
//
|
|
// The LLVM Compiler Infrastructure
|
|
//
|
|
// This file is distributed under the University of Illinois Open Source
|
|
// License. See LICENSE.TXT for details.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "CompileUnitIndex.h"
|
|
|
|
#include "PdbIndex.h"
|
|
#include "PdbUtil.h"
|
|
|
|
#include "llvm/DebugInfo/CodeView/LazyRandomTypeCollection.h"
|
|
#include "llvm/DebugInfo/CodeView/SymbolDeserializer.h"
|
|
#include "llvm/DebugInfo/CodeView/TypeDeserializer.h"
|
|
#include "llvm/DebugInfo/MSF/MappedBlockStream.h"
|
|
#include "llvm/DebugInfo/PDB/Native/DbiModuleDescriptor.h"
|
|
#include "llvm/DebugInfo/PDB/Native/DbiStream.h"
|
|
#include "llvm/DebugInfo/PDB/Native/InfoStream.h"
|
|
#include "llvm/DebugInfo/PDB/Native/ModuleDebugStream.h"
|
|
#include "llvm/DebugInfo/PDB/Native/NamedStreamMap.h"
|
|
#include "llvm/DebugInfo/PDB/Native/TpiStream.h"
|
|
#include "llvm/Support/Path.h"
|
|
|
|
#include "lldb/Utility/LLDBAssert.h"
|
|
|
|
using namespace lldb;
|
|
using namespace lldb_private;
|
|
using namespace lldb_private::npdb;
|
|
using namespace llvm::codeview;
|
|
using namespace llvm::pdb;
|
|
|
|
static bool IsMainFile(llvm::StringRef main, llvm::StringRef other) {
|
|
if (main == other)
|
|
return true;
|
|
|
|
// If the files refer to the local file system, we can just ask the file
|
|
// system if they're equivalent. But if the source isn't present on disk
|
|
// then we still want to try.
|
|
if (llvm::sys::fs::equivalent(main, other))
|
|
return true;
|
|
|
|
// FIXME: If we ever want to support PDB debug info for non-Windows systems
|
|
// the following check will be wrong, but we need a way to store the host
|
|
// information in the PDB.
|
|
llvm::SmallString<64> normalized(other);
|
|
llvm::sys::path::native(normalized, llvm::sys::path::Style::windows);
|
|
return main.equals_lower(normalized);
|
|
}
|
|
|
|
static void ParseCompile3(const CVSymbol &sym, CompilandIndexItem &cci) {
|
|
cci.m_compile_opts.emplace();
|
|
llvm::cantFail(
|
|
SymbolDeserializer::deserializeAs<Compile3Sym>(sym, *cci.m_compile_opts));
|
|
}
|
|
|
|
static void ParseObjname(const CVSymbol &sym, CompilandIndexItem &cci) {
|
|
cci.m_obj_name.emplace();
|
|
llvm::cantFail(
|
|
SymbolDeserializer::deserializeAs<ObjNameSym>(sym, *cci.m_obj_name));
|
|
}
|
|
|
|
static void ParseBuildInfo(PdbIndex &index, const CVSymbol &sym,
|
|
CompilandIndexItem &cci) {
|
|
BuildInfoSym bis(SymbolRecordKind::BuildInfoSym);
|
|
llvm::cantFail(SymbolDeserializer::deserializeAs<BuildInfoSym>(sym, bis));
|
|
|
|
// S_BUILDINFO just points to an LF_BUILDINFO in the IPI stream. Let's do
|
|
// a little extra work to pull out the LF_BUILDINFO.
|
|
LazyRandomTypeCollection &types = index.ipi().typeCollection();
|
|
llvm::Optional<CVType> cvt = types.tryGetType(bis.BuildId);
|
|
|
|
if (!cvt || cvt->kind() != LF_BUILDINFO)
|
|
return;
|
|
|
|
BuildInfoRecord bir;
|
|
llvm::cantFail(TypeDeserializer::deserializeAs<BuildInfoRecord>(*cvt, bir));
|
|
cci.m_build_info.assign(bir.ArgIndices.begin(), bir.ArgIndices.end());
|
|
}
|
|
|
|
static void ParseExtendedInfo(PdbIndex &index, CompilandIndexItem &item) {
|
|
const CVSymbolArray &syms = item.m_debug_stream.getSymbolArray();
|
|
|
|
// This is a private function, it shouldn't be called if the information
|
|
// has already been parsed.
|
|
lldbassert(!item.m_obj_name);
|
|
lldbassert(!item.m_compile_opts);
|
|
lldbassert(item.m_build_info.empty());
|
|
|
|
// We're looking for 3 things. S_COMPILE3, S_OBJNAME, and S_BUILDINFO.
|
|
int found = 0;
|
|
for (const CVSymbol &sym : syms) {
|
|
switch (sym.kind()) {
|
|
case S_COMPILE3:
|
|
ParseCompile3(sym, item);
|
|
break;
|
|
case S_OBJNAME:
|
|
ParseObjname(sym, item);
|
|
break;
|
|
case S_BUILDINFO:
|
|
ParseBuildInfo(index, sym, item);
|
|
break;
|
|
default:
|
|
continue;
|
|
}
|
|
if (++found >= 3)
|
|
break;
|
|
}
|
|
}
|
|
|
|
CompilandIndexItem::CompilandIndexItem(
|
|
PdbSymUid uid, llvm::pdb::ModuleDebugStreamRef debug_stream,
|
|
llvm::pdb::DbiModuleDescriptor descriptor)
|
|
: m_uid(uid), m_debug_stream(std::move(debug_stream)),
|
|
m_module_descriptor(std::move(descriptor)) {}
|
|
|
|
CompilandIndexItem &CompileUnitIndex::GetOrCreateCompiland(uint16_t modi) {
|
|
PdbSymUid uid = PdbSymUid::makeCompilandId(modi);
|
|
return GetOrCreateCompiland(uid);
|
|
}
|
|
|
|
CompilandIndexItem &
|
|
CompileUnitIndex::GetOrCreateCompiland(PdbSymUid compiland_uid) {
|
|
auto result = m_comp_units.try_emplace(compiland_uid.toOpaqueId(), nullptr);
|
|
if (!result.second)
|
|
return *result.first->second;
|
|
|
|
// Find the module list and load its debug information stream and cache it
|
|
// since we need to use it for almost all interesting operations.
|
|
const DbiModuleList &modules = m_index.dbi().modules();
|
|
uint16_t modi = compiland_uid.asCompiland().modi;
|
|
llvm::pdb::DbiModuleDescriptor descriptor = modules.getModuleDescriptor(modi);
|
|
uint16_t stream = descriptor.getModuleStreamIndex();
|
|
std::unique_ptr<llvm::msf::MappedBlockStream> stream_data =
|
|
m_index.pdb().createIndexedStream(stream);
|
|
llvm::pdb::ModuleDebugStreamRef debug_stream(descriptor,
|
|
std::move(stream_data));
|
|
cantFail(debug_stream.reload());
|
|
|
|
std::unique_ptr<CompilandIndexItem> &cci = result.first->second;
|
|
|
|
cci = llvm::make_unique<CompilandIndexItem>(
|
|
compiland_uid, std::move(debug_stream), std::move(descriptor));
|
|
ParseExtendedInfo(m_index, *cci);
|
|
|
|
cci->m_strings.initialize(debug_stream.getSubsectionsArray());
|
|
PDBStringTable &strings = cantFail(m_index.pdb().getStringTable());
|
|
cci->m_strings.setStrings(strings.getStringTable());
|
|
|
|
// We want the main source file to always comes first. Note that we can't
|
|
// just push_back the main file onto the front because `GetMainSourceFile`
|
|
// computes it in such a way that it doesn't own the resulting memory. So we
|
|
// have to iterate the module file list comparing each one to the main file
|
|
// name until we find it, and we can cache that one since the memory is backed
|
|
// by a contiguous chunk inside the mapped PDB.
|
|
llvm::SmallString<64> main_file = GetMainSourceFile(*cci);
|
|
llvm::sys::path::native(main_file, llvm::sys::path::Style::windows);
|
|
|
|
uint32_t file_count = modules.getSourceFileCount(modi);
|
|
cci->m_file_list.reserve(file_count);
|
|
bool found_main_file = false;
|
|
for (llvm::StringRef file : modules.source_files(modi)) {
|
|
if (!found_main_file && IsMainFile(main_file, file)) {
|
|
cci->m_file_list.insert(cci->m_file_list.begin(), file);
|
|
found_main_file = true;
|
|
continue;
|
|
}
|
|
cci->m_file_list.push_back(file);
|
|
}
|
|
|
|
return *cci;
|
|
}
|
|
|
|
const CompilandIndexItem *CompileUnitIndex::GetCompiland(uint16_t modi) const {
|
|
return GetCompiland(PdbSymUid::makeCompilandId(modi));
|
|
}
|
|
|
|
const CompilandIndexItem *
|
|
CompileUnitIndex::GetCompiland(PdbSymUid compiland_uid) const {
|
|
auto iter = m_comp_units.find(compiland_uid.toOpaqueId());
|
|
if (iter == m_comp_units.end())
|
|
return nullptr;
|
|
return iter->second.get();
|
|
}
|
|
|
|
CompilandIndexItem *CompileUnitIndex::GetCompiland(uint16_t modi) {
|
|
return GetCompiland(PdbSymUid::makeCompilandId(modi));
|
|
}
|
|
|
|
CompilandIndexItem *CompileUnitIndex::GetCompiland(PdbSymUid compiland_uid) {
|
|
auto iter = m_comp_units.find(compiland_uid.toOpaqueId());
|
|
if (iter == m_comp_units.end())
|
|
return nullptr;
|
|
return iter->second.get();
|
|
}
|
|
|
|
llvm::SmallString<64>
|
|
CompileUnitIndex::GetMainSourceFile(const CompilandIndexItem &item) const {
|
|
// LF_BUILDINFO contains a list of arg indices which point to LF_STRING_ID
|
|
// records in the IPI stream. The order of the arg indices is as follows:
|
|
// [0] - working directory where compiler was invoked.
|
|
// [1] - absolute path to compiler binary
|
|
// [2] - source file name
|
|
// [3] - path to compiler generated PDB (the /Zi PDB, although this entry gets
|
|
// added even when using /Z7)
|
|
// [4] - full command line invocation.
|
|
//
|
|
// We need to form the path [0]\[2] to generate the full path to the main
|
|
// file.source
|
|
if (item.m_build_info.size() < 3)
|
|
return {""};
|
|
|
|
LazyRandomTypeCollection &types = m_index.ipi().typeCollection();
|
|
|
|
StringIdRecord working_dir;
|
|
StringIdRecord file_name;
|
|
CVType dir_cvt = types.getType(item.m_build_info[0]);
|
|
CVType file_cvt = types.getType(item.m_build_info[2]);
|
|
llvm::cantFail(
|
|
TypeDeserializer::deserializeAs<StringIdRecord>(dir_cvt, working_dir));
|
|
llvm::cantFail(
|
|
TypeDeserializer::deserializeAs<StringIdRecord>(file_cvt, file_name));
|
|
|
|
llvm::SmallString<64> absolute_path = working_dir.String;
|
|
llvm::sys::path::append(absolute_path, file_name.String);
|
|
return absolute_path;
|
|
}
|