clang-p2996/lldb/source/Plugins/Process/minidump/MinidumpParser.cpp

//===-- MinidumpParser.cpp ---------------------------------------*- C++ -*-===//
//
//                     The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//

// Project includes
#include "MinidumpParser.h"
#include "NtStructures.h"
#include "RegisterContextMinidump_x86_32.h"

// Other libraries and framework includes
#include "lldb/Target/MemoryRegionInfo.h"
#include "lldb/Utility/LLDBAssert.h"

// C includes
// C++ includes
#include <algorithm>
#include <map>
#include <vector>

using namespace lldb_private;
using namespace minidump;

llvm::Optional<MinidumpParser>
MinidumpParser::Create(const lldb::DataBufferSP &data_buf_sp) {
  if (data_buf_sp->GetByteSize() < sizeof(MinidumpHeader)) {
    return llvm::None;
  }
  return MinidumpParser(data_buf_sp);
}

MinidumpParser::MinidumpParser(const lldb::DataBufferSP &data_buf_sp)
    : m_data_sp(data_buf_sp) {}

llvm::ArrayRef<uint8_t> MinidumpParser::GetData() {
  return llvm::ArrayRef<uint8_t>(m_data_sp->GetBytes(),
                                 m_data_sp->GetByteSize());
}

llvm::ArrayRef<uint8_t>
MinidumpParser::GetStream(MinidumpStreamType stream_type) {
  auto iter = m_directory_map.find(static_cast<uint32_t>(stream_type));
  if (iter == m_directory_map.end())
    return {};

  // check if there is enough data
  if (iter->second.rva + iter->second.data_size > m_data_sp->GetByteSize())
    return {};

  return llvm::ArrayRef<uint8_t>(m_data_sp->GetBytes() + iter->second.rva,
                                 iter->second.data_size);
}

llvm::Optional<std::string> MinidumpParser::GetMinidumpString(uint32_t rva) {
  auto arr_ref = m_data_sp->GetData();
  if (rva > arr_ref.size())
    return llvm::None;
  arr_ref = arr_ref.drop_front(rva);
  return parseMinidumpString(arr_ref);
}

UUID MinidumpParser::GetModuleUUID(const MinidumpModule *module) {
  auto cv_record =
      GetData().slice(module->CV_record.rva, module->CV_record.data_size);

  // Read the CV record signature
  const llvm::support::ulittle32_t *signature = nullptr;
  Status error = consumeObject(cv_record, signature);
  if (error.Fail())
    return UUID();

  const CvSignature cv_signature =
      static_cast<CvSignature>(static_cast<const uint32_t>(*signature));

  if (cv_signature == CvSignature::Pdb70) {
    // PDB70 record
    const CvRecordPdb70 *pdb70_uuid = nullptr;
    Status error = consumeObject(cv_record, pdb70_uuid);
    if (!error.Fail()) {
      auto arch = GetArchitecture();
      // For Apple targets we only need a 16 byte UUID so that we can match
      // the UUID in the Module to actual UUIDs from the built binaries. The
      // "Age" field is zero in breakpad minidump files for Apple targets, so
      // we restrict the UUID to the "Uuid" field so we have a UUID we can use
      // to match.
      if (arch.GetTriple().getVendor() == llvm::Triple::Apple)
        return UUID::fromData(pdb70_uuid->Uuid, sizeof(pdb70_uuid->Uuid));
      else
        return UUID::fromData(pdb70_uuid, sizeof(*pdb70_uuid));
    }
  } else if (cv_signature == CvSignature::ElfBuildId)
    return UUID::fromData(cv_record);

  return UUID();
}

llvm::ArrayRef<MinidumpThread> MinidumpParser::GetThreads() {
  llvm::ArrayRef<uint8_t> data = GetStream(MinidumpStreamType::ThreadList);

  if (data.size() == 0)
    return llvm::None;

  return MinidumpThread::ParseThreadList(data);
}

llvm::ArrayRef<uint8_t>
MinidumpParser::GetThreadContext(const MinidumpThread &td) {
  if (td.thread_context.rva + td.thread_context.data_size > GetData().size())
    return {};

  return GetData().slice(td.thread_context.rva, td.thread_context.data_size);
}

llvm::ArrayRef<uint8_t>
MinidumpParser::GetThreadContextWow64(const MinidumpThread &td) {
  // On Windows, a 32-bit process can run on a 64-bit machine under WOW64. If
  // the minidump was captured with a 64-bit debugger, then the CONTEXT we just
  // grabbed from the mini_dump_thread is the one for the 64-bit "native"
  // process rather than the 32-bit "guest" process we care about.  In this
  // case, we can get the 32-bit CONTEXT from the TEB (Thread Environment
  // Block) of the 64-bit process.
  auto teb_mem = GetMemory(td.teb, sizeof(TEB64));
  if (teb_mem.empty())
    return {};

  const TEB64 *wow64teb;
  Status error = consumeObject(teb_mem, wow64teb);
  if (error.Fail())
    return {};

  // Slot 1 of the thread-local storage in the 64-bit TEB points to a structure
  // that includes the 32-bit CONTEXT (after a ULONG). See:
  // https://msdn.microsoft.com/en-us/library/ms681670.aspx
  auto context =
      GetMemory(wow64teb->tls_slots[1] + 4, sizeof(MinidumpContext_x86_32));
  if (context.size() < sizeof(MinidumpContext_x86_32))
    return {};

  return context;
  // NOTE:  We don't currently use the TEB for anything else.  If we
  // need it in the future, the 32-bit TEB is located according to the address
  // stored in the first slot of the 64-bit TEB (wow64teb.Reserved1[0]).
}

const MinidumpSystemInfo *MinidumpParser::GetSystemInfo() {
  llvm::ArrayRef<uint8_t> data = GetStream(MinidumpStreamType::SystemInfo);

  if (data.size() == 0)
    return nullptr;

  return MinidumpSystemInfo::Parse(data);
}

ArchSpec MinidumpParser::GetArchitecture() {
  if (m_arch.IsValid())
    return m_arch;

  // Set the architecture in m_arch
  const MinidumpSystemInfo *system_info = GetSystemInfo();

  if (!system_info)
    return m_arch;

  // TODO what to do about big endiand flavors of arm ?
  // TODO set the arm subarch stuff if the minidump has info about it

  llvm::Triple triple;
  triple.setVendor(llvm::Triple::VendorType::UnknownVendor);

  const MinidumpCPUArchitecture arch =
      static_cast<const MinidumpCPUArchitecture>(
          static_cast<const uint32_t>(system_info->processor_arch));

  switch (arch) {
  case MinidumpCPUArchitecture::X86:
    triple.setArch(llvm::Triple::ArchType::x86);
    break;
  case MinidumpCPUArchitecture::AMD64:
    triple.setArch(llvm::Triple::ArchType::x86_64);
    break;
  case MinidumpCPUArchitecture::ARM:
    triple.setArch(llvm::Triple::ArchType::arm);
    break;
  case MinidumpCPUArchitecture::ARM64:
    triple.setArch(llvm::Triple::ArchType::aarch64);
    break;
  default:
    triple.setArch(llvm::Triple::ArchType::UnknownArch);
    break;
  }

  const MinidumpOSPlatform os = static_cast<const MinidumpOSPlatform>(
      static_cast<const uint32_t>(system_info->platform_id));

  // TODO add all of the OSes that Minidump/breakpad distinguishes?
  switch (os) {
  case MinidumpOSPlatform::Win32S:
  case MinidumpOSPlatform::Win32Windows:
  case MinidumpOSPlatform::Win32NT:
  case MinidumpOSPlatform::Win32CE:
    triple.setOS(llvm::Triple::OSType::Win32);
    break;
  case MinidumpOSPlatform::Linux:
    triple.setOS(llvm::Triple::OSType::Linux);
    break;
  case MinidumpOSPlatform::MacOSX:
    triple.setOS(llvm::Triple::OSType::MacOSX);
    triple.setVendor(llvm::Triple::Apple);
    break;
  case MinidumpOSPlatform::IOS:
    triple.setOS(llvm::Triple::OSType::IOS);
    triple.setVendor(llvm::Triple::Apple);
    break;
  case MinidumpOSPlatform::Android:
    triple.setOS(llvm::Triple::OSType::Linux);
    triple.setEnvironment(llvm::Triple::EnvironmentType::Android);
    break;
  default: {
    triple.setOS(llvm::Triple::OSType::UnknownOS);
    std::string csd_version;
    if (auto s = GetMinidumpString(system_info->csd_version_rva))
      csd_version = *s;
    if (csd_version.find("Linux") != std::string::npos)
      triple.setOS(llvm::Triple::OSType::Linux);
    break;
    }
  }
  m_arch.SetTriple(triple);
  return m_arch;
}

const MinidumpMiscInfo *MinidumpParser::GetMiscInfo() {
  llvm::ArrayRef<uint8_t> data = GetStream(MinidumpStreamType::MiscInfo);

  if (data.size() == 0)
    return nullptr;

  return MinidumpMiscInfo::Parse(data);
}

llvm::Optional<LinuxProcStatus> MinidumpParser::GetLinuxProcStatus() {
  llvm::ArrayRef<uint8_t> data = GetStream(MinidumpStreamType::LinuxProcStatus);

  if (data.size() == 0)
    return llvm::None;

  return LinuxProcStatus::Parse(data);
}

llvm::Optional<lldb::pid_t> MinidumpParser::GetPid() {
  const MinidumpMiscInfo *misc_info = GetMiscInfo();
  if (misc_info != nullptr) {
    return misc_info->GetPid();
  }

  llvm::Optional<LinuxProcStatus> proc_status = GetLinuxProcStatus();
  if (proc_status.hasValue()) {
    return proc_status->GetPid();
  }

  return llvm::None;
}

llvm::ArrayRef<MinidumpModule> MinidumpParser::GetModuleList() {
  llvm::ArrayRef<uint8_t> data = GetStream(MinidumpStreamType::ModuleList);

  if (data.size() == 0)
    return {};

  return MinidumpModule::ParseModuleList(data);
}

std::vector<const MinidumpModule *> MinidumpParser::GetFilteredModuleList() {
  llvm::ArrayRef<MinidumpModule> modules = GetModuleList();
  // map module_name -> pair(load_address, pointer to module struct in memory)
  llvm::StringMap<std::pair<uint64_t, const MinidumpModule *>> lowest_addr;

  std::vector<const MinidumpModule *> filtered_modules;

  llvm::Optional<std::string> name;
  std::string module_name;

  for (const auto &module : modules) {
    name = GetMinidumpString(module.module_name_rva);

    if (!name)
      continue;

    module_name = name.getValue();

    auto iter = lowest_addr.end();
    bool exists;
    std::tie(iter, exists) = lowest_addr.try_emplace(
        module_name, std::make_pair(module.base_of_image, &module));

    if (exists && module.base_of_image < iter->second.first)
      iter->second = std::make_pair(module.base_of_image, &module);
  }

  filtered_modules.reserve(lowest_addr.size());
  for (const auto &module : lowest_addr) {
    filtered_modules.push_back(module.second.second);
  }

  return filtered_modules;
}

const MinidumpExceptionStream *MinidumpParser::GetExceptionStream() {
  llvm::ArrayRef<uint8_t> data = GetStream(MinidumpStreamType::Exception);

  if (data.size() == 0)
    return nullptr;

  return MinidumpExceptionStream::Parse(data);
}

llvm::Optional<minidump::Range>
MinidumpParser::FindMemoryRange(lldb::addr_t addr) {
  llvm::ArrayRef<uint8_t> data = GetStream(MinidumpStreamType::MemoryList);
  llvm::ArrayRef<uint8_t> data64 = GetStream(MinidumpStreamType::Memory64List);

  if (data.empty() && data64.empty())
    return llvm::None;

  if (!data.empty()) {
    llvm::ArrayRef<MinidumpMemoryDescriptor> memory_list =
        MinidumpMemoryDescriptor::ParseMemoryList(data);

    if (memory_list.empty())
      return llvm::None;

    for (const auto &memory_desc : memory_list) {
      const MinidumpLocationDescriptor &loc_desc = memory_desc.memory;
      const lldb::addr_t range_start = memory_desc.start_of_memory_range;
      const size_t range_size = loc_desc.data_size;

      if (loc_desc.rva + loc_desc.data_size > GetData().size())
        return llvm::None;

      if (range_start <= addr && addr < range_start + range_size) {
        return minidump::Range(range_start,
                               GetData().slice(loc_desc.rva, range_size));
      }
    }
  }

  // Some Minidumps have a Memory64ListStream that captures all the heap memory
  // (full-memory Minidumps).  We can't exactly use the same loop as above,
  // because the Minidump uses slightly different data structures to describe
  // those

  if (!data64.empty()) {
    llvm::ArrayRef<MinidumpMemoryDescriptor64> memory64_list;
    uint64_t base_rva;
    std::tie(memory64_list, base_rva) =
        MinidumpMemoryDescriptor64::ParseMemory64List(data64);

    if (memory64_list.empty())
      return llvm::None;

    for (const auto &memory_desc64 : memory64_list) {
      const lldb::addr_t range_start = memory_desc64.start_of_memory_range;
      const size_t range_size = memory_desc64.data_size;

      if (base_rva + range_size > GetData().size())
        return llvm::None;

      if (range_start <= addr && addr < range_start + range_size) {
        return minidump::Range(range_start,
                               GetData().slice(base_rva, range_size));
      }
      base_rva += range_size;
    }
  }

  return llvm::None;
}

llvm::ArrayRef<uint8_t> MinidumpParser::GetMemory(lldb::addr_t addr,
                                                  size_t size) {
  // I don't have a sense of how frequently this is called or how many memory
  // ranges a Minidump typically has, so I'm not sure if searching for the
  // appropriate range linearly each time is stupid.  Perhaps we should build
  // an index for faster lookups.
  llvm::Optional<minidump::Range> range = FindMemoryRange(addr);
  if (!range)
    return {};

  // There's at least some overlap between the beginning of the desired range
  // (addr) and the current range.  Figure out where the overlap begins and how
  // much overlap there is.

  const size_t offset = addr - range->start;

  if (addr < range->start || offset >= range->range_ref.size())
    return {};

  const size_t overlap = std::min(size, range->range_ref.size() - offset);
  return range->range_ref.slice(offset, overlap);
}

llvm::Optional<MemoryRegionInfo>
MinidumpParser::GetMemoryRegionInfo(lldb::addr_t load_addr) {
  MemoryRegionInfo info;
  llvm::ArrayRef<uint8_t> data = GetStream(MinidumpStreamType::MemoryInfoList);
  if (data.empty())
    return llvm::None;

  std::vector<const MinidumpMemoryInfo *> mem_info_list =
      MinidumpMemoryInfo::ParseMemoryInfoList(data);
  if (mem_info_list.empty())
    return llvm::None;

  const auto yes = MemoryRegionInfo::eYes;
  const auto no = MemoryRegionInfo::eNo;

  const MinidumpMemoryInfo *next_entry = nullptr;
  for (const auto &entry : mem_info_list) {
    const auto head = entry->base_address;
    const auto tail = head + entry->region_size;

    if (head <= load_addr && load_addr < tail) {
      info.GetRange().SetRangeBase(
          (entry->state != uint32_t(MinidumpMemoryInfoState::MemFree))
              ? head
              : load_addr);
      info.GetRange().SetRangeEnd(tail);

      const uint32_t PageNoAccess =
          static_cast<uint32_t>(MinidumpMemoryProtectionContants::PageNoAccess);
      info.SetReadable((entry->protect & PageNoAccess) == 0 ? yes : no);

      const uint32_t PageWritable =
          static_cast<uint32_t>(MinidumpMemoryProtectionContants::PageWritable);
      info.SetWritable((entry->protect & PageWritable) != 0 ? yes : no);

      const uint32_t PageExecutable = static_cast<uint32_t>(
          MinidumpMemoryProtectionContants::PageExecutable);
      info.SetExecutable((entry->protect & PageExecutable) != 0 ? yes : no);

      const uint32_t MemFree =
          static_cast<uint32_t>(MinidumpMemoryInfoState::MemFree);
      info.SetMapped((entry->state != MemFree) ? yes : no);

      return info;
    } else if (head > load_addr &&
               (next_entry == nullptr || head < next_entry->base_address)) {
      // In case there is no region containing load_addr keep track of the
      // nearest region after load_addr so we can return the distance to it.
      next_entry = entry;
    }
  }

  // No containing region found. Create an unmapped region that extends to the
  // next region or LLDB_INVALID_ADDRESS
  info.GetRange().SetRangeBase(load_addr);
  info.GetRange().SetRangeEnd((next_entry != nullptr) ? next_entry->base_address
                                                      : LLDB_INVALID_ADDRESS);
  info.SetReadable(no);
  info.SetWritable(no);
  info.SetExecutable(no);
  info.SetMapped(no);

  // Note that the memory info list doesn't seem to contain ranges in kernel
  // space, so if you're walking a stack that has kernel frames, the stack may
  // appear truncated.
  return info;
}

Status MinidumpParser::Initialize() {
  Status error;

  lldbassert(m_directory_map.empty());

  llvm::ArrayRef<uint8_t> header_data(m_data_sp->GetBytes(),
                                      sizeof(MinidumpHeader));
  const MinidumpHeader *header = MinidumpHeader::Parse(header_data);
  if (header == nullptr) {
    error.SetErrorString("invalid minidump: can't parse the header");
    return error;
  }

  // A minidump without at least one stream is clearly ill-formed
  if (header->streams_count == 0) {
    error.SetErrorString("invalid minidump: no streams present");
    return error;
  }

  struct FileRange {
    uint32_t offset = 0;
    uint32_t size = 0;

    FileRange(uint32_t offset, uint32_t size) : offset(offset), size(size) {}
    uint32_t end() const { return offset + size; }
  };

  const uint32_t file_size = m_data_sp->GetByteSize();

  // Build a global minidump file map, checking for:
  // - overlapping streams/data structures
  // - truncation (streams pointing past the end of file)
  std::vector<FileRange> minidump_map;

  // Add the minidump header to the file map
  if (sizeof(MinidumpHeader) > file_size) {
    error.SetErrorString("invalid minidump: truncated header");
    return error;
  }
  minidump_map.emplace_back( 0, sizeof(MinidumpHeader) );

  // Add the directory entries to the file map
  FileRange directory_range(header->stream_directory_rva,
                            header->streams_count *
                                sizeof(MinidumpDirectory));
  if (directory_range.end() > file_size) {
    error.SetErrorString("invalid minidump: truncated streams directory");
    return error;
  }
  minidump_map.push_back(directory_range);

  // Parse stream directory entries
  llvm::ArrayRef<uint8_t> directory_data(
      m_data_sp->GetBytes() + directory_range.offset, directory_range.size);
  for (uint32_t i = 0; i < header->streams_count; ++i) {
    const MinidumpDirectory *directory_entry = nullptr;
    error = consumeObject(directory_data, directory_entry);
    if (error.Fail())
      return error;
    if (directory_entry->stream_type == 0) {
      // Ignore dummy streams (technically ill-formed, but a number of
      // existing minidumps seem to contain such streams)
      if (directory_entry->location.data_size == 0)
        continue;
      error.SetErrorString("invalid minidump: bad stream type");
      return error;
    }
    // Update the streams map, checking for duplicate stream types
    if (!m_directory_map
             .insert({directory_entry->stream_type, directory_entry->location})
             .second) {
      error.SetErrorString("invalid minidump: duplicate stream type");
      return error;
    }
    // Ignore the zero-length streams for layout checks
    if (directory_entry->location.data_size != 0) {
      minidump_map.emplace_back(directory_entry->location.rva,
                                directory_entry->location.data_size);
    }
  }

  // Sort the file map ranges by start offset
  std::sort(minidump_map.begin(), minidump_map.end(),
            [](const FileRange &a, const FileRange &b) {
              return a.offset < b.offset;
            });

  // Check for overlapping streams/data structures
  for (size_t i = 1; i < minidump_map.size(); ++i) {
    const auto &prev_range = minidump_map[i - 1];
    if (prev_range.end() > minidump_map[i].offset) {
      error.SetErrorString("invalid minidump: overlapping streams");
      return error;
    }
  }

  // Check for streams past the end of file
  const auto &last_range = minidump_map.back();
  if (last_range.end() > file_size) {
    error.SetErrorString("invalid minidump: truncated stream");
    return error;
  }

  return error;
}