[lldb][DWARF] Replace lldb's DWARFDebugArangeSet with llvm's (#110058)

They are close enough to swap lldb's `DWARFDebugArangeSet` with the llvm
one.

The difference is that llvm's `DWARFDebugArangeSet` add empty ranges
when extracting. To accommodate this, `DWARFDebugAranges` in lldb
filters out empty ranges when extracting.
This commit is contained in:
Zequan Wu
2024-10-01 10:52:51 -07:00
committed by GitHub
parent 47e3d8dc7e
commit 4206c37bd1
7 changed files with 63 additions and 326 deletions

View File

@@ -18,7 +18,6 @@ add_lldb_library(lldbPluginSymbolFileDWARF PLUGIN
DWARFContext.cpp
DWARFDataExtractor.cpp
DWARFDebugAranges.cpp
DWARFDebugArangeSet.cpp
DWARFDebugInfo.cpp
DWARFDebugInfoEntry.cpp
DWARFDebugMacro.cpp

View File

@@ -1,176 +0,0 @@
//===-- DWARFDebugArangeSet.cpp -------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "DWARFDebugArangeSet.h"
#include "DWARFDataExtractor.h"
#include "LogChannelDWARF.h"
#include "llvm/Object/Error.h"
#include <cassert>
using namespace lldb_private;
using namespace lldb_private::plugin::dwarf;
DWARFDebugArangeSet::DWARFDebugArangeSet()
: m_offset(DW_INVALID_OFFSET), m_next_offset(DW_INVALID_OFFSET) {}
void DWARFDebugArangeSet::Clear() {
m_offset = DW_INVALID_OFFSET;
m_next_offset = DW_INVALID_OFFSET;
m_header.length = 0;
m_header.version = 0;
m_header.cu_offset = 0;
m_header.addr_size = 0;
m_header.seg_size = 0;
m_arange_descriptors.clear();
}
llvm::Error DWARFDebugArangeSet::extract(const DWARFDataExtractor &data,
lldb::offset_t *offset_ptr) {
assert(data.ValidOffset(*offset_ptr));
m_arange_descriptors.clear();
m_offset = *offset_ptr;
// 7.20 Address Range Table
//
// Each set of entries in the table of address ranges contained in the
// .debug_aranges section begins with a header consisting of: a 4-byte
// length containing the length of the set of entries for this compilation
// unit, not including the length field itself; a 2-byte version identifier
// containing the value 2 for DWARF Version 2; a 4-byte offset into
// the.debug_infosection; a 1-byte unsigned integer containing the size in
// bytes of an address (or the offset portion of an address for segmented
// addressing) on the target system; and a 1-byte unsigned integer
// containing the size in bytes of a segment descriptor on the target
// system. This header is followed by a series of tuples. Each tuple
// consists of an address and a length, each in the size appropriate for an
// address on the target architecture.
m_header.length = data.GetDWARFInitialLength(offset_ptr);
// The length could be 4 bytes or 12 bytes, so use the current offset to
// determine the next offset correctly.
if (m_header.length > 0)
m_next_offset = *offset_ptr + m_header.length;
else
m_next_offset = DW_INVALID_OFFSET;
m_header.version = data.GetU16(offset_ptr);
m_header.cu_offset = data.GetDWARFOffset(offset_ptr);
m_header.addr_size = data.GetU8(offset_ptr);
m_header.seg_size = data.GetU8(offset_ptr);
// Try to avoid reading invalid arange sets by making sure:
// 1 - the version looks good
// 2 - the address byte size looks plausible
// 3 - the length seems to make sense
// 4 - size looks plausible
// 5 - the arange tuples do not contain a segment field
if (m_header.version < 2 || m_header.version > 5)
return llvm::make_error<llvm::object::GenericBinaryError>(
"Invalid arange header version");
if (m_header.addr_size != 4 && m_header.addr_size != 8)
return llvm::make_error<llvm::object::GenericBinaryError>(
"Invalid arange header address size");
if (m_header.length == 0)
return llvm::make_error<llvm::object::GenericBinaryError>(
"Invalid arange header length");
if (!data.ValidOffset(m_offset + sizeof(m_header.length) + m_header.length -
1))
return llvm::make_error<llvm::object::GenericBinaryError>(
"Invalid arange header length");
if (m_header.seg_size)
return llvm::make_error<llvm::object::GenericBinaryError>(
"segmented arange entries are not supported");
// The first tuple following the header in each set begins at an offset
// that is a multiple of the size of a single tuple (that is, twice the
// size of an address). The header is padded, if necessary, to the
// appropriate boundary.
const uint32_t header_size = *offset_ptr - m_offset;
const uint32_t tuple_size = m_header.addr_size << 1;
uint32_t first_tuple_offset = 0;
while (first_tuple_offset < header_size)
first_tuple_offset += tuple_size;
*offset_ptr = m_offset + first_tuple_offset;
Descriptor arangeDescriptor;
static_assert(sizeof(arangeDescriptor.address) ==
sizeof(arangeDescriptor.length),
"DWARFDebugArangeSet::Descriptor.address and "
"DWARFDebugArangeSet::Descriptor.length must have same size");
const lldb::offset_t next_offset = GetNextOffset();
assert(next_offset != DW_INVALID_OFFSET);
uint32_t num_terminators = 0;
bool last_was_terminator = false;
while (*offset_ptr < next_offset) {
arangeDescriptor.address = data.GetMaxU64(offset_ptr, m_header.addr_size);
arangeDescriptor.length = data.GetMaxU64(offset_ptr, m_header.addr_size);
// Each set of tuples is terminated by a 0 for the address and 0 for
// the length. Some linkers can emit .debug_aranges with multiple
// terminator pair entries that are still withing the length of the
// DWARFDebugArangeSet. We want to be sure to parse all entries for
// this DWARFDebugArangeSet so that we don't stop parsing early and end up
// treating addresses as a header of the next DWARFDebugArangeSet. We also
// need to make sure we parse all valid address pairs so we don't omit them
// from the aranges result, so we can't stop at the first terminator entry
// we find.
if (arangeDescriptor.address == 0 && arangeDescriptor.length == 0) {
++num_terminators;
last_was_terminator = true;
} else {
last_was_terminator = false;
// Only add .debug_aranges address entries that have a non zero size.
// Some linkers will zero out the length field for some .debug_aranges
// entries if they were stripped. We also could watch out for multiple
// entries at address zero and remove those as well.
if (arangeDescriptor.length > 0)
m_arange_descriptors.push_back(arangeDescriptor);
}
}
if (num_terminators > 1) {
Log *log = GetLog(DWARFLog::DebugInfo);
LLDB_LOG(log,
"warning: DWARFDebugArangeSet at %#" PRIx64 " contains %u "
"terminator entries",
m_offset, num_terminators);
}
if (last_was_terminator)
return llvm::ErrorSuccess();
return llvm::make_error<llvm::object::GenericBinaryError>(
"arange descriptors not terminated by null entry");
}
class DescriptorContainsAddress {
public:
DescriptorContainsAddress(dw_addr_t address) : m_address(address) {}
bool operator()(const DWARFDebugArangeSet::Descriptor &desc) const {
return (m_address >= desc.address) &&
(m_address < (desc.address + desc.length));
}
private:
const dw_addr_t m_address;
};
dw_offset_t DWARFDebugArangeSet::FindAddress(dw_addr_t address) const {
DescriptorConstIter end = m_arange_descriptors.end();
DescriptorConstIter pos =
std::find_if(m_arange_descriptors.begin(), end, // Range
DescriptorContainsAddress(address)); // Predicate
if (pos != end)
return m_header.cu_offset;
return DW_INVALID_OFFSET;
}

View File

@@ -1,70 +0,0 @@
//===-- DWARFDebugArangeSet.h -----------------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_DWARFDEBUGARANGESET_H
#define LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_DWARFDEBUGARANGESET_H
#include "lldb/Core/dwarf.h"
#include <cstdint>
#include <vector>
namespace lldb_private::plugin {
namespace dwarf {
class DWARFDebugArangeSet {
public:
struct Header {
/// The total length of the entries for that set, not including the length
/// field itself.
uint32_t length = 0;
/// The DWARF version number.
uint16_t version = 0;
/// The offset from the beginning of the .debug_info section of the
/// compilation unit entry referenced by the table.
uint32_t cu_offset = 0;
/// The size in bytes of an address on the target architecture. For
/// segmented addressing, this is the size of the offset portion of the
/// address.
uint8_t addr_size = 0;
/// The size in bytes of a segment descriptor on the target architecture.
/// If the target system uses a flat address space, this value is 0.
uint8_t seg_size = 0;
};
struct Descriptor {
dw_addr_t address;
dw_addr_t length;
dw_addr_t end_address() const { return address + length; }
};
DWARFDebugArangeSet();
void Clear();
void SetOffset(uint32_t offset) { m_offset = offset; }
llvm::Error extract(const DWARFDataExtractor &data,
lldb::offset_t *offset_ptr);
dw_offset_t FindAddress(dw_addr_t address) const;
size_t NumDescriptors() const { return m_arange_descriptors.size(); }
const Header &GetHeader() const { return m_header; }
dw_offset_t GetNextOffset() const { return m_next_offset; }
const Descriptor &GetDescriptorRef(uint32_t i) const {
return m_arange_descriptors[i];
}
protected:
typedef std::vector<Descriptor> DescriptorColl;
typedef DescriptorColl::iterator DescriptorIter;
typedef DescriptorColl::const_iterator DescriptorConstIter;
dw_offset_t m_offset;
dw_offset_t m_next_offset;
Header m_header;
DescriptorColl m_arange_descriptors;
};
} // namespace dwarf
} // namespace lldb_private::plugin
#endif // LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_DWARFDEBUGARANGESET_H

View File

@@ -7,65 +7,44 @@
//===----------------------------------------------------------------------===//
#include "DWARFDebugAranges.h"
#include "DWARFDebugArangeSet.h"
#include "DWARFUnit.h"
#include "LogChannelDWARF.h"
#include "lldb/Utility/Log.h"
#include "lldb/Utility/Timer.h"
#include "llvm/DebugInfo/DWARF/DWARFDebugArangeSet.h"
using namespace lldb;
using namespace lldb_private;
using namespace lldb_private::plugin::dwarf;
using llvm::DWARFDebugArangeSet;
// Constructor
DWARFDebugAranges::DWARFDebugAranges() : m_aranges() {}
// CountArangeDescriptors
class CountArangeDescriptors {
public:
CountArangeDescriptors(uint32_t &count_ref) : count(count_ref) {
// printf("constructor CountArangeDescriptors()\n");
}
void operator()(const DWARFDebugArangeSet &set) {
count += set.NumDescriptors();
}
uint32_t &count;
};
// Extract
void DWARFDebugAranges::extract(const DWARFDataExtractor &debug_aranges_data) {
llvm::DWARFDataExtractor dwarf_data = debug_aranges_data.GetAsLLVMDWARF();
lldb::offset_t offset = 0;
DWARFDebugArangeSet set;
Range range;
while (debug_aranges_data.ValidOffset(offset)) {
while (dwarf_data.isValidOffset(offset)) {
const lldb::offset_t set_offset = offset;
if (llvm::Error error = set.extract(debug_aranges_data, &offset)) {
if (llvm::Error error = set.extract(dwarf_data, &offset)) {
Log *log = GetLog(DWARFLog::DebugInfo);
LLDB_LOG_ERROR(log, std::move(error),
"DWARFDebugAranges::extract failed to extract "
".debug_aranges set at offset {1:x}: {0}",
set_offset);
} else {
const uint32_t num_descriptors = set.NumDescriptors();
if (num_descriptors > 0) {
const dw_offset_t cu_offset = set.GetHeader().cu_offset;
for (uint32_t i = 0; i < num_descriptors; ++i) {
const DWARFDebugArangeSet::Descriptor &descriptor =
set.GetDescriptorRef(i);
m_aranges.Append(RangeToDIE::Entry(descriptor.address,
descriptor.length, cu_offset));
}
}
set.clear();
return;
}
const uint64_t cu_offset = set.getCompileUnitDIEOffset();
for (const auto &desc : set.descriptors()) {
if (desc.Length != 0)
m_aranges.Append(
RangeToDIE::Entry(desc.Address, desc.Length, cu_offset));
}
// Always use the previous DWARFDebugArangeSet's information to calculate
// the offset of the next DWARFDebugArangeSet in case we entouncter an
// error in the current DWARFDebugArangeSet and our offset position is
// still in the middle of the data. If we do this, we can parse all valid
// DWARFDebugArangeSet objects without returning invalid errors.
offset = set.GetNextOffset();
set.Clear();
}
}

View File

@@ -9,6 +9,7 @@
#include "gtest/gtest.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/DebugInfo/DWARF/DWARFDebugArangeSet.h"
#include "llvm/DebugInfo/PDB/PDBSymbolData.h"
#include "llvm/DebugInfo/PDB/PDBSymbolExe.h"
#include "llvm/Support/FileSystem.h"
@@ -16,7 +17,6 @@
#include "Plugins/ObjectFile/PECOFF/ObjectFilePECOFF.h"
#include "Plugins/SymbolFile/DWARF/DWARFDataExtractor.h"
#include "Plugins/SymbolFile/DWARF/DWARFDebugArangeSet.h"
#include "Plugins/SymbolFile/DWARF/DWARFDebugAranges.h"
#include "Plugins/SymbolFile/DWARF/SymbolFileDWARF.h"
#include "Plugins/SymbolFile/PDB/SymbolFilePDB.h"
@@ -39,6 +39,7 @@ using namespace lldb;
using namespace lldb_private;
using namespace lldb_private::dwarf;
using namespace lldb_private::plugin::dwarf;
using llvm::DWARFDebugArangeSet;
class SymbolFileDWARFTests : public testing::Test {
SubsystemRAII<FileSystem, HostInfo, ObjectFilePECOFF, SymbolFileDWARF,
@@ -91,14 +92,15 @@ TEST_F(SymbolFileDWARFTests, ParseArangesNonzeroSegmentSize) {
// END TUPLES
0, 0, 0, 0, 0, 0, 0, 0, 0 // terminator
};
DWARFDataExtractor data;
data.SetData(static_cast<const void *>(binary_data), sizeof binary_data,
lldb::ByteOrder::eByteOrderBig);
llvm::DWARFDataExtractor data(llvm::ArrayRef(binary_data),
/*isLittleEndian=*/false, /*AddrSize=*/4);
DWARFDebugArangeSet debug_aranges;
offset_t off = 0;
llvm::Error error = debug_aranges.extract(data, &off);
EXPECT_TRUE(bool(error));
EXPECT_EQ("segmented arange entries are not supported",
EXPECT_EQ("non-zero segment selector size in address range table at offset "
"0x0 is not supported",
llvm::toString(std::move(error)));
EXPECT_EQ(off, 12U); // Parser should read no further than the segment size
}
@@ -115,12 +117,12 @@ TEST_F(SymbolFileDWARFTests, ParseArangesWithMultipleTerminators) {
// DWARFDebugArangeSet header using the remaining segment + address pairs
// from the remaining bytes.
unsigned char binary_data[] = {
0, 0, 0, 0, // unit_length that will be set correctly after this
0, 2, // DWARF version number (uint16_t)
0, 0, 0, 0, // CU offset (ignored for the purposes of this test)
4, // address size
0, // segment size
0, 0, 0, 0, // alignment for the first tuple
0, 0, 0, 0, // unit_length that will be set correctly after this
0, 2, // DWARF version number (uint16_t)
0, 0, 0, 255, // CU offset
4, // address size
0, // segment size
0, 0, 0, 0, // alignment for the first tuple
// BEGIN TUPLES
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // premature terminator
0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x01, 0x00, // [0x1000-0x1100)
@@ -135,18 +137,18 @@ TEST_F(SymbolFileDWARFTests, ParseArangesWithMultipleTerminators) {
DWARFDataExtractor data;
data.SetData(static_cast<const void *>(binary_data), sizeof binary_data,
lldb::ByteOrder::eByteOrderBig);
DWARFDebugArangeSet set;
offset_t off = 0;
llvm::Error error = set.extract(data, &off);
// Multiple terminators are not fatal as they do appear in binaries.
EXPECT_FALSE(bool(error));
DWARFDebugAranges debug_aranges;
debug_aranges.extract(data);
// Parser should read all terminators to the end of the length specified.
EXPECT_EQ(off, binary_data_size);
ASSERT_EQ(set.NumDescriptors(), 2U);
ASSERT_EQ(set.GetDescriptorRef(0).address, (dw_addr_t)0x1000);
ASSERT_EQ(set.GetDescriptorRef(0).length, (dw_addr_t)0x100);
ASSERT_EQ(set.GetDescriptorRef(1).address, (dw_addr_t)0x2000);
ASSERT_EQ(set.GetDescriptorRef(1).length, (dw_addr_t)0x10);
ASSERT_EQ(debug_aranges.GetNumRanges(), 2U);
EXPECT_EQ(debug_aranges.FindAddress(0x0fff), DW_INVALID_OFFSET);
EXPECT_EQ(debug_aranges.FindAddress(0x1000), 255u);
EXPECT_EQ(debug_aranges.FindAddress(0x1100 - 1), 255u);
EXPECT_EQ(debug_aranges.FindAddress(0x1100), DW_INVALID_OFFSET);
EXPECT_EQ(debug_aranges.FindAddress(0x1fff), DW_INVALID_OFFSET);
EXPECT_EQ(debug_aranges.FindAddress(0x2000), 255u);
EXPECT_EQ(debug_aranges.FindAddress(0x2010 - 1), 255u);
EXPECT_EQ(debug_aranges.FindAddress(0x2010), DW_INVALID_OFFSET);
}
TEST_F(SymbolFileDWARFTests, ParseArangesIgnoreEmpty) {
@@ -154,12 +156,12 @@ TEST_F(SymbolFileDWARFTests, ParseArangesIgnoreEmpty) {
// and we ensure that these are ignored by our DWARFDebugArangeSet parser
// and not included in the descriptors that are returned.
unsigned char binary_data[] = {
0, 0, 0, 0, // unit_length that will be set correctly after this
0, 2, // DWARF version number (uint16_t)
0, 0, 0, 0, // CU offset (ignored for the purposes of this test)
4, // address size
0, // segment size
0, 0, 0, 0, // alignment for the first tuple
0, 0, 0, 0, // unit_length that will be set correctly after this
0, 2, // DWARF version number (uint16_t)
0, 0, 0, 255, // CU offset
4, // address size
0, // segment size
0, 0, 0, 0, // alignment for the first tuple
// BEGIN TUPLES
0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x01, 0x00, // [0x1000-0x1100)
0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, // [0x1100-0x1100)
@@ -174,22 +176,22 @@ TEST_F(SymbolFileDWARFTests, ParseArangesIgnoreEmpty) {
DWARFDataExtractor data;
data.SetData(static_cast<const void *>(binary_data), sizeof binary_data,
lldb::ByteOrder::eByteOrderBig);
DWARFDebugArangeSet set;
offset_t off = 0;
llvm::Error error = set.extract(data, &off);
// Multiple terminators are not fatal as they do appear in binaries.
EXPECT_FALSE(bool(error));
DWARFDebugAranges debug_aranges;
debug_aranges.extract(data);
// Parser should read all terminators to the end of the length specified.
// Previously the DWARFDebugArangeSet would stop at the first terminator
// entry and leave the offset in the middle of the current
// DWARFDebugArangeSet data, and that would cause the next extracted
// DWARFDebugArangeSet to fail.
EXPECT_EQ(off, binary_data_size);
ASSERT_EQ(set.NumDescriptors(), 2U);
ASSERT_EQ(set.GetDescriptorRef(0).address, (dw_addr_t)0x1000);
ASSERT_EQ(set.GetDescriptorRef(0).length, (dw_addr_t)0x100);
ASSERT_EQ(set.GetDescriptorRef(1).address, (dw_addr_t)0x2000);
ASSERT_EQ(set.GetDescriptorRef(1).length, (dw_addr_t)0x10);
ASSERT_EQ(debug_aranges.GetNumRanges(), 2U);
EXPECT_EQ(debug_aranges.FindAddress(0x0fff), DW_INVALID_OFFSET);
EXPECT_EQ(debug_aranges.FindAddress(0x1000), 255u);
EXPECT_EQ(debug_aranges.FindAddress(0x1100 - 1), 255u);
EXPECT_EQ(debug_aranges.FindAddress(0x1100), DW_INVALID_OFFSET);
EXPECT_EQ(debug_aranges.FindAddress(0x1fff), DW_INVALID_OFFSET);
EXPECT_EQ(debug_aranges.FindAddress(0x2000), 255u);
EXPECT_EQ(debug_aranges.FindAddress(0x2010 - 1), 255u);
EXPECT_EQ(debug_aranges.FindAddress(0x2010), DW_INVALID_OFFSET);
}
TEST_F(SymbolFileDWARFTests, ParseAranges) {
@@ -212,6 +214,7 @@ TEST_F(SymbolFileDWARFTests, ParseAranges) {
0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Size 0x0100
// Terminating tuple
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Terminator
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Terminator
};
// Set the little endian length correctly.
binary_data[0] = sizeof(binary_data) - 4;

View File

@@ -62,7 +62,7 @@ public:
void clear();
Error extract(DWARFDataExtractor data, uint64_t *offset_ptr,
function_ref<void(Error)> WarningHandler);
function_ref<void(Error)> WarningHandler = nullptr);
void dump(raw_ostream &OS) const;
uint64_t getCompileUnitDIEOffset() const { return HeaderData.CuOffset; }

View File

@@ -144,11 +144,13 @@ Error DWARFDebugArangeSet::extract(DWARFDataExtractor data,
if (arangeDescriptor.Length == 0 && arangeDescriptor.Address == 0) {
if (*offset_ptr == end_offset)
return ErrorSuccess();
WarningHandler(createStringError(
errc::invalid_argument,
"address range table at offset 0x%" PRIx64
" has a premature terminator entry at offset 0x%" PRIx64,
Offset, EntryOffset));
if (WarningHandler) {
WarningHandler(createStringError(
errc::invalid_argument,
"address range table at offset 0x%" PRIx64
" has a premature terminator entry at offset 0x%" PRIx64,
Offset, EntryOffset));
}
}
ArangeDescriptors.push_back(arangeDescriptor);