[lldb][Mach-O] Allow "process metadata" LC_NOTE to supply registers (#144627)

The "process metadata" LC_NOTE allows for thread IDs to be specified in
a Mach-O corefile. This extends the JSON recognzied in that LC_NOTE to
allow for additional registers to be supplied on a per-thread basis.

The registers included in a Mach-O corefile LC_THREAD load command can
only be one of the register flavors that the kernel (xnu) defines in
<mach/arm/thread_status.h> for arm64 -- the general purpose registers,
floating point registers, exception registers.

JTAG style corefile producers may have access to many additional
registers beyond these that EL0 programs typically use, for instance
TCR_EL1 on AArch64, and people developing low level code need access to
these registers. This patch defines a format for including these
registers for any thread.

The JSON in "process metadata" is a dictionary that must have a
`threads` key. The value is an array of entries, one per LC_THREAD in
the Mach-O corefile. The number of entries must match the LC_THREADs so
they can be correctly associated.

Each thread's dictionary must have two keys, `sets`, and `registers`.
`sets` is an array of register set names. If a register set name matches
one from the LC_THREAD core registers, any registers that are defined
will be added to that register set. e.g. metadata can add a register to
the "General Purpose Registers" set that lldb shows users.

`registers` is an array of dictionaries, one per register. Each register
must have the keys `name`, `value`, `bitsize`, and `set`. It may provide
additional keys like `alt-name`, that
`DynamicRegisterInfo::SetRegisterInfo` recognizes.

This `sets` + `registers` formatting is the same that is used by the
`target.process.python-os-plugin-path` script interface uses, both are
parsed by `DynamicRegisterInfo`. The one addition is that in this
LC_NOTE metadata, each register must also have a `value` field, with the
value provided in big-endian base 10, as usual with JSON.

In RegisterContextUnifiedCore, I combine the register sets & registers
from the LC_THREAD for a specific thread, and the metadata sets &
registers for that thread from the LC_NOTE. Even if no LC_NOTE is
present, this class ingests the LC_THREAD register contexts and
reformats it to its internal stores before returning itself as the
RegisterContex, instead of shortcutting and returning the core's native
RegisterContext. I could have gone either way with that, but in the end
I decided if the code is correct, we should live on it always.

I added a test where we process save-core to create a userland corefile,
then use a utility "add-lcnote" to strip the existing "process metadata"
LC_NOTE that lldb put in it, and adds a new one from a JSON string.

rdar://74358787

---------

Co-authored-by: Jonas Devlieghere <jonas@devlieghere.com>
This commit is contained in:
Jason Molenda
2025-06-27 18:43:41 -07:00
committed by GitHub
parent 67a5fc8e12
commit a64db49371
11 changed files with 1026 additions and 35 deletions

View File

@@ -5794,27 +5794,8 @@ bool ObjectFileMachO::GetCorefileThreadExtraInfos(
std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex());
Log *log(GetLog(LLDBLog::Object | LLDBLog::Process | LLDBLog::Thread));
auto lc_notes = FindLC_NOTEByName("process metadata");
for (auto lc_note : lc_notes) {
offset_t payload_offset = std::get<0>(lc_note);
offset_t strsize = std::get<1>(lc_note);
std::string buf(strsize, '\0');
if (m_data.CopyData(payload_offset, strsize, buf.data()) != strsize) {
LLDB_LOGF(log,
"Unable to read %" PRIu64
" bytes of 'process metadata' LC_NOTE JSON contents",
strsize);
return false;
}
while (buf.back() == '\0')
buf.resize(buf.size() - 1);
StructuredData::ObjectSP object_sp = StructuredData::ParseJSON(buf);
if (StructuredData::ObjectSP object_sp = GetCorefileProcessMetadata()) {
StructuredData::Dictionary *dict = object_sp->GetAsDictionary();
if (!dict) {
LLDB_LOGF(log, "Unable to read 'process metadata' LC_NOTE, did not "
"get a dictionary.");
return false;
}
StructuredData::Array *threads;
if (!dict->GetValueForKeyAsArray("threads", threads) || !threads) {
LLDB_LOGF(log,
@@ -5857,6 +5838,49 @@ bool ObjectFileMachO::GetCorefileThreadExtraInfos(
return false;
}
StructuredData::ObjectSP ObjectFileMachO::GetCorefileProcessMetadata() {
ModuleSP module_sp(GetModule());
if (!module_sp)
return {};
Log *log(GetLog(LLDBLog::Object | LLDBLog::Process | LLDBLog::Thread));
std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex());
auto lc_notes = FindLC_NOTEByName("process metadata");
if (lc_notes.size() == 0)
return {};
if (lc_notes.size() > 1)
LLDB_LOGF(
log,
"Multiple 'process metadata' LC_NOTEs found, only using the first.");
auto [payload_offset, strsize] = lc_notes[0];
std::string buf(strsize, '\0');
if (m_data.CopyData(payload_offset, strsize, buf.data()) != strsize) {
LLDB_LOGF(log,
"Unable to read %" PRIu64
" bytes of 'process metadata' LC_NOTE JSON contents",
strsize);
return {};
}
while (buf.back() == '\0')
buf.resize(buf.size() - 1);
StructuredData::ObjectSP object_sp = StructuredData::ParseJSON(buf);
if (!object_sp) {
LLDB_LOGF(log, "Unable to read 'process metadata' LC_NOTE, did not "
"parse as valid JSON.");
return {};
}
StructuredData::Dictionary *dict = object_sp->GetAsDictionary();
if (!dict) {
LLDB_LOGF(log, "Unable to read 'process metadata' LC_NOTE, did not "
"get a dictionary.");
return {};
}
return object_sp;
}
lldb::RegisterContextSP
ObjectFileMachO::GetThreadContextAtIndex(uint32_t idx,
lldb_private::Thread &thread) {

View File

@@ -133,6 +133,8 @@ public:
bool GetCorefileThreadExtraInfos(std::vector<lldb::tid_t> &tids) override;
lldb_private::StructuredData::ObjectSP GetCorefileProcessMetadata() override;
bool LoadCoreFileImages(lldb_private::Process &process) override;
lldb::RegisterContextSP

View File

@@ -1,6 +1,7 @@
add_lldb_library(lldbPluginProcessMachCore PLUGIN
ProcessMachCore.cpp
ThreadMachCore.cpp
RegisterContextUnifiedCore.cpp
LINK_COMPONENTS
Support

View File

@@ -0,0 +1,308 @@
//===-- RegisterContextUnifiedCore.cpp ------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "RegisterContextUnifiedCore.h"
#include "lldb/Target/DynamicRegisterInfo.h"
#include "lldb/Target/Process.h"
#include "lldb/Utility/DataExtractor.h"
#include "lldb/Utility/RegisterValue.h"
#include "lldb/Utility/StructuredData.h"
using namespace lldb;
using namespace lldb_private;
RegisterContextUnifiedCore::RegisterContextUnifiedCore(
Thread &thread, uint32_t concrete_frame_idx,
RegisterContextSP core_thread_regctx_sp,
StructuredData::ObjectSP metadata_thread_registers)
: RegisterContext(thread, concrete_frame_idx) {
ProcessSP process_sp(thread.GetProcess());
Target &target = process_sp->GetTarget();
StructuredData::Dictionary *metadata_registers_dict = nullptr;
// If we have thread metadata, check if the keys for register
// definitions are present; if not, clear the ObjectSP.
if (metadata_thread_registers &&
metadata_thread_registers->GetAsDictionary() &&
metadata_thread_registers->GetAsDictionary()->HasKey("register_info")) {
metadata_registers_dict = metadata_thread_registers->GetAsDictionary()
->GetValueForKey("register_info")
->GetAsDictionary();
if (metadata_registers_dict)
if (!metadata_registers_dict->HasKey("sets") ||
!metadata_registers_dict->HasKey("registers"))
metadata_registers_dict = nullptr;
}
// When creating a register set list from the two sources,
// the LC_THREAD aka core_thread_regctx_sp register sets
// will be used at the same indexes.
// Any additional sets named by the thread metadata registers
// will be added after them. If the thread metadata
// specify a set with the same name as LC_THREAD, the already-used
// index from the core register context will be used in
// the RegisterInfo.
std::map<size_t, size_t> metadata_regset_to_combined_regset;
// Calculate the total size of the register store buffer we need
// for all registers. The corefile register definitions may include
// RegisterInfo descriptions of registers that aren't actually
// available. For simplicity, calculate the size of all registers
// as if they are available, so we can maintain the same offsets into
// the buffer.
uint32_t core_buffer_end = 0;
for (size_t idx = 0; idx < core_thread_regctx_sp->GetRegisterCount(); idx++) {
const RegisterInfo *reginfo =
core_thread_regctx_sp->GetRegisterInfoAtIndex(idx);
core_buffer_end =
std::max(reginfo->byte_offset + reginfo->byte_size, core_buffer_end);
}
// Add metadata register sizes to the total buffer size.
uint32_t combined_buffer_end = core_buffer_end;
if (metadata_registers_dict) {
StructuredData::Array *registers = nullptr;
if (metadata_registers_dict->GetValueForKeyAsArray("registers", registers))
registers->ForEach(
[&combined_buffer_end](StructuredData::Object *ent) -> bool {
uint32_t bitsize;
if (!ent->GetAsDictionary()->GetValueForKeyAsInteger("bitsize",
bitsize))
return false;
combined_buffer_end += (bitsize / 8);
return true;
});
}
m_register_data.resize(combined_buffer_end, 0);
// Copy the core register values into our combined data buffer,
// skip registers that are contained within another (e.g. w0 vs. x0)
// and registers that return as "unavailable".
for (size_t idx = 0; idx < core_thread_regctx_sp->GetRegisterCount(); idx++) {
const RegisterInfo *reginfo =
core_thread_regctx_sp->GetRegisterInfoAtIndex(idx);
RegisterValue val;
if (!reginfo->value_regs &&
core_thread_regctx_sp->ReadRegister(reginfo, val))
memcpy(m_register_data.data() + reginfo->byte_offset, val.GetBytes(),
val.GetByteSize());
}
// Set 'offset' fields for each register definition into our combined
// register data buffer. DynamicRegisterInfo needs this field set to
// parse the JSON.
// Also copy the values of the registers into our register data buffer.
if (metadata_registers_dict) {
size_t offset = core_buffer_end;
ByteOrder byte_order = core_thread_regctx_sp->GetByteOrder();
StructuredData::Array *registers;
if (metadata_registers_dict->GetValueForKeyAsArray("registers", registers))
registers->ForEach([this, &offset,
byte_order](StructuredData::Object *ent) -> bool {
uint64_t bitsize;
uint64_t value;
if (!ent->GetAsDictionary()->GetValueForKeyAsInteger("bitsize",
bitsize))
return false;
if (!ent->GetAsDictionary()->GetValueForKeyAsInteger("value", value)) {
// We had a bitsize but no value, so move the offset forward I guess.
offset += (bitsize / 8);
return false;
}
ent->GetAsDictionary()->AddIntegerItem("offset", offset);
Status error;
const int bytesize = bitsize / 8;
switch (bytesize) {
case 2: {
Scalar value_scalar((uint16_t)value);
value_scalar.GetAsMemoryData(m_register_data.data() + offset,
bytesize, byte_order, error);
offset += bytesize;
} break;
case 4: {
Scalar value_scalar((uint32_t)value);
value_scalar.GetAsMemoryData(m_register_data.data() + offset,
bytesize, byte_order, error);
offset += bytesize;
} break;
case 8: {
Scalar value_scalar((uint64_t)value);
value_scalar.GetAsMemoryData(m_register_data.data() + offset,
bytesize, byte_order, error);
offset += bytesize;
} break;
}
return true;
});
}
// Create a DynamicRegisterInfo from the metadata JSON.
std::unique_ptr<DynamicRegisterInfo> additional_reginfo_up;
if (metadata_registers_dict)
additional_reginfo_up = DynamicRegisterInfo::Create(
*metadata_registers_dict, target.GetArchitecture());
// Put the RegisterSet names in the constant string pool,
// to sidestep lifetime issues of char*'s.
auto copy_regset_name = [](RegisterSet &dst, const RegisterSet &src) {
dst.name = ConstString(src.name).AsCString();
if (src.short_name)
dst.short_name = ConstString(src.short_name).AsCString();
else
dst.short_name = nullptr;
};
// Copy the core thread register sets into our combined register set list.
// RegisterSet indexes will be identical for the LC_THREAD RegisterContext.
for (size_t idx = 0; idx < core_thread_regctx_sp->GetRegisterSetCount();
idx++) {
RegisterSet new_set;
const RegisterSet *old_set = core_thread_regctx_sp->GetRegisterSet(idx);
copy_regset_name(new_set, *old_set);
m_register_sets.push_back(new_set);
}
// Add any additional metadata RegisterSets to our combined RegisterSet array.
if (additional_reginfo_up) {
for (size_t idx = 0; idx < additional_reginfo_up->GetNumRegisterSets();
idx++) {
// See if this metadata RegisterSet name matches one already present
// from the LC_THREAD RegisterContext.
bool found_match = false;
const RegisterSet *old_set = additional_reginfo_up->GetRegisterSet(idx);
for (size_t jdx = 0; jdx < m_register_sets.size(); jdx++) {
if (strcmp(m_register_sets[jdx].name, old_set->name) == 0) {
metadata_regset_to_combined_regset[idx] = jdx;
found_match = true;
break;
}
}
// This metadata RegisterSet is a new one.
// Add it to the combined RegisterSet array.
if (!found_match) {
RegisterSet new_set;
copy_regset_name(new_set, *old_set);
metadata_regset_to_combined_regset[idx] = m_register_sets.size();
m_register_sets.push_back(new_set);
}
}
}
// Set up our combined RegisterInfo array, one RegisterSet at a time.
for (size_t combined_regset_idx = 0;
combined_regset_idx < m_register_sets.size(); combined_regset_idx++) {
uint32_t registers_this_regset = 0;
// Copy all LC_THREAD RegisterInfos that have a value into our
// combined RegisterInfo array. (the LC_THREAD RegisterContext
// may describe registers that were not provided in this thread)
//
// LC_THREAD register set indexes are identical to the combined
// register set indexes. The combined register set array may have
// additional entries.
if (combined_regset_idx < core_thread_regctx_sp->GetRegisterSetCount()) {
const RegisterSet *regset =
core_thread_regctx_sp->GetRegisterSet(combined_regset_idx);
// Copy all the registers that have values in.
for (size_t j = 0; j < regset->num_registers; j++) {
uint32_t reg_idx = regset->registers[j];
const RegisterInfo *reginfo =
core_thread_regctx_sp->GetRegisterInfoAtIndex(reg_idx);
RegisterValue val;
if (!reginfo->value_regs &&
core_thread_regctx_sp->ReadRegister(reginfo, val)) {
m_regset_regnum_collection[combined_regset_idx].push_back(
m_register_infos.size());
m_register_infos.push_back(*reginfo);
registers_this_regset++;
}
}
}
// Copy all the metadata RegisterInfos into our combined combined
// RegisterInfo array.
// The metadata may add registers to one of the LC_THREAD register sets,
// or its own newly added register sets. metadata_regset_to_combined_regset
// has the association of the RegisterSet indexes between the two.
if (additional_reginfo_up) {
// Find the register set in the metadata that matches this register
// set, then copy all its RegisterInfos.
for (size_t setidx = 0;
setidx < additional_reginfo_up->GetNumRegisterSets(); setidx++) {
if (metadata_regset_to_combined_regset[setidx] == combined_regset_idx) {
const RegisterSet *regset =
additional_reginfo_up->GetRegisterSet(setidx);
for (size_t j = 0; j < regset->num_registers; j++) {
uint32_t reg_idx = regset->registers[j];
const RegisterInfo *reginfo =
additional_reginfo_up->GetRegisterInfoAtIndex(reg_idx);
m_regset_regnum_collection[combined_regset_idx].push_back(
m_register_infos.size());
m_register_infos.push_back(*reginfo);
registers_this_regset++;
}
}
}
}
m_register_sets[combined_regset_idx].num_registers = registers_this_regset;
m_register_sets[combined_regset_idx].registers =
m_regset_regnum_collection[combined_regset_idx].data();
}
}
size_t RegisterContextUnifiedCore::GetRegisterCount() {
return m_register_infos.size();
}
const RegisterInfo *
RegisterContextUnifiedCore::GetRegisterInfoAtIndex(size_t reg) {
return &m_register_infos[reg];
}
size_t RegisterContextUnifiedCore::GetRegisterSetCount() {
return m_register_sets.size();
}
const RegisterSet *RegisterContextUnifiedCore::GetRegisterSet(size_t set) {
return &m_register_sets[set];
}
bool RegisterContextUnifiedCore::ReadRegister(
const lldb_private::RegisterInfo *reg_info,
lldb_private::RegisterValue &value) {
if (!reg_info)
return false;
if (ProcessSP process_sp = m_thread.GetProcess()) {
DataExtractor regdata(m_register_data.data(), m_register_data.size(),
process_sp->GetByteOrder(),
process_sp->GetAddressByteSize());
offset_t offset = reg_info->byte_offset;
switch (reg_info->byte_size) {
case 2:
value.SetUInt16(regdata.GetU16(&offset));
break;
case 4:
value.SetUInt32(regdata.GetU32(&offset));
break;
case 8:
value.SetUInt64(regdata.GetU64(&offset));
break;
default:
return false;
}
return true;
}
return false;
}
bool RegisterContextUnifiedCore::WriteRegister(
const lldb_private::RegisterInfo *reg_info,
const lldb_private::RegisterValue &value) {
return false;
}

View File

@@ -0,0 +1,57 @@
//===-- RegisterContextUnifiedCore.h --------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef LLDB_SOURCE_PLUGINS_PROCESS_REGISTERCONTEXT_UNIFIED_CORE_H
#define LLDB_SOURCE_PLUGINS_PROCESS_REGISTERCONTEXT_UNIFIED_CORE_H
#include <string>
#include <vector>
#include "lldb/Target/RegisterContext.h"
#include "lldb/Utility/ConstString.h"
#include "lldb/Utility/StructuredData.h"
#include "lldb/lldb-enumerations.h"
#include "lldb/lldb-private.h"
namespace lldb_private {
class RegisterContextUnifiedCore : public RegisterContext {
public:
RegisterContextUnifiedCore(
Thread &thread, uint32_t concrete_frame_idx,
lldb::RegisterContextSP core_thread_regctx_sp,
lldb_private::StructuredData::ObjectSP metadata_thread_registers);
void InvalidateAllRegisters() override {};
size_t GetRegisterCount() override;
const lldb_private::RegisterInfo *GetRegisterInfoAtIndex(size_t reg) override;
size_t GetRegisterSetCount() override;
const lldb_private::RegisterSet *GetRegisterSet(size_t set) override;
bool ReadRegister(const lldb_private::RegisterInfo *reg_info,
lldb_private::RegisterValue &value) override;
bool WriteRegister(const lldb_private::RegisterInfo *reg_info,
const lldb_private::RegisterValue &value) override;
private:
std::vector<lldb_private::RegisterSet> m_register_sets;
std::vector<lldb_private::RegisterInfo> m_register_infos;
/// For each register set, an array of register numbers included.
std::map<size_t, std::vector<uint32_t>> m_regset_regnum_collection;
/// Bytes of the register contents.
std::vector<uint8_t> m_register_data;
};
} // namespace lldb_private
#endif // LLDB_SOURCE_PLUGINS_PROCESS_REGISTERCONTEXT_UNIFIED_CORE_H

View File

@@ -6,12 +6,18 @@
//
//===----------------------------------------------------------------------===//
#include <optional>
#include <string>
#include <vector>
#include "RegisterContextUnifiedCore.h"
#include "ThreadMachCore.h"
#include "lldb/Breakpoint/Watchpoint.h"
#include "lldb/Host/SafeMachO.h"
#include "lldb/Symbol/ObjectFile.h"
#include "lldb/Target/AppleArm64ExceptionClass.h"
#include "lldb/Target/DynamicRegisterInfo.h"
#include "lldb/Target/Process.h"
#include "lldb/Target/RegisterContext.h"
#include "lldb/Target/StopInfo.h"
@@ -22,6 +28,7 @@
#include "lldb/Utility/RegisterValue.h"
#include "lldb/Utility/State.h"
#include "lldb/Utility/StreamString.h"
#include "lldb/Utility/StructuredData.h"
#include "ProcessMachCore.h"
//#include "RegisterContextKDP_arm.h"
@@ -70,27 +77,50 @@ lldb::RegisterContextSP ThreadMachCore::GetRegisterContext() {
lldb::RegisterContextSP
ThreadMachCore::CreateRegisterContextForFrame(StackFrame *frame) {
lldb::RegisterContextSP reg_ctx_sp;
uint32_t concrete_frame_idx = 0;
if (frame)
concrete_frame_idx = frame->GetConcreteFrameIndex();
if (concrete_frame_idx > 0)
return GetUnwinder().CreateRegisterContextForFrame(frame);
if (concrete_frame_idx == 0) {
if (!m_thread_reg_ctx_sp) {
ProcessSP process_sp(GetProcess());
if (m_thread_reg_ctx_sp)
return m_thread_reg_ctx_sp;
ObjectFile *core_objfile =
static_cast<ProcessMachCore *>(process_sp.get())->GetCoreObjectFile();
if (core_objfile)
m_thread_reg_ctx_sp = core_objfile->GetThreadContextAtIndex(
m_objfile_lc_thread_idx, *this);
ProcessSP process_sp(GetProcess());
assert(process_sp);
ObjectFile *core_objfile =
static_cast<ProcessMachCore *>(process_sp.get())->GetCoreObjectFile();
if (!core_objfile)
return {};
RegisterContextSP core_thread_regctx_sp =
core_objfile->GetThreadContextAtIndex(m_objfile_lc_thread_idx, *this);
if (!core_thread_regctx_sp)
return {};
StructuredData::ObjectSP process_md_sp =
core_objfile->GetCorefileProcessMetadata();
StructuredData::ObjectSP thread_md_sp;
if (process_md_sp && process_md_sp->GetAsDictionary() &&
process_md_sp->GetAsDictionary()->HasKey("threads")) {
StructuredData::Array *threads = process_md_sp->GetAsDictionary()
->GetValueForKey("threads")
->GetAsArray();
if (threads && threads->GetSize() == core_objfile->GetNumThreadContexts()) {
StructuredData::ObjectSP thread_sp =
threads->GetItemAtIndex(m_objfile_lc_thread_idx);
if (thread_sp && thread_sp->GetAsDictionary())
thread_md_sp = thread_sp;
}
reg_ctx_sp = m_thread_reg_ctx_sp;
} else {
reg_ctx_sp = GetUnwinder().CreateRegisterContextForFrame(frame);
}
return reg_ctx_sp;
m_thread_reg_ctx_sp = std::make_shared<RegisterContextUnifiedCore>(
*this, concrete_frame_idx, core_thread_regctx_sp, thread_md_sp);
return m_thread_reg_ctx_sp;
}
static bool IsCrashExceptionClass(AppleArm64ExceptionClass EC) {