Add new Python API SBCommandInterpreter::GetTranscript() (#90703)

# Motivation

Currently, the user can already get the "transcript" (for "what is the
transcript", see `CommandInterpreter::SaveTranscript`). However, the
only way to obtain the transcript data as a user is to first destroy the
debugger, then read the save directory. Note that destroy-callbacks
cannot be used, because 1\ transcript data is private to the command
interpreter (see `CommandInterpreter.h`), and 2\ the writing of the
transcript is *after* the invocation of destory-callbacks (see
`Debugger::Destroy`).

So basically, there is no way to obtain the transcript:
* during the lifetime of a debugger (including the destroy-callbacks,
which often performs logging tasks, where the transcript can be useful)
* without relying on external storage

In theory, there are other ways for user to obtain transcript data
during a debugger's life cycle:
* Use Python API and intercept commands and results.
* Use CLI and record console input/output.

However, such ways rely on the client's setup and are not supported
natively by LLDB.


# Proposal

Add a new Python API `SBCommandInterpreter::GetTranscript()`.

Goals:
* It can be called at any time during the debugger's life cycle,
including in destroy-callbacks.
* It returns data in-memory.

Structured data:
* To make data processing easier, the return type is `SBStructuredData`.
See comments in code for how the data is organized.
* In the future, `SaveTranscript` can be updated to write different
formats using such data (e.g. JSON). This is probably accompanied by a
new setting (e.g. `interpreter.save-session-format`).

# Alternatives

The return type can also be `std::vector<std::pair<std::string,
SBCommandReturnObject>>`. This will make implementation easier, without
having to translate it to `SBStructuredData`. On the other hand,
`SBStructuredData` can convert to JSON easily, so it's more convenient
for user to process.

# Privacy

Both user commands and output/error in the transcript can contain
privacy data. However, as mentioned, the transcript is already available
to the user. The addition of the new API doesn't increase the level of
risk. In fact, it _lowers_ the risk of privacy data being leaked later
on, by avoiding writing such data to external storage.

Once the user (or their code) gets the transcript, it will be their
responsibility to make sure that any required privacy policies are
guaranteed.

# Tests

```
bin/llvm-lit -sv ../external/llvm-project/lldb/test/API/python_api/interpreter/TestCommandInterpreterAPI.py
```

```
bin/llvm-lit -sv ../external/llvm-project/lldb/test/API/commands/session/save/TestSessionSave.py
```

---------

Co-authored-by: Roy Shi <royshi@meta.com>
Co-authored-by: Med Ismail Bennani <ismail@bennani.ma>
This commit is contained in:
royitaqi
2024-05-20 15:49:46 -07:00
committed by GitHub
parent 8018e4c569
commit e8dc8d614a
8 changed files with 270 additions and 8 deletions

View File

@@ -1,5 +1,6 @@
"""Test the SBCommandInterpreter APIs."""
import json
import lldb
from lldbsuite.test.decorators import *
from lldbsuite.test.lldbtest import *
@@ -15,8 +16,7 @@ class CommandInterpreterAPICase(TestBase):
# Find the line number to break on inside main.cpp.
self.line = line_number("main.c", "Hello world.")
def test_with_process_launch_api(self):
"""Test the SBCommandInterpreter APIs."""
def buildAndCreateTarget(self):
self.build()
exe = self.getBuildArtifact("a.out")
@@ -27,6 +27,11 @@ class CommandInterpreterAPICase(TestBase):
# Retrieve the associated command interpreter from our debugger.
ci = self.dbg.GetCommandInterpreter()
self.assertTrue(ci, VALID_COMMAND_INTERPRETER)
return ci
def test_with_process_launch_api(self):
"""Test the SBCommandInterpreter APIs."""
ci = self.buildAndCreateTarget()
# Exercise some APIs....
@@ -85,3 +90,166 @@ class CommandInterpreterAPICase(TestBase):
self.assertEqual(res.GetOutput(), "")
self.assertIsNotNone(res.GetError())
self.assertEqual(res.GetError(), "")
def getTranscriptAsPythonObject(self, ci):
"""Retrieve the transcript and convert it into a Python object"""
structured_data = ci.GetTranscript()
self.assertTrue(structured_data.IsValid())
stream = lldb.SBStream()
self.assertTrue(stream)
error = structured_data.GetAsJSON(stream)
self.assertSuccess(error)
return json.loads(stream.GetData())
def test_structured_transcript(self):
"""Test structured transcript generation and retrieval."""
ci = self.buildAndCreateTarget()
# Make sure the "save-transcript" setting is on
self.runCmd("settings set interpreter.save-transcript true")
# Send a few commands through the command interpreter.
#
# Using `ci.HandleCommand` because some commands will fail so that we
# can test the "error" field in the saved transcript.
res = lldb.SBCommandReturnObject()
ci.HandleCommand("version", res)
ci.HandleCommand("an-unknown-command", res)
ci.HandleCommand("breakpoint set -f main.c -l %d" % self.line, res)
ci.HandleCommand("r", res)
ci.HandleCommand("p a", res)
ci.HandleCommand("statistics dump", res)
total_number_of_commands = 6
# Get transcript as python object
transcript = self.getTranscriptAsPythonObject(ci)
# All commands should have expected fields.
for command in transcript:
self.assertIn("command", command)
self.assertIn("output", command)
self.assertIn("error", command)
self.assertIn("seconds", command)
# The following validates individual commands in the transcript.
#
# Notes:
# 1. Some of the asserts rely on the exact output format of the
# commands. Hopefully we are not changing them any time soon.
# 2. We are removing the "seconds" field from each command, so that
# some of the validations below can be easier / more readable.
for command in transcript:
del(command["seconds"])
# (lldb) version
self.assertEqual(transcript[0]["command"], "version")
self.assertIn("lldb version", transcript[0]["output"])
self.assertEqual(transcript[0]["error"], "")
# (lldb) an-unknown-command
self.assertEqual(transcript[1],
{
"command": "an-unknown-command",
"output": "",
"error": "error: 'an-unknown-command' is not a valid command.\n",
})
# (lldb) breakpoint set -f main.c -l <line>
self.assertEqual(transcript[2]["command"], "breakpoint set -f main.c -l %d" % self.line)
# Breakpoint 1: where = a.out`main + 29 at main.c:5:3, address = 0x0000000100000f7d
self.assertIn("Breakpoint 1: where = a.out`main ", transcript[2]["output"])
self.assertEqual(transcript[2]["error"], "")
# (lldb) r
self.assertEqual(transcript[3]["command"], "r")
# Process 25494 launched: '<path>/TestCommandInterpreterAPI.test_structured_transcript/a.out' (x86_64)
self.assertIn("Process", transcript[3]["output"])
self.assertIn("launched", transcript[3]["output"])
self.assertEqual(transcript[3]["error"], "")
# (lldb) p a
self.assertEqual(transcript[4],
{
"command": "p a",
"output": "(int) 123\n",
"error": "",
})
# (lldb) statistics dump
statistics_dump = json.loads(transcript[5]["output"])
# Dump result should be valid JSON
self.assertTrue(statistics_dump is not json.JSONDecodeError)
# Dump result should contain expected fields
self.assertIn("commands", statistics_dump)
self.assertIn("memory", statistics_dump)
self.assertIn("modules", statistics_dump)
self.assertIn("targets", statistics_dump)
def test_save_transcript_setting_default(self):
ci = self.buildAndCreateTarget()
res = lldb.SBCommandReturnObject()
# The setting's default value should be "false"
self.runCmd("settings show interpreter.save-transcript", "interpreter.save-transcript (boolean) = false\n")
# self.assertEqual(res.GetOutput(), )
def test_save_transcript_setting_off(self):
ci = self.buildAndCreateTarget()
# Make sure the setting is off
self.runCmd("settings set interpreter.save-transcript false")
# The transcript should be empty after running a command
self.runCmd("version")
transcript = self.getTranscriptAsPythonObject(ci)
self.assertEqual(transcript, [])
def test_save_transcript_setting_on(self):
ci = self.buildAndCreateTarget()
res = lldb.SBCommandReturnObject()
# Make sure the setting is on
self.runCmd("settings set interpreter.save-transcript true")
# The transcript should contain one item after running a command
self.runCmd("version")
transcript = self.getTranscriptAsPythonObject(ci)
self.assertEqual(len(transcript), 1)
self.assertEqual(transcript[0]["command"], "version")
def test_save_transcript_returns_copy(self):
"""
Test that the returned structured data is *at least* a shallow copy.
We believe that a deep copy *is* performed in `SBCommandInterpreter::GetTranscript`.
However, the deep copy cannot be tested and doesn't need to be tested,
because there is no logic in the command interpreter to modify a
transcript item (representing a command) after it has been returned.
"""
ci = self.buildAndCreateTarget()
# Make sure the setting is on
self.runCmd("settings set interpreter.save-transcript true")
# Run commands and get the transcript as structured data
self.runCmd("version")
structured_data_1 = ci.GetTranscript()
self.assertTrue(structured_data_1.IsValid())
self.assertEqual(structured_data_1.GetSize(), 1)
self.assertEqual(structured_data_1.GetItemAtIndex(0).GetValueForKey("command").GetStringValue(100), "version")
# Run some more commands and get the transcript as structured data again
self.runCmd("help")
structured_data_2 = ci.GetTranscript()
self.assertTrue(structured_data_2.IsValid())
self.assertEqual(structured_data_2.GetSize(), 2)
self.assertEqual(structured_data_2.GetItemAtIndex(0).GetValueForKey("command").GetStringValue(100), "version")
self.assertEqual(structured_data_2.GetItemAtIndex(1).GetValueForKey("command").GetStringValue(100), "help")
# Now, the first structured data should remain unchanged
self.assertTrue(structured_data_1.IsValid())
self.assertEqual(structured_data_1.GetSize(), 1)
self.assertEqual(structured_data_1.GetItemAtIndex(0).GetValueForKey("command").GetStringValue(100), "version")

View File

@@ -1,6 +1,7 @@
#include <stdio.h>
int main(int argc, char const *argv[]) {
printf("Hello world.\n");
return 0;
int a = 123;
printf("Hello world.\n");
return 0;
}