llm logs -x/--extract option (#693)

* llm logs -x/--extract option * Update docs/help.md for llm logs -x * Added test for llm logs -x/--extract, refs #693 * llm logs -xr behaves same as llm logs -x * -x/--extract in llm logging docs --------- Co-authored-by: Simon Willison <swillison@gmail.com>
2026-04-27 16:34:46 +00:00 · 2025-01-11 00:53:04 +01:00 · 2025-01-11 00:53:04 +01:00 · 88a8cfd9e4
commit 88a8cfd9e4
parent b452effa09
5 changed files with 76 additions and 33 deletions
--- a/docs/help.md
+++ b/docs/help.md
@ -301,6 +301,7 @@ Options:
  -t, --truncate              Truncate long strings in output
  -u, --usage                 Include token usage
  -r, --response              Just output the last response
+  -x, --extract               Extract first fenced code block
  -c, --current               Show logs from the current conversation
  --cid, --conversation TEXT  Show logs for this conversation ID
  --json                      Output logs as JSON
--- a/docs/logging.md
+++ b/docs/logging.md
@ -61,6 +61,11 @@ To get back just the most recent prompt response as plain text, add `-r/--respon
 ```bash
 llm logs -r
 ```
+Use `-x/--extract` to extract and return the first fenced code block from the selected log entries:
+
+```bash
+llm logs -x
+```

 Add `--json` to get the log messages in JSON instead:

--- a/llm/cli.py
+++ b/llm/cli.py
@ -855,6 +855,7 @@ order by prompt_attachments."order"
@click.option("-t", "--truncate", is_flag=True, help="Truncate long strings in output")
@click.option("-u", "--usage", is_flag=True, help="Include token usage")
@click.option("-r", "--response", is_flag=True, help="Just output the last response")
+@click.option("-x", "--extract", is_flag=True, help="Extract first fenced code block")
@click.option(
    "current_conversation",
    "-c",
@ -883,6 +884,7 @@ def logs_list(
    truncate,
    usage,
    response,
+    extract,
    current_conversation,
    conversation_id,
    json_output,
@ -979,6 +981,7 @@ def logs_list(
                else:
                    row[key] = json.loads(row[key])

+    output = None
    if json_output:
        # Output as JSON if requested
        for row in rows:
@ -986,11 +989,20 @@ def logs_list(
                {k: v for k, v in attachment.items() if k != "response_id"}
                for attachment in attachments_by_id.get(row["id"], [])
            ]
-        click.echo(json.dumps(list(rows), indent=2))
+        output = json.dumps(list(rows), indent=2)
+    elif extract:
+        # Extract and return first code block
+        for row in rows:
+            output = extract_first_fenced_code_block(row["response"])
+            if output is not None:
+                break
    elif response:
        # Just output the last response
        if rows:
-            click.echo(rows[-1]["response"])
+            output = rows[-1]["response"]
+
+    if output is not None:
+        click.echo(output)
    else:
        # Output neatly formatted human-readable logs
        current_system = None
--- a/tests/test_llm.py
+++ b/tests/test_llm.py
@ -34,7 +34,7 @@ def log_path(user_path):
            "id": str(ULID()).lower(),
            "system": "system",
            "prompt": "prompt",
-            "response": "response",
+            "response": 'response\n```python\nprint("hello word")\n```',
            "model": "davinci",
            "datetime_utc": (start + datetime.timedelta(seconds=i)).isoformat(),
            "conversation_id": "abc123",
@ -60,35 +60,38 @@ def test_logs_text(log_path, usage):
    output = result.output
    # Replace 2023-08-17T20:53:58 with YYYY-MM-DDTHH:MM:SS
    output = datetime_re.sub("YYYY-MM-DDTHH:MM:SS", output)
-
-    assert output == (
-        "# YYYY-MM-DDTHH:MM:SS    conversation: abc123\n\n"
-        "Model: **davinci**\n\n"
-        "## Prompt:\n\n"
-        "prompt\n\n"
-        "## System:\n\n"
-        "system\n\n"
-        "## Response:\n\n"
-        "response\n\n"
-    ) + ("## Token usage:\n\n2 input, 5 output\n\n" if usage else "") + (
-        "# YYYY-MM-DDTHH:MM:SS    conversation: abc123\n\n"
-        "Model: **davinci**\n\n"
-        "## Prompt:\n\n"
-        "prompt\n\n"
-        "## Response:\n\n"
-        "response\n\n"
-    ) + (
-        "## Token usage:\n\n2 input, 5 output\n\n" if usage else ""
-    ) + (
-        "# YYYY-MM-DDTHH:MM:SS    conversation: abc123\n\n"
-        "Model: **davinci**\n\n"
-        "## Prompt:\n\n"
-        "prompt\n\n"
-        "## Response:\n\n"
-        "response\n\n"
-    ) + (
-        "## Token usage:\n\n2 input, 5 output\n\n" if usage else ""
+    expected = (
+        (
+            "# YYYY-MM-DDTHH:MM:SS    conversation: abc123\n\n"
+            "Model: **davinci**\n\n"
+            "## Prompt:\n\n"
+            "prompt\n\n"
+            "## System:\n\n"
+            "system\n\n"
+            "## Response:\n\n"
+            'response\n```python\nprint("hello word")\n```\n\n'
+        )
+        + ("## Token usage:\n\n2 input, 5 output\n\n" if usage else "")
+        + (
+            "# YYYY-MM-DDTHH:MM:SS    conversation: abc123\n\n"
+            "Model: **davinci**\n\n"
+            "## Prompt:\n\n"
+            "prompt\n\n"
+            "## Response:\n\n"
+            'response\n```python\nprint("hello word")\n```\n\n'
+        )
+        + ("## Token usage:\n\n2 input, 5 output\n\n" if usage else "")
+        + (
+            "# YYYY-MM-DDTHH:MM:SS    conversation: abc123\n\n"
+            "Model: **davinci**\n\n"
+            "## Prompt:\n\n"
+            "prompt\n\n"
+            "## Response:\n\n"
+            'response\n```python\nprint("hello word")\n```\n\n'
+        )
+        + ("## Token usage:\n\n2 input, 5 output\n\n" if usage else "")
    )
+    assert output == expected


@pytest.mark.parametrize("n", (None, 0, 2))
@ -118,7 +121,28 @@ def test_logs_response_only(args, log_path):
    runner = CliRunner()
    result = runner.invoke(cli, ["logs"] + args, catch_exceptions=False)
    assert result.exit_code == 0
-    assert result.output == "response\n"
+    assert result.output == 'response\n```python\nprint("hello word")\n```\n'
+
+
+@pytest.mark.parametrize(
+    "args",
+    (
+        ["-x"],
+        ["--extract"],
+        ["list", "-x"],
+        ["list", "--extract"],
+        # Using -xr together should have same effect as just -x
+        ["-xr"],
+        ["-x", "-r"],
+        ["--extract", "--response"],
+    ),
+)
+def test_logs_extract_first_code(args, log_path):
+    "Test that logs -x/--extract returns the first code block"
+    runner = CliRunner()
+    result = runner.invoke(cli, ["logs"] + args, catch_exceptions=False)
+    assert result.exit_code == 0
+    assert result.output == 'print("hello word")\n\n'


@pytest.mark.xfail(sys.platform == "win32", reason="Expected to fail on Windows")
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@ -67,7 +67,8 @@ def test_simplify_usage_dict(input_data, expected_output):
            None,
        ],
        [
-            "First code block:\n\n```python\ndef foo():\n    return 'bar'\n```\n\nSecond code block:\n\n```javascript\nfunction foo() {\n    return 'bar';\n}\n```",
+            "First code block:\n\n```python\ndef foo():\n    return 'bar'\n```\n\n"
+            "Second code block:\n\n```javascript\nfunction foo() {\n    return 'bar';\n}\n```",
            "def foo():\n    return 'bar'\n",
        ],
        [