llm logs -x/--extract option (#693)

* llm logs -x/--extract option
* Update docs/help.md for llm logs -x
* Added test for llm logs -x/--extract, refs #693
* llm logs -xr behaves same as llm logs -x
* -x/--extract in llm logging docs

---------

Co-authored-by: Simon Willison <swillison@gmail.com>
This commit is contained in:
Csaba Henk 2025-01-11 00:53:04 +01:00 committed by GitHub
parent b452effa09
commit 88a8cfd9e4
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 76 additions and 33 deletions

View file

@ -301,6 +301,7 @@ Options:
-t, --truncate Truncate long strings in output
-u, --usage Include token usage
-r, --response Just output the last response
-x, --extract Extract first fenced code block
-c, --current Show logs from the current conversation
--cid, --conversation TEXT Show logs for this conversation ID
--json Output logs as JSON

View file

@ -61,6 +61,11 @@ To get back just the most recent prompt response as plain text, add `-r/--respon
```bash
llm logs -r
```
Use `-x/--extract` to extract and return the first fenced code block from the selected log entries:
```bash
llm logs -x
```
Add `--json` to get the log messages in JSON instead:

View file

@ -855,6 +855,7 @@ order by prompt_attachments."order"
@click.option("-t", "--truncate", is_flag=True, help="Truncate long strings in output")
@click.option("-u", "--usage", is_flag=True, help="Include token usage")
@click.option("-r", "--response", is_flag=True, help="Just output the last response")
@click.option("-x", "--extract", is_flag=True, help="Extract first fenced code block")
@click.option(
"current_conversation",
"-c",
@ -883,6 +884,7 @@ def logs_list(
truncate,
usage,
response,
extract,
current_conversation,
conversation_id,
json_output,
@ -979,6 +981,7 @@ def logs_list(
else:
row[key] = json.loads(row[key])
output = None
if json_output:
# Output as JSON if requested
for row in rows:
@ -986,11 +989,20 @@ def logs_list(
{k: v for k, v in attachment.items() if k != "response_id"}
for attachment in attachments_by_id.get(row["id"], [])
]
click.echo(json.dumps(list(rows), indent=2))
output = json.dumps(list(rows), indent=2)
elif extract:
# Extract and return first code block
for row in rows:
output = extract_first_fenced_code_block(row["response"])
if output is not None:
break
elif response:
# Just output the last response
if rows:
click.echo(rows[-1]["response"])
output = rows[-1]["response"]
if output is not None:
click.echo(output)
else:
# Output neatly formatted human-readable logs
current_system = None

View file

@ -34,7 +34,7 @@ def log_path(user_path):
"id": str(ULID()).lower(),
"system": "system",
"prompt": "prompt",
"response": "response",
"response": 'response\n```python\nprint("hello word")\n```',
"model": "davinci",
"datetime_utc": (start + datetime.timedelta(seconds=i)).isoformat(),
"conversation_id": "abc123",
@ -60,35 +60,38 @@ def test_logs_text(log_path, usage):
output = result.output
# Replace 2023-08-17T20:53:58 with YYYY-MM-DDTHH:MM:SS
output = datetime_re.sub("YYYY-MM-DDTHH:MM:SS", output)
assert output == (
"# YYYY-MM-DDTHH:MM:SS conversation: abc123\n\n"
"Model: **davinci**\n\n"
"## Prompt:\n\n"
"prompt\n\n"
"## System:\n\n"
"system\n\n"
"## Response:\n\n"
"response\n\n"
) + ("## Token usage:\n\n2 input, 5 output\n\n" if usage else "") + (
"# YYYY-MM-DDTHH:MM:SS conversation: abc123\n\n"
"Model: **davinci**\n\n"
"## Prompt:\n\n"
"prompt\n\n"
"## Response:\n\n"
"response\n\n"
) + (
"## Token usage:\n\n2 input, 5 output\n\n" if usage else ""
) + (
"# YYYY-MM-DDTHH:MM:SS conversation: abc123\n\n"
"Model: **davinci**\n\n"
"## Prompt:\n\n"
"prompt\n\n"
"## Response:\n\n"
"response\n\n"
) + (
"## Token usage:\n\n2 input, 5 output\n\n" if usage else ""
expected = (
(
"# YYYY-MM-DDTHH:MM:SS conversation: abc123\n\n"
"Model: **davinci**\n\n"
"## Prompt:\n\n"
"prompt\n\n"
"## System:\n\n"
"system\n\n"
"## Response:\n\n"
'response\n```python\nprint("hello word")\n```\n\n'
)
+ ("## Token usage:\n\n2 input, 5 output\n\n" if usage else "")
+ (
"# YYYY-MM-DDTHH:MM:SS conversation: abc123\n\n"
"Model: **davinci**\n\n"
"## Prompt:\n\n"
"prompt\n\n"
"## Response:\n\n"
'response\n```python\nprint("hello word")\n```\n\n'
)
+ ("## Token usage:\n\n2 input, 5 output\n\n" if usage else "")
+ (
"# YYYY-MM-DDTHH:MM:SS conversation: abc123\n\n"
"Model: **davinci**\n\n"
"## Prompt:\n\n"
"prompt\n\n"
"## Response:\n\n"
'response\n```python\nprint("hello word")\n```\n\n'
)
+ ("## Token usage:\n\n2 input, 5 output\n\n" if usage else "")
)
assert output == expected
@pytest.mark.parametrize("n", (None, 0, 2))
@ -118,7 +121,28 @@ def test_logs_response_only(args, log_path):
runner = CliRunner()
result = runner.invoke(cli, ["logs"] + args, catch_exceptions=False)
assert result.exit_code == 0
assert result.output == "response\n"
assert result.output == 'response\n```python\nprint("hello word")\n```\n'
@pytest.mark.parametrize(
"args",
(
["-x"],
["--extract"],
["list", "-x"],
["list", "--extract"],
# Using -xr together should have same effect as just -x
["-xr"],
["-x", "-r"],
["--extract", "--response"],
),
)
def test_logs_extract_first_code(args, log_path):
"Test that logs -x/--extract returns the first code block"
runner = CliRunner()
result = runner.invoke(cli, ["logs"] + args, catch_exceptions=False)
assert result.exit_code == 0
assert result.output == 'print("hello word")\n\n'
@pytest.mark.xfail(sys.platform == "win32", reason="Expected to fail on Windows")

View file

@ -67,7 +67,8 @@ def test_simplify_usage_dict(input_data, expected_output):
None,
],
[
"First code block:\n\n```python\ndef foo():\n return 'bar'\n```\n\nSecond code block:\n\n```javascript\nfunction foo() {\n return 'bar';\n}\n```",
"First code block:\n\n```python\ndef foo():\n return 'bar'\n```\n\n"
"Second code block:\n\n```javascript\nfunction foo() {\n return 'bar';\n}\n```",
"def foo():\n return 'bar'\n",
],
[