mirror of
https://github.com/Hopiu/llm.git
synced 2026-05-21 20:11:52 +00:00
llm logs --data-ids flag, closes #800
This commit is contained in:
parent
1bebf8b34a
commit
48f67f4085
5 changed files with 92 additions and 4 deletions
|
|
@ -307,6 +307,7 @@ Options:
|
|||
--data Output newline-delimited JSON data for schema
|
||||
--data-array Output JSON array of data for schema
|
||||
--data-key TEXT Return JSON objects from array in this key
|
||||
--data-ids Attach corresponding IDs to JSON objects
|
||||
-t, --truncate Truncate long strings in output
|
||||
-s, --short Shorter YAML output with truncated prompts
|
||||
-u, --usage Include token usage
|
||||
|
|
|
|||
|
|
@ -207,4 +207,18 @@ Output:
|
|||
{"name": "Ziggy", "ten_word_bio": "Quirky pug who loves belly rubs and quirky outfits."},
|
||||
{"name": "Robo", "ten_word_bio": "A cybernetic dog with laser eyes and super intelligence."},
|
||||
{"name": "Flamepaw", "ten_word_bio": "Fire-resistant dog with a talent for agility and tricks."}]
|
||||
```
|
||||
```
|
||||
Add `--data-ids` to include `"response_id"` and `"conversation_id"` fields in each of the returned objects reflecting the database IDs of the response and conversation they were a part of. This can be useful for tracking the source of each individual row.
|
||||
|
||||
```bash
|
||||
llm logs --schema-multi 'name, ten_word_bio' --data-key items --data-ids
|
||||
```
|
||||
Output:
|
||||
```json
|
||||
{"name": "Nebula", "ten_word_bio": "A cosmic puppy with starry fur, loves adventures in space.", "response_id": "01jn4dawj8sq0c6t3emf4k5ryx", "conversation_id": "01jn4dawj8sq0c6t3emf4k5ryx"}
|
||||
{"name": "Echo", "ten_word_bio": "A clever hound with extraordinary hearing, master of hide-and-seek.", "response_id": "01jn4dawj8sq0c6t3emf4k5ryx", "conversation_id": "01jn4dawj8sq0c6t3emf4k5ryx"}
|
||||
{"name": "Biscuit", "ten_word_bio": "An adorable chef dog, bakes treats that everyone loves.", "response_id": "01jn4dawj8sq0c6t3emf4k5ryx", "conversation_id": "01jn4dawj8sq0c6t3emf4k5ryx"}
|
||||
{"name": "Cosmo", "ten_word_bio": "Galactic explorer, loves adventures and chasing shooting stars.", "response_id": "01jn4daycb3svj0x7kvp7zrp4q", "conversation_id": "01jn4daycb3svj0x7kvp7zrp4q"}
|
||||
{"name": "Pixel", "ten_word_bio": "Tech-savvy pup, builds gadgets and loves virtual playtime.", "response_id": "01jn4daycb3svj0x7kvp7zrp4q", "conversation_id": "01jn4daycb3svj0x7kvp7zrp4q"}
|
||||
```
|
||||
If a row already has a property called `"conversation_id"` or `"response_id"` additional underscores will be appended to the ID key until it no longer overlaps with the existing keys.
|
||||
17
llm/cli.py
17
llm/cli.py
|
|
@ -48,6 +48,7 @@ from .utils import (
|
|||
schema_summary,
|
||||
multi_schema,
|
||||
schema_dsl,
|
||||
find_unused_key,
|
||||
)
|
||||
import base64
|
||||
import httpx
|
||||
|
|
@ -939,6 +940,9 @@ order by prompt_attachments."order"
|
|||
)
|
||||
@click.option("--data-array", is_flag=True, help="Output JSON array of data for schema")
|
||||
@click.option("--data-key", help="Return JSON objects from array in this key")
|
||||
@click.option(
|
||||
"--data-ids", is_flag=True, help="Attach corresponding IDs to JSON objects"
|
||||
)
|
||||
@click.option("-t", "--truncate", is_flag=True, help="Truncate long strings in output")
|
||||
@click.option(
|
||||
"-s", "--short", is_flag=True, help="Shorter YAML output with truncated prompts"
|
||||
|
|
@ -983,6 +987,7 @@ def logs_list(
|
|||
data,
|
||||
data_array,
|
||||
data_key,
|
||||
data_ids,
|
||||
truncate,
|
||||
short,
|
||||
usage,
|
||||
|
|
@ -1099,22 +1104,28 @@ def logs_list(
|
|||
for attachment in attachments:
|
||||
attachments_by_id.setdefault(attachment["response_id"], []).append(attachment)
|
||||
|
||||
if data or data_array or data_key:
|
||||
if data or data_array or data_key or data_ids:
|
||||
# Special case for --data to output valid JSON
|
||||
to_output = []
|
||||
for row in rows:
|
||||
response = row["response"] or ""
|
||||
try:
|
||||
decoded = json.loads(response)
|
||||
new_items = []
|
||||
if (
|
||||
isinstance(decoded, dict)
|
||||
and (data_key in decoded)
|
||||
and all(isinstance(item, dict) for item in decoded[data_key])
|
||||
):
|
||||
for item in decoded[data_key]:
|
||||
to_output.append(item)
|
||||
new_items.append(item)
|
||||
else:
|
||||
to_output.append(decoded)
|
||||
new_items.append(decoded)
|
||||
if data_ids:
|
||||
for item in new_items:
|
||||
item[find_unused_key(item, "response_id")] = row["id"]
|
||||
item[find_unused_key(item, "conversation_id")] = row["id"]
|
||||
to_output.extend(new_items)
|
||||
except ValueError:
|
||||
pass
|
||||
click.echo(output_rows_as_json(to_output, not data_array))
|
||||
|
|
|
|||
|
|
@ -384,3 +384,10 @@ def multi_schema(schema: dict) -> dict:
|
|||
"properties": {"items": {"type": "array", "items": schema}},
|
||||
"required": ["items"],
|
||||
}
|
||||
|
||||
|
||||
def find_unused_key(item: dict, key: str) -> str:
|
||||
'Return unused key, e.g. for {"id": "1"} and key "id" returns "id_"'
|
||||
while key in item:
|
||||
key += "_"
|
||||
return key
|
||||
|
|
|
|||
|
|
@ -353,3 +353,58 @@ def test_logs_schema(schema_log_path, args, expected):
|
|||
)
|
||||
assert result.exit_code == 0
|
||||
assert result.output == expected
|
||||
|
||||
|
||||
def test_logs_schema_data_ids(schema_log_path):
|
||||
db = sqlite_utils.Database(schema_log_path)
|
||||
ulid = ULID.from_timestamp(time.time() + 100)
|
||||
db["responses"].insert(
|
||||
{
|
||||
"id": str(ulid).lower(),
|
||||
"system": "system",
|
||||
"prompt": "prompt",
|
||||
"response": json.dumps(
|
||||
{
|
||||
"name": "three",
|
||||
"response_id": 1,
|
||||
"conversation_id": 2,
|
||||
"conversation_id_": 3,
|
||||
}
|
||||
),
|
||||
"model": "davinci",
|
||||
"datetime_utc": ulid.datetime.isoformat(),
|
||||
"conversation_id": "abc123",
|
||||
"input_tokens": 2,
|
||||
"output_tokens": 5,
|
||||
"schema_id": SINGLE_ID,
|
||||
}
|
||||
)
|
||||
runner = CliRunner()
|
||||
result = runner.invoke(
|
||||
cli,
|
||||
[
|
||||
"logs",
|
||||
"-n",
|
||||
"0",
|
||||
"-p",
|
||||
str(schema_log_path),
|
||||
"--data-ids",
|
||||
"--data-key",
|
||||
"items",
|
||||
"--data-array",
|
||||
],
|
||||
catch_exceptions=False,
|
||||
)
|
||||
assert result.exit_code == 0
|
||||
rows = json.loads(result.output)
|
||||
last_row = rows.pop(-1)
|
||||
assert set(last_row.keys()) == {
|
||||
"conversation_id_",
|
||||
"conversation_id",
|
||||
"response_id",
|
||||
"response_id_",
|
||||
"name",
|
||||
"conversation_id__",
|
||||
}
|
||||
for row in rows:
|
||||
assert set(row.keys()) == {"conversation_id", "response_id", "name"}
|
||||
|
|
|
|||
Loading…
Reference in a new issue