llm schemas list --json/--nl options, closes #1070

This commit is contained in:
Simon Willison 2025-05-23 15:17:32 -07:00
parent e18eb3a595
commit f6fefb3816
4 changed files with 121 additions and 21 deletions

View file

@ -586,6 +586,8 @@ Options:
-d, --database FILE Path to log database
-q, --query TEXT Search for schemas matching this string
--full Output full schema contents
--json Output as JSON
--nl Output as newline-delimited JSON
--help Show this message and exit.
```

View file

@ -1638,7 +1638,8 @@ def logs_list(
to_output.extend(new_items)
except ValueError:
pass
click.echo(output_rows_as_json(to_output, not data_array))
for line in output_rows_as_json(to_output, nl=not data_array, compact=True):
click.echo(line)
return
# Tool usage information
@ -2194,7 +2195,9 @@ def schemas():
help="Search for schemas matching this string",
)
@click.option("--full", is_flag=True, help="Output full schema contents")
def schemas_list(path, database, queries, full):
@click.option("json_", "--json", is_flag=True, help="Output as JSON")
@click.option("nl", "--nl", is_flag=True, help="Output as newline-delimited JSON")
def schemas_list(path, database, queries, full, json_, nl):
"List stored schemas"
if database and not path:
path = database
@ -2226,6 +2229,12 @@ def schemas_list(path, database, queries, full):
where_sql
)
rows = db.query(sql, params)
if json_ or nl:
for line in output_rows_as_json(rows, json_cols={"content"}, nl=nl):
click.echo(line)
return
for row in rows:
click.echo("- id: {}".format(row["id"]))
if full:
@ -3674,3 +3683,28 @@ def _gather_tools(tools, python_tools):
)
tool_functions.extend(registered_tools[tool] for tool in tools)
return tool_functions
def _stream_json(iterator, json_cols=None):
# We have to iterate two-at-a-time so we can know if we
# should output a trailing comma
json_cols = json_cols or ()
current_iter, next_iter = itertools.tee(iterator, 2)
next(next_iter, None)
first = True
for row, next_row in itertools.zip_longest(current_iter, next_iter):
is_last = next_row is None
data = row
for col in json_cols:
row[col] = json.loads(row[col])
line = (
("[" if first else " ")
+ json.dumps(data, default=repr)
+ ("," if not is_last else "")
+ ("]" if is_last else "")
)
yield line
first = False
if first:
# We didn't output any rows, so yield the empty list
yield "[]"

View file

@ -1,6 +1,7 @@
import click
import hashlib
import httpx
import itertools
import json
import pathlib
import puremagic
@ -216,35 +217,52 @@ def make_schema_id(schema: dict) -> Tuple[str, str]:
return schema_id, schema_json
def output_rows_as_json(rows, nl=False):
def output_rows_as_json(rows, nl=False, compact=False, json_cols=()):
"""
Output rows as JSON - either newline-delimited or an array
Parameters:
- rows: List of dictionaries to output
- rows: Iterable of dictionaries to output
- nl: Boolean, if True, use newline-delimited JSON
- compact: Boolean, if True uses [{"...": "..."}\n {"...": "..."}] format
- json_cols: Iterable of columns that contain JSON
Returns:
- String with formatted JSON output
Yields:
- Stream of strings to be output
"""
if not rows:
return "" if nl else "[]"
current_iter, next_iter = itertools.tee(rows, 2)
next(next_iter, None)
first = True
lines = []
end_i = len(rows) - 1
for i, row in enumerate(rows):
is_first = i == 0
is_last = i == end_i
for row, next_row in itertools.zip_longest(current_iter, next_iter):
is_last = next_row is None
for col in json_cols:
row[col] = json.loads(row[col])
line = "{firstchar}{serialized}{maybecomma}{lastchar}".format(
firstchar=("[" if is_first else " ") if not nl else "",
serialized=json.dumps(row),
maybecomma="," if (not nl and not is_last) else "",
lastchar="]" if (is_last and not nl) else "",
)
lines.append(line)
if nl:
# Newline-delimited JSON: one JSON object per line
yield json.dumps(row)
elif compact:
# Compact array format: [{"...": "..."}\n {"...": "..."}]
yield "{firstchar}{serialized}{maybecomma}{lastchar}".format(
firstchar="[" if first else " ",
serialized=json.dumps(row),
maybecomma="," if not is_last else "",
lastchar="]" if is_last else "",
)
else:
# Pretty-printed array format with indentation
yield "{firstchar}{serialized}{maybecomma}{lastchar}".format(
firstchar="[\n" if first else "",
serialized=textwrap.indent(json.dumps(row, indent=2), " "),
maybecomma="," if not is_last else "",
lastchar="\n]" if is_last else "",
)
first = False
return "\n".join(lines)
if first and not nl:
# We didn't output any rows, so yield the empty list
yield "[]"
def resolve_schema_input(db, schema_input, load_template):

View file

@ -430,6 +430,52 @@ def test_logs_schema_data_ids(schema_log_path):
assert set(row.keys()) == {"conversation_id", "response_id", "name"}
_expected_yaml_re = r"""- id: [a-f0-9]{32}
summary: \|
usage: \|
4 times, most recently \d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d{6}\+00:00
- id: [a-f0-9]{32}
summary: \|
usage: \|
2 times, most recently \d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d{6}\+00:00"""
@pytest.mark.parametrize(
"args,expected",
(
(["schemas"], _expected_yaml_re),
(["schemas", "list"], _expected_yaml_re),
),
)
def test_schemas_list_yaml(schema_log_path, args, expected):
result = CliRunner().invoke(cli, args + ["-d", str(schema_log_path)])
assert result.exit_code == 0
assert re.match(expected, result.output.strip())
@pytest.mark.parametrize("is_nl", (False, True))
def test_schemas_list_json(schema_log_path, is_nl):
result = CliRunner().invoke(
cli,
["schemas", "list"]
+ (["--nl"] if is_nl else ["--json"])
+ ["-d", str(schema_log_path)],
)
assert result.exit_code == 0
if is_nl:
rows = [json.loads(line) for line in result.output.strip().split("\n")]
else:
rows = json.loads(result.output)
assert len(rows) == 2
assert rows[0]["content"] == {"name": "array"}
assert rows[0]["times_used"] == 4
assert rows[1]["content"] == {"name": "string"}
assert rows[1]["times_used"] == 2
assert set(rows[0].keys()) == {"id", "content", "recently_used", "times_used"}
@pytest.fixture
def fragments_fixture(user_path):
log_path = str(user_path / "logs_fragments.db")