llm schemas list --json/--nl options, closes #1070

2026-04-21 13:34:46 +00:00 · 2025-05-23 15:17:32 -07:00 · 2025-05-23 15:17:32 -07:00 · f6fefb3816
commit f6fefb3816
parent e18eb3a595
4 changed files with 121 additions and 21 deletions
--- a/docs/help.md
+++ b/docs/help.md
@ -586,6 +586,8 @@ Options:
  -d, --database FILE  Path to log database
  -q, --query TEXT     Search for schemas matching this string
  --full               Output full schema contents
+  --json               Output as JSON
+  --nl                 Output as newline-delimited JSON
  --help               Show this message and exit.
 ```

--- a/llm/cli.py
+++ b/llm/cli.py
@ -1638,7 +1638,8 @@ def logs_list(
                to_output.extend(new_items)
            except ValueError:
                pass
-        click.echo(output_rows_as_json(to_output, not data_array))
+        for line in output_rows_as_json(to_output, nl=not data_array, compact=True):
+            click.echo(line)
        return

    # Tool usage information
@ -2194,7 +2195,9 @@ def schemas():
    help="Search for schemas matching this string",
 )
@click.option("--full", is_flag=True, help="Output full schema contents")
-def schemas_list(path, database, queries, full):
+@click.option("json_", "--json", is_flag=True, help="Output as JSON")
+@click.option("nl", "--nl", is_flag=True, help="Output as newline-delimited JSON")
+def schemas_list(path, database, queries, full, json_, nl):
    "List stored schemas"
    if database and not path:
        path = database
@ -2226,6 +2229,12 @@ def schemas_list(path, database, queries, full):
        where_sql
    )
    rows = db.query(sql, params)
+
+    if json_ or nl:
+        for line in output_rows_as_json(rows, json_cols={"content"}, nl=nl):
+            click.echo(line)
+        return
+
    for row in rows:
        click.echo("- id: {}".format(row["id"]))
        if full:
@ -3674,3 +3683,28 @@ def _gather_tools(tools, python_tools):
        )
    tool_functions.extend(registered_tools[tool] for tool in tools)
    return tool_functions
+
+
+def _stream_json(iterator, json_cols=None):
+    # We have to iterate two-at-a-time so we can know if we
+    # should output a trailing comma
+    json_cols = json_cols or ()
+    current_iter, next_iter = itertools.tee(iterator, 2)
+    next(next_iter, None)
+    first = True
+    for row, next_row in itertools.zip_longest(current_iter, next_iter):
+        is_last = next_row is None
+        data = row
+        for col in json_cols:
+            row[col] = json.loads(row[col])
+        line = (
+            ("[" if first else " ")
+            + json.dumps(data, default=repr)
+            + ("," if not is_last else "")
+            + ("]" if is_last else "")
+        )
+        yield line
+        first = False
+    if first:
+        # We didn't output any rows, so yield the empty list
+        yield "[]"
--- a/llm/utils.py
+++ b/llm/utils.py
@ -1,6 +1,7 @@
 import click
 import hashlib
 import httpx
+import itertools
 import json
 import pathlib
 import puremagic
@ -216,35 +217,52 @@ def make_schema_id(schema: dict) -> Tuple[str, str]:
    return schema_id, schema_json


-def output_rows_as_json(rows, nl=False):
+def output_rows_as_json(rows, nl=False, compact=False, json_cols=()):
    """
    Output rows as JSON - either newline-delimited or an array

    Parameters:
-    - rows: List of dictionaries to output
+    - rows: Iterable of dictionaries to output
    - nl: Boolean, if True, use newline-delimited JSON
+    - compact: Boolean, if True uses [{"...": "..."}\n {"...": "..."}] format
+    - json_cols: Iterable of columns that contain JSON

-    Returns:
-    - String with formatted JSON output
+    Yields:
+    - Stream of strings to be output
    """
-    if not rows:
-        return "" if nl else "[]"
+    current_iter, next_iter = itertools.tee(rows, 2)
+    next(next_iter, None)
+    first = True

-    lines = []
-    end_i = len(rows) - 1
-    for i, row in enumerate(rows):
-        is_first = i == 0
-        is_last = i == end_i
+    for row, next_row in itertools.zip_longest(current_iter, next_iter):
+        is_last = next_row is None
+        for col in json_cols:
+            row[col] = json.loads(row[col])

-        line = "{firstchar}{serialized}{maybecomma}{lastchar}".format(
-            firstchar=("[" if is_first else " ") if not nl else "",
-            serialized=json.dumps(row),
-            maybecomma="," if (not nl and not is_last) else "",
-            lastchar="]" if (is_last and not nl) else "",
-        )
-        lines.append(line)
+        if nl:
+            # Newline-delimited JSON: one JSON object per line
+            yield json.dumps(row)
+        elif compact:
+            # Compact array format: [{"...": "..."}\n {"...": "..."}]
+            yield "{firstchar}{serialized}{maybecomma}{lastchar}".format(
+                firstchar="[" if first else " ",
+                serialized=json.dumps(row),
+                maybecomma="," if not is_last else "",
+                lastchar="]" if is_last else "",
+            )
+        else:
+            # Pretty-printed array format with indentation
+            yield "{firstchar}{serialized}{maybecomma}{lastchar}".format(
+                firstchar="[\n" if first else "",
+                serialized=textwrap.indent(json.dumps(row, indent=2), "  "),
+                maybecomma="," if not is_last else "",
+                lastchar="\n]" if is_last else "",
+            )
+        first = False

-    return "\n".join(lines)
+    if first and not nl:
+        # We didn't output any rows, so yield the empty list
+        yield "[]"


 def resolve_schema_input(db, schema_input, load_template):
--- a/tests/test_llm_logs.py
+++ b/tests/test_llm_logs.py
@ -430,6 +430,52 @@ def test_logs_schema_data_ids(schema_log_path):
        assert set(row.keys()) == {"conversation_id", "response_id", "name"}


+_expected_yaml_re = r"""- id: [a-f0-9]{32}
+  summary: \|
+    
+  usage: \|
+    4 times, most recently \d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d{6}\+00:00
+- id: [a-f0-9]{32}
+  summary: \|
+    
+  usage: \|
+    2 times, most recently \d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d{6}\+00:00"""
+
+
+@pytest.mark.parametrize(
+    "args,expected",
+    (
+        (["schemas"], _expected_yaml_re),
+        (["schemas", "list"], _expected_yaml_re),
+    ),
+)
+def test_schemas_list_yaml(schema_log_path, args, expected):
+    result = CliRunner().invoke(cli, args + ["-d", str(schema_log_path)])
+    assert result.exit_code == 0
+    assert re.match(expected, result.output.strip())
+
+
+@pytest.mark.parametrize("is_nl", (False, True))
+def test_schemas_list_json(schema_log_path, is_nl):
+    result = CliRunner().invoke(
+        cli,
+        ["schemas", "list"]
+        + (["--nl"] if is_nl else ["--json"])
+        + ["-d", str(schema_log_path)],
+    )
+    assert result.exit_code == 0
+    if is_nl:
+        rows = [json.loads(line) for line in result.output.strip().split("\n")]
+    else:
+        rows = json.loads(result.output)
+    assert len(rows) == 2
+    assert rows[0]["content"] == {"name": "array"}
+    assert rows[0]["times_used"] == 4
+    assert rows[1]["content"] == {"name": "string"}
+    assert rows[1]["times_used"] == 2
+    assert set(rows[0].keys()) == {"id", "content", "recently_used", "times_used"}
+
+
@pytest.fixture
 def fragments_fixture(user_path):
    log_path = str(user_path / "logs_fragments.db")