From 99a1adcecee6c19005ed82253b14d64a9b34fd22 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Thu, 27 Feb 2025 07:35:28 -0800 Subject: [PATCH] Initial llm schemas list implementation, refs #781 --- docs/help.md | 28 ++++++++++++++++++++++ llm/cli.py | 68 ++++++++++++++++++++++++++++++++++++++++++++++++++++ llm/utils.py | 42 ++++++++++++++++++++++++++++++++ 3 files changed, 138 insertions(+) diff --git a/docs/help.md b/docs/help.md index f266961..aa48f78 100644 --- a/docs/help.md +++ b/docs/help.md @@ -77,6 +77,7 @@ Commands: models Manage available models openai Commands for working directly with the OpenAI API plugins List installed plugins + schemas Manage stored schemas similar Return top N similar IDs from a collection using cosine... templates Manage stored prompt templates uninstall Uninstall Python packages from the LLM environment @@ -417,6 +418,33 @@ Options: --help Show this message and exit. ``` +(help-schemas)= +### llm schemas --help +``` +Usage: llm schemas [OPTIONS] COMMAND [ARGS]... + + Manage stored schemas + +Options: + --help Show this message and exit. + +Commands: + list* List stored schemas +``` + +(help-schemas-list)= +#### llm schemas list --help +``` +Usage: llm schemas list [OPTIONS] + + List stored schemas + +Options: + -p, --path FILE Path to log database + -q, --query TEXT Search for schemas matching this string + --help Show this message and exit. +``` + (help-aliases)= ### llm aliases --help ``` diff --git a/llm/cli.py b/llm/cli.py index 09eba95..b200ee3 100644 --- a/llm/cli.py +++ b/llm/cli.py @@ -45,6 +45,7 @@ from .utils import ( make_schema_id, output_rows_as_json, resolve_schema_input, + schema_summary, ) import base64 import httpx @@ -1373,6 +1374,73 @@ def templates_list(): click.echo(display_truncated(text)) +@cli.group( + cls=DefaultGroup, + default="list", + default_if_no_args=True, +) +def schemas(): + "Manage stored schemas" + + +@schemas.command(name="list") +@click.option( + "-p", + "--path", + type=click.Path(readable=True, exists=True, dir_okay=False), + help="Path to log database", +) +@click.option( + "queries", + "-q", + "--query", + multiple=True, + help="Search for schemas matching this string", +) +def schemas_list(path, queries): + "List stored schemas" + path = pathlib.Path(path or logs_db_path()) + if not path.exists(): + raise click.ClickException("No log database found at {}".format(path)) + db = sqlite_utils.Database(path) + migrate(db) + + params = [] + where_sql = "" + if queries: + where_bits = ["schemas.content like ?" for _ in queries] + where_sql += " where {}".format(" and ".join(where_bits)) + params.extend("%{}%".format(q) for q in queries) + + sql = """ + select + schemas.id, + schemas.content, + max(responses.datetime_utc) as recently_used, + count(*) as times_used + from schemas + join responses + on responses.schema_id = schemas.id + {} group by responses.schema_id + order by recently_used + """.format( + where_sql + ) + rows = db.query(sql, params) + for row in rows: + click.echo("- id: {}".format(row["id"])) + click.echo( + " summary: |\n {}".format(schema_summary(json.loads(row["content"]))) + ) + click.echo( + " usage: |\n {} time{}, most recently {}".format( + row["times_used"], + "s" if row["times_used"] != 1 else "", + row["recently_used"], + ) + ) + + @cli.group( cls=DefaultGroup, default="list", diff --git a/llm/utils.py b/llm/utils.py index e9496ce..a7d4a2c 100644 --- a/llm/utils.py +++ b/llm/utils.py @@ -256,3 +256,45 @@ def resolve_schema_input(db, schema_input): return json.loads(row["content"]) except (sqlite_utils.db.NotFoundError, ValueError): raise click.BadParameter("Invalid schema") + + +def schema_summary(schema: dict) -> str: + """ + Extract property names from a JSON schema and format them in a + concise way that highlights the array/object structure. + + Args: + schema (dict): A JSON schema dictionary + + Returns: + str: A human-friendly summary of the schema structure + """ + if not schema or not isinstance(schema, dict): + return "" + + schema_type = schema.get("type", "") + + if schema_type == "object": + props = schema.get("properties", {}) + prop_summaries = [] + + for name, prop_schema in props.items(): + prop_type = prop_schema.get("type", "") + + if prop_type == "array": + items = prop_schema.get("items", {}) + items_summary = schema_summary(items) + prop_summaries.append(f"{name}: [{items_summary}]") + elif prop_type == "object": + nested_summary = schema_summary(prop_schema) + prop_summaries.append(f"{name}: {nested_summary}") + else: + prop_summaries.append(name) + + return "{" + ", ".join(prop_summaries) + "}" + + elif schema_type == "array": + items = schema.get("items", {}) + return schema_summary(items) + + return ""