From 8a1fc5a90ee29c20d3e8bed1e38f4b5a10b8ea3d Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Fri, 23 May 2025 21:45:58 -0700 Subject: [PATCH] llm logs --tool/-T option, closes #1013 --- docs/help.md | 1 + docs/logging.md | 28 ++++++++++++++++++++++++---- llm/cli.py | 37 +++++++++++++++++++++++++++++++++++++ tests/test_plugins.py | 13 ++++++++++++- 4 files changed, 74 insertions(+), 5 deletions(-) diff --git a/docs/help.md b/docs/help.md index 6848773..3c5e2de 100644 --- a/docs/help.md +++ b/docs/help.md @@ -342,6 +342,7 @@ Options: -m, --model TEXT Filter by model or model alias -q, --query TEXT Search for logs matching this string -f, --fragment TEXT Filter for prompts using these fragments + -T, --tool TEXT Filter for prompts using these tools --schema TEXT JSON schema, filepath or ID --schema-multi TEXT JSON schema used for multiple results --data Output newline-delimited JSON data for schema diff --git a/docs/logging.md b/docs/logging.md index 8be97c4..7329b82 100644 --- a/docs/logging.md +++ b/docs/logging.md @@ -203,7 +203,7 @@ llm logs -m chatgpt (logging-filter-fragments)= -### Filtering by prompts that used a specific fragment +### Filtering by prompts that used specific fragments The `-f/--fragment X` option will filter for just responses that were created using the specified {ref}`fragment ` hash or alias or URL or filename. @@ -217,16 +217,36 @@ You can display just the content for a specific fragment hash ID (or alias) usin ```bash llm fragments show 993fd38d898d2b59fd2d16c811da5bdac658faa34f0f4d411edde7c17ebb0680 ``` +If you provide multiple fragments you will get back responses that used _all_ of those fragments. + +(logging-filter-tools)= + +### Filtering by prompts that used specific tools + +You can filter for responses that used tools from specific fragments with the `--tool/-T` option: + +```bash +llm logs -T simple_eval +``` +This will match responses that involved a _result_ from that tool. If the tool was not executed it will not be included in the filtered responses. + +Pass `--tool/-T` multiple times for responses that used all of the specified tools. (logging-filter-schemas)= ### Browsing data collected using schemas -The `--schema X` option can be used to view responses that used the specified schema. This can be combined with `--data` and `--data-array` and `--data-key` to extract just the returned JSON data - consult the {ref}`schemas documentation ` for details. +The `--schema X` option can be used to view responses that used the specified schema, using any of the {ref}`ways to specify a schema `: + +```bash +llm logs --schema 'name, age int, bio' +``` + +This can be combined with `--data` and `--data-array` and `--data-key` to extract just the returned JSON data - consult the {ref}`schemas documentation ` for details. (logging-datasette)= -### Browsing logs using Datasette +## Browsing logs using Datasette You can also use [Datasette](https://datasette.io/) to browse your logs like this: @@ -236,7 +256,7 @@ datasette "$(llm logs path)" (logging-backup)= -### Backing up your database +## Backing up your database You can backup your logs to another file using the `llm logs backup` command: diff --git a/llm/cli.py b/llm/cli.py index 2b48a7b..ea3cabe 100644 --- a/llm/cli.py +++ b/llm/cli.py @@ -1370,6 +1370,13 @@ order by prompt_attachments."order" help="Filter for prompts using these fragments", multiple=True, ) +@click.option( + "tools", + "-T", + "--tool", + multiple=True, + help="Filter for prompts using these tools", +) @schema_option @click.option( "--schema-multi", @@ -1432,6 +1439,7 @@ def logs_list( model, query, fragments, + tools, schema_input, schema_multi, data, @@ -1564,6 +1572,35 @@ def logs_list( sql_params["f{}".format(i)] = fragment_hash where_bits.append(" AND ".join(exists_clauses)) + + if tools: + tools_by_name = get_tools() + # Filter responses by tools (must have ALL of the named tools, including plugin) + tool_clauses = [] + for i, tool_name in enumerate(tools): + try: + plugin_name = tools_by_name[tool_name].plugin + except KeyError: + raise click.ClickException(f"Unknown tool: {tool_name}") + + tool_clauses.append( + f""" + exists ( + select 1 + from tool_results tr + join tools t on t.id = tr.tool_id + where tr.response_id = responses.id + and t.name = :tool{i} + and t.plugin = :plugin{i} + ) + """ + ) + sql_params[f"tool{i}"] = tool_name + sql_params[f"plugin{i}"] = plugin_name + + # AND means “must have all” — use OR instead if you want “any of” + where_bits.append(" AND ".join(tool_clauses)) + schema_id = None if schema: schema_id = make_schema_id(schema)[0] diff --git a/tests/test_plugins.py b/tests/test_plugins.py index 2965067..b499407 100644 --- a/tests/test_plugins.py +++ b/tests/test_plugins.py @@ -312,11 +312,22 @@ def test_register_tools(tmpdir, logs_db): ) assert result4.exit_code == 0 assert '"output": "HI"' in result4.output + # Now check in the database tool_row = [row for row in logs_db["tools"].rows][0] assert tool_row["name"] == "upper" assert tool_row["plugin"] == "ToolsPlugin" + # The llm logs command should return that, including with the -T upper option + for args in ([], ["-T", "upper"]): + logs_result = runner.invoke(cli.cli, ["logs"] + args) + assert logs_result.exit_code == 0 + assert "HI" in logs_result.output + # ... but not for -T reverse + logs_empty_result = runner.invoke(cli.cli, ["logs", "-T", "count_chars"]) + assert logs_empty_result.exit_code == 0 + assert "HI" not in logs_empty_result.output + # Start with a tool, use llm -c to reuse the same tool result5 = runner.invoke( cli.cli, @@ -369,7 +380,7 @@ def test_register_tools(tmpdir, logs_db): == 0 ) # Should have logged three tool uses in llm logs -c -n 0 - log_output = runner.invoke(cli.cli, ["logs", "-c", "-n", "10"]).output + log_output = runner.invoke(cli.cli, ["logs", "-c", "-n", "11"]).output log_pattern = re.compile( r"""tool_calls.*?"text": "one".*?ONE.*?""" r"""tool_calls.*?"text": "two".*?TWO.*?"""