From 8a1fc5a90ee29c20d3e8bed1e38f4b5a10b8ea3d Mon Sep 17 00:00:00 2001
From: Simon Willison <swillison@gmail.com>
Date: Fri, 23 May 2025 21:45:58 -0700
Subject: [PATCH] llm logs --tool/-T option, closes #1013

---
 docs/help.md          |  1 +
 docs/logging.md       | 28 ++++++++++++++++++++++++----
 llm/cli.py            | 37 +++++++++++++++++++++++++++++++++++++
 tests/test_plugins.py | 13 ++++++++++++-
 4 files changed, 74 insertions(+), 5 deletions(-)
diff --git a/docs/help.md b/docs/help.md
index 6848773..3c5e2de 100644
--- a/docs/help.md
+++ b/docs/help.md
@@ -342,6 +342,7 @@ Options:
   -m, --model TEXT            Filter by model or model alias
   -q, --query TEXT            Search for logs matching this string
   -f, --fragment TEXT         Filter for prompts using these fragments
+  -T, --tool TEXT             Filter for prompts using these tools
   --schema TEXT               JSON schema, filepath or ID
   --schema-multi TEXT         JSON schema used for multiple results
   --data                      Output newline-delimited JSON data for schema
diff --git a/docs/logging.md b/docs/logging.md
index 8be97c4..7329b82 100644
--- a/docs/logging.md
+++ b/docs/logging.md
@@ -203,7 +203,7 @@ llm logs -m chatgpt
 
 (logging-filter-fragments)=
 
-### Filtering by prompts that used a specific fragment
+### Filtering by prompts that used specific fragments
 
 The `-f/--fragment X` option will filter for just responses that were created using the specified {ref}`fragment <usage-fragments>` hash or alias or URL or filename.
 
@@ -217,16 +217,36 @@ You can display just the content for a specific fragment hash ID (or alias) usin
 ```bash
 llm fragments show 993fd38d898d2b59fd2d16c811da5bdac658faa34f0f4d411edde7c17ebb0680
 ```
+If you provide multiple fragments you will get back responses that used _all_ of those fragments.
+
+(logging-filter-tools)=
+
+### Filtering by prompts that used specific tools
+
+You can filter for responses that used tools from specific fragments with the `--tool/-T` option:
+
+```bash
+llm logs -T simple_eval
+```
+This will match responses that involved a _result_ from that tool. If the tool was not executed it will not be included in the filtered responses.
+
+Pass `--tool/-T` multiple times for responses that used all of the specified tools.
 
 (logging-filter-schemas)=
 
 ### Browsing data collected using schemas
 
-The `--schema X` option can be used to view responses that used the specified schema. This can be combined with `--data` and `--data-array` and `--data-key` to extract just the returned JSON data - consult the {ref}`schemas documentation <schemas-logs>` for details.
+The `--schema X` option can be used to view responses that used the specified schema, using any of the {ref}`ways to specify a schema <schemas-specify>`:
+
+```bash
+llm logs --schema 'name, age int, bio'
+```
+
+This can be combined with `--data` and `--data-array` and `--data-key` to extract just the returned JSON data - consult the {ref}`schemas documentation <schemas-logs>` for details.
 
 (logging-datasette)=
 
-### Browsing logs using Datasette
+## Browsing logs using Datasette
 
 You can also use [Datasette](https://datasette.io/) to browse your logs like this:
 
@@ -236,7 +256,7 @@ datasette "$(llm logs path)"
 
 (logging-backup)=
 
-### Backing up your database
+## Backing up your database
 
 You can backup your logs to another file using the `llm logs backup` command:
 
diff --git a/llm/cli.py b/llm/cli.py
index 2b48a7b..ea3cabe 100644
--- a/llm/cli.py
+++ b/llm/cli.py
@@ -1370,6 +1370,13 @@ order by prompt_attachments."order"
     help="Filter for prompts using these fragments",
     multiple=True,
 )
+@click.option(
+    "tools",
+    "-T",
+    "--tool",
+    multiple=True,
+    help="Filter for prompts using these tools",
+)
 @schema_option
 @click.option(
     "--schema-multi",
@@ -1432,6 +1439,7 @@ def logs_list(
     model,
     query,
     fragments,
+    tools,
     schema_input,
     schema_multi,
     data,
@@ -1564,6 +1572,35 @@ def logs_list(
             sql_params["f{}".format(i)] = fragment_hash
 
         where_bits.append(" AND ".join(exists_clauses))
+
+    if tools:
+        tools_by_name = get_tools()
+        # Filter responses by tools (must have ALL of the named tools, including plugin)
+        tool_clauses = []
+        for i, tool_name in enumerate(tools):
+            try:
+                plugin_name = tools_by_name[tool_name].plugin
+            except KeyError:
+                raise click.ClickException(f"Unknown tool: {tool_name}")
+
+            tool_clauses.append(
+                f"""
+            exists (
+              select 1
+                from tool_results tr
+                join tools t on t.id = tr.tool_id
+               where tr.response_id = responses.id
+                 and t.name = :tool{i}
+                 and t.plugin = :plugin{i}
+            )
+            """
+            )
+            sql_params[f"tool{i}"] = tool_name
+            sql_params[f"plugin{i}"] = plugin_name
+
+        # AND means “must have all” — use OR instead if you want “any of”
+        where_bits.append(" AND ".join(tool_clauses))
+
     schema_id = None
     if schema:
         schema_id = make_schema_id(schema)[0]
diff --git a/tests/test_plugins.py b/tests/test_plugins.py
index 2965067..b499407 100644
--- a/tests/test_plugins.py
+++ b/tests/test_plugins.py
@@ -312,11 +312,22 @@ def test_register_tools(tmpdir, logs_db):
         )
         assert result4.exit_code == 0
         assert '"output": "HI"' in result4.output
+
         # Now check in the database
         tool_row = [row for row in logs_db["tools"].rows][0]
         assert tool_row["name"] == "upper"
         assert tool_row["plugin"] == "ToolsPlugin"
 
+        # The llm logs command should return that, including with the -T upper option
+        for args in ([], ["-T", "upper"]):
+            logs_result = runner.invoke(cli.cli, ["logs"] + args)
+            assert logs_result.exit_code == 0
+            assert "HI" in logs_result.output
+        # ... but not for -T reverse
+        logs_empty_result = runner.invoke(cli.cli, ["logs", "-T", "count_chars"])
+        assert logs_empty_result.exit_code == 0
+        assert "HI" not in logs_empty_result.output
+
         # Start with a tool, use llm -c to reuse the same tool
         result5 = runner.invoke(
             cli.cli,
@@ -369,7 +380,7 @@ def test_register_tools(tmpdir, logs_db):
             == 0
         )
         # Should have logged three tool uses in llm logs -c -n 0
-        log_output = runner.invoke(cli.cli, ["logs", "-c", "-n", "10"]).output
+        log_output = runner.invoke(cli.cli, ["logs", "-c", "-n", "11"]).output
         log_pattern = re.compile(
             r"""tool_calls.*?"text": "one".*?ONE.*?"""
             r"""tool_calls.*?"text": "two".*?TWO.*?"""