llm/tests/test_plugins.py
2025-05-24 13:18:09 -07:00

440 lines
14 KiB
Python

from click.testing import CliRunner
import click
import importlib
import json
import llm
from llm import cli, hookimpl, plugins, get_template_loaders, get_fragment_loaders
import textwrap
def test_register_commands():
importlib.reload(cli)
def plugin_names():
return [plugin["name"] for plugin in llm.get_plugins()]
assert "HelloWorldPlugin" not in plugin_names()
class HelloWorldPlugin:
__name__ = "HelloWorldPlugin"
@hookimpl
def register_commands(self, cli):
@cli.command(name="hello-world")
def hello_world():
"Print hello world"
click.echo("Hello world!")
try:
plugins.pm.register(HelloWorldPlugin(), name="HelloWorldPlugin")
importlib.reload(cli)
assert "HelloWorldPlugin" in plugin_names()
runner = CliRunner()
result = runner.invoke(cli.cli, ["hello-world"])
assert result.exit_code == 0
assert result.output == "Hello world!\n"
finally:
plugins.pm.unregister(name="HelloWorldPlugin")
importlib.reload(cli)
assert "HelloWorldPlugin" not in plugin_names()
def test_register_template_loaders():
assert get_template_loaders() == {}
def one_loader(template_path):
return llm.Template(name="one:" + template_path, prompt=template_path)
def two_loader(template_path):
"Docs for two"
return llm.Template(name="two:" + template_path, prompt=template_path)
def dupe_two_loader(template_path):
"Docs for two dupe"
return llm.Template(name="two:" + template_path, prompt=template_path)
class TemplateLoadersPlugin:
__name__ = "TemplateLoadersPlugin"
@hookimpl
def register_template_loaders(self, register):
register("one", one_loader)
register("two", two_loader)
register("two", dupe_two_loader)
try:
plugins.pm.register(TemplateLoadersPlugin(), name="TemplateLoadersPlugin")
loaders = get_template_loaders()
assert loaders == {
"one": one_loader,
"two": two_loader,
"two_1": dupe_two_loader,
}
# Test the CLI command
runner = CliRunner()
result = runner.invoke(cli.cli, ["templates", "loaders"])
assert result.exit_code == 0
assert result.output == (
"one:\n"
" Undocumented\n"
"two:\n"
" Docs for two\n"
"two_1:\n"
" Docs for two dupe\n"
)
finally:
plugins.pm.unregister(name="TemplateLoadersPlugin")
assert get_template_loaders() == {}
def test_register_fragment_loaders(logs_db, httpx_mock):
httpx_mock.add_response(
method="HEAD",
url="https://example.com/attachment.png",
content=b"attachment",
headers={"Content-Type": "image/png"},
is_reusable=True,
)
assert get_fragment_loaders() == {}
def single_fragment(argument):
"This is the fragment documentation"
return llm.Fragment("single", "single")
def three_fragments(argument):
return [
llm.Fragment(f"one:{argument}", "one"),
llm.Fragment(f"two:{argument}", "two"),
llm.Fragment(f"three:{argument}", "three"),
]
def fragment_and_attachment(argument):
return [
llm.Fragment(f"one:{argument}", "one"),
llm.Attachment(url="https://example.com/attachment.png"),
]
class FragmentLoadersPlugin:
__name__ = "FragmentLoadersPlugin"
@hookimpl
def register_fragment_loaders(self, register):
register("single", single_fragment)
register("three", three_fragments)
register("mixed", fragment_and_attachment)
try:
plugins.pm.register(FragmentLoadersPlugin(), name="FragmentLoadersPlugin")
loaders = get_fragment_loaders()
assert loaders == {
"single": single_fragment,
"three": three_fragments,
"mixed": fragment_and_attachment,
}
# Test the CLI command
runner = CliRunner()
result = runner.invoke(
cli.cli, ["-m", "echo", "-f", "three:x"], catch_exceptions=False
)
assert result.exit_code == 0
assert json.loads(result.output) == {
"prompt": "one:x\ntwo:x\nthree:x",
"system": "",
"attachments": [],
"stream": True,
"previous": [],
}
# And the llm fragments loaders command:
result2 = runner.invoke(cli.cli, ["fragments", "loaders"])
assert result2.exit_code == 0
expected2 = (
"single:\n"
" This is the fragment documentation\n"
"\n"
"three:\n"
" Undocumented\n"
"\n"
"mixed:\n"
" Undocumented\n"
)
assert result2.output == expected2
# Test the one that includes an attachment
result3 = runner.invoke(
cli.cli, ["-m", "echo", "-f", "mixed:x"], catch_exceptions=False
)
assert result3.exit_code == 0
result3.output.strip == textwrap.dedent(
"""\
system:
prompt:
one:x
attachments:
- https://example.com/attachment.png
"""
).strip()
finally:
plugins.pm.unregister(name="FragmentLoadersPlugin")
assert get_fragment_loaders() == {}
# Let's check the database
assert list(logs_db.query("select content, source from fragments")) == [
{"content": "one:x", "source": "one"},
{"content": "two:x", "source": "two"},
{"content": "three:x", "source": "three"},
]
def test_register_tools(tmpdir, logs_db):
def upper(text: str) -> str:
"""Convert text to uppercase."""
return text.upper()
def count_character_in_word(text: str, character: str) -> int:
"""Count the number of occurrences of a character in a word."""
return text.count(character)
class ToolsPlugin:
__name__ = "ToolsPlugin"
@hookimpl
def register_tools(self, register):
register(llm.Tool.function(upper))
register(count_character_in_word, name="count_chars")
try:
plugins.pm.register(ToolsPlugin(), name="ToolsPlugin")
tools = llm.get_tools()
assert tools == {
"upper": llm.Tool(
name="upper",
description="Convert text to uppercase.",
input_schema={
"properties": {"text": {"type": "string"}},
"required": ["text"],
"type": "object",
},
implementation=upper,
plugin="ToolsPlugin",
),
"count_chars": llm.Tool(
name="count_chars",
description="Count the number of occurrences of a character in a word.",
input_schema={
"properties": {
"text": {"type": "string"},
"character": {"type": "string"},
},
"required": ["text", "character"],
"type": "object",
},
implementation=count_character_in_word,
plugin="ToolsPlugin",
),
}
# Test the CLI command
runner = CliRunner()
result = runner.invoke(cli.cli, ["tools", "list"])
assert result.exit_code == 0
assert result.output == (
"upper(text: str) -> str (plugin: ToolsPlugin)\n"
" Convert text to uppercase.\n"
"count_chars(text: str, character: str) -> int (plugin: ToolsPlugin)\n"
" Count the number of occurrences of a character in a word.\n"
)
# And --json
result2 = runner.invoke(cli.cli, ["tools", "list", "--json"])
assert result2.exit_code == 0
assert json.loads(result2.output) == {
"upper": {
"description": "Convert text to uppercase.",
"arguments": {
"properties": {"text": {"type": "string"}},
"required": ["text"],
"type": "object",
},
"plugin": "ToolsPlugin",
},
"count_chars": {
"description": "Count the number of occurrences of a character in a word.",
"arguments": {
"properties": {
"text": {"type": "string"},
"character": {"type": "string"},
},
"required": ["text", "character"],
"type": "object",
},
"plugin": "ToolsPlugin",
},
}
# And test the --tools option
functions_path = str(tmpdir / "functions.py")
with open(functions_path, "w") as fp:
fp.write("def example(s: str, i: int):\n return s + '-' + str(i)")
result3 = runner.invoke(
cli.cli,
[
"tools",
"--functions",
"def reverse(s: str): return s[::-1]",
"--functions",
functions_path,
],
)
assert result3.exit_code == 0
assert "reverse(s: str)" in result3.output
assert "example(s: str, i: int)" in result3.output
# Now run a prompt using a plugin tool and to check it gets logged correctly
result4 = runner.invoke(
cli.cli,
[
"-m",
"echo",
"--tool",
"upper",
json.dumps(
{"tool_calls": [{"name": "upper", "arguments": {"text": "hi"}}]}
),
],
)
assert result4.exit_code == 0
assert '"output": "HI"' in result4.output
# Now check in the database
tool_row = [row for row in logs_db["tools"].rows][0]
assert tool_row["name"] == "upper"
assert tool_row["plugin"] == "ToolsPlugin"
# The llm logs command should return that, including with the -T upper option
for args in ([], ["-T", "upper"]):
logs_result = runner.invoke(cli.cli, ["logs"] + args)
assert logs_result.exit_code == 0
assert "HI" in logs_result.output
# ... but not for -T reverse
logs_empty_result = runner.invoke(cli.cli, ["logs", "-T", "count_chars"])
assert logs_empty_result.exit_code == 0
assert "HI" not in logs_empty_result.output
# Start with a tool, use llm -c to reuse the same tool
result5 = runner.invoke(
cli.cli,
[
"prompt",
"-m",
"echo",
"--tool",
"upper",
json.dumps(
{"tool_calls": [{"name": "upper", "arguments": {"text": "one"}}]}
),
"--td",
],
)
assert result5.exit_code == 0
assert (
runner.invoke(
cli.cli,
[
"-c",
json.dumps(
{
"tool_calls": [
{"name": "upper", "arguments": {"text": "two"}}
]
}
),
],
).exit_code
== 0
)
# Now do it again with llm chat -c
assert (
runner.invoke(
cli.cli,
["chat", "-c"],
input=(
json.dumps(
{
"tool_calls": [
{"name": "upper", "arguments": {"text": "three"}}
]
}
)
+ "\nquit\n"
),
catch_exceptions=False,
).exit_code
== 0
)
# Should have logged those three tool uses in llm logs -c -n 0
log_rows = json.loads(
runner.invoke(cli.cli, ["logs", "-c", "-n", "0", "--json"]).output
)
# Workaround for bug in https://github.com/simonw/llm/issues/1073 and 1079
# We use a set and don't check the order, because (esp on Windows) datetime_utc
# may not be accurate enough to order them correctly.
log_rows.sort(key=lambda row: row["datetime_utc"])
results = {
(log_row["prompt"], json.dumps(log_row["tool_results"]))
for log_row in log_rows
}
assert results == {
(
"",
'[{"id": 4, "tool_id": 1, "name": "upper", "output": "THREE", "tool_call_id": null}]',
),
(
'{"tool_calls": [{"name": "upper", "arguments": {"text": "three"}}]}',
"[]",
),
(
"",
'[{"id": 3, "tool_id": 1, "name": "upper", "output": "TWO", "tool_call_id": null}]',
),
(
"",
'[{"id": 2, "tool_id": 1, "name": "upper", "output": "ONE", "tool_call_id": null}]',
),
('{"tool_calls": [{"name": "upper", "arguments": {"text": "one"}}]}', "[]"),
('{"tool_calls": [{"name": "upper", "arguments": {"text": "two"}}]}', "[]"),
}
finally:
plugins.pm.unregister(name="ToolsPlugin")
assert llm.get_tools() == {}
def test_plugins_command():
runner = CliRunner()
result = runner.invoke(cli.cli, ["plugins"])
assert result.exit_code == 0
expected = [
{"name": "EchoModelPlugin", "hooks": ["register_models"]},
{
"name": "MockModelsPlugin",
"hooks": ["register_embedding_models", "register_models"],
},
]
actual = json.loads(result.output)
actual.sort(key=lambda p: p["name"])
assert actual == expected
# Test the --hook option
result2 = runner.invoke(cli.cli, ["plugins", "--hook", "register_embedding_models"])
assert result2.exit_code == 0
assert json.loads(result2.output) == [
{
"name": "MockModelsPlugin",
"hooks": ["register_embedding_models", "register_models"],
},
]