llm/tests/test_chat.py

385 lines
12 KiB
Python
Raw Normal View History

2023-09-05 06:08:38 +00:00
from click.testing import CliRunner
from unittest.mock import ANY
import json
2023-09-05 06:08:38 +00:00
import llm.cli
2023-09-12 16:31:20 +00:00
import pytest
import sqlite_utils
import sys
import textwrap
2023-09-05 06:08:38 +00:00
@pytest.mark.xfail(sys.platform == "win32", reason="Expected to fail on Windows")
2023-09-05 06:08:38 +00:00
def test_chat_basic(mock_model, logs_db):
runner = CliRunner()
mock_model.enqueue(["one world"])
mock_model.enqueue(["one again"])
result = runner.invoke(
llm.cli.cli,
["chat", "-m", "mock"],
input="Hi\nHi two\nquit\n",
catch_exceptions=False,
2023-09-05 06:08:38 +00:00
)
assert result.exit_code == 0
assert result.output == (
"Chatting with mock"
"\nType 'exit' or 'quit' to exit"
"\nType '!multi' to enter multiple lines, then '!end' to finish"
"\nType '!edit' to open your default editor and modify the prompt"
"\nType '!fragment <my_fragment> [<another_fragment> ...]' to insert one or more fragments"
2023-09-05 06:08:38 +00:00
"\n> Hi"
"\none world"
"\n> Hi two"
"\none again"
"\n> quit"
"\n"
)
# Should have logged
conversations = list(logs_db["conversations"].rows)
assert conversations[0] == {
"id": ANY,
"name": "Hi",
"model": "mock",
}
conversation_id = conversations[0]["id"]
responses = list(logs_db["responses"].rows)
assert responses == [
{
"id": ANY,
"model": "mock",
"prompt": "Hi",
"system": None,
"prompt_json": None,
"options_json": "{}",
"response": "one world",
"response_json": None,
"conversation_id": conversation_id,
"duration_ms": ANY,
"datetime_utc": ANY,
"input_tokens": 1,
"output_tokens": 1,
"token_details": None,
"schema_id": None,
2023-09-05 06:08:38 +00:00
},
{
"id": ANY,
"model": "mock",
"prompt": "Hi two",
"system": None,
"prompt_json": None,
"options_json": "{}",
"response": "one again",
"response_json": None,
"conversation_id": conversation_id,
"duration_ms": ANY,
"datetime_utc": ANY,
"input_tokens": 2,
"output_tokens": 1,
"token_details": None,
"schema_id": None,
2023-09-05 06:08:38 +00:00
},
]
2023-09-05 06:12:41 +00:00
# Now continue that conversation
mock_model.enqueue(["continued"])
result2 = runner.invoke(
llm.get_async_model(), llm.AsyncModel base class and OpenAI async models (#613) - https://github.com/simonw/llm/issues/507#issuecomment-2458639308 * register_model is now async aware Refs https://github.com/simonw/llm/issues/507#issuecomment-2458658134 * Refactor Chat and AsyncChat to use _Shared base class Refs https://github.com/simonw/llm/issues/507#issuecomment-2458692338 * fixed function name * Fix for infinite loop * Applied Black * Ran cog * Applied Black * Add Response.from_row() classmethod back again It does not matter that this is a blocking call, since it is a classmethod * Made mypy happy with llm/models.py * mypy fixes for openai_models.py I am unhappy with this, had to duplicate some code. * First test for AsyncModel * Still have not quite got this working * Fix for not loading plugins during tests, refs #626 * audio/wav not audio/wave, refs #603 * Black and mypy and ruff all happy * Refactor to avoid generics * Removed obsolete response() method * Support text = await async_mock_model.prompt("hello") * Initial docs for llm.get_async_model() and await model.prompt() Refs #507 * Initial async model plugin creation docs * duration_ms ANY to pass test * llm models --async option Refs https://github.com/simonw/llm/pull/613#issuecomment-2474724406 * Removed obsolete TypeVars * Expanded register_models() docs for async * await model.prompt() now returns AsyncResponse Refs https://github.com/simonw/llm/pull/613#issuecomment-2475157822 --------- Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
2024-11-14 01:51:00 +00:00
llm.cli.cli,
["chat", "-m", "mock", "-c"],
input="Continue\nquit\n",
catch_exceptions=False,
2023-09-05 06:12:41 +00:00
)
assert result2.exit_code == 0
assert result2.output == (
"Chatting with mock"
"\nType 'exit' or 'quit' to exit"
"\nType '!multi' to enter multiple lines, then '!end' to finish"
"\nType '!edit' to open your default editor and modify the prompt"
"\nType '!fragment <my_fragment> [<another_fragment> ...]' to insert one or more fragments"
2023-09-05 06:12:41 +00:00
"\n> Continue"
"\ncontinued"
"\n> quit"
"\n"
)
new_responses = list(
logs_db.query(
"select * from responses where id not in ({})".format(
", ".join("?" for _ in responses)
),
[r["id"] for r in responses],
)
)
assert new_responses == [
{
"id": ANY,
"model": "mock",
"prompt": "Continue",
"system": None,
"prompt_json": None,
"options_json": "{}",
"response": "continued",
"response_json": None,
"conversation_id": conversation_id,
"duration_ms": ANY,
"datetime_utc": ANY,
"input_tokens": 1,
"output_tokens": 1,
"token_details": None,
"schema_id": None,
2023-09-05 06:12:41 +00:00
}
]
2023-09-05 06:15:26 +00:00
@pytest.mark.xfail(sys.platform == "win32", reason="Expected to fail on Windows")
2023-09-05 06:15:26 +00:00
def test_chat_system(mock_model, logs_db):
runner = CliRunner()
mock_model.enqueue(["I am mean"])
result = runner.invoke(
llm.cli.cli,
["chat", "-m", "mock", "--system", "You are mean"],
input="Hi\nquit\n",
)
assert result.exit_code == 0
assert result.output == (
"Chatting with mock"
"\nType 'exit' or 'quit' to exit"
"\nType '!multi' to enter multiple lines, then '!end' to finish"
"\nType '!edit' to open your default editor and modify the prompt"
"\nType '!fragment <my_fragment> [<another_fragment> ...]' to insert one or more fragments"
2023-09-05 06:15:26 +00:00
"\n> Hi"
"\nI am mean"
"\n> quit"
"\n"
)
responses = list(logs_db["responses"].rows)
assert responses == [
{
"id": ANY,
"model": "mock",
"prompt": "Hi",
"system": "You are mean",
"prompt_json": None,
"options_json": "{}",
"response": "I am mean",
"response_json": None,
"conversation_id": ANY,
"duration_ms": ANY,
"datetime_utc": ANY,
"input_tokens": 1,
"output_tokens": 1,
"token_details": None,
"schema_id": None,
2023-09-05 06:15:26 +00:00
}
]
2023-09-10 18:14:28 +00:00
@pytest.mark.xfail(sys.platform == "win32", reason="Expected to fail on Windows")
def test_chat_options(mock_model, logs_db, user_path):
options_path = user_path / "model_options.json"
options_path.write_text(json.dumps({"mock": {"max_tokens": "5"}}), "utf-8")
2023-09-10 18:14:28 +00:00
runner = CliRunner()
mock_model.enqueue(["Default options response"])
2023-09-10 18:14:28 +00:00
result = runner.invoke(
llm.cli.cli,
["chat", "-m", "mock"],
2023-09-10 18:14:28 +00:00
input="Hi\nquit\n",
)
assert result.exit_code == 0
mock_model.enqueue(["Override options response"])
result = runner.invoke(
llm.cli.cli,
["chat", "-m", "mock", "--option", "max_tokens", "10"],
input="Hi with override\nquit\n",
)
assert result.exit_code == 0
2023-09-10 18:14:28 +00:00
responses = list(logs_db["responses"].rows)
assert responses == [
{
"id": ANY,
"model": "mock",
"prompt": "Hi",
"system": None,
"prompt_json": None,
"options_json": '{"max_tokens": 5}',
"response": "Default options response",
2023-09-10 18:14:28 +00:00
"response_json": None,
"conversation_id": ANY,
llm.get_async_model(), llm.AsyncModel base class and OpenAI async models (#613) - https://github.com/simonw/llm/issues/507#issuecomment-2458639308 * register_model is now async aware Refs https://github.com/simonw/llm/issues/507#issuecomment-2458658134 * Refactor Chat and AsyncChat to use _Shared base class Refs https://github.com/simonw/llm/issues/507#issuecomment-2458692338 * fixed function name * Fix for infinite loop * Applied Black * Ran cog * Applied Black * Add Response.from_row() classmethod back again It does not matter that this is a blocking call, since it is a classmethod * Made mypy happy with llm/models.py * mypy fixes for openai_models.py I am unhappy with this, had to duplicate some code. * First test for AsyncModel * Still have not quite got this working * Fix for not loading plugins during tests, refs #626 * audio/wav not audio/wave, refs #603 * Black and mypy and ruff all happy * Refactor to avoid generics * Removed obsolete response() method * Support text = await async_mock_model.prompt("hello") * Initial docs for llm.get_async_model() and await model.prompt() Refs #507 * Initial async model plugin creation docs * duration_ms ANY to pass test * llm models --async option Refs https://github.com/simonw/llm/pull/613#issuecomment-2474724406 * Removed obsolete TypeVars * Expanded register_models() docs for async * await model.prompt() now returns AsyncResponse Refs https://github.com/simonw/llm/pull/613#issuecomment-2475157822 --------- Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
2024-11-14 01:51:00 +00:00
"duration_ms": ANY,
2023-09-10 18:14:28 +00:00
"datetime_utc": ANY,
"input_tokens": 1,
"output_tokens": 1,
"token_details": None,
"schema_id": None,
},
{
"id": ANY,
"model": "mock",
"prompt": "Hi with override",
"system": None,
"prompt_json": None,
"options_json": '{"max_tokens": 10}',
"response": "Override options response",
"response_json": None,
"conversation_id": ANY,
"duration_ms": ANY,
"datetime_utc": ANY,
"input_tokens": 3,
"output_tokens": 1,
"token_details": None,
"schema_id": None,
},
2023-09-10 18:14:28 +00:00
]
2023-09-12 16:31:20 +00:00
@pytest.mark.xfail(sys.platform == "win32", reason="Expected to fail on Windows")
2023-09-12 16:31:20 +00:00
@pytest.mark.parametrize(
"input,expected",
(
(
"Hi\n!multi\nthis is multiple lines\nuntil the !end\n!end\nquit\n",
[
{"prompt": "Hi", "response": "One\n"},
{
"prompt": "this is multiple lines\nuntil the !end",
"response": "Two\n",
},
],
),
# quit should not work within !multi
(
"!multi\nthis is multiple lines\nquit\nuntil the !end\n!end\nquit\n",
[
{
"prompt": "this is multiple lines\nquit\nuntil the !end",
"response": "One\n",
}
],
),
# Try custom delimiter
(
"!multi abc\nCustom delimiter\n!end\n!end 123\n!end abc\nquit\n",
[{"prompt": "Custom delimiter\n!end\n!end 123", "response": "One\n"}],
),
),
)
def test_chat_multi(mock_model, logs_db, input, expected):
runner = CliRunner()
mock_model.enqueue(["One\n"])
mock_model.enqueue(["Two\n"])
mock_model.enqueue(["Three\n"])
result = runner.invoke(
llm.cli.cli, ["chat", "-m", "mock", "--option", "max_tokens", "10"], input=input
)
assert result.exit_code == 0
rows = list(logs_db["responses"].rows_where(select="prompt, response"))
assert rows == expected
@pytest.mark.parametrize("custom_database_path", (False, True))
def test_llm_chat_creates_log_database(tmpdir, monkeypatch, custom_database_path):
user_path = tmpdir / "user"
custom_db_path = tmpdir / "custom_log.db"
monkeypatch.setenv("LLM_USER_PATH", str(user_path))
runner = CliRunner()
args = ["chat", "-m", "mock"]
if custom_database_path:
args.extend(["--database", str(custom_db_path)])
result = runner.invoke(
llm.cli.cli,
args,
catch_exceptions=False,
input="Hi\nHi two\nquit\n",
)
assert result.exit_code == 0
# Should have created user_path and put a logs.db in it
if custom_database_path:
assert custom_db_path.exists()
db_path = str(custom_db_path)
else:
assert (user_path / "logs.db").exists()
db_path = str(user_path / "logs.db")
assert sqlite_utils.Database(db_path)["responses"].count == 2
@pytest.mark.xfail(sys.platform == "win32", reason="Expected to fail on Windows")
def test_chat_tools(logs_db):
runner = CliRunner()
functions = textwrap.dedent(
"""
def upper(text: str) -> str:
"Convert text to upper case"
return text.upper()
"""
)
result = runner.invoke(
llm.cli.cli,
["chat", "-m", "echo", "--functions", functions],
input="\n".join(
[
json.dumps(
{
"prompt": "Convert hello to uppercase",
"tool_calls": [
{"name": "upper", "arguments": {"text": "hello"}}
],
}
),
"quit",
]
),
catch_exceptions=False,
)
assert result.exit_code == 0
assert result.output == (
"Chatting with echo\n"
"Type 'exit' or 'quit' to exit\n"
"Type '!multi' to enter multiple lines, then '!end' to finish\n"
"Type '!edit' to open your default editor and modify the prompt\n"
"Type '!fragment <my_fragment> [<another_fragment> ...]' to insert one or more fragments\n"
'> {"prompt": "Convert hello to uppercase", "tool_calls": [{"name": "upper", '
'"arguments": {"text": "hello"}}]}\n'
"{\n"
' "prompt": "Convert hello to uppercase",\n'
' "system": "",\n'
' "attachments": [],\n'
' "stream": true,\n'
' "previous": []\n'
"}{\n"
' "prompt": "",\n'
' "system": "",\n'
' "attachments": [],\n'
' "stream": true,\n'
' "previous": [\n'
" {\n"
' "prompt": "{\\"prompt\\": \\"Convert hello to uppercase\\", '
'\\"tool_calls\\": [{\\"name\\": \\"upper\\", \\"arguments\\": {\\"text\\": '
'\\"hello\\"}}]}"\n'
" }\n"
" ],\n"
' "tool_results": [\n'
" {\n"
' "name": "upper",\n'
' "output": "HELLO",\n'
' "tool_call_id": null\n'
" }\n"
" ]\n"
"}\n"
"> quit\n"
)
@pytest.mark.xfail(sys.platform == "win32", reason="Expected to fail on Windows")
2025-05-24 07:03:27 +00:00
def test_chat_fragments(tmpdir):
path1 = str(tmpdir / "frag1.txt")
path2 = str(tmpdir / "frag2.txt")
with open(path1, "w") as fp:
fp.write("one")
with open(path2, "w") as fp:
fp.write("two")
runner = CliRunner()
output = runner.invoke(
llm.cli.cli,
["chat", "-m", "echo", "-f", path1],
input=("hi\n!fragment {}\nquit\n".format(path2)),
).output
assert '"prompt": "one' in output
assert '"prompt": "two"' in output