llm/tests/test_chat.py

from click.testing import CliRunner
import llm.cli
from unittest.mock import ANY
import pytest
import sys


@pytest.mark.xfail(sys.platform == "win32", reason="Expected to fail on Windows")
def test_chat_basic(mock_model, logs_db):
    runner = CliRunner()
    mock_model.enqueue(["one world"])
    mock_model.enqueue(["one again"])
    result = runner.invoke(
        llm.cli.cli,
        ["chat", "-m", "mock"],
        input="Hi\nHi two\nquit\n",
        catch_exceptions=False,
    )
    assert result.exit_code == 0
    assert result.output == (
        "Chatting with mock"
        "\nType 'exit' or 'quit' to exit"
        "\nType '!multi' to enter multiple lines, then '!end' to finish"
        "\n> Hi"
        "\none world"
        "\n> Hi two"
        "\none again"
        "\n> quit"
        "\n"
    )
    # Should have logged
    conversations = list(logs_db["conversations"].rows)
    assert conversations[0] == {
        "id": ANY,
        "name": "Hi",
        "model": "mock",
    }
    conversation_id = conversations[0]["id"]
    responses = list(logs_db["responses"].rows)
    assert responses == [
        {
            "id": ANY,
            "model": "mock",
            "prompt": "Hi",
            "system": None,
            "prompt_json": None,
            "options_json": "{}",
            "response": "one world",
            "response_json": None,
            "conversation_id": conversation_id,
            "duration_ms": ANY,
            "datetime_utc": ANY,
            "input_tokens": 1,
            "output_tokens": 1,
            "token_details": None,
            "schema_id": None,
        },
        {
            "id": ANY,
            "model": "mock",
            "prompt": "Hi two",
            "system": None,
            "prompt_json": None,
            "options_json": "{}",
            "response": "one again",
            "response_json": None,
            "conversation_id": conversation_id,
            "duration_ms": ANY,
            "datetime_utc": ANY,
            "input_tokens": 2,
            "output_tokens": 1,
            "token_details": None,
            "schema_id": None,
        },
    ]
    # Now continue that conversation
    mock_model.enqueue(["continued"])
    result2 = runner.invoke(
        llm.cli.cli,
        ["chat", "-m", "mock", "-c"],
        input="Continue\nquit\n",
        catch_exceptions=False,
    )
    assert result2.exit_code == 0
    assert result2.output == (
        "Chatting with mock"
        "\nType 'exit' or 'quit' to exit"
        "\nType '!multi' to enter multiple lines, then '!end' to finish"
        "\n> Continue"
        "\ncontinued"
        "\n> quit"
        "\n"
    )
    new_responses = list(
        logs_db.query(
            "select * from responses where id not in ({})".format(
                ", ".join("?" for _ in responses)
            ),
            [r["id"] for r in responses],
        )
    )
    assert new_responses == [
        {
            "id": ANY,
            "model": "mock",
            "prompt": "Continue",
            "system": None,
            "prompt_json": None,
            "options_json": "{}",
            "response": "continued",
            "response_json": None,
            "conversation_id": conversation_id,
            "duration_ms": ANY,
            "datetime_utc": ANY,
            "input_tokens": 1,
            "output_tokens": 1,
            "token_details": None,
            "schema_id": None,
        }
    ]


@pytest.mark.xfail(sys.platform == "win32", reason="Expected to fail on Windows")
def test_chat_system(mock_model, logs_db):
    runner = CliRunner()
    mock_model.enqueue(["I am mean"])
    result = runner.invoke(
        llm.cli.cli,
        ["chat", "-m", "mock", "--system", "You are mean"],
        input="Hi\nquit\n",
    )
    assert result.exit_code == 0
    assert result.output == (
        "Chatting with mock"
        "\nType 'exit' or 'quit' to exit"
        "\nType '!multi' to enter multiple lines, then '!end' to finish"
        "\n> Hi"
        "\nI am mean"
        "\n> quit"
        "\n"
    )
    responses = list(logs_db["responses"].rows)
    assert responses == [
        {
            "id": ANY,
            "model": "mock",
            "prompt": "Hi",
            "system": "You are mean",
            "prompt_json": None,
            "options_json": "{}",
            "response": "I am mean",
            "response_json": None,
            "conversation_id": ANY,
            "duration_ms": ANY,
            "datetime_utc": ANY,
            "input_tokens": 1,
            "output_tokens": 1,
            "token_details": None,
            "schema_id": None,
        }
    ]


@pytest.mark.xfail(sys.platform == "win32", reason="Expected to fail on Windows")
def test_chat_options(mock_model, logs_db):
    runner = CliRunner()
    mock_model.enqueue(["Some text"])
    result = runner.invoke(
        llm.cli.cli,
        ["chat", "-m", "mock", "--option", "max_tokens", "10"],
        input="Hi\nquit\n",
    )
    assert result.exit_code == 0
    responses = list(logs_db["responses"].rows)
    assert responses == [
        {
            "id": ANY,
            "model": "mock",
            "prompt": "Hi",
            "system": None,
            "prompt_json": None,
            "options_json": '{"max_tokens": 10}',
            "response": "Some text",
            "response_json": None,
            "conversation_id": ANY,
            "duration_ms": ANY,
            "datetime_utc": ANY,
            "input_tokens": 1,
            "output_tokens": 1,
            "token_details": None,
            "schema_id": None,
        }
    ]


@pytest.mark.xfail(sys.platform == "win32", reason="Expected to fail on Windows")
@pytest.mark.parametrize(
    "input,expected",
    (
        (
            "Hi\n!multi\nthis is multiple lines\nuntil the !end\n!end\nquit\n",
            [
                {"prompt": "Hi", "response": "One\n"},
                {
                    "prompt": "this is multiple lines\nuntil the !end",
                    "response": "Two\n",
                },
            ],
        ),
        # quit should not work within !multi
        (
            "!multi\nthis is multiple lines\nquit\nuntil the !end\n!end\nquit\n",
            [
                {
                    "prompt": "this is multiple lines\nquit\nuntil the !end",
                    "response": "One\n",
                }
            ],
        ),
        # Try custom delimiter
        (
            "!multi abc\nCustom delimiter\n!end\n!end 123\n!end abc\nquit\n",
            [{"prompt": "Custom delimiter\n!end\n!end 123", "response": "One\n"}],
        ),
    ),
)
def test_chat_multi(mock_model, logs_db, input, expected):
    runner = CliRunner()
    mock_model.enqueue(["One\n"])
    mock_model.enqueue(["Two\n"])
    mock_model.enqueue(["Three\n"])
    result = runner.invoke(
        llm.cli.cli, ["chat", "-m", "mock", "--option", "max_tokens", "10"], input=input
    )
    assert result.exit_code == 0
    rows = list(logs_db["responses"].rows_where(select="prompt, response"))
    assert rows == expected
Initial tests for llm chat, refs #231 2023-09-05 06:08:38 +00:00			`from click.testing import CliRunner`
			`import llm.cli`
			`from unittest.mock import ANY`
llm chat !multi support, closes #267 2023-09-12 16:31:20 +00:00			`import pytest`
Windows readline fix, plus run CI against macOS and Windows * Run CI on Windows and macOS as well as Ubuntu, refs #407 * Use pyreadline3 on win32 * Back to fail-fast since we have a bigger matrix now * Mark some tests as xfail on windows 2024-01-27 00:24:58 +00:00			`import sys`
Initial tests for llm chat, refs #231 2023-09-05 06:08:38 +00:00

Windows readline fix, plus run CI against macOS and Windows * Run CI on Windows and macOS as well as Ubuntu, refs #407 * Use pyreadline3 on win32 * Back to fail-fast since we have a bigger matrix now * Mark some tests as xfail on windows 2024-01-27 00:24:58 +00:00			`@pytest.mark.xfail(sys.platform == "win32", reason="Expected to fail on Windows")`
Initial tests for llm chat, refs #231 2023-09-05 06:08:38 +00:00			`def test_chat_basic(mock_model, logs_db):`
			`runner = CliRunner()`
			`mock_model.enqueue(["one world"])`
			`mock_model.enqueue(["one again"])`
			`result = runner.invoke(`
attachments= keyword argument, tests pass again - refs #587 2024-10-28 21:22:02 +00:00			`llm.cli.cli,`
			`["chat", "-m", "mock"],`
			`input="Hi\nHi two\nquit\n",`
			`catch_exceptions=False,`
Initial tests for llm chat, refs #231 2023-09-05 06:08:38 +00:00			`)`
			`assert result.exit_code == 0`
			`assert result.output == (`
			`"Chatting with mock"`
			`"\nType 'exit' or 'quit' to exit"`
Wrap !multi in single quotes, for consistency with exit/quit 2023-09-12 17:45:02 +00:00			`"\nType '!multi' to enter multiple lines, then '!end' to finish"`
Initial tests for llm chat, refs #231 2023-09-05 06:08:38 +00:00			`"\n> Hi"`
			`"\none world"`
			`"\n> Hi two"`
			`"\none again"`
			`"\n> quit"`
			`"\n"`
			`)`
			`# Should have logged`
			`conversations = list(logs_db["conversations"].rows)`
			`assert conversations[0] == {`
			`"id": ANY,`
			`"name": "Hi",`
			`"model": "mock",`
			`}`
			`conversation_id = conversations[0]["id"]`
			`responses = list(logs_db["responses"].rows)`
			`assert responses == [`
			`{`
			`"id": ANY,`
			`"model": "mock",`
			`"prompt": "Hi",`
			`"system": None,`
			`"prompt_json": None,`
			`"options_json": "{}",`
			`"response": "one world",`
			`"response_json": None,`
			`"conversation_id": conversation_id,`
			`"duration_ms": ANY,`
			`"datetime_utc": ANY,`
Log input tokens, output tokens and token details (#642) * Store input_tokens, output_tokens, token_details on Response, closes #610 * llm prompt -u/--usage option * llm logs -u/--usage option * Docs on tracking token usage in plugins * OpenAI default plugin logs usage 2024-11-20 04:21:59 +00:00			`"input_tokens": 1,`
			`"output_tokens": 1,`
			`"token_details": None,`
llm prompt --schema X option and model.prompt(..., schema=) parameter (#777) Refs #776 * Implemented new llm prompt --schema and model.prompt(schema=) * Log schema to responses.schema_id and schemas table * Include schema in llm logs Markdown output * Test for schema=pydantic_model * Initial --schema CLI documentation * Python docs for schema= * Advanced plugin docs on schemas 2025-02-27 00:58:28 +00:00			`"schema_id": None,`
Initial tests for llm chat, refs #231 2023-09-05 06:08:38 +00:00			`},`
			`{`
			`"id": ANY,`
			`"model": "mock",`
			`"prompt": "Hi two",`
			`"system": None,`
			`"prompt_json": None,`
			`"options_json": "{}",`
			`"response": "one again",`
			`"response_json": None,`
			`"conversation_id": conversation_id,`
			`"duration_ms": ANY,`
			`"datetime_utc": ANY,`
Log input tokens, output tokens and token details (#642) * Store input_tokens, output_tokens, token_details on Response, closes #610 * llm prompt -u/--usage option * llm logs -u/--usage option * Docs on tracking token usage in plugins * OpenAI default plugin logs usage 2024-11-20 04:21:59 +00:00			`"input_tokens": 2,`
			`"output_tokens": 1,`
			`"token_details": None,`
llm prompt --schema X option and model.prompt(..., schema=) parameter (#777) Refs #776 * Implemented new llm prompt --schema and model.prompt(schema=) * Log schema to responses.schema_id and schemas table * Include schema in llm logs Markdown output * Test for schema=pydantic_model * Initial --schema CLI documentation * Python docs for schema= * Advanced plugin docs on schemas 2025-02-27 00:58:28 +00:00			`"schema_id": None,`
Initial tests for llm chat, refs #231 2023-09-05 06:08:38 +00:00			`},`
			`]`
llm chat test continue conversation 2023-09-05 06:12:41 +00:00			`# Now continue that conversation`
			`mock_model.enqueue(["continued"])`
			`result2 = runner.invoke(`
llm.get_async_model(), llm.AsyncModel base class and OpenAI async models (#613) - https://github.com/simonw/llm/issues/507#issuecomment-2458639308 * register_model is now async aware Refs https://github.com/simonw/llm/issues/507#issuecomment-2458658134 * Refactor Chat and AsyncChat to use _Shared base class Refs https://github.com/simonw/llm/issues/507#issuecomment-2458692338 * fixed function name * Fix for infinite loop * Applied Black * Ran cog * Applied Black * Add Response.from_row() classmethod back again It does not matter that this is a blocking call, since it is a classmethod * Made mypy happy with llm/models.py * mypy fixes for openai_models.py I am unhappy with this, had to duplicate some code. * First test for AsyncModel * Still have not quite got this working * Fix for not loading plugins during tests, refs #626 * audio/wav not audio/wave, refs #603 * Black and mypy and ruff all happy * Refactor to avoid generics * Removed obsolete response() method * Support text = await async_mock_model.prompt("hello") * Initial docs for llm.get_async_model() and await model.prompt() Refs #507 * Initial async model plugin creation docs * duration_ms ANY to pass test * llm models --async option Refs https://github.com/simonw/llm/pull/613#issuecomment-2474724406 * Removed obsolete TypeVars * Expanded register_models() docs for async * await model.prompt() now returns AsyncResponse Refs https://github.com/simonw/llm/pull/613#issuecomment-2475157822 --------- Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com> 2024-11-14 01:51:00 +00:00			`llm.cli.cli,`
			`["chat", "-m", "mock", "-c"],`
			`input="Continue\nquit\n",`
			`catch_exceptions=False,`
llm chat test continue conversation 2023-09-05 06:12:41 +00:00			`)`
			`assert result2.exit_code == 0`
			`assert result2.output == (`
			`"Chatting with mock"`
			`"\nType 'exit' or 'quit' to exit"`
Wrap !multi in single quotes, for consistency with exit/quit 2023-09-12 17:45:02 +00:00			`"\nType '!multi' to enter multiple lines, then '!end' to finish"`
llm chat test continue conversation 2023-09-05 06:12:41 +00:00			`"\n> Continue"`
			`"\ncontinued"`
			`"\n> quit"`
			`"\n"`
			`)`
			`new_responses = list(`
			`logs_db.query(`
			`"select * from responses where id not in ({})".format(`
			`", ".join("?" for _ in responses)`
			`),`
			`[r["id"] for r in responses],`
			`)`
			`)`
			`assert new_responses == [`
			`{`
			`"id": ANY,`
			`"model": "mock",`
			`"prompt": "Continue",`
			`"system": None,`
			`"prompt_json": None,`
			`"options_json": "{}",`
			`"response": "continued",`
			`"response_json": None,`
			`"conversation_id": conversation_id,`
			`"duration_ms": ANY,`
			`"datetime_utc": ANY,`
Log input tokens, output tokens and token details (#642) * Store input_tokens, output_tokens, token_details on Response, closes #610 * llm prompt -u/--usage option * llm logs -u/--usage option * Docs on tracking token usage in plugins * OpenAI default plugin logs usage 2024-11-20 04:21:59 +00:00			`"input_tokens": 1,`
			`"output_tokens": 1,`
			`"token_details": None,`
llm prompt --schema X option and model.prompt(..., schema=) parameter (#777) Refs #776 * Implemented new llm prompt --schema and model.prompt(schema=) * Log schema to responses.schema_id and schemas table * Include schema in llm logs Markdown output * Test for schema=pydantic_model * Initial --schema CLI documentation * Python docs for schema= * Advanced plugin docs on schemas 2025-02-27 00:58:28 +00:00			`"schema_id": None,`
llm chat test continue conversation 2023-09-05 06:12:41 +00:00			`}`
			`]`
llm chat test system prompt 2023-09-05 06:15:26 +00:00

Windows readline fix, plus run CI against macOS and Windows * Run CI on Windows and macOS as well as Ubuntu, refs #407 * Use pyreadline3 on win32 * Back to fail-fast since we have a bigger matrix now * Mark some tests as xfail on windows 2024-01-27 00:24:58 +00:00			`@pytest.mark.xfail(sys.platform == "win32", reason="Expected to fail on Windows")`
llm chat test system prompt 2023-09-05 06:15:26 +00:00			`def test_chat_system(mock_model, logs_db):`
			`runner = CliRunner()`
			`mock_model.enqueue(["I am mean"])`
			`result = runner.invoke(`
			`llm.cli.cli,`
			`["chat", "-m", "mock", "--system", "You are mean"],`
			`input="Hi\nquit\n",`
			`)`
			`assert result.exit_code == 0`
			`assert result.output == (`
			`"Chatting with mock"`
			`"\nType 'exit' or 'quit' to exit"`
Wrap !multi in single quotes, for consistency with exit/quit 2023-09-12 17:45:02 +00:00			`"\nType '!multi' to enter multiple lines, then '!end' to finish"`
llm chat test system prompt 2023-09-05 06:15:26 +00:00			`"\n> Hi"`
			`"\nI am mean"`
			`"\n> quit"`
			`"\n"`
			`)`
			`responses = list(logs_db["responses"].rows)`
			`assert responses == [`
			`{`
			`"id": ANY,`
			`"model": "mock",`
			`"prompt": "Hi",`
			`"system": "You are mean",`
			`"prompt_json": None,`
			`"options_json": "{}",`
			`"response": "I am mean",`
			`"response_json": None,`
			`"conversation_id": ANY,`
			`"duration_ms": ANY,`
			`"datetime_utc": ANY,`
Log input tokens, output tokens and token details (#642) * Store input_tokens, output_tokens, token_details on Response, closes #610 * llm prompt -u/--usage option * llm logs -u/--usage option * Docs on tracking token usage in plugins * OpenAI default plugin logs usage 2024-11-20 04:21:59 +00:00			`"input_tokens": 1,`
			`"output_tokens": 1,`
			`"token_details": None,`
llm prompt --schema X option and model.prompt(..., schema=) parameter (#777) Refs #776 * Implemented new llm prompt --schema and model.prompt(schema=) * Log schema to responses.schema_id and schemas table * Include schema in llm logs Markdown output * Test for schema=pydantic_model * Initial --schema CLI documentation * Python docs for schema= * Advanced plugin docs on schemas 2025-02-27 00:58:28 +00:00			`"schema_id": None,`
llm chat test system prompt 2023-09-05 06:15:26 +00:00			`}`
			`]`
llm chat -o/--option - refs #244 2023-09-10 18:14:28 +00:00

Windows readline fix, plus run CI against macOS and Windows * Run CI on Windows and macOS as well as Ubuntu, refs #407 * Use pyreadline3 on win32 * Back to fail-fast since we have a bigger matrix now * Mark some tests as xfail on windows 2024-01-27 00:24:58 +00:00			`@pytest.mark.xfail(sys.platform == "win32", reason="Expected to fail on Windows")`
llm chat -o/--option - refs #244 2023-09-10 18:14:28 +00:00			`def test_chat_options(mock_model, logs_db):`
			`runner = CliRunner()`
			`mock_model.enqueue(["Some text"])`
			`result = runner.invoke(`
			`llm.cli.cli,`
			`["chat", "-m", "mock", "--option", "max_tokens", "10"],`
			`input="Hi\nquit\n",`
			`)`
			`assert result.exit_code == 0`
			`responses = list(logs_db["responses"].rows)`
			`assert responses == [`
			`{`
			`"id": ANY,`
			`"model": "mock",`
			`"prompt": "Hi",`
			`"system": None,`
			`"prompt_json": None,`
			`"options_json": '{"max_tokens": 10}',`
			`"response": "Some text",`
			`"response_json": None,`
			`"conversation_id": ANY,`
llm.get_async_model(), llm.AsyncModel base class and OpenAI async models (#613) - https://github.com/simonw/llm/issues/507#issuecomment-2458639308 * register_model is now async aware Refs https://github.com/simonw/llm/issues/507#issuecomment-2458658134 * Refactor Chat and AsyncChat to use _Shared base class Refs https://github.com/simonw/llm/issues/507#issuecomment-2458692338 * fixed function name * Fix for infinite loop * Applied Black * Ran cog * Applied Black * Add Response.from_row() classmethod back again It does not matter that this is a blocking call, since it is a classmethod * Made mypy happy with llm/models.py * mypy fixes for openai_models.py I am unhappy with this, had to duplicate some code. * First test for AsyncModel * Still have not quite got this working * Fix for not loading plugins during tests, refs #626 * audio/wav not audio/wave, refs #603 * Black and mypy and ruff all happy * Refactor to avoid generics * Removed obsolete response() method * Support text = await async_mock_model.prompt("hello") * Initial docs for llm.get_async_model() and await model.prompt() Refs #507 * Initial async model plugin creation docs * duration_ms ANY to pass test * llm models --async option Refs https://github.com/simonw/llm/pull/613#issuecomment-2474724406 * Removed obsolete TypeVars * Expanded register_models() docs for async * await model.prompt() now returns AsyncResponse Refs https://github.com/simonw/llm/pull/613#issuecomment-2475157822 --------- Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com> 2024-11-14 01:51:00 +00:00			`"duration_ms": ANY,`
llm chat -o/--option - refs #244 2023-09-10 18:14:28 +00:00			`"datetime_utc": ANY,`
Log input tokens, output tokens and token details (#642) * Store input_tokens, output_tokens, token_details on Response, closes #610 * llm prompt -u/--usage option * llm logs -u/--usage option * Docs on tracking token usage in plugins * OpenAI default plugin logs usage 2024-11-20 04:21:59 +00:00			`"input_tokens": 1,`
			`"output_tokens": 1,`
			`"token_details": None,`
llm prompt --schema X option and model.prompt(..., schema=) parameter (#777) Refs #776 * Implemented new llm prompt --schema and model.prompt(schema=) * Log schema to responses.schema_id and schemas table * Include schema in llm logs Markdown output * Test for schema=pydantic_model * Initial --schema CLI documentation * Python docs for schema= * Advanced plugin docs on schemas 2025-02-27 00:58:28 +00:00			`"schema_id": None,`
llm chat -o/--option - refs #244 2023-09-10 18:14:28 +00:00			`}`
			`]`
llm chat !multi support, closes #267 2023-09-12 16:31:20 +00:00

Windows readline fix, plus run CI against macOS and Windows * Run CI on Windows and macOS as well as Ubuntu, refs #407 * Use pyreadline3 on win32 * Back to fail-fast since we have a bigger matrix now * Mark some tests as xfail on windows 2024-01-27 00:24:58 +00:00			`@pytest.mark.xfail(sys.platform == "win32", reason="Expected to fail on Windows")`
llm chat !multi support, closes #267 2023-09-12 16:31:20 +00:00			`@pytest.mark.parametrize(`
			`"input,expected",`
			`(`
			`(`
			`"Hi\n!multi\nthis is multiple lines\nuntil the !end\n!end\nquit\n",`
			`[`
			`{"prompt": "Hi", "response": "One\n"},`
			`{`
			`"prompt": "this is multiple lines\nuntil the !end",`
			`"response": "Two\n",`
			`},`
			`],`
			`),`
			`# quit should not work within !multi`
			`(`
			`"!multi\nthis is multiple lines\nquit\nuntil the !end\n!end\nquit\n",`
			`[`
			`{`
			`"prompt": "this is multiple lines\nquit\nuntil the !end",`
			`"response": "One\n",`
			`}`
			`],`
			`),`
			`# Try custom delimiter`
			`(`
			`"!multi abc\nCustom delimiter\n!end\n!end 123\n!end abc\nquit\n",`
			`[{"prompt": "Custom delimiter\n!end\n!end 123", "response": "One\n"}],`
			`),`
			`),`
			`)`
			`def test_chat_multi(mock_model, logs_db, input, expected):`
			`runner = CliRunner()`
			`mock_model.enqueue(["One\n"])`
			`mock_model.enqueue(["Two\n"])`
			`mock_model.enqueue(["Three\n"])`
			`result = runner.invoke(`
			`llm.cli.cli, ["chat", "-m", "mock", "--option", "max_tokens", "10"], input=input`
			`)`
			`assert result.exit_code == 0`
			`rows = list(logs_db["responses"].rows_where(select="prompt, response"))`
			`assert rows == expected`