import asyncio from click.testing import CliRunner from importlib.metadata import version import json import llm from llm import cli from llm.migrations import migrate from llm.tools import llm_time import os import pytest import sqlite_utils import time API_KEY = os.environ.get("PYTEST_OPENAI_API_KEY", None) or "badkey" @pytest.mark.vcr def test_tool_use_basic(vcr): model = llm.get_model("gpt-4o-mini") def multiply(a: int, b: int) -> int: """Multiply two numbers.""" return a * b chain_response = model.chain("What is 1231 * 2331?", tools=[multiply], key=API_KEY) output = "".join(chain_response) assert output == "The result of \\( 1231 \\times 2331 \\) is \\( 2,869,461 \\)." first, second = chain_response._responses assert first.prompt.prompt == "What is 1231 * 2331?" assert first.prompt.tools[0].name == "multiply" assert len(second.prompt.tool_results) == 1 assert second.prompt.tool_results[0].name == "multiply" assert second.prompt.tool_results[0].output == "2869461" # Test writing to the database db = sqlite_utils.Database(memory=True) migrate(db) chain_response.log_to_db(db) assert set(db.table_names()).issuperset( {"tools", "tool_responses", "tool_calls", "tool_results"} ) responses = list(db["responses"].rows) assert len(responses) == 2 first_response, second_response = responses tools = list(db["tools"].rows) assert len(tools) == 1 assert tools[0]["name"] == "multiply" assert tools[0]["description"] == "Multiply two numbers." assert tools[0]["plugin"] is None tool_results = list(db["tool_results"].rows) tool_calls = list(db["tool_calls"].rows) assert len(tool_calls) == 1 assert tool_calls[0]["response_id"] == first_response["id"] assert tool_calls[0]["name"] == "multiply" assert tool_calls[0]["arguments"] == '{"a": 1231, "b": 2331}' assert len(tool_results) == 1 assert tool_results[0]["response_id"] == second_response["id"] assert tool_results[0]["output"] == "2869461" assert tool_results[0]["tool_call_id"] == tool_calls[0]["tool_call_id"] @pytest.mark.vcr def test_tool_use_chain_of_two_calls(vcr): model = llm.get_model("gpt-4o-mini") def lookup_population(country: str) -> int: "Returns the current population of the specified fictional country" return 123124 def can_have_dragons(population: int) -> bool: "Returns True if the specified population can have dragons, False otherwise" return population > 10000 chain_response = model.chain( "Can the country of Crumpet have dragons? Answer with only YES or NO", tools=[lookup_population, can_have_dragons], stream=False, key=API_KEY, ) output = chain_response.text() assert output == "YES" assert len(chain_response._responses) == 3 first, second, third = chain_response._responses assert first.tool_calls()[0].arguments == {"country": "Crumpet"} assert first.prompt.tool_results == [] assert second.prompt.tool_results[0].output == "123124" assert second.tool_calls()[0].arguments == {"population": 123124} assert third.prompt.tool_results[0].output == "true" assert third.tool_calls() == [] def test_tool_use_async_tool_function(): async def hello(): return "world" model = llm.get_model("echo") chain_response = model.chain( json.dumps({"tool_calls": [{"name": "hello"}]}), tools=[hello] ) output = chain_response.text() # That's two JSON objects separated by '\n}{\n' bits = output.split("\n}{\n") assert len(bits) == 2 objects = [json.loads(bits[0] + "}"), json.loads("{" + bits[1])] assert objects == [ {"prompt": "", "system": "", "attachments": [], "stream": True, "previous": []}, { "prompt": "", "system": "", "attachments": [], "stream": True, "previous": [{"prompt": '{"tool_calls": [{"name": "hello"}]}'}], "tool_results": [ {"name": "hello", "output": "world", "tool_call_id": None} ], }, ] @pytest.mark.asyncio async def test_async_tools_run_tools_in_parallel(): start_timestamps = [] start_ns = time.monotonic_ns() async def hello(): start_timestamps.append(("hello", time.monotonic_ns() - start_ns)) await asyncio.sleep(0.2) return "world" async def hello2(): start_timestamps.append(("hello2", time.monotonic_ns() - start_ns)) await asyncio.sleep(0.2) return "world2" model = llm.get_async_model("echo") chain_response = model.chain( json.dumps({"tool_calls": [{"name": "hello"}, {"name": "hello2"}]}), tools=[hello, hello2], ) output = await chain_response.text() # That's two JSON objects separated by '\n}{\n' bits = output.split("\n}{\n") assert len(bits) == 2 objects = [json.loads(bits[0] + "}"), json.loads("{" + bits[1])] assert objects == [ {"prompt": "", "system": "", "attachments": [], "stream": True, "previous": []}, { "prompt": "", "system": "", "attachments": [], "stream": True, "previous": [ {"prompt": '{"tool_calls": [{"name": "hello"}, {"name": "hello2"}]}'} ], "tool_results": [ {"name": "hello", "output": "world", "tool_call_id": None}, {"name": "hello2", "output": "world2", "tool_call_id": None}, ], }, ] delta_ns = start_timestamps[1][1] - start_timestamps[0][1] # They should have run in parallel so it should be less than 0.02s difference assert delta_ns < (100_000_000 * 0.2) @pytest.mark.asyncio async def test_async_toolbox(): class Tools(llm.Toolbox): async def go(self): return "This was async" model = llm.get_async_model("echo") chain_response = model.chain( json.dumps({"tool_calls": [{"name": "Tools_go"}]}), tools=[Tools()], ) output = await chain_response.text() assert '"output": "This was async"' in output @pytest.mark.vcr def test_conversation_with_tools(vcr): import llm def add(a: int, b: int) -> int: return a + b def multiply(a: int, b: int) -> int: return a * b model = llm.get_model("echo") conversation = model.conversation(tools=[add, multiply]) output1 = conversation.chain( json.dumps( {"tool_calls": [{"name": "multiply", "arguments": {"a": 5324, "b": 23233}}]} ) ).text() assert "123692492" in output1 output2 = conversation.chain( json.dumps( { "tool_calls": [ {"name": "add", "arguments": {"a": 841758375, "b": 123123}} ] } ) ).text() assert "841881498" in output2 def test_default_tool_llm_version(): runner = CliRunner() result = runner.invoke( cli.cli, [ "-m", "echo", "-T", "llm_version", json.dumps({"tool_calls": [{"name": "llm_version"}]}), ], ) assert result.exit_code == 0 assert '"output": "{}"'.format(version("llm")) in result.output def test_default_tool_llm_time(): runner = CliRunner() result = runner.invoke( cli.cli, [ "-m", "echo", "-T", "llm_time", json.dumps({"tool_calls": [{"name": "llm_time"}]}), ], ) assert result.exit_code == 0 assert "timezone_offset" in result.output # Test it by calling it directly info = llm_time() assert set(info.keys()) == { "timezone_offset", "utc_time_iso", "local_time", "local_timezone", "utc_time", "is_dst", } def test_incorrect_tool_usage(): model = llm.get_model("echo") def simple(name: str): return name chain_response = model.chain( json.dumps({"tool_calls": [{"name": "bad_tool"}]}), tools=[simple], ) output = chain_response.text() assert 'Error: tool \\"bad_tool\\" does not exist' in output