diff --git a/docs/index.md b/docs/index.md index 275f349..0a2e4b6 100644 --- a/docs/index.md +++ b/docs/index.md @@ -49,6 +49,7 @@ maxdepth: 3 --- setup usage +other-models python-api templates logging diff --git a/docs/other-models.md b/docs/other-models.md new file mode 100644 index 0000000..53c9604 --- /dev/null +++ b/docs/other-models.md @@ -0,0 +1,86 @@ +(other-models)= +# Other models + +LLM supports OpenAI models by default. You can install {ref}`plugins` to add support for other models. You can also add additional OpenAI-compatible models {ref}`using a configuration file `. + +## Installing and using a local model + +{ref}`LLM plugins ` can provide local models that run on your machine. + +To install **[llm-gpt4all](https://github.com/simonw/llm-gpt4all)**, providing 17 models from the [GPT4All](https://gpt4all.io/) project, run this: + +```bash +llm install llm-gpt4all +``` +Run `llm models list` to see the expanded list of available models. + +To run a prompt through one of the models from GPT4All specify it using `-m/--model`: +```bash +llm -m ggml-vicuna-7b-1 'What is the capital of France?' +``` +The model will be downloaded and cached the first time you use it. + +Check the **[llm-plugins](https://github.com/simonw/llm-plugins)** repository for the latest list of available plugins for other models. + +(openai-extra-models)= + +## Adding more OpenAI models + +OpenAI occasionally release new models with new names. LLM aims to ship new releases to support these, but you can also configure them directly, by adding them to a `extra-openai-models.yaml` configuration file. + +Run this command to find the directory in which this file should be created: + +```bash +dirname "$(llm logs path)" +``` +On my Mac laptop I get this: +``` +~/Library/Application Support/io.datasette.llm +``` +Create a file in that directory called `extra-openai-models.yaml`. + +Let's say OpenAI have just released the `gpt-3.5-turbo-0613` model and you want to use it, despite LLM not yet shipping support. You could configure that by adding this to the file: + +```yaml +- model_id: gpt-3.5-turbo-0613 + aliases: ["0613"] +``` +The `model_id` is the identifier that will be recorded in the LLM logs. You can use this to specify the model, or you can optionally include a list of aliases for that model. + +With this configuration in place, the following command should run a prompt against the new model: + +```bash +llm -m 0613 'What is the capital of France?' +``` +Run `llm models list` to confirm that the new model is now available: +```bash +llm models list +``` +Example output: +``` +OpenAI Chat: gpt-3.5-turbo (aliases: 3.5, chatgpt) +OpenAI Chat: gpt-3.5-turbo-16k (aliases: chatgpt-16k, 3.5-16k) +OpenAI Chat: gpt-4 (aliases: 4, gpt4) +OpenAI Chat: gpt-4-32k (aliases: 4-32k) +OpenAI Chat: gpt-3.5-turbo-0613 (aliases: 0613) +``` +Running `llm logs -n 1` should confirm that the prompt and response has been correctly logged to the database. + +## OpenAI-compatible models + +Projects such as [LocalAI](https://localai.io/) offer a REST API that imitates the OpenAI API but can be used to run other models, including models that can be installed on your own machine. These can be added using the same configuration mechanism. + +The `model_id` is the name LLM will use for the model. The `model_name` is the name which needs to be passed to the API - this might differ from the `model_id`, especially if the `model_id` could potentially clash with other installed models. + +The `api_base` key can be used to point the OpenAI client library at a different API endpoint. + +To add the `orca-mini-3b` model hosted by a local installation of [LocalAI](https://localai.io/), add this to your `extra-openai-models.yaml` file: + +```yaml +- model_id: orca-openai-compat + model_name: orca-mini-3b.ggmlv3 + api_base: "http://localhost:8080" +``` +If the `api_base` is set, the existing configured `openai` API key will not be sent by default. + +You can set `api_key_name` to the name of a key stored using the {ref}`api-keys` feature. diff --git a/docs/setup.md b/docs/setup.md index df33a27..ac8cbb4 100644 --- a/docs/setup.md +++ b/docs/setup.md @@ -39,7 +39,9 @@ For example, the [llm-gpt4all](https://github.com/simonw/llm-gpt4all) plugin add ```bash llm install llm-gpt4all ``` -## Authentication + +(api-keys)= +## API key management Many LLM models require an API key. These API keys can be provided to this tool using several different mechanisms. diff --git a/docs/usage.md b/docs/usage.md index 3a663c8..67c71ec 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -30,23 +30,6 @@ Some models support options. You can pass these using `-o/--option name value` - llm 'Ten names for cheesecakes' -o temperature 1.5 ``` -## Installing and using a local model - -{ref}`LLM plugins ` can provide local models that run on your machine. - -To install [llm-gpt4all](https://github.com/simonw/llm-gpt4all), providing 17 models from the [GPT4All](https://gpt4all.io/) project, run this: - -```bash -llm install llm-gpt4all -``` -Run `llm models list` to see the expanded list of available models. - -To run a prompt through one of the models from GPT4All specify it using `-m/--model`: -```bash -llm -m ggml-vicuna-7b-1 'What is the capital of France?' -``` -The model will be downloaded and cached the first time you use it. - ## Continuing a conversation By default, the tool will start a new conversation each time you run it. @@ -107,10 +90,12 @@ llm models list --options Output: ``` diff --git a/llm/default_plugins/openai_models.py b/llm/default_plugins/openai_models.py index c6d74c5..0172413 100644 --- a/llm/default_plugins/openai_models.py +++ b/llm/default_plugins/openai_models.py @@ -8,6 +8,7 @@ from pydantic import field_validator, Field import requests from typing import List, Optional, Union import json +import yaml @hookimpl @@ -16,6 +17,26 @@ def register_models(register): register(Chat("gpt-3.5-turbo-16k"), aliases=("chatgpt-16k", "3.5-16k")) register(Chat("gpt-4"), aliases=("4", "gpt4")) register(Chat("gpt-4-32k"), aliases=("4-32k",)) + # Load extra models + extra_path = llm.user_dir() / "extra-openai-models.yaml" + if not extra_path.exists(): + return + with open(extra_path) as f: + extra_models = yaml.safe_load(f) + for model in extra_models: + model_id = model["model_id"] + aliases = model.get("aliases", []) + model_name = model["model_name"] + api_base = model.get("api_base") + chat_model = Chat(model_id, model_name=model_name, api_base=api_base) + if api_base: + chat_model.needs_key = None + if model.get("api_key_name"): + chat_model.needs_key = model["api_key_name"] + register( + chat_model, + aliases=aliases, + ) @hookimpl @@ -141,9 +162,11 @@ class Chat(Model): return validated_logit_bias - def __init__(self, model_id, key=None): + def __init__(self, model_id, key=None, model_name=None, api_base=None): self.model_id = model_id self.key = key + self.model_name = model_name + self.api_base = api_base def __str__(self): return "OpenAI Chat: {}".format(self.model_id) @@ -169,13 +192,22 @@ class Chat(Model): messages.append({"role": "system", "content": prompt.system}) messages.append({"role": "user", "content": prompt.prompt}) response._prompt_json = {"messages": messages} + kwargs = dict(not_nulls(prompt.options)) + if self.api_base: + kwargs["api_base"] = self.api_base + if self.needs_key: + if self.key: + kwargs["api_key"] = self.key + else: + # OpenAI-compatible models don't need a key, but the + # openai client library requires one + kwargs["api_key"] = "DUMMY_KEY" if stream: completion = openai.ChatCompletion.create( - model=prompt.model.model_id, + model=self.model_name or self.model_id, messages=messages, stream=True, - api_key=self.key, - **not_nulls(prompt.options), + **kwargs, ) chunks = [] for chunk in completion: @@ -186,10 +218,10 @@ class Chat(Model): response.response_json = combine_chunks(chunks) else: completion = openai.ChatCompletion.create( - model=prompt.model.model_id, + model=self.model_name or self.model_id, messages=messages, - api_key=self.key, stream=False, + **kwargs, ) response.response_json = completion.to_dict_recursive() yield completion.choices[0].message.content @@ -202,6 +234,7 @@ def not_nulls(data) -> dict: def combine_chunks(chunks: List[dict]) -> dict: content = "" role = None + finish_reason = None for item in chunks: for choice in item["choices"]: @@ -209,16 +242,17 @@ def combine_chunks(chunks: List[dict]) -> dict: role = choice["delta"]["role"] if "content" in choice["delta"]: content += choice["delta"]["content"] - if choice["finish_reason"] is not None: + if choice.get("finish_reason") is not None: finish_reason = choice["finish_reason"] - return { - "id": chunks[0]["id"], - "object": chunks[0]["object"], - "model": chunks[0]["model"], - "created": chunks[0]["created"], - "index": chunks[0]["choices"][0]["index"], - "role": role, + # Imitations of the OpenAI API may be missing some of these fields + combined = { "content": content, + "role": role, "finish_reason": finish_reason, } + for key in ("id", "object", "model", "created", "index"): + if key in chunks[0]: + combined[key] = chunks[0][key] + + return combined diff --git a/tests/conftest.py b/tests/conftest.py index 06f0f6e..f8830fc 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -37,3 +37,16 @@ def mocked_openai(requests_mock): }, headers={"Content-Type": "application/json"}, ) + + +@pytest.fixture +def mocked_localai(requests_mock): + return requests_mock.post( + "http://localai.localhost/chat/completions", + json={ + "model": "orca", + "usage": {}, + "choices": [{"message": {"content": "Bob, Alice, Eve"}}], + }, + headers={"Content-Type": "application/json"}, + ) diff --git a/tests/test_llm.py b/tests/test_llm.py index 1dec322..34c849b 100644 --- a/tests/test_llm.py +++ b/tests/test_llm.py @@ -163,3 +163,29 @@ def test_llm_default_prompt( }, }.items() ) + + +EXTRA_MODELS_YAML = """ +- model_id: orca + model_name: orca-mini-3b + api_base: "http://localai.localhost" +""" + + +def test_openai_localai_configuration(mocked_localai, user_path): + log_path = user_path / "logs.db" + sqlite_utils.Database(str(log_path)) + # Write the configuration file + config_path = user_path / "extra-openai-models.yaml" + config_path.write_text(EXTRA_MODELS_YAML, "utf-8") + # Run the prompt + runner = CliRunner() + prompt = "three names for a pet pelican" + result = runner.invoke(cli, ["--no-stream", "--model", "orca", prompt]) + assert result.exit_code == 0 + assert result.output == "Bob, Alice, Eve\n" + assert json.loads(mocked_localai.last_request.text) == { + "model": "orca-mini-3b", + "messages": [{"role": "user", "content": "three names for a pet pelican"}], + "stream": False, + }