completion: true to register completion models, refs #284

2026-04-18 04:01:02 +00:00 · 2023-09-18 22:17:26 -07:00 · 2023-09-18 22:17:26 -07:00 · fcff36c6bc
commit fcff36c6bc
parent 9c7792dce5
4 changed files with 33 additions and 2 deletions
--- a/docs/other-models.md
+++ b/docs/other-models.md
@ -47,6 +47,8 @@ Let's say OpenAI have just released the `gpt-3.5-turbo-0613` model and you want
 ```
 The `model_id` is the identifier that will be recorded in the LLM logs. You can use this to specify the model, or you can optionally include a list of aliases for that model.

+If the model is a completion model (such as `gpt-3.5-turbo-instruct`) add `completion: true` to the configuration.
+
 With this configuration in place, the following command should run a prompt against the new model:

 ```bash
@ -87,6 +89,8 @@ If the `api_base` is set, the existing configured `openai` API key will not be s

 You can set `api_key_name` to the name of a key stored using the {ref}`api-keys` feature.

+Add `completion: true` if the model is a completion model that uses a `/completion` as opposed to a `/completion/chat` endpoint.
+
 Having configured the model like this, run `llm models` to check that it installed correctly. You can then run prompts against it like so:

 ```bash
--- a/llm/default_plugins/openai_models.py
+++ b/llm/default_plugins/openai_models.py
@ -51,7 +51,11 @@ def register_models(register):
        api_version = extra_model.get("api_version")
        api_engine = extra_model.get("api_engine")
        headers = extra_model.get("headers")
-        chat_model = Chat(
+        if extra_model.get("completion"):
+            klass = Completion
+        else:
+            klass = Chat
+        chat_model = klass(
            model_id,
            model_name=model_name,
            api_base=api_base,
--- a/tests/conftest.py
+++ b/tests/conftest.py
@ -318,7 +318,7 @@ def mocked_openai_completion_logprobs(requests_mock):

@pytest.fixture
 def mocked_localai(requests_mock):
-    return requests_mock.post(
+    requests_mock.post(
        "http://localai.localhost/chat/completions",
        json={
            "model": "orca",
@ -327,6 +327,16 @@ def mocked_localai(requests_mock):
        },
        headers={"Content-Type": "application/json"},
    )
+    requests_mock.post(
+        "http://localai.localhost/completions",
+        json={
+            "model": "completion-babbage",
+            "usage": {},
+            "choices": [{"text": "Hello"}],
+        },
+        headers={"Content-Type": "application/json"},
+    )
+    return requests_mock


@pytest.fixture
--- a/tests/test_llm.py
+++ b/tests/test_llm.py
@ -438,6 +438,10 @@ EXTRA_MODELS_YAML = """
 - model_id: orca
  model_name: orca-mini-3b
  api_base: "http://localai.localhost"
+- model_id: completion-babbage
+  model_name: babbage
+  api_base: "http://localai.localhost"
+  completion: 1
 """


@ -458,6 +462,15 @@ def test_openai_localai_configuration(mocked_localai, user_path):
        "messages": [{"role": "user", "content": "three names \nfor a pet pelican"}],
        "stream": False,
    }
+    # And check the completion model too
+    result2 = runner.invoke(cli, ["--no-stream", "--model", "completion-babbage", "hi"])
+    assert result2.exit_code == 0
+    assert result2.output == "Hello\n"
+    assert json.loads(mocked_localai.last_request.text) == {
+        "model": "babbage",
+        "prompt": "hi",
+        "stream": False,
+    }


 EXPECTED_OPTIONS = """