Fix for duplicated prompts in llm chat with templates, closes #1240

Also includes a bug fix for system fragments, see https://github.com/simonw/llm/issues/1240#issuecomment-3177684518
2026-04-19 04:31:04 +00:00 · 2025-08-11 21:52:54 -07:00 · 2025-08-11 21:52:54 -07:00 · 2f206d0e26
commit 2f206d0e26
parent c6e158071a
2 changed files with 72 additions and 7 deletions
--- a/llm/cli.py
+++ b/llm/cli.py
@ -1197,16 +1197,19 @@ def chat(
                continue
        if template_obj:
            try:
-                template_prompt, template_system = template_obj.evaluate(prompt, params)
+                # Mirror prompt() logic: only pass input if template uses it
+                uses_input = "input" in template_obj.vars()
+                input_ = prompt if uses_input else ""
+                template_prompt, template_system = template_obj.evaluate(input_, params)
            except Template.MissingVariables as ex:
                raise click.ClickException(str(ex))
            if template_system and not system:
                system = template_system
            if template_prompt:
-                new_prompt = template_prompt
-                if prompt:
-                    new_prompt += "\n" + prompt
-                prompt = new_prompt
+                if prompt and not uses_input:
+                    prompt = f"{template_prompt}\n{prompt}"
+                else:
+                    prompt = template_prompt
        if prompt.strip() in ("exit", "quit"):
            break

@ -1221,9 +1224,9 @@ def chat(
            **kwargs,
        )

-        # System prompt only sent for the first message:
+        # System prompt and system fragments only sent for the first message
        system = None
-        system_fragments = []
+        argument_system_fragments = []
        for chunk in response:
            print(chunk, end="")
            sys.stdout.flush()
--- a/tests/test_chat_templates.py
+++ b/tests/test_chat_templates.py
@ -0,0 +1,62 @@
+from click.testing import CliRunner
+import sys
+import llm.cli
+import pytest
+
+
+@pytest.mark.xfail(sys.platform == "win32", reason="Expected to fail on Windows")
+def test_chat_template_system_only_no_duplicate_prompt(
+    mock_model, logs_db, templates_path
+):
+    # Template that only sets a system prompt, no user prompt
+    (templates_path / "wild-french.yaml").write_text(
+        "system: Speak in French\n", "utf-8"
+    )
+
+    runner = CliRunner()
+    mock_model.enqueue(["Bonjour !"])
+    result = runner.invoke(
+        llm.cli.cli,
+        ["chat", "-m", "mock", "-t", "wild-french"],
+        input="hi\nquit\n",
+        catch_exceptions=False,
+    )
+    assert result.exit_code == 0
+
+    # Ensure the logged prompt is not duplicated (no "hi\nhi")
+    rows = list(logs_db["responses"].rows)
+    assert len(rows) == 1
+    assert rows[0]["prompt"] == "hi"
+    assert rows[0]["system"] == "Speak in French"
+
+
+@pytest.mark.xfail(sys.platform == "win32", reason="Expected to fail on Windows")
+def test_chat_system_fragments_only_first_turn(tmpdir, mock_model, logs_db):
+    # Create a system fragment file
+    sys_frag_path = str(tmpdir / "sys.txt")
+    with open(sys_frag_path, "w", encoding="utf-8") as fp:
+        fp.write("System fragment content")
+
+    runner = CliRunner()
+    # Two responses queued for two turns
+    mock_model.enqueue(["first"])
+    mock_model.enqueue(["second"])
+    result = runner.invoke(
+        llm.cli.cli,
+        ["chat", "-m", "mock", "--system-fragment", sys_frag_path],
+        input="Hi\nHi two\nquit\n",
+        catch_exceptions=False,
+    )
+    assert result.exit_code == 0
+
+    # Verify only the first response has the system fragment
+    responses = list(logs_db["responses"].rows)
+    assert len(responses) == 2
+    first_id = responses[0]["id"]
+    second_id = responses[1]["id"]
+
+    sys_frags = list(logs_db["system_fragments"].rows)
+    # Exactly one system fragment row, attached to the first response only
+    assert len(sys_frags) == 1
+    assert sys_frags[0]["response_id"] == first_id
+    assert sys_frags[0]["response_id"] != second_id