Support attachments without prompts, closes #611

2026-04-17 19:51:13 +00:00 · 2024-11-05 21:27:18 -08:00 · 2024-11-05 21:27:18 -08:00 · 0cc4072bcd
commit 0cc4072bcd
parent 41cb5c3387
3 changed files with 48 additions and 25 deletions
--- a/docs/plugins/advanced-model-plugins.md
+++ b/docs/plugins/advanced-model-plugins.md
@ -53,25 +53,42 @@ You should usually access the type and the content through one of these methods:

 A `id()` method returns a database ID for this content, which is either a SHA256 hash of the binary content or, in the case of attachments hosted at an external URL, a hash of `{"url": url}` instead. This is an implementation detail which you should not need to access directly.

-Here's how the OpenAI plugin handles attachments:
+Note that it's possible for a prompt with an attachments to not include a text prompt at all, in which case `prompt.prompt` will be `None`.
+
+Here's how the OpenAI plugin handles attachments, including the case where no `prompt.prompt` was provided:

 ```python
-messages = []
 if not prompt.attachments:
    messages.append({"role": "user", "content": prompt.prompt})
 else:
-    attachment_message = [{"type": "text", "text": prompt.prompt}]
+    attachment_message = []
+    if prompt.prompt:
+        attachment_message.append({"type": "text", "text": prompt.prompt})
    for attachment in prompt.attachments:
-        url = attachment.url
-        if not url:
-            base64_image = attachment.base64_content()
-            url = f"data:{attachment.resolve_type()};base64,{base64_image}"
-        attachment_message.append(
-            {"type": "image_url", "image_url": {"url": url}}
-        )
+        attachment_message.append(_attachment(attachment))
    messages.append({"role": "user", "content": attachment_message})
+
+
+# And the code for creating the attachment message
+def _attachment(attachment):
+    url = attachment.url
+    base64_content = ""
+    if not url or attachment.resolve_type().startswith("audio/"):
+        base64_content = attachment.base64_content()
+        url = f"data:{attachment.resolve_type()};base64,{base64_content}"
+    if attachment.resolve_type().startswith("image/"):
+        return {"type": "image_url", "image_url": {"url": url}}
+    else:
+        format_ = "wav" if attachment.resolve_type() == "audio/wave" else "mp3"
+        return {
+            "type": "input_audio",
+            "input_audio": {
+                "data": base64_content,
+                "format": format_,
+            },
+        }
 ```
-As you can see, it uses `attachment.url` if that is available and otherwise falls back to using the `base64_content()` method to embed the image directly in the JSON sent to the API.
+As you can see, it uses `attachment.url` if that is available and otherwise falls back to using the `base64_content()` method to embed the image directly in the JSON sent to the API. For the OpenAI API audio attachments are always included as base64-encoded strings.

 ### Attachments from previous conversations

@ -82,17 +99,13 @@ Here's how the OpenAI plugin does that:
 ```python
 for prev_response in conversation.responses:
    if prev_response.attachments:
-        attachment_message = [
-            {"type": "text", "text": prev_response.prompt.prompt}
-        ]
-        for attachment in prev_response.attachments:
-            url = attachment.url
-            if not url:
-                base64_image = attachment.base64_content()
-                url = f"data:{attachment.resolve_type()};base64,{base64_image}"
+        attachment_message = []
+        if prev_response.prompt.prompt:
            attachment_message.append(
-                {"type": "image_url", "image_url": {"url": url}}
+                {"type": "text", "text": prev_response.prompt.prompt}
            )
+        for attachment in prev_response.attachments:
+            attachment_message.append(_attachment(attachment))
        messages.append({"role": "user", "content": attachment_message})
    else:
        messages.append(
--- a/llm/cli.py
+++ b/llm/cli.py
@ -250,7 +250,13 @@ def prompt(
                bits.append(prompt)
            prompt = " ".join(bits)

-        if prompt is None and not save and sys.stdin.isatty():
+        if (
+            prompt is None
+            and not save
+            and sys.stdin.isatty()
+            and not attachments
+            and not attachment_types
+        ):
            # Hang waiting for input to stdin (unless --save)
            prompt = sys.stdin.read()
        return prompt
--- a/llm/default_plugins/openai_models.py
+++ b/llm/default_plugins/openai_models.py
@ -346,9 +346,11 @@ class Chat(Model):
                    )
                    current_system = prev_response.prompt.system
                if prev_response.attachments:
-                    attachment_message = [
-                        {"type": "text", "text": prev_response.prompt.prompt}
-                    ]
+                    attachment_message = []
+                    if prev_response.prompt.prompt:
+                        attachment_message.append(
+                            {"type": "text", "text": prev_response.prompt.prompt}
+                        )
                    for attachment in prev_response.attachments:
                        attachment_message.append(_attachment(attachment))
                    messages.append({"role": "user", "content": attachment_message})
@ -362,7 +364,9 @@ class Chat(Model):
        if not prompt.attachments:
            messages.append({"role": "user", "content": prompt.prompt})
        else:
-            attachment_message = [{"type": "text", "text": prompt.prompt}]
+            attachment_message = []
+            if prompt.prompt:
+                attachment_message.append({"type": "text", "text": prompt.prompt})
            for attachment in prompt.attachments:
                attachment_message.append(_attachment(attachment))
            messages.append({"role": "user", "content": attachment_message})