diff --git a/docs/plugins/advanced-model-plugins.md b/docs/plugins/advanced-model-plugins.md new file mode 100644 index 0000000..fdbdc23 --- /dev/null +++ b/docs/plugins/advanced-model-plugins.md @@ -0,0 +1,102 @@ +(advanced-model-plugins)= +# Advanced model plugins + +The {ref}`model plugin tutorial ` covers the basics of developing a plugin that adds support for a new model. + +This document covers more advanced topics. + +(advanced-model-plugins-attachments)= +## Attachments for multi-modal models + +Models such as GPT-4o, Claude 3.5 Sonnet and Google's Gemini 1.5 are multi-modal: they accept input in the form of images and maybe even audio, video and other formats. + +LLM calls these **attachments**. Models can specify the types of attachments they accept and then implement special code in the `.execute()` method to handle them. + +### Specifying attachment types + +A `Model` subclass can list the types of attachments it accepts by defining a `attachment_types` class attribute: + +```python +class NewModel(llm.Model): + model_id = "new-model" + attachment_types = { + "image/png", + "image/jpeg", + "image/webp", + "image/gif", + } +``` +These content types are detected when an attachment is passed to LLM using `llm -a filename`, or can be specified by the user using the `--attachment-type filename image/png` option. + +**Note:** *MP3 files will have their attachment type detected as `audio/mpeg`, not `audio/mp3`. + +LLM will use the `attachment_types` attribute to validate that provided attachments should be accepted before passing them to the model. + +### Handling attachments + +The `prompt` object passed to the `execute()` method will have an `attachments` attribute containing a list of `Attachment` objects provided by the user. + +An `Attachment` instance has the following properties: + +- `url (str)`: The URL of the attachment, if it was provided as a URL +- `path (str)`: The resolved file path of the attachment, if it was provided as a file +- `type (str)`: The content type of the attachment, if it was provided +- `content (bytes)`: The binary content of the attachment, if it was provided + +Generally only one of `url`, `path` or `content` will be set. + +You should usually access the type and the content through one of these methods: + +- `attachment.resolve_type() -> str`: Returns the `type` if it is available, otherwise attempts to guess the type by looking at the first few bytes of content +- `attachment.content_bytes() -> bytes`: Returns the binary content, which it may need to read from a file or fetch from a URL +- `attachment.base64_content() -> str`: Returns that content as a base64-encoded string + +A `id()` method returns a database ID for this content, which is either a SHA256 hash of the binary content or, in the case of attachments hosted at an external URL, a hash of `{"url": url}` instead. This is an implementation detail which you should not need to access directly. + +Here's how the OpenAI plugin handles attachments: + +```python +messages = [] +if not prompt.attachments: + messages.append({"role": "user", "content": prompt.prompt}) +else: + attachment_message = [{"type": "text", "text": prompt.prompt}] + for attachment in prompt.attachments: + url = attachment.url + if not url: + base64_image = attachment.base64_content() + url = f"data:{attachment.resolve_type()};base64,{base64_image}" + attachment_message.append( + {"type": "image_url", "image_url": {"url": url}} + ) + messages.append({"role": "user", "content": attachment_message}) +``` +As you can see, it uses `attachment.url` if that is available and otherwise falls back to using the `base64_content()` method to embed the image directly in the JSON sent to the API. + +### Attachments from previous conversations + +Models that implement the ability to continue a conversation can reconstruct the previous message JSON using the `response.attachments` attribute. + +Here's how the OpenAI plugin does that: + +```python +for prev_response in conversation.responses: + if prev_response.attachments: + attachment_message = [ + {"type": "text", "text": prev_response.prompt.prompt} + ] + for attachment in prev_response.attachments: + url = attachment.url + if not url: + base64_image = attachment.base64_content() + url = f"data:{attachment.resolve_type()};base64,{base64_image}" + attachment_message.append( + {"type": "image_url", "image_url": {"url": url}} + ) + messages.append({"role": "user", "content": attachment_message}) + else: + messages.append( + {"role": "user", "content": prev_response.prompt.prompt} + ) + messages.append({"role": "assistant", "content": prev_response.text()}) +``` diff --git a/docs/plugins/index.md b/docs/plugins/index.md index 96ae62f..2a08844 100644 --- a/docs/plugins/index.md +++ b/docs/plugins/index.md @@ -17,5 +17,6 @@ installing-plugins directory plugin-hooks tutorial-model-plugin +advanced-model-plugins plugin-utilities ``` diff --git a/docs/plugins/tutorial-model-plugin.md b/docs/plugins/tutorial-model-plugin.md index ff9c17f..6f1bcbb 100644 --- a/docs/plugins/tutorial-model-plugin.md +++ b/docs/plugins/tutorial-model-plugin.md @@ -1,5 +1,5 @@ (tutorial-model-plugin)= -# Writing a plugin to support a new model +# Model plugin tutorial This tutorial will walk you through developing a new plugin for LLM that adds support for a new Large Language Model. diff --git a/llm/models.py b/llm/models.py index fc58a07..06440ee 100644 --- a/llm/models.py +++ b/llm/models.py @@ -50,7 +50,7 @@ class Attachment: return puremagic.from_string(self.content, mime=True) raise ValueError("Attachment has no type and no content to derive it from") - def base64_content(self): + def content_bytes(self): content = self.content if not content: if self.path: @@ -59,7 +59,10 @@ class Attachment: response = httpx.get(self.url) response.raise_for_status() content = response.content - return base64.b64encode(content).decode("utf-8") + return content + + def base64_content(self): + return base64.b64encode(self.content_bytes()).decode("utf-8") @classmethod def from_row(cls, row):