diff --git a/docs/plugins/llm-markov/llm_markov.py b/docs/plugins/llm-markov/llm_markov.py
new file mode 100644
index 0000000..3ba4d9d
--- /dev/null
+++ b/docs/plugins/llm-markov/llm_markov.py
@@ -0,0 +1,68 @@
+import llm
+import random
+import time
+from typing import Optional
+from pydantic import field_validator, Field
+
+
+@llm.hookimpl
+def register_models(register):
+    register(Markov())
+
+
+def build_markov_table(text):
+    words = text.split()
+    transitions = {}
+    # Loop through all but the last word
+    for i in range(len(words) - 1):
+        word = words[i]
+        next_word = words[i + 1]
+        transitions.setdefault(word, []).append(next_word)
+    return transitions
+
+
+def generate(transitions, length, start_word=None):
+    all_words = list(transitions.keys())
+    next_word = start_word or random.choice(all_words)
+    for i in range(length):
+        yield next_word
+        options = transitions.get(next_word) or all_words
+        next_word = random.choice(options)
+
+
+class Markov(llm.Model):
+    model_id = "markov"
+    can_stream = True
+
+    class Options(llm.Options):
+        length: Optional[int] = Field(
+            description="Number of words to generate", default=None
+        )
+        delay: Optional[float] = Field(
+            description="Seconds to delay between each token", default=None
+        )
+
+        @field_validator("length")
+        def validate_length(cls, length):
+            if length is None:
+                return None
+            if length < 2:
+                raise ValueError("length must be >= 2")
+            return length
+
+        @field_validator("delay")
+        def validate_delay(cls, delay):
+            if delay is None:
+                return None
+            if not 0 <= delay <= 10:
+                raise ValueError("delay must be between 0 and 10")
+            return delay
+
+    def execute(self, prompt, stream, response, conversation):
+        text = prompt.prompt
+        transitions = build_markov_table(text)
+        length = prompt.options.length or 20
+        for word in generate(transitions, length):
+            yield word + " "
+            if prompt.options.delay:
+                time.sleep(prompt.options.delay)
diff --git a/docs/plugins/llm-markov/pyproject.toml b/docs/plugins/llm-markov/pyproject.toml
new file mode 100644
index 0000000..f544ce2
--- /dev/null
+++ b/docs/plugins/llm-markov/pyproject.toml
@@ -0,0 +1,6 @@
+[project]
+name = "llm-markov"
+version = "0.1"
+
+[project.entry-points.llm]
+markov = "llm_markov"
\ No newline at end of file
diff --git a/docs/plugins/tutorial-model-plugin.md b/docs/plugins/tutorial-model-plugin.md
index e6fd836..66c270e 100644
--- a/docs/plugins/tutorial-model-plugin.md
+++ b/docs/plugins/tutorial-model-plugin.md
@@ -24,7 +24,7 @@ def register_models(register):
 class Markov(llm.Model):
     model_id = "markov"
 
-    def execute(self, prompt, stream, response):
+    def execute(self, prompt, stream, response, conversation):
         return ["hello world"]
 ```
 
@@ -204,7 +204,7 @@ def generate(transitions, length, start_word=None):
 class Markov(llm.Model):
     model_id = "markov"
 
-    def execute(self, prompt, stream, response):
+    def execute(self, prompt, stream, response, conversation):
         text = prompt.prompt
         transitions = build_markov_table(text)
         for word in generate(transitions, 20):
@@ -220,6 +220,20 @@ llm -m markov "the cat sat on the mat"
 the mat the cat sat on the cat sat on the mat cat sat on the mat cat sat on
 ```
 
+## Understanding execute()
+
+The full signature of the `execute()` method is:
+```python
+def execute(self, prompt, stream, response, conversation):
+```
+The `prompt` argument is a `Prompt` object that contains the text that the user provided, the system prompt and the provided options.
+
+`stream` is a boolean that says if the model is being run in streaming mode.
+
+`response` is the `Response` object that is being created by the model. This is provided so you can write additional information to `response.response_json`, which may be logged to the database.
+
+`conversation` is the `Conversation` that the prompt is a part of - or `None` if no conversation was provided. Some models may use `conversation.responses` to access previous prompts and responses in the conversation and use them to construct a call to the LLM that includes previous context.
+
 ## Prompts and responses are logged to the database
 
 The prompt and the response will be logged to a SQLite database automatically by LLM. You can see the single most recent addition to the logs using:
@@ -230,32 +244,32 @@ The output should look something like this:
 ```json
 [
   {
-    "id": 621,
+    "id": "01h52s4yez2bd1qk2deq49wk8h",
     "model": "markov",
     "prompt": "the cat sat on the mat",
     "system": null,
     "prompt_json": null,
     "options_json": {},
-    "response": "on the cat sat on the cat sat on the mat on the mat sat on the mat on the ",
+    "response": "on the cat sat on the cat sat on the mat cat sat on the cat sat on the cat ",
     "response_json": null,
-    "reply_to_id": null,
-    "chat_id": null,
+    "conversation_id": "01h52s4yey7zc5rjmczy3ft75g",
     "duration_ms": 0,
-    "datetime_utc": "2023-07-06T01:31:48.074373"
+    "datetime_utc": "2023-07-11T15:29:34.685868",
+    "conversation_name": "the cat sat on the mat",
+    "conversation_model": "markov"
   }
 ]
 ```
-
-Plugins can log additional information to the database by assigning a dictionary to the `response._response_json` property during the `execute()` method.
+Plugins can log additional information to the database by assigning a dictionary to the `response.response_json` property during the `execute()` method.
 
 Here's how to include that full `transitions` table in the `response_json` in the log:
 ```python
-    def execute(self, prompt, stream, response):
+    def execute(self, prompt, stream, response, conversation):
         text = self.prompt.prompt
         transitions = build_markov_table(text)
         for word in generate(transitions, 20):
             yield word + ' '
-        response._response_json = {"transitions": transitions}
+        response.response_json = {"transitions": transitions}
 ```
 
 Now when you run the logs command you'll see that too:
@@ -383,10 +397,10 @@ import time
 ```
 Then replace the `execute()` method with this one:
 ```python
-    def execute(self, prompt, stream, response):
+    def execute(self, prompt, stream, response, conversation):
         text = prompt.prompt
         transitions = build_markov_table(text)
-        length = prompt.options.length or 10
+        length = prompt.options.length or 20
         for word in generate(transitions, length):
             yield word + ' '
             if prompt.options.delay:
@@ -395,74 +409,11 @@ Then replace the `execute()` method with this one:
 Add `can_stream = True` to the top of the `Markov` model class, on the line below `model_id = "markov". This tells LLM that the model is able to stream content to the console.
 
 The full `llm_markov.py` file should now look like this:
-```python
-import llm
-import random
-import time
-from typing import Optional
-from pydantic import field_validator, Field
 
-@llm.hookimpl
-def register_models(register):
-    register(Markov())
-
-def build_markov_table(text):
-    words = text.split()
-    transitions = {}
-    # Loop through all but the last word
-    for i in range(len(words) - 1):
-        word = words[i]
-        next_word = words[i + 1]
-        transitions.setdefault(word, []).append(next_word)
-    return transitions
-
-def generate(transitions, length, start_word=None):
-    all_words = list(transitions.keys())
-    next_word = start_word or random.choice(all_words)
-    for i in range(length):
-        yield next_word
-        options = transitions.get(next_word) or all_words
-        next_word = random.choice(options)
-
-class Markov(llm.Model):
-    model_id = "markov"
-    can_stream = True
-
-    class Options(llm.Options):
-        length: Optional[int] = Field(
-            description="Number of words to generate",
-            default=None
-        )
-        delay: Optional[float] = Field(
-            description="Seconds to delay between each token",
-            default=None
-        )
-
-        @field_validator("length")
-        def validate_length(cls, length):
-            if length is None:
-                return None
-            if length < 2:
-                raise ValueError("length must be >= 2")
-            return length
-
-        @field_validator("delay")
-        def validate_delay(cls, delay):
-            if delay is None:
-                return None
-            if not 0 <= delay <= 10:
-                raise ValueError("delay must be between 0 and 10")
-            return delay
-
-    def execute(self, prompt, stream, response):
-        text = prompt.prompt
-        transitions = build_markov_table(text)
-        length = prompt.options.length or 10
-        for word in generate(transitions, length):
-            yield word + ' '
-            if prompt.options.delay:
-                time.sleep(prompt.options.delay)
+```{literalinclude} llm-markov/llm_markov.py
+:language: python
 ```
+
 Now we can request a 20 word completion with a 0.1s delay between tokens like this:
 ```bash
 llm -m markov "the cat sat on the mat" \