llm models list --options - closes #82

This commit is contained in:
Simon Willison 2023-07-10 13:40:27 -07:00
parent a2a97b6c7d
commit 8f7c3a9797
4 changed files with 155 additions and 22 deletions

View file

@ -190,7 +190,8 @@ Usage: llm models list [OPTIONS]
List available models
Options:
--help Show this message and exit.
--options Show options for each model, if available
--help Show this message and exit.
```
#### llm models default --help
```

View file

@ -5,24 +5,30 @@ The default command for this is `llm prompt` - you can use `llm` instead if you
## Executing a prompt
To run a prompt, streaming tokens as they come in:
llm 'Ten names for cheesecakes'
```bash
llm 'Ten names for cheesecakes'
```
To disable streaming and only return the response once it has completed:
llm 'Ten names for cheesecakes' --no-stream
```bash
llm 'Ten names for cheesecakes' --no-stream
```
To switch from ChatGPT 3.5 (the default) to GPT-4 if you have access:
llm 'Ten names for cheesecakes' -m gpt4
```bash
llm 'Ten names for cheesecakes' -m gpt4
```
You can use `-m 4` as an even shorter shortcut.
Pass `--model <model name>` to use a different model.
You can also send a prompt to standard input, for example:
```bash
echo 'Ten names for cheesecakes' | llm
```
Some models support options. You can pass these using `-o/--option name value` - for example, to set the temperature to 1.5 run this:
echo 'Ten names for cheesecakes' | llm
```bash
llm 'Ten names for cheesecakes' -o temperature 1.5
```
## Continuing a conversation
@ -77,14 +83,80 @@ OpenAI Chat: gpt-4 (aliases: 4, gpt4)
OpenAI Chat: gpt-4-32k (aliases: 4-32k)
PaLM 2: chat-bison-001 (aliases: palm, palm2)
```
You can use pass the full model name or any of the aliases to the `-m/--model` option:
Add `--options` to also see documentation for the options supported by each model:
```bash
llm models list --options
```
Output:
<!-- [[[cog
from click.testing import CliRunner
import sys
sys._called_from_test = True
from llm.cli import cli
result = CliRunner().invoke(cli, ["models", "list", "--options"])
cog.out("```\n{}\n```".format(result.output))
]]] -->
```
OpenAI Chat: gpt-3.5-turbo (aliases: 3.5, chatgpt)
temperature: float
What sampling temperature to use, between 0 and 2. Higher values like
0.8 will make the output more random, while lower values like 0.2 will
make it more focused and deterministic.
max_tokens: int
Maximum number of tokens to generate
top_p: float
An alternative to sampling with temperature, called nucleus sampling,
where the model considers the results of the tokens with top_p
probability mass. So 0.1 means only the tokens comprising the top 10%
probability mass are considered. Recommended to use top_p or
temperature but not both.
frequency_penalty: float
Number between -2.0 and 2.0. Positive values penalize new tokens based
on their existing frequency in the text so far, decreasing the model's
likelihood to repeat the same line verbatim.
presence_penalty: float
Number between -2.0 and 2.0. Positive values penalize new tokens based
on whether they appear in the text so far, increasing the model's
likelihood to talk about new topics.
stop: str
A string where the API will stop generating further tokens.
logit_bias: Union[dict, str, NoneType]
Modify the likelihood of specified tokens appearing in the completion.
OpenAI Chat: gpt-3.5-turbo-16k (aliases: chatgpt-16k, 3.5-16k)
temperature: float
max_tokens: int
top_p: float
frequency_penalty: float
presence_penalty: float
stop: str
logit_bias: Union[dict, str, NoneType]
OpenAI Chat: gpt-4 (aliases: 4, gpt4)
temperature: float
max_tokens: int
top_p: float
frequency_penalty: float
presence_penalty: float
stop: str
logit_bias: Union[dict, str, NoneType]
OpenAI Chat: gpt-4-32k (aliases: 4-32k)
temperature: float
max_tokens: int
top_p: float
frequency_penalty: float
presence_penalty: float
stop: str
logit_bias: Union[dict, str, NoneType]
```
<!-- [[[end]]] -->
When running a prompt you can pass the full model name or any of the aliases to the `-m/--model` option:
```bash
llm -m chatgpt-16k 'As many names for cheesecakes as you can think of, with detailed descriptions'
```
Models that have been installed using plugins will be shown here as well.
## Setting a custom model
## Setting a custom default model
The model used when calling `llm` without the `-m/--model` option defaults to `gpt-3.5-turbo` - the fastest and least expensive OpenAI model, and the same model family that powers ChatGPT.

View file

@ -19,6 +19,7 @@ from runpy import run_module
import shutil
import sqlite_utils
import sys
import textwrap
import warnings
import yaml
@ -365,13 +366,32 @@ def models():
@models.command(name="list")
def models_list():
@click.option(
"--options", is_flag=True, help="Show options for each model, if available"
)
def models_list(options):
"List available models"
models_that_have_shown_options = set()
for model_with_aliases in get_models_with_aliases():
extra = ""
if model_with_aliases.aliases:
extra = " (aliases: {})".format(", ".join(model_with_aliases.aliases))
output = str(model_with_aliases.model) + extra
if options and model_with_aliases.model.Options.model_fields:
for name, field in model_with_aliases.model.Options.model_fields.items():
type_info = str(field.annotation).replace("typing.", "")
if type_info.startswith("Optional["):
type_info = type_info[9:-1]
bits = ["\n ", name, ": ", type_info]
if field.description and (
model_with_aliases.model.__class__
not in models_that_have_shown_options
):
wrapped = textwrap.wrap(field.description, 70)
bits.append("\n ")
bits.extend("\n ".join(wrapped))
output += "".join(bits)
models_that_have_shown_options.add(model_with_aliases.model.__class__)
click.echo(output)

View file

@ -4,7 +4,7 @@ from llm.utils import dicts_to_table_string
import click
import datetime
import openai
from pydantic import field_validator
from pydantic import field_validator, Field
import requests
from typing import List, Optional, Union
import json
@ -67,13 +67,53 @@ class Chat(Model):
can_stream: bool = True
class Options(llm.Options):
temperature: Optional[float] = None
max_tokens: Optional[int] = None
top_p: Optional[float] = None
frequency_penalty: Optional[float] = None
presence_penalty: Optional[float] = None
stop: Optional[str] = None
logit_bias: Optional[Union[dict, str]] = None
temperature: Optional[float] = Field(
description=(
"What sampling temperature to use, between 0 and 2. Higher values like "
"0.8 will make the output more random, while lower values like 0.2 will "
"make it more focused and deterministic."
),
default=None,
)
max_tokens: Optional[int] = Field(
description="Maximum number of tokens to generate", default=None
)
top_p: Optional[float] = Field(
description=(
"An alternative to sampling with temperature, called nucleus sampling, "
"where the model considers the results of the tokens with top_p "
"probability mass. So 0.1 means only the tokens comprising the top "
"10% probability mass are considered. Recommended to use top_p or "
"temperature but not both."
),
default=None,
)
frequency_penalty: Optional[float] = Field(
description=(
"Number between -2.0 and 2.0. Positive values penalize new tokens based "
"on their existing frequency in the text so far, decreasing the model's "
"likelihood to repeat the same line verbatim."
),
default=None,
)
presence_penalty: Optional[float] = Field(
description=(
"Number between -2.0 and 2.0. Positive values penalize new tokens based "
"on whether they appear in the text so far, increasing the model's "
"likelihood to talk about new topics."
),
default=None,
)
stop: Optional[str] = Field(
description=("A string where the API will stop generating further tokens."),
default=None,
)
logit_bias: Optional[Union[dict, str]] = Field(
description=(
"Modify the likelihood of specified tokens appearing in the completion."
),
default=None,
)
@field_validator("logit_bias")
def validate_logit_bias(cls, logit_bias):