mirror of
https://github.com/Hopiu/llm.git
synced 2026-04-25 07:24:46 +00:00
llm models list --options - closes #82
This commit is contained in:
parent
a2a97b6c7d
commit
8f7c3a9797
4 changed files with 155 additions and 22 deletions
|
|
@ -190,7 +190,8 @@ Usage: llm models list [OPTIONS]
|
|||
List available models
|
||||
|
||||
Options:
|
||||
--help Show this message and exit.
|
||||
--options Show options for each model, if available
|
||||
--help Show this message and exit.
|
||||
```
|
||||
#### llm models default --help
|
||||
```
|
||||
|
|
|
|||
|
|
@ -5,24 +5,30 @@ The default command for this is `llm prompt` - you can use `llm` instead if you
|
|||
## Executing a prompt
|
||||
|
||||
To run a prompt, streaming tokens as they come in:
|
||||
|
||||
llm 'Ten names for cheesecakes'
|
||||
|
||||
```bash
|
||||
llm 'Ten names for cheesecakes'
|
||||
```
|
||||
To disable streaming and only return the response once it has completed:
|
||||
|
||||
llm 'Ten names for cheesecakes' --no-stream
|
||||
|
||||
```bash
|
||||
llm 'Ten names for cheesecakes' --no-stream
|
||||
```
|
||||
To switch from ChatGPT 3.5 (the default) to GPT-4 if you have access:
|
||||
|
||||
llm 'Ten names for cheesecakes' -m gpt4
|
||||
|
||||
```bash
|
||||
llm 'Ten names for cheesecakes' -m gpt4
|
||||
```
|
||||
You can use `-m 4` as an even shorter shortcut.
|
||||
|
||||
Pass `--model <model name>` to use a different model.
|
||||
|
||||
You can also send a prompt to standard input, for example:
|
||||
```bash
|
||||
echo 'Ten names for cheesecakes' | llm
|
||||
```
|
||||
Some models support options. You can pass these using `-o/--option name value` - for example, to set the temperature to 1.5 run this:
|
||||
|
||||
echo 'Ten names for cheesecakes' | llm
|
||||
```bash
|
||||
llm 'Ten names for cheesecakes' -o temperature 1.5
|
||||
```
|
||||
|
||||
## Continuing a conversation
|
||||
|
||||
|
|
@ -77,14 +83,80 @@ OpenAI Chat: gpt-4 (aliases: 4, gpt4)
|
|||
OpenAI Chat: gpt-4-32k (aliases: 4-32k)
|
||||
PaLM 2: chat-bison-001 (aliases: palm, palm2)
|
||||
```
|
||||
You can use pass the full model name or any of the aliases to the `-m/--model` option:
|
||||
Add `--options` to also see documentation for the options supported by each model:
|
||||
```bash
|
||||
llm models list --options
|
||||
```
|
||||
Output:
|
||||
<!-- [[[cog
|
||||
from click.testing import CliRunner
|
||||
import sys
|
||||
sys._called_from_test = True
|
||||
from llm.cli import cli
|
||||
result = CliRunner().invoke(cli, ["models", "list", "--options"])
|
||||
cog.out("```\n{}\n```".format(result.output))
|
||||
]]] -->
|
||||
```
|
||||
OpenAI Chat: gpt-3.5-turbo (aliases: 3.5, chatgpt)
|
||||
temperature: float
|
||||
What sampling temperature to use, between 0 and 2. Higher values like
|
||||
0.8 will make the output more random, while lower values like 0.2 will
|
||||
make it more focused and deterministic.
|
||||
max_tokens: int
|
||||
Maximum number of tokens to generate
|
||||
top_p: float
|
||||
An alternative to sampling with temperature, called nucleus sampling,
|
||||
where the model considers the results of the tokens with top_p
|
||||
probability mass. So 0.1 means only the tokens comprising the top 10%
|
||||
probability mass are considered. Recommended to use top_p or
|
||||
temperature but not both.
|
||||
frequency_penalty: float
|
||||
Number between -2.0 and 2.0. Positive values penalize new tokens based
|
||||
on their existing frequency in the text so far, decreasing the model's
|
||||
likelihood to repeat the same line verbatim.
|
||||
presence_penalty: float
|
||||
Number between -2.0 and 2.0. Positive values penalize new tokens based
|
||||
on whether they appear in the text so far, increasing the model's
|
||||
likelihood to talk about new topics.
|
||||
stop: str
|
||||
A string where the API will stop generating further tokens.
|
||||
logit_bias: Union[dict, str, NoneType]
|
||||
Modify the likelihood of specified tokens appearing in the completion.
|
||||
OpenAI Chat: gpt-3.5-turbo-16k (aliases: chatgpt-16k, 3.5-16k)
|
||||
temperature: float
|
||||
max_tokens: int
|
||||
top_p: float
|
||||
frequency_penalty: float
|
||||
presence_penalty: float
|
||||
stop: str
|
||||
logit_bias: Union[dict, str, NoneType]
|
||||
OpenAI Chat: gpt-4 (aliases: 4, gpt4)
|
||||
temperature: float
|
||||
max_tokens: int
|
||||
top_p: float
|
||||
frequency_penalty: float
|
||||
presence_penalty: float
|
||||
stop: str
|
||||
logit_bias: Union[dict, str, NoneType]
|
||||
OpenAI Chat: gpt-4-32k (aliases: 4-32k)
|
||||
temperature: float
|
||||
max_tokens: int
|
||||
top_p: float
|
||||
frequency_penalty: float
|
||||
presence_penalty: float
|
||||
stop: str
|
||||
logit_bias: Union[dict, str, NoneType]
|
||||
|
||||
```
|
||||
<!-- [[[end]]] -->
|
||||
|
||||
When running a prompt you can pass the full model name or any of the aliases to the `-m/--model` option:
|
||||
```bash
|
||||
llm -m chatgpt-16k 'As many names for cheesecakes as you can think of, with detailed descriptions'
|
||||
```
|
||||
Models that have been installed using plugins will be shown here as well.
|
||||
|
||||
## Setting a custom model
|
||||
## Setting a custom default model
|
||||
|
||||
The model used when calling `llm` without the `-m/--model` option defaults to `gpt-3.5-turbo` - the fastest and least expensive OpenAI model, and the same model family that powers ChatGPT.
|
||||
|
||||
|
|
|
|||
22
llm/cli.py
22
llm/cli.py
|
|
@ -19,6 +19,7 @@ from runpy import run_module
|
|||
import shutil
|
||||
import sqlite_utils
|
||||
import sys
|
||||
import textwrap
|
||||
import warnings
|
||||
import yaml
|
||||
|
||||
|
|
@ -365,13 +366,32 @@ def models():
|
|||
|
||||
|
||||
@models.command(name="list")
|
||||
def models_list():
|
||||
@click.option(
|
||||
"--options", is_flag=True, help="Show options for each model, if available"
|
||||
)
|
||||
def models_list(options):
|
||||
"List available models"
|
||||
models_that_have_shown_options = set()
|
||||
for model_with_aliases in get_models_with_aliases():
|
||||
extra = ""
|
||||
if model_with_aliases.aliases:
|
||||
extra = " (aliases: {})".format(", ".join(model_with_aliases.aliases))
|
||||
output = str(model_with_aliases.model) + extra
|
||||
if options and model_with_aliases.model.Options.model_fields:
|
||||
for name, field in model_with_aliases.model.Options.model_fields.items():
|
||||
type_info = str(field.annotation).replace("typing.", "")
|
||||
if type_info.startswith("Optional["):
|
||||
type_info = type_info[9:-1]
|
||||
bits = ["\n ", name, ": ", type_info]
|
||||
if field.description and (
|
||||
model_with_aliases.model.__class__
|
||||
not in models_that_have_shown_options
|
||||
):
|
||||
wrapped = textwrap.wrap(field.description, 70)
|
||||
bits.append("\n ")
|
||||
bits.extend("\n ".join(wrapped))
|
||||
output += "".join(bits)
|
||||
models_that_have_shown_options.add(model_with_aliases.model.__class__)
|
||||
click.echo(output)
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@ from llm.utils import dicts_to_table_string
|
|||
import click
|
||||
import datetime
|
||||
import openai
|
||||
from pydantic import field_validator
|
||||
from pydantic import field_validator, Field
|
||||
import requests
|
||||
from typing import List, Optional, Union
|
||||
import json
|
||||
|
|
@ -67,13 +67,53 @@ class Chat(Model):
|
|||
can_stream: bool = True
|
||||
|
||||
class Options(llm.Options):
|
||||
temperature: Optional[float] = None
|
||||
max_tokens: Optional[int] = None
|
||||
top_p: Optional[float] = None
|
||||
frequency_penalty: Optional[float] = None
|
||||
presence_penalty: Optional[float] = None
|
||||
stop: Optional[str] = None
|
||||
logit_bias: Optional[Union[dict, str]] = None
|
||||
temperature: Optional[float] = Field(
|
||||
description=(
|
||||
"What sampling temperature to use, between 0 and 2. Higher values like "
|
||||
"0.8 will make the output more random, while lower values like 0.2 will "
|
||||
"make it more focused and deterministic."
|
||||
),
|
||||
default=None,
|
||||
)
|
||||
max_tokens: Optional[int] = Field(
|
||||
description="Maximum number of tokens to generate", default=None
|
||||
)
|
||||
top_p: Optional[float] = Field(
|
||||
description=(
|
||||
"An alternative to sampling with temperature, called nucleus sampling, "
|
||||
"where the model considers the results of the tokens with top_p "
|
||||
"probability mass. So 0.1 means only the tokens comprising the top "
|
||||
"10% probability mass are considered. Recommended to use top_p or "
|
||||
"temperature but not both."
|
||||
),
|
||||
default=None,
|
||||
)
|
||||
frequency_penalty: Optional[float] = Field(
|
||||
description=(
|
||||
"Number between -2.0 and 2.0. Positive values penalize new tokens based "
|
||||
"on their existing frequency in the text so far, decreasing the model's "
|
||||
"likelihood to repeat the same line verbatim."
|
||||
),
|
||||
default=None,
|
||||
)
|
||||
presence_penalty: Optional[float] = Field(
|
||||
description=(
|
||||
"Number between -2.0 and 2.0. Positive values penalize new tokens based "
|
||||
"on whether they appear in the text so far, increasing the model's "
|
||||
"likelihood to talk about new topics."
|
||||
),
|
||||
default=None,
|
||||
)
|
||||
stop: Optional[str] = Field(
|
||||
description=("A string where the API will stop generating further tokens."),
|
||||
default=None,
|
||||
)
|
||||
logit_bias: Optional[Union[dict, str]] = Field(
|
||||
description=(
|
||||
"Modify the likelihood of specified tokens appearing in the completion."
|
||||
),
|
||||
default=None,
|
||||
)
|
||||
|
||||
@field_validator("logit_bias")
|
||||
def validate_logit_bias(cls, logit_bias):
|
||||
|
|
|
|||
Loading…
Reference in a new issue