llm similar --binary, closes #269

This commit is contained in:
Simon Willison 2023-09-12 11:22:21 -07:00
parent 506de80f69
commit 4952a8d119
3 changed files with 17 additions and 6 deletions

View file

@ -342,6 +342,10 @@ Or feed text to standard input using `-i -`:
```bash
echo 'computer science' | llm similar quotations -i -
```
When using a model like CLIP, you can find images similar to an input image using `-i filename` with `--binary`:
```bash
llm similar photos -i image.jpg --binary
```
(embeddings-cli-embed-models)=
## llm embed-models

View file

@ -539,8 +539,9 @@ Usage: llm similar [OPTIONS] COLLECTION [ID]
llm similar my-collection 1234
Options:
-i, --input FILENAME File to embed for comparison
-i, --input PATH File to embed for comparison
-c, --content TEXT Content to embed for comparison
--binary Treat input as binary data
-n, --number INTEGER Number of results to return
-d, --database FILE
--help Show this message and exit.

View file

@ -1334,10 +1334,11 @@ def embed_multi(
@click.option(
"-i",
"--input",
type=click.File("r"),
type=click.Path(exists=True, readable=True, allow_dash=True),
help="File to embed for comparison",
)
@click.option("-c", "--content", help="Content to embed for comparison")
@click.option("--binary", is_flag=True, help="Treat input as binary data")
@click.option(
"-n", "--number", type=int, default=10, help="Number of results to return"
)
@ -1347,7 +1348,7 @@ def embed_multi(
type=click.Path(file_okay=True, allow_dash=False, dir_okay=False, writable=True),
envvar="LLM_EMBEDDINGS_DB",
)
def similar(collection, id, input, content, number, database):
def similar(collection, id, input, content, binary, number, database):
"""
Return top N similar IDs from a collection
@ -1383,11 +1384,16 @@ def similar(collection, id, input, content, number, database):
except Collection.DoesNotExist:
raise click.ClickException("ID not found in collection")
else:
# Resolve input text
if not content:
if not input:
if not input or input == "-":
# Read from stdin
input = sys.stdin
content = input.read()
input_source = sys.stdin.buffer if binary else sys.stdin
content = input_source.read()
else:
mode = "rb" if binary else "r"
with open(input, mode) as f:
content = f.read()
if not content:
raise click.ClickException("No content provided")
results = collection_obj.similar(content, number)