Store updated timestamp on embeddings, closes #211

This commit is contained in:
Simon Willison 2023-09-02 20:40:33 -07:00
parent 51488c579b
commit 73a9043108
5 changed files with 23 additions and 1 deletions

View file

@ -5,6 +5,7 @@ from itertools import islice
import json
from sqlite_utils import Database
from sqlite_utils.db import Table
import time
from typing import cast, Any, Dict, Iterable, List, Optional, Tuple
@ -133,6 +134,7 @@ class Collection:
"embedding": encode(embedding),
"content": text if store else None,
"metadata": json.dumps(metadata) if metadata else None,
"updated": int(time.time()),
},
replace=True,
)
@ -184,6 +186,7 @@ class Collection:
"embedding": llm.encode(embedding),
"content": text if store else None,
"metadata": json.dumps(metadata) if metadata else None,
"updated": int(time.time()),
}
for (embedding, (id, text, metadata)) in zip(embeddings, batch)
),

View file

@ -1,4 +1,5 @@
from sqlite_migrate import Migrations
import time
embeddings_migrations = Migrations("llm.embeddings")
@ -22,3 +23,14 @@ def m001_create_tables(db):
@embeddings_migrations()
def m002_foreign_key(db):
db["embeddings"].add_foreign_key("collection_id", "collections", "id")
@embeddings_migrations()
def m003_add_updated(db):
db["embeddings"].add_column("updated", int)
# Pretty-print the schema
db["embeddings"].transform()
# Assume anything existing was last updated right now
db.query(
"update embeddings set updated = ? where updated is null", [int(time.time())]
)

View file

@ -1,8 +1,9 @@
import json
import llm
from llm.embeddings import Entry
import sqlite_utils
import pytest
import sqlite_utils
from unittest.mock import ANY
@pytest.fixture
@ -65,6 +66,7 @@ def test_collection(collection):
"embedding": llm.encode([5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]),
"content": None,
"metadata": None,
"updated": ANY,
},
{
"collection_id": 1,
@ -72,8 +74,10 @@ def test_collection(collection):
"embedding": llm.encode([7, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]),
"content": None,
"metadata": None,
"updated": ANY,
},
]
assert isinstance(rows[0]["updated"], int) and rows[0]["updated"] > 0
def test_similar(collection):

View file

@ -3,6 +3,7 @@ from llm.cli import cli
import json
import pytest
import sqlite_utils
from unittest.mock import ANY
@pytest.mark.parametrize(
@ -98,6 +99,7 @@ def test_embed_store(user_path):
),
"content": None,
"metadata": None,
"updated": ANY,
}
]
# Should show up in 'llm embed-db collections'

View file

@ -91,6 +91,7 @@ def test_migrations_for_embeddings():
"embedding": bytes,
"content": str,
"metadata": str,
"updated": int,
}
assert db["embeddings"].foreign_keys[0].column == "collection_id"
assert db["embeddings"].foreign_keys[0].other_table == "collections"