mirror of
https://github.com/Hopiu/llm.git
synced 2026-04-25 07:24:46 +00:00
Store updated timestamp on embeddings, closes #211
This commit is contained in:
parent
51488c579b
commit
73a9043108
5 changed files with 23 additions and 1 deletions
|
|
@ -5,6 +5,7 @@ from itertools import islice
|
|||
import json
|
||||
from sqlite_utils import Database
|
||||
from sqlite_utils.db import Table
|
||||
import time
|
||||
from typing import cast, Any, Dict, Iterable, List, Optional, Tuple
|
||||
|
||||
|
||||
|
|
@ -133,6 +134,7 @@ class Collection:
|
|||
"embedding": encode(embedding),
|
||||
"content": text if store else None,
|
||||
"metadata": json.dumps(metadata) if metadata else None,
|
||||
"updated": int(time.time()),
|
||||
},
|
||||
replace=True,
|
||||
)
|
||||
|
|
@ -184,6 +186,7 @@ class Collection:
|
|||
"embedding": llm.encode(embedding),
|
||||
"content": text if store else None,
|
||||
"metadata": json.dumps(metadata) if metadata else None,
|
||||
"updated": int(time.time()),
|
||||
}
|
||||
for (embedding, (id, text, metadata)) in zip(embeddings, batch)
|
||||
),
|
||||
|
|
|
|||
|
|
@ -1,4 +1,5 @@
|
|||
from sqlite_migrate import Migrations
|
||||
import time
|
||||
|
||||
embeddings_migrations = Migrations("llm.embeddings")
|
||||
|
||||
|
|
@ -22,3 +23,14 @@ def m001_create_tables(db):
|
|||
@embeddings_migrations()
|
||||
def m002_foreign_key(db):
|
||||
db["embeddings"].add_foreign_key("collection_id", "collections", "id")
|
||||
|
||||
|
||||
@embeddings_migrations()
|
||||
def m003_add_updated(db):
|
||||
db["embeddings"].add_column("updated", int)
|
||||
# Pretty-print the schema
|
||||
db["embeddings"].transform()
|
||||
# Assume anything existing was last updated right now
|
||||
db.query(
|
||||
"update embeddings set updated = ? where updated is null", [int(time.time())]
|
||||
)
|
||||
|
|
|
|||
|
|
@ -1,8 +1,9 @@
|
|||
import json
|
||||
import llm
|
||||
from llm.embeddings import Entry
|
||||
import sqlite_utils
|
||||
import pytest
|
||||
import sqlite_utils
|
||||
from unittest.mock import ANY
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
|
|
@ -65,6 +66,7 @@ def test_collection(collection):
|
|||
"embedding": llm.encode([5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]),
|
||||
"content": None,
|
||||
"metadata": None,
|
||||
"updated": ANY,
|
||||
},
|
||||
{
|
||||
"collection_id": 1,
|
||||
|
|
@ -72,8 +74,10 @@ def test_collection(collection):
|
|||
"embedding": llm.encode([7, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]),
|
||||
"content": None,
|
||||
"metadata": None,
|
||||
"updated": ANY,
|
||||
},
|
||||
]
|
||||
assert isinstance(rows[0]["updated"], int) and rows[0]["updated"] > 0
|
||||
|
||||
|
||||
def test_similar(collection):
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@ from llm.cli import cli
|
|||
import json
|
||||
import pytest
|
||||
import sqlite_utils
|
||||
from unittest.mock import ANY
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
|
|
@ -98,6 +99,7 @@ def test_embed_store(user_path):
|
|||
),
|
||||
"content": None,
|
||||
"metadata": None,
|
||||
"updated": ANY,
|
||||
}
|
||||
]
|
||||
# Should show up in 'llm embed-db collections'
|
||||
|
|
|
|||
|
|
@ -91,6 +91,7 @@ def test_migrations_for_embeddings():
|
|||
"embedding": bytes,
|
||||
"content": str,
|
||||
"metadata": str,
|
||||
"updated": int,
|
||||
}
|
||||
assert db["embeddings"].foreign_keys[0].column == "collection_id"
|
||||
assert db["embeddings"].foreign_keys[0].other_table == "collections"
|
||||
|
|
|
|||
Loading…
Reference in a new issue