Populate content_hash with embed_multi, refs #217

This commit is contained in:
Simon Willison 2023-09-03 14:43:29 -07:00
parent 53fc9cbab7
commit 156bed7c65
2 changed files with 3 additions and 0 deletions

View file

@ -187,6 +187,7 @@ class Collection:
"id": id,
"embedding": llm.encode(embedding),
"content": text if store else None,
"content_hash": self.content_hash(text),
"metadata": json.dumps(metadata) if metadata else None,
"updated": int(time.time()),
}

View file

@ -110,6 +110,8 @@ def test_embed_multi(with_metadata):
else:
assert len(rows_with_metadata) == 0
assert len(rows_with_content) == 1000
# Every row should have content_hash set
assert all(row["content_hash"] is not None for row in rows)
def test_collection_delete(collection):