mirror of
https://github.com/Hopiu/wagtail.git
synced 2026-05-17 11:41:11 +00:00
Merge pull request #345 from kaedroho/searchchanges/elasticsearchmapping
Search Changes 4 - Build ElasticSearch documents in ElasticSearch backend
This commit is contained in:
commit
28f67a4726
2 changed files with 77 additions and 55 deletions
|
|
@ -12,6 +12,58 @@ from wagtail.wagtailsearch.indexed import Indexed
|
|||
from wagtail.wagtailsearch.utils import normalise_query_string
|
||||
|
||||
|
||||
class ElasticSearchMapping(object):
|
||||
def __init__(self, model):
|
||||
self.model = model
|
||||
|
||||
def get_document_type(self):
|
||||
return self.model.indexed_get_content_type()
|
||||
|
||||
def get_mapping(self):
|
||||
# Get type name
|
||||
content_type = self.get_document_type()
|
||||
|
||||
# Get indexed fields
|
||||
indexed_fields = self.model.indexed_get_indexed_fields()
|
||||
|
||||
# Make field list
|
||||
fields = {
|
||||
'pk': dict(type='string', index='not_analyzed', store='yes'),
|
||||
'content_type': dict(type='string'),
|
||||
}
|
||||
fields.update(indexed_fields)
|
||||
|
||||
return {
|
||||
content_type: {
|
||||
'properties': fields,
|
||||
}
|
||||
}
|
||||
|
||||
def get_document_id(self, obj):
|
||||
return obj.indexed_get_toplevel_content_type() + ':' + str(obj.pk)
|
||||
|
||||
def get_document(self, obj):
|
||||
# Get content type, indexed fields and id
|
||||
content_type = obj.indexed_get_content_type()
|
||||
indexed_fields = obj.indexed_get_indexed_fields()
|
||||
|
||||
# Build document
|
||||
doc = dict(pk=str(obj.pk), content_type=content_type)
|
||||
for field in indexed_fields.keys():
|
||||
if hasattr(obj, field):
|
||||
doc[field] = getattr(obj, field)
|
||||
|
||||
# Check if this field is callable
|
||||
if hasattr(doc[field], "__call__"):
|
||||
# Call it
|
||||
doc[field] = doc[field]()
|
||||
|
||||
return doc
|
||||
|
||||
def __repr__(self):
|
||||
return '<ElasticSearchMapping: %s>' % (self.model.__name__, )
|
||||
|
||||
|
||||
class ElasticSearchQuery(object):
|
||||
def __init__(self, model, query_string, fields=None, filters={}):
|
||||
self.model = model
|
||||
|
|
@ -330,25 +382,11 @@ class ElasticSearch(BaseSearch):
|
|||
self.es.indices.create(self.es_index, INDEX_SETTINGS)
|
||||
|
||||
def add_type(self, model):
|
||||
# Get type name
|
||||
content_type = model.indexed_get_content_type()
|
||||
|
||||
# Get indexed fields
|
||||
indexed_fields = model.indexed_get_indexed_fields()
|
||||
|
||||
# Make field list
|
||||
fields = {
|
||||
"pk": dict(type="string", index="not_analyzed", store="yes"),
|
||||
"content_type": dict(type="string"),
|
||||
}
|
||||
fields.update(indexed_fields)
|
||||
# Get mapping
|
||||
mapping = ElasticSearchMapping(model)
|
||||
|
||||
# Put mapping
|
||||
self.es.indices.put_mapping(index=self.es_index, doc_type=content_type, body={
|
||||
content_type: {
|
||||
"properties": fields,
|
||||
}
|
||||
})
|
||||
self.es.indices.put_mapping(index=self.es_index, doc_type=mapping.get_document_type(), body=mapping.get_mapping())
|
||||
|
||||
def refresh_index(self):
|
||||
self.es.indices.refresh(self.es_index)
|
||||
|
|
@ -358,11 +396,11 @@ class ElasticSearch(BaseSearch):
|
|||
if not self.object_can_be_indexed(obj):
|
||||
return
|
||||
|
||||
# Build document
|
||||
doc = obj.indexed_build_document()
|
||||
# Get mapping
|
||||
mapping = ElasticSearchMapping(obj.__class__)
|
||||
|
||||
# Add to index
|
||||
self.es.index(self.es_index, obj.indexed_get_content_type(), doc, id=doc["id"])
|
||||
# Add document to index
|
||||
self.es.index(self.es_index, mapping.get_document_type(), mapping.get_document(obj), id=mapping.get_document_id(obj))
|
||||
|
||||
def add_bulk(self, obj_list):
|
||||
# Group all objects by their type
|
||||
|
|
@ -372,27 +410,30 @@ class ElasticSearch(BaseSearch):
|
|||
if not self.object_can_be_indexed(obj):
|
||||
continue
|
||||
|
||||
# Get object type
|
||||
obj_type = obj.indexed_get_content_type()
|
||||
# Get mapping
|
||||
mapping = ElasticSearchMapping(obj.__class__)
|
||||
|
||||
# Get document type
|
||||
doc_type = mapping.get_document_type()
|
||||
|
||||
# If type is currently not in set, add it
|
||||
if obj_type not in type_set:
|
||||
type_set[obj_type] = []
|
||||
if doc_type not in type_set:
|
||||
type_set[doc_type] = []
|
||||
|
||||
# Add object to set
|
||||
type_set[obj_type].append(obj.indexed_build_document())
|
||||
# Add document to set
|
||||
type_set[doc_type].append((mapping.get_document_id(obj), mapping.get_document(obj)))
|
||||
|
||||
# Loop through each type and bulk add them
|
||||
for type_name, type_objects in type_set.items():
|
||||
for type_name, type_documents in type_set.items():
|
||||
# Get list of actions
|
||||
actions = []
|
||||
for obj in type_objects:
|
||||
for doc_id, doc in type_documents:
|
||||
action = {
|
||||
'_index': self.es_index,
|
||||
'_type': type_name,
|
||||
'_id': obj['id'],
|
||||
'_id': doc_id,
|
||||
}
|
||||
action.update(obj)
|
||||
action.update(doc)
|
||||
actions.append(action)
|
||||
|
||||
bulk(self.es, actions)
|
||||
|
|
@ -402,12 +443,15 @@ class ElasticSearch(BaseSearch):
|
|||
if not isinstance(obj, Indexed) or not isinstance(obj, models.Model):
|
||||
return
|
||||
|
||||
# Get mapping
|
||||
mapping = ElasticSearchMapping(obj.__class__)
|
||||
|
||||
# Delete document
|
||||
try:
|
||||
self.es.delete(
|
||||
self.es_index,
|
||||
obj.indexed_get_content_type(),
|
||||
obj.indexed_get_document_id(),
|
||||
mapping.get_document_type(),
|
||||
mapping.get_document_id(obj),
|
||||
)
|
||||
except NotFoundError:
|
||||
pass # Document doesn't exist, ignore this exception
|
||||
|
|
|
|||
|
|
@ -65,28 +65,6 @@ class Indexed(object):
|
|||
indexed_fields = parent_indexed_fields
|
||||
return indexed_fields
|
||||
|
||||
def indexed_get_document_id(self):
|
||||
return self.indexed_get_toplevel_content_type() + ":" + str(self.pk)
|
||||
|
||||
def indexed_build_document(self):
|
||||
# Get content type, indexed fields and id
|
||||
content_type = self.indexed_get_content_type()
|
||||
indexed_fields = self.indexed_get_indexed_fields()
|
||||
doc_id = self.indexed_get_document_id()
|
||||
|
||||
# Build document
|
||||
doc = dict(pk=str(self.pk), content_type=content_type, id=doc_id)
|
||||
for field in indexed_fields.keys():
|
||||
if hasattr(self, field):
|
||||
doc[field] = getattr(self, field)
|
||||
|
||||
# Check if this field is callable
|
||||
if hasattr(doc[field], "__call__"):
|
||||
# Call it
|
||||
doc[field] = doc[field]()
|
||||
|
||||
return doc
|
||||
|
||||
indexed_fields = ()
|
||||
|
||||
|
||||
|
|
|
|||
Loading…
Reference in a new issue