From 5c64172d02fc2fded5f4ed990f62fa836c5302c3 Mon Sep 17 00:00:00 2001 From: Karl Hobley Date: Fri, 20 Jun 2014 13:44:54 +0100 Subject: [PATCH 1/3] Created ElasticSearchMapping class --- .../wagtailsearch/backends/elasticsearch.py | 92 ++++++++++++------- 1 file changed, 59 insertions(+), 33 deletions(-) diff --git a/wagtail/wagtailsearch/backends/elasticsearch.py b/wagtail/wagtailsearch/backends/elasticsearch.py index 91536d744..82e76d5fc 100644 --- a/wagtail/wagtailsearch/backends/elasticsearch.py +++ b/wagtail/wagtailsearch/backends/elasticsearch.py @@ -12,6 +12,40 @@ from wagtail.wagtailsearch.indexed import Indexed from wagtail.wagtailsearch.utils import normalise_query_string +class ElasticSearchMapping(object): + def __init__(self, model): + self.model = model + + def get_document_type(self): + return self.model.indexed_get_content_type() + + def get_mapping(self): + # Get type name + content_type = self.get_document_type() + + # Get indexed fields + indexed_fields = self.model.indexed_get_indexed_fields() + + # Make field list + fields = { + 'pk': dict(type='string', index='not_analyzed', store='yes'), + 'content_type': dict(type='string'), + } + fields.update(indexed_fields) + + return { + content_type: { + 'properties': fields, + } + } + + def get_document_id(self, obj): + return obj.indexed_build_document()['id'] + + def get_document(self, obj): + return obj.indexed_build_document() + + class ElasticSearchQuery(object): def __init__(self, model, query_string, fields=None, filters={}): self.model = model @@ -330,25 +364,11 @@ class ElasticSearch(BaseSearch): self.es.indices.create(self.es_index, INDEX_SETTINGS) def add_type(self, model): - # Get type name - content_type = model.indexed_get_content_type() - - # Get indexed fields - indexed_fields = model.indexed_get_indexed_fields() - - # Make field list - fields = { - "pk": dict(type="string", index="not_analyzed", store="yes"), - "content_type": dict(type="string"), - } - fields.update(indexed_fields) + # Get mapping + mapping = ElasticSearchMapping(model) # Put mapping - self.es.indices.put_mapping(index=self.es_index, doc_type=content_type, body={ - content_type: { - "properties": fields, - } - }) + self.es.indices.put_mapping(index=self.es_index, doc_type=mapping.get_document_type(), body=mapping.get_mapping()) def refresh_index(self): self.es.indices.refresh(self.es_index) @@ -358,11 +378,11 @@ class ElasticSearch(BaseSearch): if not self.object_can_be_indexed(obj): return - # Build document - doc = obj.indexed_build_document() + # Get mapping + mapping = ElasticSearchMapping(obj.__class__) - # Add to index - self.es.index(self.es_index, obj.indexed_get_content_type(), doc, id=doc["id"]) + # Add document to index + self.es.index(self.es_index, mapping.get_document_type(), mapping.get_document(obj), id=mapping.get_document_id(obj)) def add_bulk(self, obj_list): # Group all objects by their type @@ -372,27 +392,30 @@ class ElasticSearch(BaseSearch): if not self.object_can_be_indexed(obj): continue - # Get object type - obj_type = obj.indexed_get_content_type() + # Get mapping + mapping = ElasticSearchMapping(obj.__class__) + + # Get document type + doc_type = mapping.get_document_type() # If type is currently not in set, add it - if obj_type not in type_set: - type_set[obj_type] = [] + if doc_type not in type_set: + type_set[doc_type] = [] - # Add object to set - type_set[obj_type].append(obj.indexed_build_document()) + # Add document to set + type_set[doc_type].append((mapping.get_document_id(obj), mapping.get_document(obj))) # Loop through each type and bulk add them - for type_name, type_objects in type_set.items(): + for type_name, type_documents in type_set.items(): # Get list of actions actions = [] - for obj in type_objects: + for doc_id, doc in type_documents: action = { '_index': self.es_index, '_type': type_name, - '_id': obj['id'], + '_id': doc_id, } - action.update(obj) + action.update(doc) actions.append(action) bulk(self.es, actions) @@ -402,12 +425,15 @@ class ElasticSearch(BaseSearch): if not isinstance(obj, Indexed) or not isinstance(obj, models.Model): return + # Get mapping + mapping = ElasticSearchMapping(obj.__class__) + # Delete document try: self.es.delete( self.es_index, - obj.indexed_get_content_type(), - obj.indexed_get_document_id(), + mapping.get_document_type(), + mapping.get_document_id(obj), ) except NotFoundError: pass # Document doesn't exist, ignore this exception From fefa8c79ef7660d789f109a444ebe2b9fc8b43f8 Mon Sep 17 00:00:00 2001 From: Karl Hobley Date: Fri, 20 Jun 2014 13:51:35 +0100 Subject: [PATCH 2/3] Added repr to ElasticSearchMapping --- wagtail/wagtailsearch/backends/elasticsearch.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/wagtail/wagtailsearch/backends/elasticsearch.py b/wagtail/wagtailsearch/backends/elasticsearch.py index 82e76d5fc..9ad1f288a 100644 --- a/wagtail/wagtailsearch/backends/elasticsearch.py +++ b/wagtail/wagtailsearch/backends/elasticsearch.py @@ -45,6 +45,9 @@ class ElasticSearchMapping(object): def get_document(self, obj): return obj.indexed_build_document() + def __repr__(self): + return '' % (self.model.__name__, ) + class ElasticSearchQuery(object): def __init__(self, model, query_string, fields=None, filters={}): From da9b7c2408474583d66bf1be274e9a0831f36a40 Mon Sep 17 00:00:00 2001 From: Karl Hobley Date: Fri, 20 Jun 2014 13:55:09 +0100 Subject: [PATCH 3/3] Moved document building methods from indexed into ElasticSearchMapping --- .../wagtailsearch/backends/elasticsearch.py | 19 ++++++++++++++-- wagtail/wagtailsearch/indexed.py | 22 ------------------- 2 files changed, 17 insertions(+), 24 deletions(-) diff --git a/wagtail/wagtailsearch/backends/elasticsearch.py b/wagtail/wagtailsearch/backends/elasticsearch.py index 9ad1f288a..bbdbf0ecd 100644 --- a/wagtail/wagtailsearch/backends/elasticsearch.py +++ b/wagtail/wagtailsearch/backends/elasticsearch.py @@ -40,10 +40,25 @@ class ElasticSearchMapping(object): } def get_document_id(self, obj): - return obj.indexed_build_document()['id'] + return obj.indexed_get_toplevel_content_type() + ':' + str(obj.pk) def get_document(self, obj): - return obj.indexed_build_document() + # Get content type, indexed fields and id + content_type = obj.indexed_get_content_type() + indexed_fields = obj.indexed_get_indexed_fields() + + # Build document + doc = dict(pk=str(obj.pk), content_type=content_type) + for field in indexed_fields.keys(): + if hasattr(obj, field): + doc[field] = getattr(obj, field) + + # Check if this field is callable + if hasattr(doc[field], "__call__"): + # Call it + doc[field] = doc[field]() + + return doc def __repr__(self): return '' % (self.model.__name__, ) diff --git a/wagtail/wagtailsearch/indexed.py b/wagtail/wagtailsearch/indexed.py index e2817905d..929bd0dbe 100644 --- a/wagtail/wagtailsearch/indexed.py +++ b/wagtail/wagtailsearch/indexed.py @@ -65,28 +65,6 @@ class Indexed(object): indexed_fields = parent_indexed_fields return indexed_fields - def indexed_get_document_id(self): - return self.indexed_get_toplevel_content_type() + ":" + str(self.pk) - - def indexed_build_document(self): - # Get content type, indexed fields and id - content_type = self.indexed_get_content_type() - indexed_fields = self.indexed_get_indexed_fields() - doc_id = self.indexed_get_document_id() - - # Build document - doc = dict(pk=str(self.pk), content_type=content_type, id=doc_id) - for field in indexed_fields.keys(): - if hasattr(self, field): - doc[field] = getattr(self, field) - - # Check if this field is callable - if hasattr(doc[field], "__call__"): - # Call it - doc[field] = doc[field]() - - return doc - indexed_fields = ()