From 5e76a54b2b4046b4a6a177c04c3ad8fc3bc345fe Mon Sep 17 00:00:00 2001 From: Karl Hobley Date: Fri, 20 Jun 2014 15:36:10 +0100 Subject: [PATCH 1/8] Remove any duplicate search fields of the same type --- wagtail/wagtailsearch/indexed.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/wagtail/wagtailsearch/indexed.py b/wagtail/wagtailsearch/indexed.py index 347e41c0c..aba3003f7 100644 --- a/wagtail/wagtailsearch/indexed.py +++ b/wagtail/wagtailsearch/indexed.py @@ -101,6 +101,13 @@ class Indexed(object): # Add the field search_fields.append(SearchField(field_name, boost=boost, partial_match=partial_match, es_extra=config)) + # Remove any duplicate entries into search fields + # We need to take into account that fields can be indexed as both a SearchField and as a FilterField + search_fields_dict = {} + for field in search_fields: + search_fields_dict[(field.field_name, type(field))] = field + search_fields = search_fields_dict.values() + return search_fields @classmethod From 6d21727b034a1f0147c1be987aab27e4728a5199 Mon Sep 17 00:00:00 2001 From: Karl Hobley Date: Fri, 20 Jun 2014 15:48:51 +0100 Subject: [PATCH 2/8] Cleaned up database backend search method --- wagtail/wagtailsearch/backends/db.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/wagtail/wagtailsearch/backends/db.py b/wagtail/wagtailsearch/backends/db.py index a7dca12e9..5ed8e07df 100644 --- a/wagtail/wagtailsearch/backends/db.py +++ b/wagtail/wagtailsearch/backends/db.py @@ -49,7 +49,7 @@ class DBSearch(BaseSearch): # Filter by terms for term in terms: - term_query = None + term_query = models.Q() for field_name in fields: # Check if the field exists (this will filter out indexed callables) try: @@ -58,11 +58,8 @@ class DBSearch(BaseSearch): continue # Filter on this field - field_filter = {'%s__icontains' % field_name: term} - if term_query is None: - term_query = models.Q(**field_filter) - else: - term_query |= models.Q(**field_filter) + term_query |= models.Q(**{'%s__icontains' % field_name: term}) + query = query.filter(term_query) # Distinct From 67ed563dd74602a1cfb79b2d1e274c3c8aa8cf2d Mon Sep 17 00:00:00 2001 From: Karl Hobley Date: Fri, 20 Jun 2014 15:52:06 +0100 Subject: [PATCH 3/8] Made quotes more consistant --- .../wagtailsearch/backends/elasticsearch.py | 62 +++++++++---------- wagtail/wagtailsearch/indexed.py | 4 +- .../management/commands/update_index.py | 6 +- 3 files changed, 36 insertions(+), 36 deletions(-) diff --git a/wagtail/wagtailsearch/backends/elasticsearch.py b/wagtail/wagtailsearch/backends/elasticsearch.py index 88542f624..4890b5e52 100644 --- a/wagtail/wagtailsearch/backends/elasticsearch.py +++ b/wagtail/wagtailsearch/backends/elasticsearch.py @@ -65,7 +65,7 @@ class ElasticSearchMapping(object): doc[field] = getattr(obj, field) # Check if this field is callable - if hasattr(doc[field], "__call__"): + if hasattr(doc[field], '__call__'): # Call it doc[field] = doc[field]() @@ -346,43 +346,43 @@ class ElasticSearch(BaseSearch): # Settings INDEX_SETTINGS = { - "settings": { - "analysis": { - "analyzer": { - "ngram_analyzer": { - "type": "custom", - "tokenizer": "lowercase", - "filter": ["ngram"] + 'settings': { + 'analysis': { + 'analyzer': { + 'ngram_analyzer': { + 'type': 'custom', + 'tokenizer': 'lowercase', + 'filter': ['ngram'] }, - "edgengram_analyzer": { - "type": "custom", - "tokenizer": "lowercase", - "filter": ["edgengram"] + 'edgengram_analyzer': { + 'type': 'custom', + 'tokenizer': 'lowercase', + 'filter': ['edgengram'] } }, - "tokenizer": { - "ngram_tokenizer": { - "type": "nGram", - "min_gram": 3, - "max_gram": 15, + 'tokenizer': { + 'ngram_tokenizer': { + 'type': 'nGram', + 'min_gram': 3, + 'max_gram': 15, }, - "edgengram_tokenizer": { - "type": "edgeNGram", - "min_gram": 2, - "max_gram": 15, - "side": "front" + 'edgengram_tokenizer': { + 'type': 'edgeNGram', + 'min_gram': 2, + 'max_gram': 15, + 'side': 'front' } }, - "filter": { - "ngram": { - "type": "nGram", - "min_gram": 3, - "max_gram": 15 + 'filter': { + 'ngram': { + 'type': 'nGram', + 'min_gram': 3, + 'max_gram': 15 }, - "edgengram": { - "type": "edgeNGram", - "min_gram": 1, - "max_gram": 15 + 'edgengram': { + 'type': 'edgeNGram', + 'min_gram': 1, + 'max_gram': 15 } } } diff --git a/wagtail/wagtailsearch/indexed.py b/wagtail/wagtailsearch/indexed.py index aba3003f7..b4626914f 100644 --- a/wagtail/wagtailsearch/indexed.py +++ b/wagtail/wagtailsearch/indexed.py @@ -49,7 +49,7 @@ class Indexed(object): if isinstance(indexed_fields, string_types): indexed_fields = [indexed_fields] if isinstance(indexed_fields, list): - indexed_fields = dict((field, dict(type="string")) for field in indexed_fields) + indexed_fields = dict((field, dict(type='string')) for field in indexed_fields) if not isinstance(indexed_fields, dict): raise ValueError() @@ -139,7 +139,7 @@ class BaseField(object): return self.get_attname(cls) + self.suffix def __repr__(self): - return "<%s: %s>" % (self.__class__.__name__, self.field_name) + return '<%s: %s>' % (self.__class__.__name__, self.field_name) class SearchField(BaseField): diff --git a/wagtail/wagtailsearch/management/commands/update_index.py b/wagtail/wagtailsearch/management/commands/update_index.py index 8f1605b24..ee85da8ff 100644 --- a/wagtail/wagtailsearch/management/commands/update_index.py +++ b/wagtail/wagtailsearch/management/commands/update_index.py @@ -25,13 +25,13 @@ class Command(BaseCommand): # Loop through objects for obj in model.objects.all(): - # Check if this object has an "object_indexed" function - if hasattr(obj, "object_indexed"): + # Check if this object has an 'object_indexed' function + if hasattr(obj, 'object_indexed'): if obj.object_indexed() is False: continue # Get key for this object - key = toplevel_content_type + ":" + str(obj.pk) + key = toplevel_content_type + ':' + str(obj.pk) # Check if this key already exists if key in object_set: From f2d1c803783a4e69e84d46b5f87dd58b41cfc770 Mon Sep 17 00:00:00 2001 From: Karl Hobley Date: Sun, 6 Apr 2014 14:21:40 +0100 Subject: [PATCH 4/8] Don't run tests with wagtailsearch signal handlers enabled --- wagtail/tests/urls.py | 4 ---- wagtail/wagtailsearch/tests/test_backends.py | 10 +++++----- 2 files changed, 5 insertions(+), 9 deletions(-) diff --git a/wagtail/tests/urls.py b/wagtail/tests/urls.py index 83e12adfb..385d0e6cf 100644 --- a/wagtail/tests/urls.py +++ b/wagtail/tests/urls.py @@ -6,10 +6,6 @@ from wagtail.wagtaildocs import urls as wagtaildocs_urls from wagtail.wagtailsearch.urls import frontend as wagtailsearch_frontend_urls from wagtail.contrib.wagtailsitemaps.views import sitemap -# Signal handlers -from wagtail.wagtailsearch import register_signal_handlers as wagtailsearch_register_signal_handlers -wagtailsearch_register_signal_handlers() - urlpatterns = patterns('', url(r'^admin/', include(wagtailadmin_urls)), diff --git a/wagtail/wagtailsearch/tests/test_backends.py b/wagtail/wagtailsearch/tests/test_backends.py index 8fab1d152..86a67f21a 100644 --- a/wagtail/wagtailsearch/tests/test_backends.py +++ b/wagtail/wagtailsearch/tests/test_backends.py @@ -11,11 +11,6 @@ from wagtail.wagtailsearch.backends.db import DBSearch from wagtail.wagtailsearch.backends import InvalidSearchBackendError -# Register wagtailsearch signal handlers -from wagtail.wagtailsearch import register_signal_handlers -register_signal_handlers() - - class BackendTests(object): # To test a specific backend, subclass BackendTests and define self.backend_path. @@ -41,21 +36,25 @@ class BackendTests(object): testa = models.SearchTest() testa.title = "Hello World" testa.save() + self.backend.add(testa) self.testa = testa testb = models.SearchTest() testb.title = "Hello" testb.live = True testb.save() + self.backend.add(testb) testc = models.SearchTestChild() testc.title = "Hello" testc.live = True testc.save() + self.backend.add(testc) testd = models.SearchTestChild() testd.title = "World" testd.save() + self.backend.add(testd) # Refresh the index self.backend.refresh_index() @@ -130,6 +129,7 @@ class BackendTests(object): def test_delete(self): # Delete one of the objects + self.backend.delete(self.testa) self.testa.delete() # Refresh index From 6847109bb994580e67ca33ba93f1c382ab81ec6e Mon Sep 17 00:00:00 2001 From: Karl Hobley Date: Tue, 29 Apr 2014 11:54:02 +0100 Subject: [PATCH 5/8] Removed 'object_indexed' check from update_index command Conflicts: wagtail/wagtailsearch/management/commands/update_index.py --- wagtail/wagtailsearch/management/commands/update_index.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/wagtail/wagtailsearch/management/commands/update_index.py b/wagtail/wagtailsearch/management/commands/update_index.py index ee85da8ff..0b4cb695f 100644 --- a/wagtail/wagtailsearch/management/commands/update_index.py +++ b/wagtail/wagtailsearch/management/commands/update_index.py @@ -25,11 +25,6 @@ class Command(BaseCommand): # Loop through objects for obj in model.objects.all(): - # Check if this object has an 'object_indexed' function - if hasattr(obj, 'object_indexed'): - if obj.object_indexed() is False: - continue - # Get key for this object key = toplevel_content_type + ':' + str(obj.pk) From b1fb9dc2e249024961323d6f34ce4ee970a698fe Mon Sep 17 00:00:00 2001 From: Karl Hobley Date: Fri, 20 Jun 2014 15:59:39 +0100 Subject: [PATCH 6/8] Fixed a few more double quotes --- wagtail/wagtailsearch/indexed.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/wagtail/wagtailsearch/indexed.py b/wagtail/wagtailsearch/indexed.py index b4626914f..4b636497b 100644 --- a/wagtail/wagtailsearch/indexed.py +++ b/wagtail/wagtailsearch/indexed.py @@ -15,13 +15,13 @@ class Indexed(object): @classmethod def indexed_get_content_type(cls): # Work out content type - content_type = (cls._meta.app_label + "_" + cls.__name__).lower() + content_type = (cls._meta.app_label + '_' + cls.__name__).lower() # Get parent content type parent = cls.indexed_get_parent() if parent: parent_content_type = parent.indexed_get_content_type() - return parent_content_type + "_" + content_type + return parent_content_type + '_' + content_type else: return content_type @@ -33,7 +33,7 @@ class Indexed(object): return parent.indexed_get_content_type() else: # At toplevel, return this content type - return (cls._meta.app_label + "_" + cls.__name__).lower() + return (cls._meta.app_label + '_' + cls.__name__).lower() @classmethod def indexed_get_indexed_fields(cls): From 4a70a4251b7ec5140351f7d2f255d3ccfc81894b Mon Sep 17 00:00:00 2001 From: Karl Hobley Date: Sun, 6 Apr 2014 14:41:40 +0100 Subject: [PATCH 7/8] Give some feedback from the add_bulk command --- wagtail/wagtailsearch/backends/db.py | 4 ++-- wagtail/wagtailsearch/backends/elasticsearch.py | 1 + wagtail/wagtailsearch/management/commands/update_index.py | 6 ++---- 3 files changed, 5 insertions(+), 6 deletions(-) diff --git a/wagtail/wagtailsearch/backends/db.py b/wagtail/wagtailsearch/backends/db.py index 5ed8e07df..4c05b19df 100644 --- a/wagtail/wagtailsearch/backends/db.py +++ b/wagtail/wagtailsearch/backends/db.py @@ -22,7 +22,7 @@ class DBSearch(BaseSearch): pass # Not needed def add_bulk(self, obj_list): - pass # Not needed + return [] # Not needed def delete(self, obj): pass # Not needed @@ -70,4 +70,4 @@ class DBSearch(BaseSearch): for prefetch in prefetch_related: query = query.prefetch_related(prefetch) - return query \ No newline at end of file + return query diff --git a/wagtail/wagtailsearch/backends/elasticsearch.py b/wagtail/wagtailsearch/backends/elasticsearch.py index 4890b5e52..dbe755ff1 100644 --- a/wagtail/wagtailsearch/backends/elasticsearch.py +++ b/wagtail/wagtailsearch/backends/elasticsearch.py @@ -447,6 +447,7 @@ class ElasticSearch(BaseSearch): action.update(doc) actions.append(action) + yield type_name, len(type_documents) bulk(self.es, actions) def delete(self, obj): diff --git a/wagtail/wagtailsearch/management/commands/update_index.py b/wagtail/wagtailsearch/management/commands/update_index.py index 0b4cb695f..2c1593635 100644 --- a/wagtail/wagtailsearch/management/commands/update_index.py +++ b/wagtail/wagtailsearch/management/commands/update_index.py @@ -57,10 +57,8 @@ class Command(BaseCommand): # Add objects to index self.stdout.write("Adding objects") - results = s.add_bulk(object_set.values()) - if results: - for result in results: - self.stdout.write(result[0] + ' ' + str(result[1])) + for result in s.add_bulk(object_set.values()): + self.stdout.write(result[0] + ' ' + str(result[1])) # Refresh index self.stdout.write("Refreshing index") From 673da4ab021eb7f2fa14d9e194d12689373ba9c5 Mon Sep 17 00:00:00 2001 From: Karl Hobley Date: Fri, 20 Jun 2014 17:19:32 +0100 Subject: [PATCH 8/8] Set index:'not_analysed' setting on content_type field --- wagtail/wagtailsearch/backends/elasticsearch.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wagtail/wagtailsearch/backends/elasticsearch.py b/wagtail/wagtailsearch/backends/elasticsearch.py index dbe755ff1..8fb706bba 100644 --- a/wagtail/wagtailsearch/backends/elasticsearch.py +++ b/wagtail/wagtailsearch/backends/elasticsearch.py @@ -41,7 +41,7 @@ class ElasticSearchMapping(object): # Make field list fields = { 'pk': dict(type='string', index='not_analyzed', store='yes'), - 'content_type': dict(type='string'), + 'content_type': dict(type='string', index='not_analyzed'), } fields.update(dict(