From 2ba532d745cfbc271574e4dd42a9b2d3c5563d36 Mon Sep 17 00:00:00 2001 From: Bertrand Bordage Date: Thu, 23 Nov 2017 15:56:41 +0100 Subject: [PATCH 01/40] Adds search query API & implements PlainText & MatchAll. --- wagtail/contrib/postgres_search/backend.py | 13 +++- wagtail/wagtailsearch/backends/base.py | 15 +++- wagtail/wagtailsearch/backends/db.py | 54 ++++++++------ .../wagtailsearch/backends/elasticsearch2.py | 54 +++++++------- wagtail/wagtailsearch/query.py | 74 +++++++++++++++++++ wagtail/wagtailsearch/tests/test_backends.py | 50 ++++++------- 6 files changed, 178 insertions(+), 82 deletions(-) create mode 100644 wagtail/wagtailsearch/query.py diff --git a/wagtail/contrib/postgres_search/backend.py b/wagtail/contrib/postgres_search/backend.py index 7288ea139..df8d79df5 100644 --- a/wagtail/contrib/postgres_search/backend.py +++ b/wagtail/contrib/postgres_search/backend.py @@ -12,6 +12,7 @@ from django.utils.encoding import force_text from wagtail.wagtailsearch.backends.base import ( BaseSearchBackend, BaseSearchQuery, BaseSearchResults) from wagtail.wagtailsearch.index import RelatedFields, SearchField +from wagtail.wagtailsearch.query import MatchAll, PlainText from .models import IndexEntry from .utils import ( @@ -174,8 +175,8 @@ class PostgresSearchQuery(BaseSearchQuery): self.search_fields = self.queryset.model.get_search_fields() def get_search_query(self, config): - combine = OR if self.operator == 'or' else AND - search_terms = keyword_split(unidecode(self.query_string)) + combine = OR if self.query.operator == 'or' else AND + search_terms = keyword_split(unidecode(self.query.query_string)) if not search_terms: return SearchQuery('') return combine(SearchQuery(q, config=config) for q in search_terms) @@ -197,8 +198,14 @@ class PostgresSearchQuery(BaseSearchQuery): return field.boost def search(self, config, start, stop): - if self.query_string is None: + if isinstance(self.query, MatchAll): return self.queryset[start:stop] + + if not isinstance(self.query, PlainText): + raise NotImplementedError( + '%s is not supported by the PostgreSQL search backend.' + % self.query.__class__) + search_query = self.get_search_query(config=config) queryset = self.queryset query = queryset.query diff --git a/wagtail/wagtailsearch/backends/base.py b/wagtail/wagtailsearch/backends/base.py index c0fc4cc8a..5b6592360 100644 --- a/wagtail/wagtailsearch/backends/base.py +++ b/wagtail/wagtailsearch/backends/base.py @@ -1,11 +1,14 @@ from __future__ import absolute_import, unicode_literals +from warnings import warn + from django.db.models.lookups import Lookup from django.db.models.query import QuerySet from django.db.models.sql.where import SubqueryConstraint, WhereNode from wagtail.wagtailsearch.index import class_is_indexed +from wagtail.wagtailsearch.query import MATCH_ALL, PlainText class FilterError(Exception): @@ -19,11 +22,17 @@ class FieldError(Exception): class BaseSearchQuery(object): DEFAULT_OPERATOR = 'or' - def __init__(self, queryset, query_string, fields=None, operator=None, order_by_relevance=True): + def __init__(self, queryset, query, fields=None, operator=None, order_by_relevance=True): self.queryset = queryset - self.query_string = query_string + if query is None: + warn('Querying `None` is deprecated, use `MATCH_ALL` instead.', + DeprecationWarning) + query = MATCH_ALL + elif isinstance(query, str): + query = PlainText(query, + operator=operator or self.DEFAULT_OPERATOR) + self.query = query self.fields = fields - self.operator = operator or self.DEFAULT_OPERATOR self.order_by_relevance = order_by_relevance def _get_filterable_field(self, field_attname): diff --git a/wagtail/wagtailsearch/backends/db.py b/wagtail/wagtailsearch/backends/db.py index 90539fad0..1dbcdd325 100644 --- a/wagtail/wagtailsearch/backends/db.py +++ b/wagtail/wagtailsearch/backends/db.py @@ -5,6 +5,7 @@ from django.db.models.expressions import Value from wagtail.wagtailsearch.backends.base import ( BaseSearchBackend, BaseSearchQuery, BaseSearchResults) +from wagtail.wagtailsearch.query import MatchAll, PlainText class DatabaseSearchQuery(BaseSearchQuery): @@ -36,32 +37,41 @@ class DatabaseSearchQuery(BaseSearchQuery): q = models.Q() model = self.queryset.model - if self.query_string is not None: - # Get fields - fields = self.fields or [field.field_name for field in model.get_searchable_search_fields()] + if isinstance(self.query, MatchAll): + return q - # Get terms - terms = self.query_string.split() - if not terms: - return model.objects.none() + if not isinstance(self.query, PlainText): + raise NotImplementedError( + '%s is not supported by the database search backend.' + % self.query.__class__) - # Filter by terms - for term in terms: - term_query = models.Q() - for field_name in fields: - # Check if the field exists (this will filter out indexed callables) - try: - model._meta.get_field(field_name) - except models.fields.FieldDoesNotExist: - continue + # Get fields + fields = self.fields or [field.field_name for field in model.get_searchable_search_fields()] - # Filter on this field - term_query |= models.Q(**{'%s__icontains' % field_name: term}) + # Get terms + terms = self.query.query_string.split() + if not terms: + return model.objects.none() - if self.operator == 'or': - q |= term_query - elif self.operator == 'and': - q &= term_query + # Filter by terms + for term in terms: + term_query = models.Q() + for field_name in fields: + # Check if the field exists (this will filter out indexed callables) + try: + model._meta.get_field(field_name) + except models.fields.FieldDoesNotExist: + continue + + # Filter on this field + term_query |= models.Q(**{'%s__icontains' % field_name: term}) + + operator = self.query.operator + + if operator == 'or': + q |= term_query + elif operator == 'and': + q &= term_query return q diff --git a/wagtail/wagtailsearch/backends/elasticsearch2.py b/wagtail/wagtailsearch/backends/elasticsearch2.py index 207d8155e..35e1af0d1 100644 --- a/wagtail/wagtailsearch/backends/elasticsearch2.py +++ b/wagtail/wagtailsearch/backends/elasticsearch2.py @@ -16,6 +16,7 @@ from wagtail.wagtailsearch.backends.base import ( BaseSearchBackend, BaseSearchQuery, BaseSearchResults) from wagtail.wagtailsearch.index import ( FilterField, Indexed, RelatedFields, SearchField, class_is_indexed) +from wagtail.wagtailsearch.query import MatchAll, PlainText def get_model_root(model): @@ -371,40 +372,35 @@ class Elasticsearch2SearchQuery(BaseSearchQuery): return filter_out def get_inner_query(self): - if self.query_string is not None: - fields = self.fields or ['_all', '_partials'] + if isinstance(self.query, MatchAll): + return {'match_all': {}} - if len(fields) == 1: - if self.operator == 'or': - query = { - 'match': { - fields[0]: self.query_string, - } - } - else: - query = { - 'match': { - fields[0]: { - 'query': self.query_string, - 'operator': self.operator, - } - } - } - else: - query = { - 'multi_match': { - 'query': self.query_string, - 'fields': fields, - } + if not isinstance(self.query, PlainText): + raise NotImplementedError( + '%s is not supported by the Elasticsearch search backend.' + % self.query.__class__) + + fields = self.fields or ['_all', '_partials'] + operator = self.query.operator + + if len(fields) == 1: + field = fields[0] + query = { + 'match': { + field: self.query.query_string, } - - if self.operator != 'or': - query['multi_match']['operator'] = self.operator + } + if operator != 'or': + query['match'][field]['operator'] = operator else: query = { - 'match_all': {} + 'multi_match': { + 'query': self.query.query_string, + 'fields': fields, + } } - + if operator != 'or': + query['multi_match']['operator'] = operator return query def get_content_type_filter(self): diff --git a/wagtail/wagtailsearch/query.py b/wagtail/wagtailsearch/query.py new file mode 100644 index 000000000..fb72b22e1 --- /dev/null +++ b/wagtail/wagtailsearch/query.py @@ -0,0 +1,74 @@ +class SearchQuery: + def __and__(self, other): + return And([self, other]) + + def __or__(self, other): + return Or([self, other]) + + def __invert__(self): + return Not(self) + + +class SearchQueryOperator(SearchQuery): + pass + + +class And(SearchQueryOperator): + def __init__(self, subqueries): + self.subqueries = subqueries + + +class Or(SearchQueryOperator): + def __init__(self, subqueries): + self.subqueries = subqueries + + +class Not(SearchQueryOperator): + def __init__(self, subquery): + self.subquery = subquery + + +class MatchAll(SearchQuery): + pass + + +class PlainText(SearchQuery): + def __init__(self, query_string, operator=None, boost=1.0): + self.query_string = query_string + self.operator = operator + self.boost = boost + + +class Term(SearchQuery): + def __init__(self, term, boost=1.0): + self.term = term + self.boost = boost + + +class Prefix(SearchQuery): + def __init__(self, prefix, boost=1.0): + self.prefix = prefix + self.boost = boost + + +class Fuzzy(SearchQuery): + def __init__(self, term, max_distance=3, boost=1.0): + self.term = term + self.max_distance = max_distance + self.boost = boost + + +class Boost(SearchQuery): + def __init__(self, query, boost): + self.query = query + self.boost = boost + + +class Filter(SearchQuery): + def __init__(self, query, include=None, exclude=None): + self.query = query + self.include = include + self.exclude = exclude + + +MATCH_ALL = MatchAll() diff --git a/wagtail/wagtailsearch/tests/test_backends.py b/wagtail/wagtailsearch/tests/test_backends.py index 7ed7ad056..8e79b4876 100644 --- a/wagtail/wagtailsearch/tests/test_backends.py +++ b/wagtail/wagtailsearch/tests/test_backends.py @@ -17,6 +17,7 @@ from wagtail.wagtailsearch.backends import ( InvalidSearchBackendError, get_search_backend, get_search_backends) from wagtail.wagtailsearch.backends.base import FieldError from wagtail.wagtailsearch.backends.db import DatabaseSearchBackend +from wagtail.wagtailsearch.query import MATCH_ALL class BackendTests(WagtailTestUtils): @@ -65,8 +66,7 @@ class BackendTests(WagtailTestUtils): self.assertSetEqual(set(results), set()) def test_search_all(self): - # Searches on None should return everything in the index - results = self.backend.search(None, models.Book) + results = self.backend.search(MATCH_ALL, models.Book) self.assertSetEqual(set(results), set(models.Book.objects.all())) def test_ranking(self): @@ -90,7 +90,7 @@ class BackendTests(WagtailTestUtils): def test_search_on_child_class(self): # Searches on a child class should only return results that have the child class as well # and all results should be instances of the child class - results = self.backend.search(None, models.Novel) + results = self.backend.search(MATCH_ALL, models.Novel) self.assertSetEqual(set(results), set(models.Novel.objects.all())) def test_search_child_class_field_from_parent(self): @@ -162,7 +162,7 @@ class BackendTests(WagtailTestUtils): # FILTERING TESTS def test_filter_exact_value(self): - results = self.backend.search(None, models.Book.objects.filter(number_of_pages=440)) + results = self.backend.search(MATCH_ALL, models.Book.objects.filter(number_of_pages=440)) self.assertUnsortedListEqual([r.title for r in results], [ "The Return of the King", @@ -170,14 +170,14 @@ class BackendTests(WagtailTestUtils): ]) def test_filter_exact_value_on_parent_model_field(self): - results = self.backend.search(None, models.Novel.objects.filter(number_of_pages=440)) + results = self.backend.search(MATCH_ALL, models.Novel.objects.filter(number_of_pages=440)) self.assertUnsortedListEqual([r.title for r in results], [ "The Return of the King" ]) def test_filter_lt(self): - results = self.backend.search(None, models.Book.objects.filter(number_of_pages__lt=440)) + results = self.backend.search(MATCH_ALL, models.Book.objects.filter(number_of_pages__lt=440)) self.assertUnsortedListEqual([r.title for r in results], [ "The Hobbit", @@ -188,7 +188,7 @@ class BackendTests(WagtailTestUtils): ]) def test_filter_lte(self): - results = self.backend.search(None, models.Book.objects.filter(number_of_pages__lte=440)) + results = self.backend.search(MATCH_ALL, models.Book.objects.filter(number_of_pages__lte=440)) self.assertUnsortedListEqual([r.title for r in results], [ "The Return of the King", @@ -201,7 +201,7 @@ class BackendTests(WagtailTestUtils): ]) def test_filter_gt(self): - results = self.backend.search(None, models.Book.objects.filter(number_of_pages__gt=440)) + results = self.backend.search(MATCH_ALL, models.Book.objects.filter(number_of_pages__gt=440)) self.assertUnsortedListEqual([r.title for r in results], [ "JavaScript: The Definitive Guide", @@ -213,7 +213,7 @@ class BackendTests(WagtailTestUtils): ]) def test_filter_gte(self): - results = self.backend.search(None, models.Book.objects.filter(number_of_pages__gte=440)) + results = self.backend.search(MATCH_ALL, models.Book.objects.filter(number_of_pages__gte=440)) self.assertUnsortedListEqual([r.title for r in results], [ "The Return of the King", @@ -227,7 +227,7 @@ class BackendTests(WagtailTestUtils): ]) def test_filter_in_list(self): - results = self.backend.search(None, models.Book.objects.filter(number_of_pages__in=[440, 1160])) + results = self.backend.search(MATCH_ALL, models.Book.objects.filter(number_of_pages__in=[440, 1160])) self.assertUnsortedListEqual([r.title for r in results], [ "The Return of the King", @@ -236,7 +236,7 @@ class BackendTests(WagtailTestUtils): ]) def test_filter_in_iterable(self): - results = self.backend.search(None, models.Book.objects.filter(number_of_pages__in=iter([440, 1160]))) + results = self.backend.search(MATCH_ALL, models.Book.objects.filter(number_of_pages__in=iter([440, 1160]))) self.assertUnsortedListEqual([r.title for r in results], [ "The Return of the King", @@ -246,7 +246,7 @@ class BackendTests(WagtailTestUtils): def test_filter_in_values_list_subquery(self): values = models.Book.objects.filter(number_of_pages__lt=440).values_list('number_of_pages', flat=True) - results = self.backend.search(None, models.Book.objects.filter(number_of_pages__in=values)) + results = self.backend.search(MATCH_ALL, models.Book.objects.filter(number_of_pages__in=values)) self.assertUnsortedListEqual([r.title for r in results], [ "The Hobbit", @@ -258,7 +258,7 @@ class BackendTests(WagtailTestUtils): def test_filter_isnull_true(self): # Note: We don't know the birth dates of any of the programming guide authors - results = self.backend.search(None, models.Author.objects.filter(date_of_birth__isnull=True)) + results = self.backend.search(MATCH_ALL, models.Author.objects.filter(date_of_birth__isnull=True)) self.assertUnsortedListEqual([r.name for r in results], [ "David Ascher", @@ -273,7 +273,7 @@ class BackendTests(WagtailTestUtils): def test_filter_isnull_false(self): # Note: We know the birth dates of all of the novel authors - results = self.backend.search(None, models.Author.objects.filter(date_of_birth__isnull=False)) + results = self.backend.search(MATCH_ALL, models.Author.objects.filter(date_of_birth__isnull=False)) self.assertUnsortedListEqual([r.name for r in results], [ "Isaac Asimov", @@ -282,7 +282,7 @@ class BackendTests(WagtailTestUtils): ]) def test_filter_prefix(self): - results = self.backend.search(None, models.Book.objects.filter(title__startswith="Th")) + results = self.backend.search(MATCH_ALL, models.Book.objects.filter(title__startswith="Th")) self.assertUnsortedListEqual([r.title for r in results], [ "The Hobbit", @@ -294,14 +294,14 @@ class BackendTests(WagtailTestUtils): def test_filter_and_operator(self): results = self.backend.search( - None, models.Book.objects.filter(number_of_pages=440) & models.Book.objects.filter(publication_date=date(1955, 10, 20))) + MATCH_ALL, models.Book.objects.filter(number_of_pages=440) & models.Book.objects.filter(publication_date=date(1955, 10, 20))) self.assertUnsortedListEqual([r.title for r in results], [ "The Return of the King" ]) def test_filter_or_operator(self): - results = self.backend.search(None, models.Book.objects.filter(number_of_pages=440) | models.Book.objects.filter(number_of_pages=1160)) + results = self.backend.search(MATCH_ALL, models.Book.objects.filter(number_of_pages=440) | models.Book.objects.filter(number_of_pages=1160)) self.assertUnsortedListEqual([r.title for r in results], [ "Learning Python", @@ -311,12 +311,12 @@ class BackendTests(WagtailTestUtils): def test_filter_on_non_filterable_field(self): with self.assertRaises(FieldError): - list(self.backend.search(None, models.Author.objects.filter(name__startswith="Issac"))) + list(self.backend.search(MATCH_ALL, models.Author.objects.filter(name__startswith="Issac"))) # ORDER BY RELEVANCE def test_order_by_relevance(self): - results = self.backend.search(None, models.Novel.objects.order_by('number_of_pages'), order_by_relevance=False) + results = self.backend.search(MATCH_ALL, models.Novel.objects.order_by('number_of_pages'), order_by_relevance=False) # Ordering should be set to "number_of_pages" self.assertEqual([r.title for r in results], [ @@ -332,19 +332,19 @@ class BackendTests(WagtailTestUtils): def test_order_by_non_filterable_field(self): with self.assertRaises(FieldError): - list(self.backend.search(None, models.Author.objects.order_by('name'), order_by_relevance=False)) + list(self.backend.search(MATCH_ALL, models.Author.objects.order_by('name'), order_by_relevance=False)) # SLICING TESTS def test_single_result(self): - results = self.backend.search(None, models.Novel.objects.order_by('number_of_pages'), order_by_relevance=False) + results = self.backend.search(MATCH_ALL, models.Novel.objects.order_by('number_of_pages'), order_by_relevance=False) self.assertEqual(results[0].title, "Foundation") self.assertEqual(results[1].title, "The Hobbit") def test_limit(self): # Note: we need consistent ordering for this test - results = self.backend.search(None, models.Novel.objects.order_by('number_of_pages'), order_by_relevance=False) + results = self.backend.search(MATCH_ALL, models.Novel.objects.order_by('number_of_pages'), order_by_relevance=False) # Limit the results results = results[:3] @@ -357,7 +357,7 @@ class BackendTests(WagtailTestUtils): def test_offset(self): # Note: we need consistent ordering for this test - results = self.backend.search(None, models.Novel.objects.order_by('number_of_pages'), order_by_relevance=False) + results = self.backend.search(MATCH_ALL, models.Novel.objects.order_by('number_of_pages'), order_by_relevance=False) # Offset the results results = results[3:] @@ -372,7 +372,7 @@ class BackendTests(WagtailTestUtils): def test_offset_and_limit(self): # Note: we need consistent ordering for this test - results = self.backend.search(None, models.Novel.objects.order_by('number_of_pages'), order_by_relevance=False) + results = self.backend.search(MATCH_ALL, models.Novel.objects.order_by('number_of_pages'), order_by_relevance=False) # Offset the results results = results[3:6] @@ -419,7 +419,7 @@ class BackendTests(WagtailTestUtils): # instead of three). # Note: we need consistent ordering for this test - results = self.backend.search(None, models.Novel.objects.order_by('number_of_pages'), order_by_relevance=False) + results = self.backend.search(MATCH_ALL, models.Novel.objects.order_by('number_of_pages'), order_by_relevance=False) # Limit the results results = results[:3] From 70393cb131dd21ecb654d5d8f55bfa48e2c405b2 Mon Sep 17 00:00:00 2001 From: Bertrand Bordage Date: Thu, 23 Nov 2017 16:18:31 +0100 Subject: [PATCH 02/40] Use MATCH_ALL in Elasticsearch tests. --- .../wagtailsearch/tests/elasticsearch_common_tests.py | 11 ++++++----- .../tests/test_elasticsearch2_backend.py | 5 +++-- .../tests/test_elasticsearch5_backend.py | 5 +++-- 3 files changed, 12 insertions(+), 9 deletions(-) diff --git a/wagtail/wagtailsearch/tests/elasticsearch_common_tests.py b/wagtail/wagtailsearch/tests/elasticsearch_common_tests.py index bb0c1a208..903cbb937 100644 --- a/wagtail/wagtailsearch/tests/elasticsearch_common_tests.py +++ b/wagtail/wagtailsearch/tests/elasticsearch_common_tests.py @@ -6,6 +6,7 @@ from io import StringIO from django.core import management from wagtail.tests.search import models +from wagtail.wagtailsearch.query import MATCH_ALL class ElasticsearchCommonSearchBackendTests(object): @@ -111,7 +112,7 @@ class ElasticsearchCommonSearchBackendTests(object): ) # This should not give any results - results = self.backend.search(None, models.Book) + results = self.backend.search(MATCH_ALL, models.Book) self.assertSetEqual(set(results), set()) def test_annotate_score(self): @@ -129,7 +130,7 @@ class ElasticsearchCommonSearchBackendTests(object): def test_more_than_ten_results(self): # #3431 reported that Elasticsearch only sends back 10 results if the results set is not sliced - results = self.backend.search(None, models.Book) + results = self.backend.search(MATCH_ALL, models.Book) self.assertEqual(len(results), 13) @@ -143,7 +144,7 @@ class ElasticsearchCommonSearchBackendTests(object): index.add_items(models.Book, books) index.refresh() - results = self.backend.search(None, models.Book) + results = self.backend.search(MATCH_ALL, models.Book) self.assertEqual(len(results), 163) def test_slice_more_than_one_hundred_results(self): @@ -155,7 +156,7 @@ class ElasticsearchCommonSearchBackendTests(object): index.add_items(models.Book, books) index.refresh() - results = self.backend.search(None, models.Book)[10:120] + results = self.backend.search(MATCH_ALL, models.Book)[10:120] self.assertEqual(len(results), 110) def test_slice_to_next_page(self): @@ -169,5 +170,5 @@ class ElasticsearchCommonSearchBackendTests(object): index.add_items(models.Book, books) index.refresh() - results = self.backend.search(None, models.Book)[110:] + results = self.backend.search(MATCH_ALL, models.Book)[110:] self.assertEqual(len(results), 53) diff --git a/wagtail/wagtailsearch/tests/test_elasticsearch2_backend.py b/wagtail/wagtailsearch/tests/test_elasticsearch2_backend.py index 1508e9a6e..4f1f853aa 100644 --- a/wagtail/wagtailsearch/tests/test_elasticsearch2_backend.py +++ b/wagtail/wagtailsearch/tests/test_elasticsearch2_backend.py @@ -13,6 +13,7 @@ from elasticsearch.serializer import JSONSerializer from wagtail.tests.search import models from wagtail.wagtailsearch.backends.elasticsearch2 import ( Elasticsearch2SearchBackend, get_model_root) +from wagtail.wagtailsearch.query import MATCH_ALL from .elasticsearch_common_tests import ElasticsearchCommonSearchBackendTests from .test_backends import BackendTests @@ -57,9 +58,9 @@ class TestElasticsearch2SearchQuery(TestCase): }} self.assertDictEqual(query.get_query(), expected_result) - def test_none_query_string(self): + def test_match_all(self): # Create a query - query = self.query_class(models.Book.objects.all(), None) + query = self.query_class(models.Book.objects.all(), MATCH_ALL) # Check it expected_result = {'filtered': { diff --git a/wagtail/wagtailsearch/tests/test_elasticsearch5_backend.py b/wagtail/wagtailsearch/tests/test_elasticsearch5_backend.py index 8382a140a..1ad14a993 100644 --- a/wagtail/wagtailsearch/tests/test_elasticsearch5_backend.py +++ b/wagtail/wagtailsearch/tests/test_elasticsearch5_backend.py @@ -12,6 +12,7 @@ from elasticsearch.serializer import JSONSerializer from wagtail.tests.search import models from wagtail.wagtailsearch.backends.elasticsearch5 import Elasticsearch5SearchBackend +from wagtail.wagtailsearch.query import MATCH_ALL from .elasticsearch_common_tests import ElasticsearchCommonSearchBackendTests from .test_backends import BackendTests @@ -61,9 +62,9 @@ class TestElasticsearch5SearchQuery(TestCase): }} self.assertDictEqual(query.get_query(), expected_result) - def test_none_query_string(self): + def test_match_all(self): # Create a query - query = self.query_class(models.Book.objects.all(), None) + query = self.query_class(models.Book.objects.all(), MATCH_ALL) # Check it expected_result = {'bool': { From 39da11cce681bf93c08b596c767508006b1e1a94 Mon Sep 17 00:00:00 2001 From: Bertrand Bordage Date: Thu, 23 Nov 2017 16:23:48 +0100 Subject: [PATCH 03/40] Fixes Elasticsearch querying. --- .../wagtailsearch/backends/elasticsearch2.py | 32 +++++++++++-------- 1 file changed, 19 insertions(+), 13 deletions(-) diff --git a/wagtail/wagtailsearch/backends/elasticsearch2.py b/wagtail/wagtailsearch/backends/elasticsearch2.py index 35e1af0d1..e65e9f912 100644 --- a/wagtail/wagtailsearch/backends/elasticsearch2.py +++ b/wagtail/wagtailsearch/backends/elasticsearch2.py @@ -384,23 +384,29 @@ class Elasticsearch2SearchQuery(BaseSearchQuery): operator = self.query.operator if len(fields) == 1: - field = fields[0] - query = { + if operator == 'or': + return { + 'match': { + fields[0]: self.query.query_string, + } + } + return { 'match': { - field: self.query.query_string, + fields[0]: { + 'query': self.query.query_string, + 'operator': operator, + } } } - if operator != 'or': - query['match'][field]['operator'] = operator - else: - query = { - 'multi_match': { - 'query': self.query.query_string, - 'fields': fields, - } + + query = { + 'multi_match': { + 'query': self.query.query_string, + 'fields': fields, } - if operator != 'or': - query['multi_match']['operator'] = operator + } + if operator != 'or': + query['multi_match']['operator'] = operator return query def get_content_type_filter(self): From 69789d7c35a9d9addce03bbef3cb72a4c24d4a89 Mon Sep 17 00:00:00 2001 From: Bertrand Bordage Date: Thu, 23 Nov 2017 16:30:13 +0100 Subject: [PATCH 04/40] =?UTF-8?q?BaseSearchQuery=20=E2=86=92=20SearchQuery?= =?UTF-8?q?Compiler.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- wagtail/contrib/postgres_search/backend.py | 14 +++++++------- wagtail/wagtailsearch/backends/base.py | 13 +++++++------ wagtail/wagtailsearch/backends/db.py | 10 +++++----- .../wagtailsearch/backends/elasticsearch2.py | 18 +++++++++--------- .../wagtailsearch/backends/elasticsearch5.py | 6 +++--- 5 files changed, 31 insertions(+), 30 deletions(-) diff --git a/wagtail/contrib/postgres_search/backend.py b/wagtail/contrib/postgres_search/backend.py index df8d79df5..83c2ad16d 100644 --- a/wagtail/contrib/postgres_search/backend.py +++ b/wagtail/contrib/postgres_search/backend.py @@ -10,7 +10,7 @@ from django.db.models.functions import Cast from django.utils.encoding import force_text from wagtail.wagtailsearch.backends.base import ( - BaseSearchBackend, BaseSearchQuery, BaseSearchResults) + BaseSearchBackend, SearchQueryCompiler, BaseSearchResults) from wagtail.wagtailsearch.index import RelatedFields, SearchField from wagtail.wagtailsearch.query import MatchAll, PlainText @@ -167,11 +167,11 @@ class Index(object): return self.name -class PostgresSearchQuery(BaseSearchQuery): +class PostgresSearchQueryCompiler(SearchQueryCompiler): DEFAULT_OPERATOR = 'and' def __init__(self, *args, **kwargs): - super(PostgresSearchQuery, self).__init__(*args, **kwargs) + super(PostgresSearchQueryCompiler, self).__init__(*args, **kwargs) self.search_fields = self.queryset.model.get_search_fields() def get_search_query(self, config): @@ -235,11 +235,11 @@ class PostgresSearchQuery(BaseSearchQuery): class PostgresSearchResults(BaseSearchResults): def _do_search(self): - return list(self.query.search(self.backend.get_config(), - self.start, self.stop)) + return list(self.query_compiler.search(self.backend.get_config(), + self.start, self.stop)) def _do_count(self): - return self.query.search(self.backend.get_config(), None, None).count() + return self.query_compiler.search(self.backend.get_config(), None, None).count() class PostgresSearchRebuilder: @@ -277,7 +277,7 @@ class PostgresSearchAtomicRebuilder(PostgresSearchRebuilder): class PostgresSearchBackend(BaseSearchBackend): - query_class = PostgresSearchQuery + query_compiler_class = PostgresSearchQueryCompiler results_class = PostgresSearchResults rebuilder_class = PostgresSearchRebuilder atomic_rebuilder_class = PostgresSearchAtomicRebuilder diff --git a/wagtail/wagtailsearch/backends/base.py b/wagtail/wagtailsearch/backends/base.py index 5b6592360..55d7b3919 100644 --- a/wagtail/wagtailsearch/backends/base.py +++ b/wagtail/wagtailsearch/backends/base.py @@ -19,7 +19,7 @@ class FieldError(Exception): pass -class BaseSearchQuery(object): +class SearchQueryCompiler(object): DEFAULT_OPERATOR = 'or' def __init__(self, queryset, query, fields=None, operator=None, order_by_relevance=True): @@ -104,9 +104,9 @@ class BaseSearchQuery(object): class BaseSearchResults(object): - def __init__(self, backend, query, prefetch_related=None): + def __init__(self, backend, query_compiler, prefetch_related=None): self.backend = backend - self.query = query + self.query_compiler = query_compiler self.prefetch_related = prefetch_related self.start = 0 self.stop = None @@ -129,7 +129,8 @@ class BaseSearchResults(object): def _clone(self): klass = self.__class__ - new = klass(self.backend, self.query, prefetch_related=self.prefetch_related) + new = klass(self.backend, self.query_compiler, + prefetch_related=self.prefetch_related) new.start = self.start new.stop = self.stop new._score_field = self._score_field @@ -209,7 +210,7 @@ class EmptySearchResults(BaseSearchResults): class BaseSearchBackend(object): - query_class = None + query_compiler_class = None results_class = None rebuilder_class = None @@ -285,7 +286,7 @@ class BaseSearchBackend(object): raise ValueError("operator must be either 'or' or 'and'") # Search - search_query = self.query_class( + search_query = self.query_compiler_class( queryset, query_string, fields=fields, operator=operator, order_by_relevance=order_by_relevance ) return self.results_class(self, search_query) diff --git a/wagtail/wagtailsearch/backends/db.py b/wagtail/wagtailsearch/backends/db.py index 1dbcdd325..4aa9f40d5 100644 --- a/wagtail/wagtailsearch/backends/db.py +++ b/wagtail/wagtailsearch/backends/db.py @@ -4,11 +4,11 @@ from django.db import models from django.db.models.expressions import Value from wagtail.wagtailsearch.backends.base import ( - BaseSearchBackend, BaseSearchQuery, BaseSearchResults) + BaseSearchBackend, SearchQueryCompiler, BaseSearchResults) from wagtail.wagtailsearch.query import MatchAll, PlainText -class DatabaseSearchQuery(BaseSearchQuery): +class DatabaseSearchQueryCompiler(SearchQueryCompiler): DEFAULT_OPERATOR = 'and' def _process_lookup(self, field, lookup, value): @@ -78,8 +78,8 @@ class DatabaseSearchQuery(BaseSearchQuery): class DatabaseSearchResults(BaseSearchResults): def get_queryset(self): - queryset = self.query.queryset - q = self.query.get_extra_q() + queryset = self.query_compiler.queryset + q = self.query_compiler.get_extra_q() return queryset.filter(q).distinct()[self.start:self.stop] @@ -96,7 +96,7 @@ class DatabaseSearchResults(BaseSearchResults): class DatabaseSearchBackend(BaseSearchBackend): - query_class = DatabaseSearchQuery + query_compiler_class = DatabaseSearchQueryCompiler results_class = DatabaseSearchResults def reset_index(self): diff --git a/wagtail/wagtailsearch/backends/elasticsearch2.py b/wagtail/wagtailsearch/backends/elasticsearch2.py index e65e9f912..3e59f2c58 100644 --- a/wagtail/wagtailsearch/backends/elasticsearch2.py +++ b/wagtail/wagtailsearch/backends/elasticsearch2.py @@ -13,7 +13,7 @@ from elasticsearch.helpers import bulk from wagtail.utils.utils import deep_update from wagtail.wagtailsearch.backends.base import ( - BaseSearchBackend, BaseSearchQuery, BaseSearchResults) + BaseSearchBackend, SearchQueryCompiler, BaseSearchResults) from wagtail.wagtailsearch.index import ( FilterField, Indexed, RelatedFields, SearchField, class_is_indexed) from wagtail.wagtailsearch.query import MatchAll, PlainText @@ -261,12 +261,12 @@ class Elasticsearch2Mapping(object): return '' % (self.model.__name__, ) -class Elasticsearch2SearchQuery(BaseSearchQuery): +class Elasticsearch2SearchQueryCompiler(SearchQueryCompiler): mapping_class = Elasticsearch2Mapping DEFAULT_OPERATOR = 'or' def __init__(self, *args, **kwargs): - super(Elasticsearch2SearchQuery, self).__init__(*args, **kwargs) + super(Elasticsearch2SearchQueryCompiler, self).__init__(*args, **kwargs) self.mapping = self.mapping_class(self.queryset.model) # Convert field names into index column names @@ -496,11 +496,11 @@ class Elasticsearch2SearchResults(BaseSearchResults): def _get_es_body(self, for_count=False): body = { - 'query': self.query.get_query() + 'query': self.query_compiler.get_query() } if not for_count: - sort = self.query.get_sort() + sort = self.query_compiler.get_sort() if sort is not None: body['sort'] = sort @@ -519,7 +519,7 @@ class Elasticsearch2SearchResults(BaseSearchResults): results = {str(pk): None for pk in pks} # Find objects in database and add them to dict - for obj in self.query.queryset.filter(pk__in=pks): + for obj in self.query_compiler.queryset.filter(pk__in=pks): results[str(obj.pk)] = obj if self._score_field: @@ -542,7 +542,7 @@ class Elasticsearch2SearchResults(BaseSearchResults): use_scroll = limit is None or limit > PAGE_SIZE params = { - 'index': self.backend.get_index_for_model(self.query.queryset.model).name, + 'index': self.backend.get_index_for_model(self.query_compiler.queryset.model).name, 'body': self._get_es_body(), '_source': False, self.fields_param_name: 'pk', @@ -611,7 +611,7 @@ class Elasticsearch2SearchResults(BaseSearchResults): def _do_count(self): # Get count hit_count = self.backend.es.count( - index=self.backend.get_index_for_model(self.query.queryset.model).name, + index=self.backend.get_index_for_model(self.query_compiler.queryset.model).name, body=self._get_es_body(for_count=True), )['count'] @@ -819,7 +819,7 @@ class ElasticsearchAtomicIndexRebuilder(ElasticsearchIndexRebuilder): class Elasticsearch2SearchBackend(BaseSearchBackend): index_class = Elasticsearch2Index - query_class = Elasticsearch2SearchQuery + query_compiler_class = Elasticsearch2SearchQueryCompiler results_class = Elasticsearch2SearchResults mapping_class = Elasticsearch2Mapping basic_rebuilder_class = ElasticsearchIndexRebuilder diff --git a/wagtail/wagtailsearch/backends/elasticsearch5.py b/wagtail/wagtailsearch/backends/elasticsearch5.py index 92e7be535..148ff5370 100644 --- a/wagtail/wagtailsearch/backends/elasticsearch5.py +++ b/wagtail/wagtailsearch/backends/elasticsearch5.py @@ -2,7 +2,7 @@ from __future__ import absolute_import, unicode_literals from .elasticsearch2 import ( Elasticsearch2Index, Elasticsearch2Mapping, Elasticsearch2SearchBackend, - Elasticsearch2SearchQuery, Elasticsearch2SearchResults) + Elasticsearch2SearchQueryCompiler, Elasticsearch2SearchResults) class Elasticsearch5Mapping(Elasticsearch2Mapping): @@ -15,7 +15,7 @@ class Elasticsearch5Index(Elasticsearch2Index): pass -class Elasticsearch5SearchQuery(Elasticsearch2SearchQuery): +class Elasticsearch5SearchQuery(Elasticsearch2SearchQueryCompiler): mapping_class = Elasticsearch5Mapping def _connect_filters(self, filters, connector, negated): @@ -77,7 +77,7 @@ class Elasticsearch5SearchResults(Elasticsearch2SearchResults): class Elasticsearch5SearchBackend(Elasticsearch2SearchBackend): mapping_class = Elasticsearch5Mapping index_class = Elasticsearch5Index - query_class = Elasticsearch5SearchQuery + query_compiler_class = Elasticsearch5SearchQuery results_class = Elasticsearch5SearchResults From 2b4bb2b7180ea7d75848c982139b6270b97a8f9c Mon Sep 17 00:00:00 2001 From: Bertrand Bordage Date: Thu, 23 Nov 2017 16:38:30 +0100 Subject: [PATCH 05/40] Renames `query_string` arguments to `query`. --- wagtail/wagtailsearch/backends/base.py | 6 +++--- wagtail/wagtailsearch/queryset.py | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/wagtail/wagtailsearch/backends/base.py b/wagtail/wagtailsearch/backends/base.py index 55d7b3919..9ff3d6d1e 100644 --- a/wagtail/wagtailsearch/backends/base.py +++ b/wagtail/wagtailsearch/backends/base.py @@ -241,7 +241,7 @@ class BaseSearchBackend(object): def delete(self, obj): raise NotImplementedError - def search(self, query_string, model_or_queryset, fields=None, filters=None, + def search(self, query, model_or_queryset, fields=None, filters=None, prefetch_related=None, operator=None, order_by_relevance=True): # Find model/queryset if isinstance(model_or_queryset, QuerySet): @@ -256,7 +256,7 @@ class BaseSearchBackend(object): return EmptySearchResults() # Check that theres still a query string after the clean up - if query_string == "": + if query == "": return EmptySearchResults() # Only fields that are indexed as a SearchField can be passed in fields @@ -287,6 +287,6 @@ class BaseSearchBackend(object): # Search search_query = self.query_compiler_class( - queryset, query_string, fields=fields, operator=operator, order_by_relevance=order_by_relevance + queryset, query, fields=fields, operator=operator, order_by_relevance=order_by_relevance ) return self.results_class(self, search_query) diff --git a/wagtail/wagtailsearch/queryset.py b/wagtail/wagtailsearch/queryset.py index 3471a4869..557e48ef2 100644 --- a/wagtail/wagtailsearch/queryset.py +++ b/wagtail/wagtailsearch/queryset.py @@ -4,11 +4,11 @@ from wagtail.wagtailsearch.backends import get_search_backend class SearchableQuerySetMixin(object): - def search(self, query_string, fields=None, + def search(self, query, fields=None, operator=None, order_by_relevance=True, backend='default'): """ This runs a search query on all the items in the QuerySet """ search_backend = get_search_backend(backend) - return search_backend.search(query_string, self, fields=fields, + return search_backend.search(query, self, fields=fields, operator=operator, order_by_relevance=order_by_relevance) From cf5d7ae0f0b0b0f1de47c17e158a042f5bf7022c Mon Sep 17 00:00:00 2001 From: Bertrand Bordage Date: Thu, 23 Nov 2017 16:53:18 +0100 Subject: [PATCH 06/40] Fixes Elasticsearch SearchQueryCompiler names. --- .../wagtailsearch/backends/elasticsearch5.py | 4 +- .../tests/test_elasticsearch2_backend.py | 108 +++++++++--------- .../tests/test_elasticsearch5_backend.py | 108 +++++++++--------- 3 files changed, 110 insertions(+), 110 deletions(-) diff --git a/wagtail/wagtailsearch/backends/elasticsearch5.py b/wagtail/wagtailsearch/backends/elasticsearch5.py index 148ff5370..e2a30055a 100644 --- a/wagtail/wagtailsearch/backends/elasticsearch5.py +++ b/wagtail/wagtailsearch/backends/elasticsearch5.py @@ -15,7 +15,7 @@ class Elasticsearch5Index(Elasticsearch2Index): pass -class Elasticsearch5SearchQuery(Elasticsearch2SearchQueryCompiler): +class Elasticsearch5SearchQueryCompiler(Elasticsearch2SearchQueryCompiler): mapping_class = Elasticsearch5Mapping def _connect_filters(self, filters, connector, negated): @@ -77,7 +77,7 @@ class Elasticsearch5SearchResults(Elasticsearch2SearchResults): class Elasticsearch5SearchBackend(Elasticsearch2SearchBackend): mapping_class = Elasticsearch5Mapping index_class = Elasticsearch5Index - query_compiler_class = Elasticsearch5SearchQuery + query_compiler_class = Elasticsearch5SearchQueryCompiler results_class = Elasticsearch5SearchResults diff --git a/wagtail/wagtailsearch/tests/test_elasticsearch2_backend.py b/wagtail/wagtailsearch/tests/test_elasticsearch2_backend.py index 4f1f853aa..b82d8bb2d 100644 --- a/wagtail/wagtailsearch/tests/test_elasticsearch2_backend.py +++ b/wagtail/wagtailsearch/tests/test_elasticsearch2_backend.py @@ -45,55 +45,55 @@ class TestElasticsearch2SearchQuery(TestCase): json.dumps(a, sort_keys=True, default=default), json.dumps(b, sort_keys=True, default=default) ) - query_class = Elasticsearch2SearchBackend.query_class + query_compiler_class = Elasticsearch2SearchBackend.query_compiler_class def test_simple(self): # Create a query - query = self.query_class(models.Book.objects.all(), "Hello") + query_compiler = self.query_compiler_class(models.Book.objects.all(), "Hello") # Check it expected_result = {'filtered': { 'filter': {'match': {'content_type': 'searchtests.Book'}}, 'query': {'multi_match': {'query': 'Hello', 'fields': ['_all', '_partials']}} }} - self.assertDictEqual(query.get_query(), expected_result) + self.assertDictEqual(query_compiler.get_query(), expected_result) def test_match_all(self): # Create a query - query = self.query_class(models.Book.objects.all(), MATCH_ALL) + query_compiler = self.query_compiler_class(models.Book.objects.all(), MATCH_ALL) # Check it expected_result = {'filtered': { 'filter': {'match': {'content_type': 'searchtests.Book'}}, 'query': {'match_all': {}} }} - self.assertDictEqual(query.get_query(), expected_result) + self.assertDictEqual(query_compiler.get_query(), expected_result) def test_and_operator(self): # Create a query - query = self.query_class(models.Book.objects.all(), "Hello", operator='and') + query_compiler = self.query_compiler_class(models.Book.objects.all(), "Hello", operator='and') # Check it expected_result = {'filtered': { 'filter': {'match': {'content_type': 'searchtests.Book'}}, 'query': {'multi_match': {'query': 'Hello', 'fields': ['_all', '_partials'], 'operator': 'and'}} }} - self.assertDictEqual(query.get_query(), expected_result) + self.assertDictEqual(query_compiler.get_query(), expected_result) def test_filter(self): # Create a query - query = self.query_class(models.Book.objects.filter(title="Test"), "Hello") + query_compiler = self.query_compiler_class(models.Book.objects.filter(title="Test"), "Hello") # Check it expected_result = {'filtered': {'filter': {'and': [ {'match': {'content_type': 'searchtests.Book'}}, {'term': {'title_filter': 'Test'}} ]}, 'query': {'multi_match': {'query': 'Hello', 'fields': ['_all', '_partials']}}}} - self.assertDictEqual(query.get_query(), expected_result) + self.assertDictEqual(query_compiler.get_query(), expected_result) def test_and_filter(self): # Create a query - query = self.query_class(models.Book.objects.filter(title="Test", publication_date=datetime.date(2017, 10, 18)), "Hello") + query_compiler = self.query_compiler_class(models.Book.objects.filter(title="Test", publication_date=datetime.date(2017, 10, 18)), "Hello") # Check it expected_result = {'filtered': {'filter': {'and': [ @@ -102,7 +102,7 @@ class TestElasticsearch2SearchQuery(TestCase): ]}, 'query': {'multi_match': {'query': 'Hello', 'fields': ['_all', '_partials']}}}} # Make sure field filters are sorted (as they can be in any order which may cause false positives) - query = query.get_query() + query = query_compiler.get_query() field_filters = query['filtered']['filter']['and'][1]['and'] field_filters[:] = sorted(field_filters, key=lambda f: list(f['term'].keys())[0]) @@ -110,10 +110,10 @@ class TestElasticsearch2SearchQuery(TestCase): def test_or_filter(self): # Create a query - query = self.query_class(models.Book.objects.filter(Q(title="Test") | Q(publication_date=datetime.date(2017, 10, 18))), "Hello") + query_compiler = self.query_compiler_class(models.Book.objects.filter(Q(title="Test") | Q(publication_date=datetime.date(2017, 10, 18))), "Hello") # Make sure field filters are sorted (as they can be in any order which may cause false positives) - query = query.get_query() + query = query_compiler.get_query() field_filters = query['filtered']['filter']['and'][1]['or'] field_filters[:] = sorted(field_filters, key=lambda f: list(f['term'].keys())[0]) @@ -126,51 +126,51 @@ class TestElasticsearch2SearchQuery(TestCase): def test_negated_filter(self): # Create a query - query = self.query_class(models.Book.objects.exclude(publication_date=datetime.date(2017, 10, 18)), "Hello") + query_compiler = self.query_compiler_class(models.Book.objects.exclude(publication_date=datetime.date(2017, 10, 18)), "Hello") # Check it expected_result = {'filtered': {'filter': {'and': [ {'match': {'content_type': 'searchtests.Book'}}, {'not': {'term': {'publication_date_filter': '2017-10-18'}}} ]}, 'query': {'multi_match': {'query': 'Hello', 'fields': ['_all', '_partials']}}}} - self.assertDictEqual(query.get_query(), expected_result) + self.assertDictEqual(query_compiler.get_query(), expected_result) def test_fields(self): # Create a query - query = self.query_class(models.Book.objects.all(), "Hello", fields=['title']) + query_compiler = self.query_compiler_class(models.Book.objects.all(), "Hello", fields=['title']) # Check it expected_result = {'filtered': { 'filter': {'match': {'content_type': 'searchtests.Book'}}, 'query': {'match': {'title': 'Hello'}} }} - self.assertDictEqual(query.get_query(), expected_result) + self.assertDictEqual(query_compiler.get_query(), expected_result) def test_fields_with_and_operator(self): # Create a query - query = self.query_class(models.Book.objects.all(), "Hello", fields=['title'], operator='and') + query_compiler = self.query_compiler_class(models.Book.objects.all(), "Hello", fields=['title'], operator='and') # Check it expected_result = {'filtered': { 'filter': {'match': {'content_type': 'searchtests.Book'}}, 'query': {'match': {'title': {'query': 'Hello', 'operator': 'and'}}} }} - self.assertDictEqual(query.get_query(), expected_result) + self.assertDictEqual(query_compiler.get_query(), expected_result) def test_multiple_fields(self): # Create a query - query = self.query_class(models.Book.objects.all(), "Hello", fields=['title', 'content']) + query_compiler = self.query_compiler_class(models.Book.objects.all(), "Hello", fields=['title', 'content']) # Check it expected_result = {'filtered': { 'filter': {'match': {'content_type': 'searchtests.Book'}}, 'query': {'multi_match': {'fields': ['title', 'content'], 'query': 'Hello'}} }} - self.assertDictEqual(query.get_query(), expected_result) + self.assertDictEqual(query_compiler.get_query(), expected_result) def test_multiple_fields_with_and_operator(self): # Create a query - query = self.query_class( + query_compiler = self.query_compiler_class( models.Book.objects.all(), "Hello", fields=['title', 'content'], operator='and' ) @@ -179,68 +179,68 @@ class TestElasticsearch2SearchQuery(TestCase): 'filter': {'match': {'content_type': 'searchtests.Book'}}, 'query': {'multi_match': {'fields': ['title', 'content'], 'query': 'Hello', 'operator': 'and'}} }} - self.assertDictEqual(query.get_query(), expected_result) + self.assertDictEqual(query_compiler.get_query(), expected_result) def test_exact_lookup(self): # Create a query - query = self.query_class(models.Book.objects.filter(title__exact="Test"), "Hello") + query_compiler = self.query_compiler_class(models.Book.objects.filter(title__exact="Test"), "Hello") # Check it expected_result = {'filtered': {'filter': {'and': [ {'match': {'content_type': 'searchtests.Book'}}, {'term': {'title_filter': 'Test'}} ]}, 'query': {'multi_match': {'query': 'Hello', 'fields': ['_all', '_partials']}}}} - self.assertDictEqual(query.get_query(), expected_result) + self.assertDictEqual(query_compiler.get_query(), expected_result) def test_none_lookup(self): # Create a query - query = self.query_class(models.Book.objects.filter(title=None), "Hello") + query_compiler = self.query_compiler_class(models.Book.objects.filter(title=None), "Hello") # Check it expected_result = {'filtered': {'filter': {'and': [ {'match': {'content_type': 'searchtests.Book'}}, {'missing': {'field': 'title_filter'}} ]}, 'query': {'multi_match': {'query': 'Hello', 'fields': ['_all', '_partials']}}}} - self.assertDictEqual(query.get_query(), expected_result) + self.assertDictEqual(query_compiler.get_query(), expected_result) def test_isnull_true_lookup(self): # Create a query - query = self.query_class(models.Book.objects.filter(title__isnull=True), "Hello") + query_compiler = self.query_compiler_class(models.Book.objects.filter(title__isnull=True), "Hello") # Check it expected_result = {'filtered': {'filter': {'and': [ {'match': {'content_type': 'searchtests.Book'}}, {'missing': {'field': 'title_filter'}} ]}, 'query': {'multi_match': {'query': 'Hello', 'fields': ['_all', '_partials']}}}} - self.assertDictEqual(query.get_query(), expected_result) + self.assertDictEqual(query_compiler.get_query(), expected_result) def test_isnull_false_lookup(self): # Create a query - query = self.query_class(models.Book.objects.filter(title__isnull=False), "Hello") + query_compiler = self.query_compiler_class(models.Book.objects.filter(title__isnull=False), "Hello") # Check it expected_result = {'filtered': {'filter': {'and': [ {'match': {'content_type': 'searchtests.Book'}}, {'exists': {'field': 'title_filter'}} ]}, 'query': {'multi_match': {'query': 'Hello', 'fields': ['_all', '_partials']}}}} - self.assertDictEqual(query.get_query(), expected_result) + self.assertDictEqual(query_compiler.get_query(), expected_result) def test_startswith_lookup(self): # Create a query - query = self.query_class(models.Book.objects.filter(title__startswith="Test"), "Hello") + query_compiler = self.query_compiler_class(models.Book.objects.filter(title__startswith="Test"), "Hello") # Check it expected_result = {'filtered': {'filter': {'and': [ {'match': {'content_type': 'searchtests.Book'}}, {'prefix': {'title_filter': 'Test'}} ]}, 'query': {'multi_match': {'query': 'Hello', 'fields': ['_all', '_partials']}}}} - self.assertDictEqual(query.get_query(), expected_result) + self.assertDictEqual(query_compiler.get_query(), expected_result) def test_gt_lookup(self): # This also tests conversion of python dates to strings # Create a query - query = self.query_class( + query_compiler = self.query_compiler_class( models.Book.objects.filter(publication_date__gt=datetime.datetime(2014, 4, 29)), "Hello" ) @@ -249,11 +249,11 @@ class TestElasticsearch2SearchQuery(TestCase): {'match': {'content_type': 'searchtests.Book'}}, {'range': {'publication_date_filter': {'gt': '2014-04-29'}}} ]}, 'query': {'multi_match': {'query': 'Hello', 'fields': ['_all', '_partials']}}}} - self.assertDictEqual(query.get_query(), expected_result) + self.assertDictEqual(query_compiler.get_query(), expected_result) def test_lt_lookup(self): # Create a query - query = self.query_class( + query_compiler = self.query_compiler_class( models.Book.objects.filter(publication_date__lt=datetime.datetime(2014, 4, 29)), "Hello" ) @@ -262,11 +262,11 @@ class TestElasticsearch2SearchQuery(TestCase): {'match': {'content_type': 'searchtests.Book'}}, {'range': {'publication_date_filter': {'lt': '2014-04-29'}}} ]}, 'query': {'multi_match': {'query': 'Hello', 'fields': ['_all', '_partials']}}}} - self.assertDictEqual(query.get_query(), expected_result) + self.assertDictEqual(query_compiler.get_query(), expected_result) def test_gte_lookup(self): # Create a query - query = self.query_class( + query_compiler = self.query_compiler_class( models.Book.objects.filter(publication_date__gte=datetime.datetime(2014, 4, 29)), "Hello" ) @@ -275,11 +275,11 @@ class TestElasticsearch2SearchQuery(TestCase): {'match': {'content_type': 'searchtests.Book'}}, {'range': {'publication_date_filter': {'gte': '2014-04-29'}}} ]}, 'query': {'multi_match': {'query': 'Hello', 'fields': ['_all', '_partials']}}}} - self.assertDictEqual(query.get_query(), expected_result) + self.assertDictEqual(query_compiler.get_query(), expected_result) def test_lte_lookup(self): # Create a query - query = self.query_class( + query_compiler = self.query_compiler_class( models.Book.objects.filter(publication_date__lte=datetime.datetime(2014, 4, 29)), "Hello" ) @@ -288,14 +288,14 @@ class TestElasticsearch2SearchQuery(TestCase): {'match': {'content_type': 'searchtests.Book'}}, {'range': {'publication_date_filter': {'lte': '2014-04-29'}}} ]}, 'query': {'multi_match': {'query': 'Hello', 'fields': ['_all', '_partials']}}}} - self.assertDictEqual(query.get_query(), expected_result) + self.assertDictEqual(query_compiler.get_query(), expected_result) def test_range_lookup(self): start_date = datetime.datetime(2014, 4, 29) end_date = datetime.datetime(2014, 8, 19) # Create a query - query = self.query_class( + query_compiler = self.query_compiler_class( models.Book.objects.filter(publication_date__range=(start_date, end_date)), "Hello" ) @@ -304,37 +304,37 @@ class TestElasticsearch2SearchQuery(TestCase): {'match': {'content_type': 'searchtests.Book'}}, {'range': {'publication_date_filter': {'gte': '2014-04-29', 'lte': '2014-08-19'}}} ]}, 'query': {'multi_match': {'query': 'Hello', 'fields': ['_all', '_partials']}}}} - self.assertDictEqual(query.get_query(), expected_result) + self.assertDictEqual(query_compiler.get_query(), expected_result) def test_custom_ordering(self): # Create a query - query = self.query_class( + query_compiler = self.query_compiler_class( models.Book.objects.order_by('publication_date'), "Hello", order_by_relevance=False ) # Check it expected_result = [{'publication_date_filter': 'asc'}] - self.assertDictEqual(query.get_sort(), expected_result) + self.assertDictEqual(query_compiler.get_sort(), expected_result) def test_custom_ordering_reversed(self): # Create a query - query = self.query_class( + query_compiler = self.query_compiler_class( models.Book.objects.order_by('-publication_date'), "Hello", order_by_relevance=False ) # Check it expected_result = [{'publication_date_filter': 'desc'}] - self.assertDictEqual(query.get_sort(), expected_result) + self.assertDictEqual(query_compiler.get_sort(), expected_result) def test_custom_ordering_multiple(self): # Create a query - query = self.query_class( + query_compiler = self.query_compiler_class( models.Book.objects.order_by('publication_date', 'number_of_pages'), "Hello", order_by_relevance=False ) # Check it expected_result = [{'publication_date_filter': 'asc'}, {'number_of_pages_filter': 'asc'}] - self.assertDictEqual(query.get_sort(), expected_result) + self.assertDictEqual(query_compiler.get_sort(), expected_result) class TestElasticsearch2SearchResults(TestCase): @@ -348,11 +348,11 @@ class TestElasticsearch2SearchResults(TestCase): def get_results(self): backend = Elasticsearch2SearchBackend({}) - query = mock.MagicMock() - query.queryset = models.Book.objects.all() - query.get_query.return_value = 'QUERY' - query.get_sort.return_value = None - return backend.results_class(backend, query) + query_compiler = mock.MagicMock() + query_compiler.queryset = models.Book.objects.all() + query_compiler.get_query.return_value = 'QUERY' + query_compiler.get_sort.return_value = None + return backend.results_class(backend, query_compiler) def construct_search_response(self, results): return { diff --git a/wagtail/wagtailsearch/tests/test_elasticsearch5_backend.py b/wagtail/wagtailsearch/tests/test_elasticsearch5_backend.py index 1ad14a993..e3449c1cc 100644 --- a/wagtail/wagtailsearch/tests/test_elasticsearch5_backend.py +++ b/wagtail/wagtailsearch/tests/test_elasticsearch5_backend.py @@ -49,55 +49,55 @@ class TestElasticsearch5SearchQuery(TestCase): json.dumps(a, sort_keys=True, default=default), json.dumps(b, sort_keys=True, default=default) ) - query_class = Elasticsearch5SearchBackend.query_class + query_compiler_class = Elasticsearch5SearchBackend.query_compiler_class def test_simple(self): # Create a query - query = self.query_class(models.Book.objects.all(), "Hello") + query_compiler = self.query_compiler_class(models.Book.objects.all(), "Hello") # Check it expected_result = {'bool': { 'filter': {'match': {'content_type': 'searchtests.Book'}}, 'must': {'multi_match': {'query': 'Hello', 'fields': ['_all', '_partials']}} }} - self.assertDictEqual(query.get_query(), expected_result) + self.assertDictEqual(query_compiler.get_query(), expected_result) def test_match_all(self): # Create a query - query = self.query_class(models.Book.objects.all(), MATCH_ALL) + query_compiler = self.query_compiler_class(models.Book.objects.all(), MATCH_ALL) # Check it expected_result = {'bool': { 'filter': {'match': {'content_type': 'searchtests.Book'}}, 'must': {'match_all': {}} }} - self.assertDictEqual(query.get_query(), expected_result) + self.assertDictEqual(query_compiler.get_query(), expected_result) def test_and_operator(self): # Create a query - query = self.query_class(models.Book.objects.all(), "Hello", operator='and') + query_compiler = self.query_compiler_class(models.Book.objects.all(), "Hello", operator='and') # Check it expected_result = {'bool': { 'filter': {'match': {'content_type': 'searchtests.Book'}}, 'must': {'multi_match': {'query': 'Hello', 'fields': ['_all', '_partials'], 'operator': 'and'}} }} - self.assertDictEqual(query.get_query(), expected_result) + self.assertDictEqual(query_compiler.get_query(), expected_result) def test_filter(self): # Create a query - query = self.query_class(models.Book.objects.filter(title="Test"), "Hello") + query_compiler = self.query_compiler_class(models.Book.objects.filter(title="Test"), "Hello") # Check it expected_result = {'bool': {'filter': [ {'match': {'content_type': 'searchtests.Book'}}, {'term': {'title_filter': 'Test'}} ], 'must': {'multi_match': {'query': 'Hello', 'fields': ['_all', '_partials']}}}} - self.assertDictEqual(query.get_query(), expected_result) + self.assertDictEqual(query_compiler.get_query(), expected_result) def test_and_filter(self): # Create a query - query = self.query_class(models.Book.objects.filter(title="Test", publication_date=datetime.date(2017, 10, 18)), "Hello") + query_compiler = self.query_compiler_class(models.Book.objects.filter(title="Test", publication_date=datetime.date(2017, 10, 18)), "Hello") # Check it expected_result = {'bool': {'filter': [ @@ -106,7 +106,7 @@ class TestElasticsearch5SearchQuery(TestCase): ], 'must': {'multi_match': {'query': 'Hello', 'fields': ['_all', '_partials']}}}} # Make sure field filters are sorted (as they can be in any order which may cause false positives) - query = query.get_query() + query = query_compiler.get_query() field_filters = query['bool']['filter'][1]['bool']['must'] field_filters[:] = sorted(field_filters, key=lambda f: list(f['term'].keys())[0]) @@ -114,10 +114,10 @@ class TestElasticsearch5SearchQuery(TestCase): def test_or_filter(self): # Create a query - query = self.query_class(models.Book.objects.filter(Q(title="Test") | Q(publication_date=datetime.date(2017, 10, 18))), "Hello") + query_compiler = self.query_compiler_class(models.Book.objects.filter(Q(title="Test") | Q(publication_date=datetime.date(2017, 10, 18))), "Hello") # Make sure field filters are sorted (as they can be in any order which may cause false positives) - query = query.get_query() + query = query_compiler.get_query() field_filters = query['bool']['filter'][1]['bool']['should'] field_filters[:] = sorted(field_filters, key=lambda f: list(f['term'].keys())[0]) @@ -130,51 +130,51 @@ class TestElasticsearch5SearchQuery(TestCase): def test_negated_filter(self): # Create a query - query = self.query_class(models.Book.objects.exclude(publication_date=datetime.date(2017, 10, 18)), "Hello") + query_compiler = self.query_compiler_class(models.Book.objects.exclude(publication_date=datetime.date(2017, 10, 18)), "Hello") # Check it expected_result = {'bool': {'filter': [ {'match': {'content_type': 'searchtests.Book'}}, {'bool': {'mustNot': {'term': {'publication_date_filter': '2017-10-18'}}}} ], 'must': {'multi_match': {'query': 'Hello', 'fields': ['_all', '_partials']}}}} - self.assertDictEqual(query.get_query(), expected_result) + self.assertDictEqual(query_compiler.get_query(), expected_result) def test_fields(self): # Create a query - query = self.query_class(models.Book.objects.all(), "Hello", fields=['title']) + query_compiler = self.query_compiler_class(models.Book.objects.all(), "Hello", fields=['title']) # Check it expected_result = {'bool': { 'filter': {'match': {'content_type': 'searchtests.Book'}}, 'must': {'match': {'title': 'Hello'}} }} - self.assertDictEqual(query.get_query(), expected_result) + self.assertDictEqual(query_compiler.get_query(), expected_result) def test_fields_with_and_operator(self): # Create a query - query = self.query_class(models.Book.objects.all(), "Hello", fields=['title'], operator='and') + query_compiler = self.query_compiler_class(models.Book.objects.all(), "Hello", fields=['title'], operator='and') # Check it expected_result = {'bool': { 'filter': {'match': {'content_type': 'searchtests.Book'}}, 'must': {'match': {'title': {'query': 'Hello', 'operator': 'and'}}} }} - self.assertDictEqual(query.get_query(), expected_result) + self.assertDictEqual(query_compiler.get_query(), expected_result) def test_multiple_fields(self): # Create a query - query = self.query_class(models.Book.objects.all(), "Hello", fields=['title', 'content']) + query_compiler = self.query_compiler_class(models.Book.objects.all(), "Hello", fields=['title', 'content']) # Check it expected_result = {'bool': { 'filter': {'match': {'content_type': 'searchtests.Book'}}, 'must': {'multi_match': {'fields': ['title', 'content'], 'query': 'Hello'}} }} - self.assertDictEqual(query.get_query(), expected_result) + self.assertDictEqual(query_compiler.get_query(), expected_result) def test_multiple_fields_with_and_operator(self): # Create a query - query = self.query_class( + query_compiler = self.query_compiler_class( models.Book.objects.all(), "Hello", fields=['title', 'content'], operator='and' ) @@ -183,68 +183,68 @@ class TestElasticsearch5SearchQuery(TestCase): 'filter': {'match': {'content_type': 'searchtests.Book'}}, 'must': {'multi_match': {'fields': ['title', 'content'], 'query': 'Hello', 'operator': 'and'}} }} - self.assertDictEqual(query.get_query(), expected_result) + self.assertDictEqual(query_compiler.get_query(), expected_result) def test_exact_lookup(self): # Create a query - query = self.query_class(models.Book.objects.filter(title__exact="Test"), "Hello") + query_compiler = self.query_compiler_class(models.Book.objects.filter(title__exact="Test"), "Hello") # Check it expected_result = {'bool': {'filter': [ {'match': {'content_type': 'searchtests.Book'}}, {'term': {'title_filter': 'Test'}} ], 'must': {'multi_match': {'query': 'Hello', 'fields': ['_all', '_partials']}}}} - self.assertDictEqual(query.get_query(), expected_result) + self.assertDictEqual(query_compiler.get_query(), expected_result) def test_none_lookup(self): # Create a query - query = self.query_class(models.Book.objects.filter(title=None), "Hello") + query = self.query_compiler_class(models.Book.objects.filter(title=None), "Hello") # Check it expected_result = {'bool': {'filter': [ {'match': {'content_type': 'searchtests.Book'}}, {'missing': {'field': 'title_filter'}} ], 'must': {'multi_match': {'query': 'Hello', 'fields': ['_all', '_partials']}}}} - self.assertDictEqual(query.get_query(), expected_result) + self.assertDictEqual(query_compiler.get_query(), expected_result) def test_isnull_true_lookup(self): # Create a query - query = self.query_class(models.Book.objects.filter(title__isnull=True), "Hello") + query_compiler = self.query_compiler_class(models.Book.objects.filter(title__isnull=True), "Hello") # Check it expected_result = {'bool': {'filter': [ {'match': {'content_type': 'searchtests.Book'}}, {'missing': {'field': 'title_filter'}} ], 'must': {'multi_match': {'query': 'Hello', 'fields': ['_all', '_partials']}}}} - self.assertDictEqual(query.get_query(), expected_result) + self.assertDictEqual(query_compiler.get_query(), expected_result) def test_isnull_false_lookup(self): # Create a query - query = self.query_class(models.Book.objects.filter(title__isnull=False), "Hello") + query_compiler = self.query_compiler_class(models.Book.objects.filter(title__isnull=False), "Hello") # Check it expected_result = {'bool': {'filter': [ {'match': {'content_type': 'searchtests.Book'}}, {'exists': {'field': 'title_filter'}} ], 'must': {'multi_match': {'query': 'Hello', 'fields': ['_all', '_partials']}}}} - self.assertDictEqual(query.get_query(), expected_result) + self.assertDictEqual(query_compiler.get_query(), expected_result) def test_startswith_lookup(self): # Create a query - query = self.query_class(models.Book.objects.filter(title__startswith="Test"), "Hello") + query_compiler = self.query_compiler_class(models.Book.objects.filter(title__startswith="Test"), "Hello") # Check it expected_result = {'bool': {'filter': [ {'match': {'content_type': 'searchtests.Book'}}, {'prefix': {'title_filter': 'Test'}} ], 'must': {'multi_match': {'query': 'Hello', 'fields': ['_all', '_partials']}}}} - self.assertDictEqual(query.get_query(), expected_result) + self.assertDictEqual(query_compiler.get_query(), expected_result) def test_gt_lookup(self): # This also tests conversion of python dates to strings # Create a query - query = self.query_class( + query_compiler = self.query_compiler_class( models.Book.objects.filter(publication_date__gt=datetime.datetime(2014, 4, 29)), "Hello" ) @@ -253,11 +253,11 @@ class TestElasticsearch5SearchQuery(TestCase): {'match': {'content_type': 'searchtests.Book'}}, {'range': {'publication_date_filter': {'gt': '2014-04-29'}}} ], 'must': {'multi_match': {'query': 'Hello', 'fields': ['_all', '_partials']}}}} - self.assertDictEqual(query.get_query(), expected_result) + self.assertDictEqual(query_compiler.get_query(), expected_result) def test_lt_lookup(self): # Create a query - query = self.query_class( + query_compiler = self.query_compiler_class( models.Book.objects.filter(publication_date__lt=datetime.datetime(2014, 4, 29)), "Hello" ) @@ -266,11 +266,11 @@ class TestElasticsearch5SearchQuery(TestCase): {'match': {'content_type': 'searchtests.Book'}}, {'range': {'publication_date_filter': {'lt': '2014-04-29'}}} ], 'must': {'multi_match': {'query': 'Hello', 'fields': ['_all', '_partials']}}}} - self.assertDictEqual(query.get_query(), expected_result) + self.assertDictEqual(query_compiler.get_query(), expected_result) def test_gte_lookup(self): # Create a query - query = self.query_class( + query_compiler = self.query_compiler_class( models.Book.objects.filter(publication_date__gte=datetime.datetime(2014, 4, 29)), "Hello" ) @@ -279,11 +279,11 @@ class TestElasticsearch5SearchQuery(TestCase): {'match': {'content_type': 'searchtests.Book'}}, {'range': {'publication_date_filter': {'gte': '2014-04-29'}}} ], 'must': {'multi_match': {'query': 'Hello', 'fields': ['_all', '_partials']}}}} - self.assertDictEqual(query.get_query(), expected_result) + self.assertDictEqual(query_compiler.get_query(), expected_result) def test_lte_lookup(self): # Create a query - query = self.query_class( + query_compiler = self.query_compiler_class( models.Book.objects.filter(publication_date__lte=datetime.datetime(2014, 4, 29)), "Hello" ) @@ -292,14 +292,14 @@ class TestElasticsearch5SearchQuery(TestCase): {'match': {'content_type': 'searchtests.Book'}}, {'range': {'publication_date_filter': {'lte': '2014-04-29'}}} ], 'must': {'multi_match': {'query': 'Hello', 'fields': ['_all', '_partials']}}}} - self.assertDictEqual(query.get_query(), expected_result) + self.assertDictEqual(query_compiler.get_query(), expected_result) def test_range_lookup(self): start_date = datetime.datetime(2014, 4, 29) end_date = datetime.datetime(2014, 8, 19) # Create a query - query = self.query_class( + query_compiler = self.query_compiler_class( models.Book.objects.filter(publication_date__range=(start_date, end_date)), "Hello" ) @@ -308,37 +308,37 @@ class TestElasticsearch5SearchQuery(TestCase): {'match': {'content_type': 'searchtests.Book'}}, {'range': {'publication_date_filter': {'gte': '2014-04-29', 'lte': '2014-08-19'}}} ], 'must': {'multi_match': {'query': 'Hello', 'fields': ['_all', '_partials']}}}} - self.assertDictEqual(query.get_query(), expected_result) + self.assertDictEqual(query_compiler.get_query(), expected_result) def test_custom_ordering(self): # Create a query - query = self.query_class( + query_compiler = self.query_compiler_class( models.Book.objects.order_by('publication_date'), "Hello", order_by_relevance=False ) # Check it expected_result = [{'publication_date_filter': 'asc'}] - self.assertDictEqual(query.get_sort(), expected_result) + self.assertDictEqual(query_compiler.get_sort(), expected_result) def test_custom_ordering_reversed(self): # Create a query - query = self.query_class( + query_compiler = self.query_compiler_class( models.Book.objects.order_by('-publication_date'), "Hello", order_by_relevance=False ) # Check it expected_result = [{'publication_date_filter': 'desc'}] - self.assertDictEqual(query.get_sort(), expected_result) + self.assertDictEqual(query_compiler.get_sort(), expected_result) def test_custom_ordering_multiple(self): # Create a query - query = self.query_class( + query_compiler = self.query_compiler_class( models.Book.objects.order_by('publication_date', 'number_of_pages'), "Hello", order_by_relevance=False ) # Check it expected_result = [{'publication_date_filter': 'asc'}, {'number_of_pages_filter': 'asc'}] - self.assertDictEqual(query.get_sort(), expected_result) + self.assertDictEqual(query_compiler.get_sort(), expected_result) class TestElasticsearch5SearchResults(TestCase): @@ -352,11 +352,11 @@ class TestElasticsearch5SearchResults(TestCase): def get_results(self): backend = Elasticsearch5SearchBackend({}) - query = mock.MagicMock() - query.queryset = models.Book.objects.all() - query.get_query.return_value = 'QUERY' - query.get_sort.return_value = None - return backend.results_class(backend, query) + query_compiler = mock.MagicMock() + query_compiler.queryset = models.Book.objects.all() + query_compiler.get_query.return_value = 'QUERY' + query_compiler.get_sort.return_value = None + return backend.results_class(backend, query_compiler) def construct_search_response(self, results): return { From 1bb6613bcca999030baad2121d338b32b26ff04a Mon Sep 17 00:00:00 2001 From: Bertrand Bordage Date: Thu, 23 Nov 2017 16:53:41 +0100 Subject: [PATCH 07/40] Types the search query API. --- wagtail/wagtailsearch/query.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/wagtail/wagtailsearch/query.py b/wagtail/wagtailsearch/query.py index fb72b22e1..37be41e27 100644 --- a/wagtail/wagtailsearch/query.py +++ b/wagtail/wagtailsearch/query.py @@ -24,7 +24,7 @@ class Or(SearchQueryOperator): class Not(SearchQueryOperator): - def __init__(self, subquery): + def __init__(self, subquery: SearchQuery): self.subquery = subquery @@ -33,39 +33,41 @@ class MatchAll(SearchQuery): class PlainText(SearchQuery): - def __init__(self, query_string, operator=None, boost=1.0): + def __init__(self, query_string: str, operator: str = None, + boost: float = 1.0): self.query_string = query_string self.operator = operator self.boost = boost class Term(SearchQuery): - def __init__(self, term, boost=1.0): + def __init__(self, term: str, boost: float = 1.0): self.term = term self.boost = boost class Prefix(SearchQuery): - def __init__(self, prefix, boost=1.0): + def __init__(self, prefix: str, boost: float = 1.0): self.prefix = prefix self.boost = boost class Fuzzy(SearchQuery): - def __init__(self, term, max_distance=3, boost=1.0): + def __init__(self, term: str, max_distance: float = 3, boost: float = 1.0): self.term = term self.max_distance = max_distance self.boost = boost class Boost(SearchQuery): - def __init__(self, query, boost): + def __init__(self, query: SearchQuery, boost: float): self.query = query self.boost = boost class Filter(SearchQuery): - def __init__(self, query, include=None, exclude=None): + def __init__(self, query: SearchQuery, + include: SearchQuery = None, exclude: SearchQuery = None): self.query = query self.include = include self.exclude = exclude From 019f59f486d6e838d5df9ff7ef3fbe0ac8539b97 Mon Sep 17 00:00:00 2001 From: Bertrand Bordage Date: Thu, 23 Nov 2017 16:57:52 +0100 Subject: [PATCH 08/40] Fixes an undefined variable. --- wagtail/wagtailsearch/tests/test_elasticsearch5_backend.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wagtail/wagtailsearch/tests/test_elasticsearch5_backend.py b/wagtail/wagtailsearch/tests/test_elasticsearch5_backend.py index e3449c1cc..030f73ac4 100644 --- a/wagtail/wagtailsearch/tests/test_elasticsearch5_backend.py +++ b/wagtail/wagtailsearch/tests/test_elasticsearch5_backend.py @@ -198,7 +198,7 @@ class TestElasticsearch5SearchQuery(TestCase): def test_none_lookup(self): # Create a query - query = self.query_compiler_class(models.Book.objects.filter(title=None), "Hello") + query_compiler = self.query_compiler_class(models.Book.objects.filter(title=None), "Hello") # Check it expected_result = {'bool': {'filter': [ From 71a7ca5808d51538e6d81a59a16abf54b11ef828 Mon Sep 17 00:00:00 2001 From: Bertrand Bordage Date: Thu, 23 Nov 2017 17:05:54 +0100 Subject: [PATCH 09/40] =?UTF-8?q?SearchQueryCompiler=20=E2=86=92=20BaseSea?= =?UTF-8?q?rchQueryCompiler.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- wagtail/contrib/postgres_search/backend.py | 4 ++-- wagtail/wagtailsearch/backends/base.py | 2 +- wagtail/wagtailsearch/backends/db.py | 4 ++-- wagtail/wagtailsearch/backends/elasticsearch2.py | 4 ++-- wagtail/wagtailsearch/query.py | 3 +++ 5 files changed, 10 insertions(+), 7 deletions(-) diff --git a/wagtail/contrib/postgres_search/backend.py b/wagtail/contrib/postgres_search/backend.py index 83c2ad16d..9fe538980 100644 --- a/wagtail/contrib/postgres_search/backend.py +++ b/wagtail/contrib/postgres_search/backend.py @@ -10,7 +10,7 @@ from django.db.models.functions import Cast from django.utils.encoding import force_text from wagtail.wagtailsearch.backends.base import ( - BaseSearchBackend, SearchQueryCompiler, BaseSearchResults) + BaseSearchBackend, BaseSearchQueryCompiler, BaseSearchResults) from wagtail.wagtailsearch.index import RelatedFields, SearchField from wagtail.wagtailsearch.query import MatchAll, PlainText @@ -167,7 +167,7 @@ class Index(object): return self.name -class PostgresSearchQueryCompiler(SearchQueryCompiler): +class PostgresSearchQueryCompiler(BaseSearchQueryCompiler): DEFAULT_OPERATOR = 'and' def __init__(self, *args, **kwargs): diff --git a/wagtail/wagtailsearch/backends/base.py b/wagtail/wagtailsearch/backends/base.py index 9ff3d6d1e..9cf04d5c9 100644 --- a/wagtail/wagtailsearch/backends/base.py +++ b/wagtail/wagtailsearch/backends/base.py @@ -19,7 +19,7 @@ class FieldError(Exception): pass -class SearchQueryCompiler(object): +class BaseSearchQueryCompiler(object): DEFAULT_OPERATOR = 'or' def __init__(self, queryset, query, fields=None, operator=None, order_by_relevance=True): diff --git a/wagtail/wagtailsearch/backends/db.py b/wagtail/wagtailsearch/backends/db.py index 4aa9f40d5..cc2ae42bd 100644 --- a/wagtail/wagtailsearch/backends/db.py +++ b/wagtail/wagtailsearch/backends/db.py @@ -4,11 +4,11 @@ from django.db import models from django.db.models.expressions import Value from wagtail.wagtailsearch.backends.base import ( - BaseSearchBackend, SearchQueryCompiler, BaseSearchResults) + BaseSearchBackend, BaseSearchQueryCompiler, BaseSearchResults) from wagtail.wagtailsearch.query import MatchAll, PlainText -class DatabaseSearchQueryCompiler(SearchQueryCompiler): +class DatabaseSearchQueryCompiler(BaseSearchQueryCompiler): DEFAULT_OPERATOR = 'and' def _process_lookup(self, field, lookup, value): diff --git a/wagtail/wagtailsearch/backends/elasticsearch2.py b/wagtail/wagtailsearch/backends/elasticsearch2.py index 3e59f2c58..320479f1a 100644 --- a/wagtail/wagtailsearch/backends/elasticsearch2.py +++ b/wagtail/wagtailsearch/backends/elasticsearch2.py @@ -13,7 +13,7 @@ from elasticsearch.helpers import bulk from wagtail.utils.utils import deep_update from wagtail.wagtailsearch.backends.base import ( - BaseSearchBackend, SearchQueryCompiler, BaseSearchResults) + BaseSearchBackend, BaseSearchQueryCompiler, BaseSearchResults) from wagtail.wagtailsearch.index import ( FilterField, Indexed, RelatedFields, SearchField, class_is_indexed) from wagtail.wagtailsearch.query import MatchAll, PlainText @@ -261,7 +261,7 @@ class Elasticsearch2Mapping(object): return '' % (self.model.__name__, ) -class Elasticsearch2SearchQueryCompiler(SearchQueryCompiler): +class Elasticsearch2SearchQueryCompiler(BaseSearchQueryCompiler): mapping_class = Elasticsearch2Mapping DEFAULT_OPERATOR = 'or' diff --git a/wagtail/wagtailsearch/query.py b/wagtail/wagtailsearch/query.py index 37be41e27..3df8e4bb3 100644 --- a/wagtail/wagtailsearch/query.py +++ b/wagtail/wagtailsearch/query.py @@ -1,3 +1,6 @@ +from __future__ import absolute_import, unicode_literals + + class SearchQuery: def __and__(self, other): return And([self, other]) From 27bcb3f38f330789d7eca4a1d7683282a1b254d6 Mon Sep 17 00:00:00 2001 From: Bertrand Bordage Date: Thu, 23 Nov 2017 18:37:36 +0100 Subject: [PATCH 10/40] Implements And/Or/Not/Term in postgres_search. --- wagtail/contrib/postgres_search/backend.py | 47 +++++++---- wagtail/wagtailsearch/backends/db.py | 4 +- .../wagtailsearch/backends/elasticsearch2.py | 4 +- wagtail/wagtailsearch/query.py | 22 +++-- wagtail/wagtailsearch/tests/test_backends.py | 83 ++++++++++++++++++- .../wagtailsearch/tests/test_db_backend.py | 24 ++++++ .../tests/test_elasticsearch2_backend.py | 24 ++++++ 7 files changed, 182 insertions(+), 26 deletions(-) diff --git a/wagtail/contrib/postgres_search/backend.py b/wagtail/contrib/postgres_search/backend.py index 9fe538980..9084de260 100644 --- a/wagtail/contrib/postgres_search/backend.py +++ b/wagtail/contrib/postgres_search/backend.py @@ -2,7 +2,10 @@ from __future__ import absolute_import, unicode_literals -from django.contrib.postgres.search import SearchQuery, SearchRank, SearchVector +from warnings import warn + +from django.contrib.postgres.search import SearchQuery as PostgresSearchQuery +from django.contrib.postgres.search import SearchRank, SearchVector from django.db import DEFAULT_DB_ALIAS, NotSupportedError, connections, transaction from django.db.models import F, Manager, TextField, Value from django.db.models.constants import LOOKUP_SEP @@ -12,13 +15,12 @@ from django.utils.encoding import force_text from wagtail.wagtailsearch.backends.base import ( BaseSearchBackend, BaseSearchQueryCompiler, BaseSearchResults) from wagtail.wagtailsearch.index import RelatedFields, SearchField -from wagtail.wagtailsearch.query import MatchAll, PlainText +from wagtail.wagtailsearch.query import And, MatchAll, Not, Or, PlainText, Term from .models import IndexEntry from .utils import ( ADD, AND, OR, WEIGHTS_VALUES, get_ancestors_content_types_pks, get_content_type_pk, - get_descendants_content_types_pks, get_postgresql_connections, get_weight, keyword_split, - unidecode) + get_descendants_content_types_pks, get_postgresql_connections, get_weight, unidecode) # TODO: Add autocomplete. @@ -174,12 +176,29 @@ class PostgresSearchQueryCompiler(BaseSearchQueryCompiler): super(PostgresSearchQueryCompiler, self).__init__(*args, **kwargs) self.search_fields = self.queryset.model.get_search_fields() - def get_search_query(self, config): - combine = OR if self.query.operator == 'or' else AND - search_terms = keyword_split(unidecode(self.query.query_string)) - if not search_terms: - return SearchQuery('') - return combine(SearchQuery(q, config=config) for q in search_terms) + def build_database_query(self, query=None, config=None): + if query is None: + query = self.query + + if isinstance(query, PlainText): + return self.build_database_query(query.to_combined_terms(), config) + if isinstance(query, Term): + # TODO: Find a way to use the term boosting. + if query.boost != 1: + warn('PostgreSQL search backend ' + 'does not support term boosting for now.') + return PostgresSearchQuery(unidecode(query.term), config=config) + if isinstance(query, Not): + return ~self.build_database_query(query.subquery, config) + if isinstance(query, And): + return AND(self.build_database_query(subquery, config) + for subquery in query.subqueries) + if isinstance(query, Or): + return OR(self.build_database_query(subquery, config) + for subquery in query.subqueries) + raise NotImplementedError( + '`%s` is not supported by the PostgreSQL search backend.' + % self.query.__class__.__name__) def get_boost(self, field_name, fields=None): if fields is None: @@ -198,15 +217,11 @@ class PostgresSearchQueryCompiler(BaseSearchQueryCompiler): return field.boost def search(self, config, start, stop): + # TODO: Handle MatchAll nested inside other search query classes. if isinstance(self.query, MatchAll): return self.queryset[start:stop] - if not isinstance(self.query, PlainText): - raise NotImplementedError( - '%s is not supported by the PostgreSQL search backend.' - % self.query.__class__) - - search_query = self.get_search_query(config=config) + search_query = self.build_database_query(config=config) queryset = self.queryset query = queryset.query if self.fields is None: diff --git a/wagtail/wagtailsearch/backends/db.py b/wagtail/wagtailsearch/backends/db.py index cc2ae42bd..30831b2a1 100644 --- a/wagtail/wagtailsearch/backends/db.py +++ b/wagtail/wagtailsearch/backends/db.py @@ -42,8 +42,8 @@ class DatabaseSearchQueryCompiler(BaseSearchQueryCompiler): if not isinstance(self.query, PlainText): raise NotImplementedError( - '%s is not supported by the database search backend.' - % self.query.__class__) + '`%s` is not supported by the database search backend.' + % self.query.__class__.__name__) # Get fields fields = self.fields or [field.field_name for field in model.get_searchable_search_fields()] diff --git a/wagtail/wagtailsearch/backends/elasticsearch2.py b/wagtail/wagtailsearch/backends/elasticsearch2.py index 320479f1a..f265774fd 100644 --- a/wagtail/wagtailsearch/backends/elasticsearch2.py +++ b/wagtail/wagtailsearch/backends/elasticsearch2.py @@ -377,8 +377,8 @@ class Elasticsearch2SearchQueryCompiler(BaseSearchQueryCompiler): if not isinstance(self.query, PlainText): raise NotImplementedError( - '%s is not supported by the Elasticsearch search backend.' - % self.query.__class__) + '`%s` is not supported by the Elasticsearch search backend.' + % self.query.__class__.__name__) fields = self.fields or ['_all', '_partials'] operator = self.query.operator diff --git a/wagtail/wagtailsearch/query.py b/wagtail/wagtailsearch/query.py index 3df8e4bb3..813c87e1b 100644 --- a/wagtail/wagtailsearch/query.py +++ b/wagtail/wagtailsearch/query.py @@ -36,27 +36,39 @@ class MatchAll(SearchQuery): class PlainText(SearchQuery): - def __init__(self, query_string: str, operator: str = None, - boost: float = 1.0): + OPERATORS = { + 'and': And, + 'or': Or, + } + DEFAULT_OPERATOR = 'and' + + def __init__(self, query_string: str, operator: str = DEFAULT_OPERATOR, + boost: float = 1): self.query_string = query_string + if operator.lower() not in self.OPERATORS: + raise ValueError("`operator` must be either 'or' or 'and'.") self.operator = operator self.boost = boost + def to_combined_terms(self): + return self.OPERATORS[self.operator]([ + Term(term) for term in self.query_string.split()]) + class Term(SearchQuery): - def __init__(self, term: str, boost: float = 1.0): + def __init__(self, term: str, boost: float = 1): self.term = term self.boost = boost class Prefix(SearchQuery): - def __init__(self, prefix: str, boost: float = 1.0): + def __init__(self, prefix: str, boost: float = 1): self.prefix = prefix self.boost = boost class Fuzzy(SearchQuery): - def __init__(self, term: str, max_distance: float = 3, boost: float = 1.0): + def __init__(self, term: str, max_distance: float = 3, boost: float = 1): self.term = term self.max_distance = max_distance self.boost = boost diff --git a/wagtail/wagtailsearch/tests/test_backends.py b/wagtail/wagtailsearch/tests/test_backends.py index 8e79b4876..d3295b1f7 100644 --- a/wagtail/wagtailsearch/tests/test_backends.py +++ b/wagtail/wagtailsearch/tests/test_backends.py @@ -17,7 +17,7 @@ from wagtail.wagtailsearch.backends import ( InvalidSearchBackendError, get_search_backend, get_search_backends) from wagtail.wagtailsearch.backends.base import FieldError from wagtail.wagtailsearch.backends.db import DatabaseSearchBackend -from wagtail.wagtailsearch.query import MATCH_ALL +from wagtail.wagtailsearch.query import MATCH_ALL, And, Not, Or, PlainText, Term class BackendTests(WagtailTestUtils): @@ -430,6 +430,87 @@ class BackendTests(WagtailTestUtils): "The Fellowship of the Ring" ]) + # + # Query classes + # + + def test_match_all(self): + results = self.backend.search(MATCH_ALL, models.Book.objects.all()) + self.assertEqual(len(results), 13) + + def test_term(self): + # Single word + results = self.backend.search(Term('Javascript'), + models.Book.objects.all()) + self.assertSetEqual({r.title for r in results}, + {'JavaScript: The Definitive Guide', + 'JavaScript: The good parts'}) + + # Multiple word + results = self.backend.search(Term('Javascript Guide'), + models.Book.objects.all()) + self.assertSetEqual({r.title for r in results}, + {'JavaScript: The Definitive Guide'}) + + def test_plain_text(self): + # Single word + results = self.backend.search(PlainText('Javascript'), + models.Book.objects.all()) + self.assertSetEqual({r.title for r in results}, + {'JavaScript: The Definitive Guide', + 'JavaScript: The good parts'}) + + # Multiple words (OR operator) + results = self.backend.search(PlainText('Javascript Definitive', + operator='or'), + models.Book.objects.all()) + self.assertSetEqual({r.title for r in results}, + {'JavaScript: The Definitive Guide', + 'JavaScript: The good parts'}) + + # Multiple words (AND operator) + results = self.backend.search(PlainText('Javascript Definitive', + operator='and'), + models.Book.objects.all()) + self.assertSetEqual({r.title for r in results}, + {'JavaScript: The Definitive Guide'}) + + def test_and(self): + results = self.backend.search(And([Term('Javascript'), + Term('Definitive')]), + models.Book.objects.all()) + self.assertSetEqual({r.title for r in results}, + {'JavaScript: The Definitive Guide'}) + + def test_or(self): + results = self.backend.search(Or([Term('Hobbit'), Term('Towers')]), + models.Book.objects.all()) + self.assertSetEqual({r.title for r in results}, + {'The Hobbit', 'The Two Towers'}) + + def test_not(self): + all_other_titles = { + 'A Clash of Kings', + 'A Game of Thrones', + 'A Storm of Swords', + 'Foundation', + 'Learning Python', + 'The Hobbit', + 'The Two Towers', + 'The Fellowship of the Ring', + 'The Return of the King', + 'The Rust Programming Language', + 'Two Scoops of Django 1.11', + } + + results = self.backend.search(Not(PlainText('Javascript')), + models.Book.objects.all()) + self.assertSetEqual({r.title for r in results}, all_other_titles) + + results = self.backend.search(~PlainText('Javascript'), + models.Book.objects.all()) + self.assertSetEqual({r.title for r in results}, all_other_titles) + @override_settings( WAGTAILSEARCH_BACKENDS={ diff --git a/wagtail/wagtailsearch/tests/test_db_backend.py b/wagtail/wagtailsearch/tests/test_db_backend.py index 5344bac77..47ce49f5f 100644 --- a/wagtail/wagtailsearch/tests/test_db_backend.py +++ b/wagtail/wagtailsearch/tests/test_db_backend.py @@ -44,3 +44,27 @@ class TestDBBackend(BackendTests, TestCase): @unittest.expectedFailure def test_same_rank_pages(self): super(TestDBBackend, self).test_same_rank_pages() + + # + # Query classes + # + + # Not implemented yet + @unittest.expectedFailure + def test_term(self): + super().test_term() + + # Not implemented yet + @unittest.expectedFailure + def test_and(self): + super().test_and() + + # Not implemented yet + @unittest.expectedFailure + def test_or(self): + super().test_or() + + # Not implemented yet + @unittest.expectedFailure + def test_not(self): + super().test_not() diff --git a/wagtail/wagtailsearch/tests/test_elasticsearch2_backend.py b/wagtail/wagtailsearch/tests/test_elasticsearch2_backend.py index b82d8bb2d..e9759b6db 100644 --- a/wagtail/wagtailsearch/tests/test_elasticsearch2_backend.py +++ b/wagtail/wagtailsearch/tests/test_elasticsearch2_backend.py @@ -37,6 +37,30 @@ class TestElasticsearch2SearchBackend(BackendTests, ElasticsearchCommonSearchBac def test_delete(self): super(TestElasticsearch2SearchBackend, self).test_delete() + # + # Query classes + # + + # Not implemented yet + @unittest.expectedFailure + def test_term(self): + super().test_term() + + # Not implemented yet + @unittest.expectedFailure + def test_and(self): + super().test_and() + + # Not implemented yet + @unittest.expectedFailure + def test_or(self): + super().test_or() + + # Not implemented yet + @unittest.expectedFailure + def test_not(self): + super().test_not() + class TestElasticsearch2SearchQuery(TestCase): def assertDictEqual(self, a, b): From 4a05f4ae23e6e4ac91a4b0628d8646fad96b5344 Mon Sep 17 00:00:00 2001 From: Bertrand Bordage Date: Thu, 23 Nov 2017 18:48:22 +0100 Subject: [PATCH 11/40] Adds a test for complex operators combinations. --- wagtail/wagtailsearch/tests/test_backends.py | 27 +++++++++++++++++-- .../wagtailsearch/tests/test_db_backend.py | 5 ++++ .../tests/test_elasticsearch2_backend.py | 5 ++++ 3 files changed, 35 insertions(+), 2 deletions(-) diff --git a/wagtail/wagtailsearch/tests/test_backends.py b/wagtail/wagtailsearch/tests/test_backends.py index d3295b1f7..46a0a7dce 100644 --- a/wagtail/wagtailsearch/tests/test_backends.py +++ b/wagtail/wagtailsearch/tests/test_backends.py @@ -482,12 +482,22 @@ class BackendTests(WagtailTestUtils): self.assertSetEqual({r.title for r in results}, {'JavaScript: The Definitive Guide'}) + results = self.backend.search(Term('Javascript') & Term('Definitive'), + models.Book.objects.all()) + self.assertSetEqual({r.title for r in results}, + {'JavaScript: The Definitive Guide'}) + def test_or(self): results = self.backend.search(Or([Term('Hobbit'), Term('Towers')]), models.Book.objects.all()) self.assertSetEqual({r.title for r in results}, {'The Hobbit', 'The Two Towers'}) + results = self.backend.search(Term('Hobbit') | Term('Towers'), + models.Book.objects.all()) + self.assertSetEqual({r.title for r in results}, + {'The Hobbit', 'The Two Towers'}) + def test_not(self): all_other_titles = { 'A Clash of Kings', @@ -503,14 +513,27 @@ class BackendTests(WagtailTestUtils): 'Two Scoops of Django 1.11', } - results = self.backend.search(Not(PlainText('Javascript')), + results = self.backend.search(Not(Term('Javascript')), models.Book.objects.all()) self.assertSetEqual({r.title for r in results}, all_other_titles) - results = self.backend.search(~PlainText('Javascript'), + results = self.backend.search(~Term('Javascript'), models.Book.objects.all()) self.assertSetEqual({r.title for r in results}, all_other_titles) + def test_operators_combination(self): + results = self.backend.search( + ((Term('Javascript') & ~Term('Definitive')) + | Term('Python') | Term('Rust')) + | Term('Two'), + models.Book.objects.all()) + self.assertSetEqual({r.title for r in results}, + {'JavaScript: The good parts', + 'Learning Python', + 'The Two Towers', + 'The Rust Programming Language', + 'Two Scoops of Django 1.11'}) + @override_settings( WAGTAILSEARCH_BACKENDS={ diff --git a/wagtail/wagtailsearch/tests/test_db_backend.py b/wagtail/wagtailsearch/tests/test_db_backend.py index 47ce49f5f..3db2bd16c 100644 --- a/wagtail/wagtailsearch/tests/test_db_backend.py +++ b/wagtail/wagtailsearch/tests/test_db_backend.py @@ -68,3 +68,8 @@ class TestDBBackend(BackendTests, TestCase): @unittest.expectedFailure def test_not(self): super().test_not() + + # Not implemented yet + @unittest.expectedFailure + def test_operators_combination(self): + super().test_operators_combination() diff --git a/wagtail/wagtailsearch/tests/test_elasticsearch2_backend.py b/wagtail/wagtailsearch/tests/test_elasticsearch2_backend.py index e9759b6db..936a50e4b 100644 --- a/wagtail/wagtailsearch/tests/test_elasticsearch2_backend.py +++ b/wagtail/wagtailsearch/tests/test_elasticsearch2_backend.py @@ -61,6 +61,11 @@ class TestElasticsearch2SearchBackend(BackendTests, ElasticsearchCommonSearchBac def test_not(self): super().test_not() + # Not implemented yet + @unittest.expectedFailure + def test_operators_combination(self): + super().test_operators_combination() + class TestElasticsearch2SearchQuery(TestCase): def assertDictEqual(self, a, b): From 6fe6935997952e82318115b5a5fb912962473a93 Mon Sep 17 00:00:00 2001 From: Bertrand Bordage Date: Thu, 23 Nov 2017 18:51:34 +0100 Subject: [PATCH 12/40] Makes Flake8 happy. --- wagtail/wagtailsearch/tests/test_backends.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/wagtail/wagtailsearch/tests/test_backends.py b/wagtail/wagtailsearch/tests/test_backends.py index 46a0a7dce..4ef4fbeca 100644 --- a/wagtail/wagtailsearch/tests/test_backends.py +++ b/wagtail/wagtailsearch/tests/test_backends.py @@ -523,9 +523,9 @@ class BackendTests(WagtailTestUtils): def test_operators_combination(self): results = self.backend.search( - ((Term('Javascript') & ~Term('Definitive')) - | Term('Python') | Term('Rust')) - | Term('Two'), + ((Term('Javascript') & ~Term('Definitive')) | + Term('Python') | Term('Rust')) | + Term('Two'), models.Book.objects.all()) self.assertSetEqual({r.title for r in results}, {'JavaScript: The good parts', From 4c4dfac806bd01450e39600ffd8b4551792632b5 Mon Sep 17 00:00:00 2001 From: Bertrand Bordage Date: Thu, 23 Nov 2017 18:52:50 +0100 Subject: [PATCH 13/40] Removes keyword_split. --- wagtail/contrib/postgres_search/utils.py | 20 -------------------- 1 file changed, 20 deletions(-) diff --git a/wagtail/contrib/postgres_search/utils.py b/wagtail/contrib/postgres_search/utils.py index f3f42f97a..0e3e25960 100644 --- a/wagtail/contrib/postgres_search/utils.py +++ b/wagtail/contrib/postgres_search/utils.py @@ -1,7 +1,6 @@ from __future__ import absolute_import, division, unicode_literals import operator -import re from functools import partial, reduce from itertools import zip_longest @@ -31,25 +30,6 @@ AND = partial(reduce, operator.and_) ADD = partial(reduce, operator.add) -def keyword_split(keywords): - """ - Return all the keywords in a keyword string. - - Keeps keywords surrounded by quotes together, removing the surrounding quotes: - - >>> keyword_split('Hello I\\'m looking for "something special"') - ['Hello', "I'm", 'looking', 'for', 'something special'] - - Nested quoted strings are returned as is: - - >>> keyword_split("He said \\"I'm looking for 'something special'\\" so I've given him the 'special item'") - ['He', 'said', "I'm looking for 'something special'", 'so', "I've", 'given', 'him', 'the', 'special item'] - - """ - matches = re.findall(r'"([^"]+)"|\'([^\']+)\'|(\S+)', keywords) - return [match[0] or match[1] or match[2] for match in matches] - - def get_descendant_models(model): """ Returns all descendants of a model, including the model itself. From b36165fd6a781c70a31bd3c2798c76aeeebcf97c Mon Sep 17 00:00:00 2001 From: Bertrand Bordage Date: Thu, 23 Nov 2017 19:25:18 +0100 Subject: [PATCH 14/40] Implements And/Or/Not/Term in database search backend. --- wagtail/contrib/postgres_search/backend.py | 3 +- wagtail/contrib/postgres_search/utils.py | 10 -- wagtail/wagtailsearch/backends/db.py | 96 ++++++++++--------- wagtail/wagtailsearch/query.py | 3 +- wagtail/wagtailsearch/tests/test_backends.py | 2 +- .../wagtailsearch/tests/test_db_backend.py | 29 ------ wagtail/wagtailsearch/utils.py | 9 ++ 7 files changed, 67 insertions(+), 85 deletions(-) diff --git a/wagtail/contrib/postgres_search/backend.py b/wagtail/contrib/postgres_search/backend.py index 9084de260..eb7ca7306 100644 --- a/wagtail/contrib/postgres_search/backend.py +++ b/wagtail/contrib/postgres_search/backend.py @@ -16,10 +16,11 @@ from wagtail.wagtailsearch.backends.base import ( BaseSearchBackend, BaseSearchQueryCompiler, BaseSearchResults) from wagtail.wagtailsearch.index import RelatedFields, SearchField from wagtail.wagtailsearch.query import And, MatchAll, Not, Or, PlainText, Term +from wagtail.wagtailsearch.utils import ADD, AND, OR from .models import IndexEntry from .utils import ( - ADD, AND, OR, WEIGHTS_VALUES, get_ancestors_content_types_pks, get_content_type_pk, + WEIGHTS_VALUES, get_ancestors_content_types_pks, get_content_type_pk, get_descendants_content_types_pks, get_postgresql_connections, get_weight, unidecode) diff --git a/wagtail/contrib/postgres_search/utils.py b/wagtail/contrib/postgres_search/utils.py index 0e3e25960..fd0dc06cb 100644 --- a/wagtail/contrib/postgres_search/utils.py +++ b/wagtail/contrib/postgres_search/utils.py @@ -1,7 +1,5 @@ from __future__ import absolute_import, division, unicode_literals -import operator -from functools import partial, reduce from itertools import zip_longest from django.apps import apps @@ -22,14 +20,6 @@ def get_postgresql_connections(): if connection.vendor == 'postgresql'] -# Reduce any iterable to a single value using a logical OR e.g. (a | b | ...) -OR = partial(reduce, operator.or_) -# Reduce any iterable to a single value using a logical AND e.g. (a & b & ...) -AND = partial(reduce, operator.and_) -# Reduce any iterable to a single value using an addition -ADD = partial(reduce, operator.add) - - def get_descendant_models(model): """ Returns all descendants of a model, including the model itself. diff --git a/wagtail/wagtailsearch/backends/db.py b/wagtail/wagtailsearch/backends/db.py index 30831b2a1..bcfd89ce2 100644 --- a/wagtail/wagtailsearch/backends/db.py +++ b/wagtail/wagtailsearch/backends/db.py @@ -1,16 +1,36 @@ from __future__ import absolute_import, unicode_literals +from warnings import warn + from django.db import models from django.db.models.expressions import Value from wagtail.wagtailsearch.backends.base import ( BaseSearchBackend, BaseSearchQueryCompiler, BaseSearchResults) -from wagtail.wagtailsearch.query import MatchAll, PlainText +from wagtail.wagtailsearch.query import And, MatchAll, Not, Or, PlainText, Term +from wagtail.wagtailsearch.utils import AND, OR class DatabaseSearchQueryCompiler(BaseSearchQueryCompiler): DEFAULT_OPERATOR = 'and' + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.fields_names = list(self.get_fields_names()) + + def get_fields_names(self): + model = self.queryset.model + fields_names = self.fields or [field.field_name for field in + model.get_searchable_search_fields()] + # Check if the field exists (this will filter out indexed callables) + for field_name in fields_names: + try: + model._meta.get_field(field_name) + except models.fields.FieldDoesNotExist: + continue + else: + yield field_name + def _process_lookup(self, field, lookup, value): return models.Q(**{field.get_attname(self.queryset.model) + '__' + lookup: value}) @@ -29,57 +49,47 @@ class DatabaseSearchQueryCompiler(BaseSearchQueryCompiler): return q - def get_extra_q(self): - # Run _get_filters_from_queryset to test that no fields that are not - # a FilterField have been used in the query. - self._get_filters_from_queryset() + def build_single_term_filter(self, term): + term_query = models.Q() + for field_name in self.fields_names: + term_query |= models.Q(**{field_name + '__icontains': term}) + return term_query - q = models.Q() - model = self.queryset.model + def build_database_filter(self, query=None): + if query is None: + query = self.query if isinstance(self.query, MatchAll): - return q + return models.Q() - if not isinstance(self.query, PlainText): - raise NotImplementedError( - '`%s` is not supported by the database search backend.' - % self.query.__class__.__name__) - - # Get fields - fields = self.fields or [field.field_name for field in model.get_searchable_search_fields()] - - # Get terms - terms = self.query.query_string.split() - if not terms: - return model.objects.none() - - # Filter by terms - for term in terms: - term_query = models.Q() - for field_name in fields: - # Check if the field exists (this will filter out indexed callables) - try: - model._meta.get_field(field_name) - except models.fields.FieldDoesNotExist: - continue - - # Filter on this field - term_query |= models.Q(**{'%s__icontains' % field_name: term}) - - operator = self.query.operator - - if operator == 'or': - q |= term_query - elif operator == 'and': - q &= term_query - - return q + if isinstance(query, PlainText): + return self.build_database_filter(query.to_combined_terms()) + if isinstance(query, Term): + if query.boost != 1: + warn('Database search backend does not support term boosting.') + return self.build_single_term_filter(query.term) + if isinstance(query, Not): + return ~self.build_database_filter(query.subquery) + if isinstance(query, And): + return AND(self.build_database_filter(subquery) + for subquery in query.subqueries) + if isinstance(query, Or): + return OR(self.build_database_filter(subquery) + for subquery in query.subqueries) + raise NotImplementedError( + '`%s` is not supported by the database search backend.' + % self.query.__class__.__name__) class DatabaseSearchResults(BaseSearchResults): def get_queryset(self): queryset = self.query_compiler.queryset - q = self.query_compiler.get_extra_q() + + # Run _get_filters_from_queryset to test that no fields that are not + # a FilterField have been used in the query. + self.query_compiler._get_filters_from_queryset() + + q = self.query_compiler.build_database_filter() return queryset.filter(q).distinct()[self.start:self.stop] diff --git a/wagtail/wagtailsearch/query.py b/wagtail/wagtailsearch/query.py index 813c87e1b..de341ce44 100644 --- a/wagtail/wagtailsearch/query.py +++ b/wagtail/wagtailsearch/query.py @@ -52,7 +52,8 @@ class PlainText(SearchQuery): def to_combined_terms(self): return self.OPERATORS[self.operator]([ - Term(term) for term in self.query_string.split()]) + Term(term, boost=self.boost) + for term in self.query_string.split()]) class Term(SearchQuery): diff --git a/wagtail/wagtailsearch/tests/test_backends.py b/wagtail/wagtailsearch/tests/test_backends.py index 4ef4fbeca..66ffe6156 100644 --- a/wagtail/wagtailsearch/tests/test_backends.py +++ b/wagtail/wagtailsearch/tests/test_backends.py @@ -447,7 +447,7 @@ class BackendTests(WagtailTestUtils): 'JavaScript: The good parts'}) # Multiple word - results = self.backend.search(Term('Javascript Guide'), + results = self.backend.search(Term('Definitive Guide'), models.Book.objects.all()) self.assertSetEqual({r.title for r in results}, {'JavaScript: The Definitive Guide'}) diff --git a/wagtail/wagtailsearch/tests/test_db_backend.py b/wagtail/wagtailsearch/tests/test_db_backend.py index 3db2bd16c..5344bac77 100644 --- a/wagtail/wagtailsearch/tests/test_db_backend.py +++ b/wagtail/wagtailsearch/tests/test_db_backend.py @@ -44,32 +44,3 @@ class TestDBBackend(BackendTests, TestCase): @unittest.expectedFailure def test_same_rank_pages(self): super(TestDBBackend, self).test_same_rank_pages() - - # - # Query classes - # - - # Not implemented yet - @unittest.expectedFailure - def test_term(self): - super().test_term() - - # Not implemented yet - @unittest.expectedFailure - def test_and(self): - super().test_and() - - # Not implemented yet - @unittest.expectedFailure - def test_or(self): - super().test_or() - - # Not implemented yet - @unittest.expectedFailure - def test_not(self): - super().test_not() - - # Not implemented yet - @unittest.expectedFailure - def test_operators_combination(self): - super().test_operators_combination() diff --git a/wagtail/wagtailsearch/utils.py b/wagtail/wagtailsearch/utils.py index 495a58833..5adddaec9 100644 --- a/wagtail/wagtailsearch/utils.py +++ b/wagtail/wagtailsearch/utils.py @@ -1,7 +1,16 @@ from __future__ import absolute_import, unicode_literals +import operator import re import string +from functools import partial, reduce + +# Reduce any iterable to a single value using a logical OR e.g. (a | b | ...) +OR = partial(reduce, operator.or_) +# Reduce any iterable to a single value using a logical AND e.g. (a & b & ...) +AND = partial(reduce, operator.and_) +# Reduce any iterable to a single value using an addition +ADD = partial(reduce, operator.add) MAX_QUERY_STRING_LENGTH = 255 From c737f516e8928ffbba610ad4a4981419e4cf5ba8 Mon Sep 17 00:00:00 2001 From: Bertrand Bordage Date: Thu, 23 Nov 2017 21:03:00 +0100 Subject: [PATCH 15/40] Implements Filter & Boost in database search backends. --- wagtail/contrib/postgres_search/backend.py | 6 +- wagtail/wagtailsearch/backends/db.py | 6 +- wagtail/wagtailsearch/query.py | 168 ++++++++++++++---- wagtail/wagtailsearch/tests/test_backends.py | 135 +++++++++++--- .../tests/test_elasticsearch2_backend.py | 11 +- 5 files changed, 261 insertions(+), 65 deletions(-) diff --git a/wagtail/contrib/postgres_search/backend.py b/wagtail/contrib/postgres_search/backend.py index eb7ca7306..0429ecbd2 100644 --- a/wagtail/contrib/postgres_search/backend.py +++ b/wagtail/contrib/postgres_search/backend.py @@ -15,7 +15,7 @@ from django.utils.encoding import force_text from wagtail.wagtailsearch.backends.base import ( BaseSearchBackend, BaseSearchQueryCompiler, BaseSearchResults) from wagtail.wagtailsearch.index import RelatedFields, SearchField -from wagtail.wagtailsearch.query import And, MatchAll, Not, Or, PlainText, Term +from wagtail.wagtailsearch.query import And, MatchAll, Not, Or, SearchQueryShortcut, Term from wagtail.wagtailsearch.utils import ADD, AND, OR from .models import IndexEntry @@ -181,8 +181,8 @@ class PostgresSearchQueryCompiler(BaseSearchQueryCompiler): if query is None: query = self.query - if isinstance(query, PlainText): - return self.build_database_query(query.to_combined_terms(), config) + if isinstance(query, SearchQueryShortcut): + return self.build_database_query(query.get_equivalent(), config) if isinstance(query, Term): # TODO: Find a way to use the term boosting. if query.boost != 1: diff --git a/wagtail/wagtailsearch/backends/db.py b/wagtail/wagtailsearch/backends/db.py index bcfd89ce2..20e1fec03 100644 --- a/wagtail/wagtailsearch/backends/db.py +++ b/wagtail/wagtailsearch/backends/db.py @@ -7,7 +7,7 @@ from django.db.models.expressions import Value from wagtail.wagtailsearch.backends.base import ( BaseSearchBackend, BaseSearchQueryCompiler, BaseSearchResults) -from wagtail.wagtailsearch.query import And, MatchAll, Not, Or, PlainText, Term +from wagtail.wagtailsearch.query import And, MatchAll, Not, Or, SearchQueryShortcut, Term from wagtail.wagtailsearch.utils import AND, OR @@ -62,8 +62,8 @@ class DatabaseSearchQueryCompiler(BaseSearchQueryCompiler): if isinstance(self.query, MatchAll): return models.Q() - if isinstance(query, PlainText): - return self.build_database_filter(query.to_combined_terms()) + if isinstance(query, SearchQueryShortcut): + return self.build_database_filter(query.get_equivalent()) if isinstance(query, Term): if query.boost != 1: warn('Database search backend does not support term boosting.') diff --git a/wagtail/wagtailsearch/query.py b/wagtail/wagtailsearch/query.py index de341ce44..276cffe90 100644 --- a/wagtail/wagtailsearch/query.py +++ b/wagtail/wagtailsearch/query.py @@ -1,6 +1,11 @@ from __future__ import absolute_import, unicode_literals +# +# Base classes +# + + class SearchQuery: def __and__(self, other): return And([self, other]) @@ -11,31 +16,119 @@ class SearchQuery: def __invert__(self): return Not(self) + def apply(self, func): + raise NotImplementedError + + def clone(self): + return self.apply(lambda o: o) + + def get_children(self): + return () + + @property + def children(self): + return list(self.get_children()) + + @property + def child(self): + children = self.children + if len(children) != 1: + raise IndexError('`%s` object has %d children, not a single child.' + % self.__class__.__name__, len(children)) + return children[0] + class SearchQueryOperator(SearchQuery): pass -class And(SearchQueryOperator): +class MultiOperandsSearchQueryOperator(SearchQueryOperator): def __init__(self, subqueries): self.subqueries = subqueries + def apply(self, func): + return func(self.__class__( + [subquery.apply(func) for subquery in self.subqueries])) -class Or(SearchQueryOperator): - def __init__(self, subqueries): - self.subqueries = subqueries + def get_children(self): + yield from self.subqueries + + +class SearchQueryShortcut(SearchQuery): + def get_equivalent(self): + raise NotImplementedError + + def get_children(self): + return self.get_equivalent(), + +# +# Operators +# + + +class And(MultiOperandsSearchQueryOperator): + pass + + +class Or(MultiOperandsSearchQueryOperator): + pass class Not(SearchQueryOperator): def __init__(self, subquery: SearchQuery): self.subquery = subquery + def apply(self, func): + return func(self.__class__(self.subquery.apply(func))) + + def get_children(self): + yield self.subquery + + +# +# Basic query classes +# + class MatchAll(SearchQuery): - pass + def apply(self, func): + return self.__class__() -class PlainText(SearchQuery): +class Term(SearchQuery): + def __init__(self, term: str, boost: float = 1): + self.term = term + self.boost = boost + + def apply(self, func): + return func(self.__class__(self.term, self.boost)) + + +class Prefix(SearchQuery): + def __init__(self, prefix: str, boost: float = 1): + self.prefix = prefix + self.boost = boost + + def apply(self, func): + return func(self.__class__(self.prefix, self.boost)) + + +class Fuzzy(SearchQuery): + def __init__(self, term: str, max_distance: float = 3, boost: float = 1): + self.term = term + self.max_distance = max_distance + self.boost = boost + + def apply(self, func): + return func(self.__class__(self.term, self.max_distance, self.boost)) + + +# +# Shortcut query classes +# + + +class PlainText(SearchQueryShortcut): OPERATORS = { 'and': And, 'or': Or, @@ -50,43 +143,52 @@ class PlainText(SearchQuery): self.operator = operator self.boost = boost - def to_combined_terms(self): + def apply(self, func): + return func(self.__class__(self.query_string, self.operator, + self.boost)) + + def get_equivalent(self): return self.OPERATORS[self.operator]([ Term(term, boost=self.boost) for term in self.query_string.split()]) -class Term(SearchQuery): - def __init__(self, term: str, boost: float = 1): - self.term = term - self.boost = boost - - -class Prefix(SearchQuery): - def __init__(self, prefix: str, boost: float = 1): - self.prefix = prefix - self.boost = boost - - -class Fuzzy(SearchQuery): - def __init__(self, term: str, max_distance: float = 3, boost: float = 1): - self.term = term - self.max_distance = max_distance - self.boost = boost - - -class Boost(SearchQuery): - def __init__(self, query: SearchQuery, boost: float): - self.query = query - self.boost = boost - - -class Filter(SearchQuery): +class Filter(SearchQueryShortcut): def __init__(self, query: SearchQuery, include: SearchQuery = None, exclude: SearchQuery = None): self.query = query self.include = include self.exclude = exclude + def apply(self, func): + return func(self.__class__( + self.query.apply(func), + self.include.apply(func), self.exclude.apply(func))) + + def get_equivalent(self): + query = self.query + if self.include is not None: + query &= self.include + if self.exclude is not None: + query &= ~self.exclude + return query + + +class Boost(SearchQueryShortcut): + def __init__(self, subquery: SearchQuery, boost: float): + self.subquery = subquery + self.boost = boost + + def apply(self, func): + return func(self.__class__(self.subquery.apply(func), self.boost)) + + def get_equivalent(self): + def boost_child(child): + if isinstance(child, (PlainText, Fuzzy, Prefix, Term)): + child.boost *= self.boost + return child + + return self.subquery.apply(boost_child) + MATCH_ALL = MatchAll() diff --git a/wagtail/wagtailsearch/tests/test_backends.py b/wagtail/wagtailsearch/tests/test_backends.py index 66ffe6156..82cddef95 100644 --- a/wagtail/wagtailsearch/tests/test_backends.py +++ b/wagtail/wagtailsearch/tests/test_backends.py @@ -17,7 +17,7 @@ from wagtail.wagtailsearch.backends import ( InvalidSearchBackendError, get_search_backend, get_search_backends) from wagtail.wagtailsearch.backends.base import FieldError from wagtail.wagtailsearch.backends.db import DatabaseSearchBackend -from wagtail.wagtailsearch.query import MATCH_ALL, And, Not, Or, PlainText, Term +from wagtail.wagtailsearch.query import MATCH_ALL, And, Boost, Filter, Not, Or, PlainText, Term class BackendTests(WagtailTestUtils): @@ -431,7 +431,7 @@ class BackendTests(WagtailTestUtils): ]) # - # Query classes + # Basic query classes # def test_match_all(self): @@ -452,29 +452,6 @@ class BackendTests(WagtailTestUtils): self.assertSetEqual({r.title for r in results}, {'JavaScript: The Definitive Guide'}) - def test_plain_text(self): - # Single word - results = self.backend.search(PlainText('Javascript'), - models.Book.objects.all()) - self.assertSetEqual({r.title for r in results}, - {'JavaScript: The Definitive Guide', - 'JavaScript: The good parts'}) - - # Multiple words (OR operator) - results = self.backend.search(PlainText('Javascript Definitive', - operator='or'), - models.Book.objects.all()) - self.assertSetEqual({r.title for r in results}, - {'JavaScript: The Definitive Guide', - 'JavaScript: The good parts'}) - - # Multiple words (AND operator) - results = self.backend.search(PlainText('Javascript Definitive', - operator='and'), - models.Book.objects.all()) - self.assertSetEqual({r.title for r in results}, - {'JavaScript: The Definitive Guide'}) - def test_and(self): results = self.backend.search(And([Term('Javascript'), Term('Definitive')]), @@ -534,6 +511,114 @@ class BackendTests(WagtailTestUtils): 'The Rust Programming Language', 'Two Scoops of Django 1.11'}) + # + # Shortcut query classes + # + + def test_plain_text(self): + # Single word + results = self.backend.search(PlainText('Javascript'), + models.Book.objects.all()) + self.assertSetEqual({r.title for r in results}, + {'JavaScript: The Definitive Guide', + 'JavaScript: The good parts'}) + + # Multiple words (OR operator) + results = self.backend.search(PlainText('Javascript Definitive', + operator='or'), + models.Book.objects.all()) + self.assertSetEqual({r.title for r in results}, + {'JavaScript: The Definitive Guide', + 'JavaScript: The good parts'}) + + # Multiple words (AND operator) + results = self.backend.search(PlainText('Javascript Definitive', + operator='and'), + models.Book.objects.all()) + self.assertSetEqual({r.title for r in results}, + {'JavaScript: The Definitive Guide'}) + + def test_filter_equivalent(self): + filter = Filter(Term('Javascript')) + term = filter.child + self.assertIsInstance(term, Term) + self.assertEqual(term.term, 'Javascript') + + filter = Filter(Term('Javascript'), include=Term('Definitive')) + and_obj = filter.child + self.assertIsInstance(and_obj, And) + javascript = and_obj.children[0] + self.assertIsInstance(javascript, Term) + self.assertEqual(javascript.term, 'Javascript') + definitive = and_obj.children[1] + self.assertIsInstance(definitive, Term) + self.assertEqual(definitive.term, 'Definitive') + + filter = Filter(Term('Javascript'), + include=Term('Definitive'), exclude=Term('Guide')) + and_obj1 = filter.child + self.assertIsInstance(and_obj1, And) + and_obj2 = and_obj1.children[0] + javascript = and_obj2.children[0] + self.assertIsInstance(javascript, Term) + self.assertEqual(javascript.term, 'Javascript') + definitive = and_obj2.children[1] + self.assertIsInstance(definitive, Term) + self.assertEqual(definitive.term, 'Definitive') + not_obj = and_obj1.children[1] + self.assertIsInstance(not_obj, Not) + guide = not_obj.child + self.assertEqual(guide.term, 'Guide') + + def test_filter_query(self): + results = self.backend.search(Filter(Term('Javascript')), + models.Book.objects.all()) + self.assertSetEqual({r.title for r in results}, + {'JavaScript: The Definitive Guide', + 'JavaScript: The good parts'}) + + results = self.backend.search(Filter(Term('Javascript'), + include=Term('Definitive')), + models.Book.objects.all()) + self.assertSetEqual({r.title for r in results}, + {'JavaScript: The Definitive Guide'}) + + results = self.backend.search(Filter(Term('Javascript'), + include=Term('Definitive'), + exclude=Term('Guide')), + models.Book.objects.all()) + self.assertSetEqual({r.title for r in results}, set()) + + def test_boost_equivalent(self): + boost = Boost(Term('Guide'), 5) + equivalent = boost.children[0] + self.assertIsInstance(equivalent, Term) + self.assertAlmostEqual(equivalent.boost, 5) + + boost = Boost(Term('Guide', boost=0.5), 5) + equivalent = boost.children[0] + self.assertIsInstance(equivalent, Term) + self.assertAlmostEqual(equivalent.boost, 2.5) + + boost = Boost(Boost(Term('Guide', 0.1), 3), 5) + sub_boost = boost.children[0] + self.assertIsInstance(sub_boost, Boost) + sub_boost = sub_boost.children[0] + self.assertIsInstance(sub_boost, Term) + self.assertAlmostEqual(sub_boost.boost, 1.5) + + boost = Boost(And([Boost(Term('Guide', 0.1), 3), Term('Two', 2)]), 5) + and_obj = boost.children[0] + self.assertIsInstance(and_obj, And) + sub_boost = and_obj.children[0] + self.assertIsInstance(sub_boost, Boost) + guide = sub_boost.children[0] + self.assertIsInstance(guide, Term) + self.assertAlmostEqual(guide.boost, 1.5) + two = and_obj.children[1] + self.assertIsInstance(two, Term) + self.assertAlmostEqual(two.boost, 10) + @override_settings( WAGTAILSEARCH_BACKENDS={ diff --git a/wagtail/wagtailsearch/tests/test_elasticsearch2_backend.py b/wagtail/wagtailsearch/tests/test_elasticsearch2_backend.py index 936a50e4b..e921e7a6f 100644 --- a/wagtail/wagtailsearch/tests/test_elasticsearch2_backend.py +++ b/wagtail/wagtailsearch/tests/test_elasticsearch2_backend.py @@ -38,7 +38,7 @@ class TestElasticsearch2SearchBackend(BackendTests, ElasticsearchCommonSearchBac super(TestElasticsearch2SearchBackend, self).test_delete() # - # Query classes + # Basic query classes # # Not implemented yet @@ -66,6 +66,15 @@ class TestElasticsearch2SearchBackend(BackendTests, ElasticsearchCommonSearchBac def test_operators_combination(self): super().test_operators_combination() + # + # Shortcut query classes + # + + # Not implemented yet + @unittest.expectedFailure + def test_filter_query(self): + super().test_filter_query() + class TestElasticsearch2SearchQuery(TestCase): def assertDictEqual(self, a, b): From 04e2deb30944245cea98dcf7d50a846d199d0c88 Mon Sep 17 00:00:00 2001 From: Bertrand Bordage Date: Thu, 23 Nov 2017 21:26:54 +0100 Subject: [PATCH 16/40] Allows search query API tests failure on ES5. --- .../tests/elasticsearch_common_tests.py | 39 +++++++++++++++++++ .../tests/test_elasticsearch2_backend.py | 38 ------------------ 2 files changed, 39 insertions(+), 38 deletions(-) diff --git a/wagtail/wagtailsearch/tests/elasticsearch_common_tests.py b/wagtail/wagtailsearch/tests/elasticsearch_common_tests.py index 903cbb937..3cd214392 100644 --- a/wagtail/wagtailsearch/tests/elasticsearch_common_tests.py +++ b/wagtail/wagtailsearch/tests/elasticsearch_common_tests.py @@ -1,5 +1,6 @@ from __future__ import absolute_import, unicode_literals +import unittest from datetime import date from io import StringIO @@ -172,3 +173,41 @@ class ElasticsearchCommonSearchBackendTests(object): results = self.backend.search(MATCH_ALL, models.Book)[110:] self.assertEqual(len(results), 53) + + # + # Basic query classes + # + + # Not implemented yet + @unittest.expectedFailure + def test_term(self): + super().test_term() + + # Not implemented yet + @unittest.expectedFailure + def test_and(self): + super().test_and() + + # Not implemented yet + @unittest.expectedFailure + def test_or(self): + super().test_or() + + # Not implemented yet + @unittest.expectedFailure + def test_not(self): + super().test_not() + + # Not implemented yet + @unittest.expectedFailure + def test_operators_combination(self): + super().test_operators_combination() + + # + # Shortcut query classes + # + + # Not implemented yet + @unittest.expectedFailure + def test_filter_query(self): + super().test_filter_query() diff --git a/wagtail/wagtailsearch/tests/test_elasticsearch2_backend.py b/wagtail/wagtailsearch/tests/test_elasticsearch2_backend.py index e921e7a6f..b82d8bb2d 100644 --- a/wagtail/wagtailsearch/tests/test_elasticsearch2_backend.py +++ b/wagtail/wagtailsearch/tests/test_elasticsearch2_backend.py @@ -37,44 +37,6 @@ class TestElasticsearch2SearchBackend(BackendTests, ElasticsearchCommonSearchBac def test_delete(self): super(TestElasticsearch2SearchBackend, self).test_delete() - # - # Basic query classes - # - - # Not implemented yet - @unittest.expectedFailure - def test_term(self): - super().test_term() - - # Not implemented yet - @unittest.expectedFailure - def test_and(self): - super().test_and() - - # Not implemented yet - @unittest.expectedFailure - def test_or(self): - super().test_or() - - # Not implemented yet - @unittest.expectedFailure - def test_not(self): - super().test_not() - - # Not implemented yet - @unittest.expectedFailure - def test_operators_combination(self): - super().test_operators_combination() - - # - # Shortcut query classes - # - - # Not implemented yet - @unittest.expectedFailure - def test_filter_query(self): - super().test_filter_query() - class TestElasticsearch2SearchQuery(TestCase): def assertDictEqual(self, a, b): From d537737166201b4cd628db4664c3a18bdd157e71 Mon Sep 17 00:00:00 2001 From: Bertrand Bordage Date: Thu, 23 Nov 2017 22:12:33 +0100 Subject: [PATCH 17/40] Replaces a remaining None with MATCH_ALL. --- wagtail/wagtailcore/tests/test_page_queryset.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/wagtail/wagtailcore/tests/test_page_queryset.py b/wagtail/wagtailcore/tests/test_page_queryset.py index bbbb47c46..deaf0ce83 100644 --- a/wagtail/wagtailcore/tests/test_page_queryset.py +++ b/wagtail/wagtailcore/tests/test_page_queryset.py @@ -6,6 +6,7 @@ from django.test import TestCase from wagtail.tests.testapp.models import EventPage, SimplePage, SingleEventPage from wagtail.wagtailcore.models import Page, PageViewRestriction, Site from wagtail.wagtailcore.signals import page_unpublished +from wagtail.wagtailsearch.query import MATCH_ALL class TestPageQuerySet(TestCase): @@ -593,7 +594,8 @@ class TestSpecificQuery(TestCase): # 1276 - The database search backend didn't return results with the # specific type when searching a specific queryset. - pages = list(Page.objects.specific().live().in_menu().search(None, backend='wagtail.wagtailsearch.backends.db')) + pages = list(Page.objects.specific().live().in_menu().search( + MATCH_ALL, backend='wagtail.wagtailsearch.backends.db')) # Check that each page is in the queryset with the correct type. # We don't care about order here From c3cac970ff8c83ef4060cf07b1404aeabdc1efbc Mon Sep 17 00:00:00 2001 From: Bertrand Bordage Date: Thu, 23 Nov 2017 22:16:08 +0100 Subject: [PATCH 18/40] Fixes Elasticsearch tests. --- .../tests/elasticsearch_common_tests.py | 18 +++++++++++++++++- .../tests/test_elasticsearch2_backend.py | 18 +----------------- .../tests/test_elasticsearch5_backend.py | 18 +----------------- 3 files changed, 19 insertions(+), 35 deletions(-) diff --git a/wagtail/wagtailsearch/tests/elasticsearch_common_tests.py b/wagtail/wagtailsearch/tests/elasticsearch_common_tests.py index 3cd214392..0a271d493 100644 --- a/wagtail/wagtailsearch/tests/elasticsearch_common_tests.py +++ b/wagtail/wagtailsearch/tests/elasticsearch_common_tests.py @@ -8,9 +8,10 @@ from django.core import management from wagtail.tests.search import models from wagtail.wagtailsearch.query import MATCH_ALL +from wagtail.wagtailsearch.tests.test_backends import BackendTests -class ElasticsearchCommonSearchBackendTests(object): +class ElasticsearchCommonSearchBackendTests(BackendTests): def test_search_with_spaces_only(self): # Search for some space characters and hope it doesn't crash results = self.backend.search(" ", models.Book) @@ -174,6 +175,21 @@ class ElasticsearchCommonSearchBackendTests(object): results = self.backend.search(MATCH_ALL, models.Book)[110:] self.assertEqual(len(results), 53) + # Broken + @unittest.expectedFailure + def test_filter_in_values_list_subquery(self): + super().test_filter_in_values_list_subquery() + + # Broken + @unittest.expectedFailure + def test_order_by_non_filterable_field(self): + super().test_order_by_non_filterable_field() + + # Broken + @unittest.expectedFailure + def test_delete(self): + super().test_delete() + # # Basic query classes # diff --git a/wagtail/wagtailsearch/tests/test_elasticsearch2_backend.py b/wagtail/wagtailsearch/tests/test_elasticsearch2_backend.py index b82d8bb2d..e7a8aa77d 100644 --- a/wagtail/wagtailsearch/tests/test_elasticsearch2_backend.py +++ b/wagtail/wagtailsearch/tests/test_elasticsearch2_backend.py @@ -16,27 +16,11 @@ from wagtail.wagtailsearch.backends.elasticsearch2 import ( from wagtail.wagtailsearch.query import MATCH_ALL from .elasticsearch_common_tests import ElasticsearchCommonSearchBackendTests -from .test_backends import BackendTests -class TestElasticsearch2SearchBackend(BackendTests, ElasticsearchCommonSearchBackendTests, TestCase): +class TestElasticsearch2SearchBackend(ElasticsearchCommonSearchBackendTests, TestCase): backend_path = 'wagtail.wagtailsearch.backends.elasticsearch2' - # Broken - @unittest.expectedFailure - def test_filter_in_values_list_subquery(self): - super(TestElasticsearch2SearchBackend, self).test_filter_in_values_list_subquery() - - # Broken - @unittest.expectedFailure - def test_order_by_non_filterable_field(self): - super(TestElasticsearch2SearchBackend, self).test_order_by_non_filterable_field() - - # Broken - @unittest.expectedFailure - def test_delete(self): - super(TestElasticsearch2SearchBackend, self).test_delete() - class TestElasticsearch2SearchQuery(TestCase): def assertDictEqual(self, a, b): diff --git a/wagtail/wagtailsearch/tests/test_elasticsearch5_backend.py b/wagtail/wagtailsearch/tests/test_elasticsearch5_backend.py index 030f73ac4..092721754 100644 --- a/wagtail/wagtailsearch/tests/test_elasticsearch5_backend.py +++ b/wagtail/wagtailsearch/tests/test_elasticsearch5_backend.py @@ -15,10 +15,9 @@ from wagtail.wagtailsearch.backends.elasticsearch5 import Elasticsearch5SearchBa from wagtail.wagtailsearch.query import MATCH_ALL from .elasticsearch_common_tests import ElasticsearchCommonSearchBackendTests -from .test_backends import BackendTests -class TestElasticsearch5SearchBackend(BackendTests, ElasticsearchCommonSearchBackendTests, TestCase): +class TestElasticsearch5SearchBackend(ElasticsearchCommonSearchBackendTests, TestCase): backend_path = 'wagtail.wagtailsearch.backends.elasticsearch5' # Broken @@ -26,21 +25,6 @@ class TestElasticsearch5SearchBackend(BackendTests, ElasticsearchCommonSearchBac def test_filter_isnull_true(self): super(TestElasticsearch5SearchBackend, self).test_filter_isnull_true() - # Broken - @unittest.expectedFailure - def test_filter_in_values_list_subquery(self): - super(TestElasticsearch5SearchBackend, self).test_filter_in_values_list_subquery() - - # Broken - @unittest.expectedFailure - def test_order_by_non_filterable_field(self): - super(TestElasticsearch5SearchBackend, self).test_order_by_non_filterable_field() - - # Broken - @unittest.expectedFailure - def test_delete(self): - super(TestElasticsearch5SearchBackend, self).test_delete() - class TestElasticsearch5SearchQuery(TestCase): def assertDictEqual(self, a, b): From 1bc7cf5934774b4b56a3be2093949538f09692d9 Mon Sep 17 00:00:00 2001 From: Bertrand Bordage Date: Thu, 23 Nov 2017 22:45:06 +0100 Subject: [PATCH 19/40] Unused import. --- wagtail/wagtailsearch/tests/test_elasticsearch2_backend.py | 1 - 1 file changed, 1 deletion(-) diff --git a/wagtail/wagtailsearch/tests/test_elasticsearch2_backend.py b/wagtail/wagtailsearch/tests/test_elasticsearch2_backend.py index e7a8aa77d..1f5684e5a 100644 --- a/wagtail/wagtailsearch/tests/test_elasticsearch2_backend.py +++ b/wagtail/wagtailsearch/tests/test_elasticsearch2_backend.py @@ -3,7 +3,6 @@ from __future__ import absolute_import, unicode_literals import datetime import json -import unittest import mock from django.db.models import Q From c598534271dcccbf16b5a811ff0a15faaaa29560 Mon Sep 17 00:00:00 2001 From: Bertrand Bordage Date: Fri, 24 Nov 2017 03:40:51 +0100 Subject: [PATCH 20/40] Tests invalid search operators and removes a useless check. --- wagtail/wagtailsearch/backends/base.py | 6 ---- wagtail/wagtailsearch/query.py | 4 +-- wagtail/wagtailsearch/tests/test_backends.py | 30 ++++++++++++++++++++ 3 files changed, 32 insertions(+), 8 deletions(-) diff --git a/wagtail/wagtailsearch/backends/base.py b/wagtail/wagtailsearch/backends/base.py index 9cf04d5c9..40c8fb411 100644 --- a/wagtail/wagtailsearch/backends/base.py +++ b/wagtail/wagtailsearch/backends/base.py @@ -279,12 +279,6 @@ class BaseSearchBackend(object): for prefetch in prefetch_related: queryset = queryset.prefetch_related(prefetch) - # Check operator - if operator is not None: - operator = operator.lower() - if operator not in ['or', 'and']: - raise ValueError("operator must be either 'or' or 'and'") - # Search search_query = self.query_compiler_class( queryset, query, fields=fields, operator=operator, order_by_relevance=order_by_relevance diff --git a/wagtail/wagtailsearch/query.py b/wagtail/wagtailsearch/query.py index 276cffe90..8fc346c52 100644 --- a/wagtail/wagtailsearch/query.py +++ b/wagtail/wagtailsearch/query.py @@ -138,9 +138,9 @@ class PlainText(SearchQueryShortcut): def __init__(self, query_string: str, operator: str = DEFAULT_OPERATOR, boost: float = 1): self.query_string = query_string - if operator.lower() not in self.OPERATORS: + self.operator = operator.lower() + if self.operator not in self.OPERATORS: raise ValueError("`operator` must be either 'or' or 'and'.") - self.operator = operator self.boost = boost def apply(self, func): diff --git a/wagtail/wagtailsearch/tests/test_backends.py b/wagtail/wagtailsearch/tests/test_backends.py index 82cddef95..7c6fa71ce 100644 --- a/wagtail/wagtailsearch/tests/test_backends.py +++ b/wagtail/wagtailsearch/tests/test_backends.py @@ -538,6 +538,36 @@ class BackendTests(WagtailTestUtils): self.assertSetEqual({r.title for r in results}, {'JavaScript: The Definitive Guide'}) + def test_plain_text_operator_case(self): + results = self.backend.search(PlainText('Guide', operator='AND'), + models.Book.objects.all()) + self.assertSetEqual({r.title for r in results}, + {'JavaScript: The Definitive Guide'}) + + results = self.backend.search(PlainText('Guide', operator='aNd'), + models.Book.objects.all()) + self.assertSetEqual({r.title for r in results}, + {'JavaScript: The Definitive Guide'}) + + results = self.backend.search('Guide', models.Book.objects.all(), + operator='AND') + self.assertSetEqual({r.title for r in results}, + {'JavaScript: The Definitive Guide'}) + + results = self.backend.search('Guide', models.Book.objects.all(), + operator='aNd') + self.assertSetEqual({r.title for r in results}, + {'JavaScript: The Definitive Guide'}) + + def test_plain_text_invalid_operator(self): + with self.assertRaises(ValueError): + self.backend.search(PlainText('Guide', operator='xor'), + models.Book.objects.all()) + + with self.assertRaises(ValueError): + self.backend.search('Guide', models.Book.objects.all(), + operator='xor') + def test_filter_equivalent(self): filter = Filter(Term('Javascript')) term = filter.child From 420f721631a2eb2ef80822bfaef24c36906b192f Mon Sep 17 00:00:00 2001 From: Bertrand Bordage Date: Fri, 24 Nov 2017 04:05:54 +0100 Subject: [PATCH 21/40] Tests Travis CI against the PostgreSQL search backend. --- tox.ini | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tox.ini b/tox.ini index a931abc1f..2925e8bb5 100644 --- a/tox.ini +++ b/tox.ini @@ -48,7 +48,7 @@ deps = elasticsearch5: certifi setenv = - postgres: DATABASE_ENGINE=django.db.backends.postgresql_psycopg2 + postgres: DATABASE_ENGINE=django.db.backends.postgresql mysql: DATABASE_ENGINE=django.db.backends.mysql mysql: DATABASE_HOST=localhost From d187bf4c759a25f0876330ebe4b0bc9c098de827 Mon Sep 17 00:00:00 2001 From: Bertrand Bordage Date: Fri, 24 Nov 2017 04:37:10 +0100 Subject: [PATCH 22/40] Disables AUTO_UPDATE in PostgreSQL test for speed. --- wagtail/tests/settings.py | 1 + 1 file changed, 1 insertion(+) diff --git a/wagtail/tests/settings.py b/wagtail/tests/settings.py index e416cf2a2..47e553a4b 100644 --- a/wagtail/tests/settings.py +++ b/wagtail/tests/settings.py @@ -170,6 +170,7 @@ if os.environ.get('DATABASE_ENGINE') == 'django.db.backends.postgresql': INSTALLED_APPS += ('wagtail.contrib.postgres_search',) WAGTAILSEARCH_BACKENDS['postgresql'] = { 'BACKEND': 'wagtail.contrib.postgres_search.backend', + 'AUTO_UPDATE': False, } if 'ELASTICSEARCH_URL' in os.environ: From b3bf2415d2b1debb8b44cb140d8de10d431d1fa0 Mon Sep 17 00:00:00 2001 From: Bertrand Bordage Date: Fri, 24 Nov 2017 05:09:39 +0100 Subject: [PATCH 23/40] Fixes PostgreSQL tests. --- wagtail/contrib/postgres_search/backend.py | 3 ++- wagtail/contrib/postgres_search/models.py | 2 ++ wagtail/wagtailcore/models.py | 2 +- 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/wagtail/contrib/postgres_search/backend.py b/wagtail/contrib/postgres_search/backend.py index 0429ecbd2..1d750c96a 100644 --- a/wagtail/contrib/postgres_search/backend.py +++ b/wagtail/contrib/postgres_search/backend.py @@ -209,7 +209,8 @@ class PostgresSearchQueryCompiler(BaseSearchQueryCompiler): else: sub_field_name = None for field in fields: - if field.field_name == field_name: + if isinstance(field, SearchField) \ + and field.field_name == field_name: # Note: Searching on a specific related field using # `.search(fields=…)` is not yet supported by Wagtail. # This method anticipates by already implementing it. diff --git a/wagtail/contrib/postgres_search/models.py b/wagtail/contrib/postgres_search/models.py index 8bb85dee0..798eaaddb 100644 --- a/wagtail/contrib/postgres_search/models.py +++ b/wagtail/contrib/postgres_search/models.py @@ -14,6 +14,8 @@ from .utils import get_descendants_content_types_pks class TextIDGenericRelation(GenericRelation): + auto_created = True + def get_content_type_lookup(self, alias, remote_alias): field = self.remote_field.model._meta.get_field( self.content_type_field_name) diff --git a/wagtail/wagtailcore/models.py b/wagtail/wagtailcore/models.py index 84688cd5d..c5707e27a 100644 --- a/wagtail/wagtailcore/models.py +++ b/wagtail/wagtailcore/models.py @@ -1035,7 +1035,7 @@ class Page(six.with_metaclass(PageBase, AbstractPage, index.Indexed, Clusterable def copy(self, recursive=False, to=None, update_attrs=None, copy_revisions=True, keep_live=True, user=None): # Fill dict with self.specific values specific_self = self.specific - default_exclude_fields = ['id', 'path', 'depth', 'numchild', 'url_path', 'path'] + default_exclude_fields = ['id', 'path', 'depth', 'numchild', 'url_path', 'path', 'index_entries'] exclude_fields = default_exclude_fields + specific_self.exclude_fields_in_copy specific_dict = {} From ff1272dea23cd8b00619c85dc7e4960b12cc7db6 Mon Sep 17 00:00:00 2001 From: Bertrand Bordage Date: Fri, 24 Nov 2017 05:33:43 +0100 Subject: [PATCH 24/40] Simplification. --- wagtail/wagtailsearch/query.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wagtail/wagtailsearch/query.py b/wagtail/wagtailsearch/query.py index 8fc346c52..9a4232e60 100644 --- a/wagtail/wagtailsearch/query.py +++ b/wagtail/wagtailsearch/query.py @@ -59,7 +59,7 @@ class SearchQueryShortcut(SearchQuery): raise NotImplementedError def get_children(self): - return self.get_equivalent(), + yield self.get_equivalent() # # Operators From c9e0a5437276a2b4573d18e129f65563cf4ae7c5 Mon Sep 17 00:00:00 2001 From: Bertrand Bordage Date: Thu, 14 Dec 2017 19:01:26 +0100 Subject: [PATCH 25/40] Removes boost when using Filter. --- wagtail/search/query.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/wagtail/search/query.py b/wagtail/search/query.py index 9a4232e60..9a159d553 100644 --- a/wagtail/search/query.py +++ b/wagtail/search/query.py @@ -168,9 +168,9 @@ class Filter(SearchQueryShortcut): def get_equivalent(self): query = self.query if self.include is not None: - query &= self.include + query &= Boost(self.include, 0) if self.exclude is not None: - query &= ~self.exclude + query &= Boost(~self.exclude, 0) return query From a8196fd1a171137259385ef54a3d58f85fe41805 Mon Sep 17 00:00:00 2001 From: Bertrand Bordage Date: Thu, 14 Dec 2017 19:37:28 +0100 Subject: [PATCH 26/40] Fixes the DB backend. --- wagtail/search/backends/db.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wagtail/search/backends/db.py b/wagtail/search/backends/db.py index cd244bede..93935d70f 100644 --- a/wagtail/search/backends/db.py +++ b/wagtail/search/backends/db.py @@ -95,7 +95,7 @@ class DatabaseSearchResults(BaseSearchResults): queryset = self.get_queryset() # Call query._get_order_by so it can raise errors if a non-indexed field is used for ordering - list(self.query._get_order_by()) + list(self.query_compiler._get_order_by()) if self._score_field: queryset = queryset.annotate(**{self._score_field: Value(None, output_field=models.FloatField())}) From 0d70a01cf98efa9681f27513839b2cf1aea92d4f Mon Sep 17 00:00:00 2001 From: Bertrand Bordage Date: Thu, 14 Dec 2017 19:46:04 +0100 Subject: [PATCH 27/40] Updates a test. --- wagtail/search/tests/test_backends.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/wagtail/search/tests/test_backends.py b/wagtail/search/tests/test_backends.py index bcf5b5563..97316c461 100644 --- a/wagtail/search/tests/test_backends.py +++ b/wagtail/search/tests/test_backends.py @@ -578,7 +578,10 @@ class BackendTests(WagtailTestUtils): javascript = and_obj.children[0] self.assertIsInstance(javascript, Term) self.assertEqual(javascript.term, 'Javascript') - definitive = and_obj.children[1] + boost_obj = and_obj.children[1] + self.assertIsInstance(boost_obj, Boost) + self.assertEqual(boost_obj.boost, 0) + definitive = boost_obj.child self.assertIsInstance(definitive, Term) self.assertEqual(definitive.term, 'Definitive') @@ -590,10 +593,16 @@ class BackendTests(WagtailTestUtils): javascript = and_obj2.children[0] self.assertIsInstance(javascript, Term) self.assertEqual(javascript.term, 'Javascript') - definitive = and_obj2.children[1] + boost_obj = and_obj2.children[1] + self.assertIsInstance(boost_obj, Boost) + self.assertEqual(boost_obj.boost, 0) + definitive = boost_obj.child self.assertIsInstance(definitive, Term) self.assertEqual(definitive.term, 'Definitive') - not_obj = and_obj1.children[1] + boost_obj = and_obj1.children[1] + self.assertIsInstance(boost_obj, Boost) + self.assertEqual(boost_obj.boost, 0) + not_obj = boost_obj.child self.assertIsInstance(not_obj, Not) guide = not_obj.child self.assertEqual(guide.term, 'Guide') From 944a2a97ebc5094c860a1b2e29b852dd9f3b8dd6 Mon Sep 17 00:00:00 2001 From: Bertrand Bordage Date: Thu, 14 Dec 2017 19:50:07 +0100 Subject: [PATCH 28/40] Removes an unused import. --- wagtail/search/tests/test_elasticsearch5_backend.py | 1 - 1 file changed, 1 deletion(-) diff --git a/wagtail/search/tests/test_elasticsearch5_backend.py b/wagtail/search/tests/test_elasticsearch5_backend.py index bbbc08df4..ff00efaaa 100644 --- a/wagtail/search/tests/test_elasticsearch5_backend.py +++ b/wagtail/search/tests/test_elasticsearch5_backend.py @@ -1,7 +1,6 @@ # -*- coding: utf-8 -*- import datetime import json -import unittest import mock from django.db.models import Q From bb80df35ffaf5067e1833b98e2e6e8893d0eadc5 Mon Sep 17 00:00:00 2001 From: Bertrand Bordage Date: Thu, 14 Dec 2017 19:59:21 +0100 Subject: [PATCH 29/40] Fixes Elasticsearch 5 tests. --- wagtail/search/backends/elasticsearch5.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wagtail/search/backends/elasticsearch5.py b/wagtail/search/backends/elasticsearch5.py index e1b75f4d2..2ce1808bf 100644 --- a/wagtail/search/backends/elasticsearch5.py +++ b/wagtail/search/backends/elasticsearch5.py @@ -35,7 +35,7 @@ class Elasticsearch5SearchQueryCompiler(Elasticsearch2SearchQueryCompiler): return query - return super(Elasticsearch5SearchQuery, self)._process_lookup(field, lookup, value) + return super()._process_lookup(field, lookup, value) def _connect_filters(self, filters, connector, negated): if filters: From 5ba8341de2a7608e966c000c1cd175e8c6083b21 Mon Sep 17 00:00:00 2001 From: Bertrand Bordage Date: Wed, 3 Jan 2018 18:31:24 +0100 Subject: [PATCH 30/40] Separates query API tests in a mixin. --- .../postgres_search/tests/test_backend.py | 4 +- .../tests/elasticsearch_common_tests.py | 38 ------------------- wagtail/search/tests/test_backends.py | 2 + wagtail/search/tests/test_db_backend.py | 4 +- 4 files changed, 6 insertions(+), 42 deletions(-) diff --git a/wagtail/contrib/postgres_search/tests/test_backend.py b/wagtail/contrib/postgres_search/tests/test_backend.py index 1f56f8936..d4a05e429 100644 --- a/wagtail/contrib/postgres_search/tests/test_backend.py +++ b/wagtail/contrib/postgres_search/tests/test_backend.py @@ -3,12 +3,12 @@ import unittest from django.test import TestCase -from wagtail.search.tests.test_backends import BackendTests +from wagtail.search.tests.test_backends import QueryAPITestMixin, BackendTests from ..utils import BOOSTS_WEIGHTS, WEIGHTS_VALUES, determine_boosts_weights, get_weight -class TestPostgresSearchBackend(BackendTests, TestCase): +class TestPostgresSearchBackend(QueryAPITestMixin, BackendTests, TestCase): backend_path = 'wagtail.contrib.postgres_search.backend' def test_weights(self): diff --git a/wagtail/search/tests/elasticsearch_common_tests.py b/wagtail/search/tests/elasticsearch_common_tests.py index a9fbe738b..034d502e2 100644 --- a/wagtail/search/tests/elasticsearch_common_tests.py +++ b/wagtail/search/tests/elasticsearch_common_tests.py @@ -177,41 +177,3 @@ class ElasticsearchCommonSearchBackendTests(BackendTests): @unittest.expectedFailure def test_delete(self): super().test_delete() - - # - # Basic query classes - # - - # Not implemented yet - @unittest.expectedFailure - def test_term(self): - super().test_term() - - # Not implemented yet - @unittest.expectedFailure - def test_and(self): - super().test_and() - - # Not implemented yet - @unittest.expectedFailure - def test_or(self): - super().test_or() - - # Not implemented yet - @unittest.expectedFailure - def test_not(self): - super().test_not() - - # Not implemented yet - @unittest.expectedFailure - def test_operators_combination(self): - super().test_operators_combination() - - # - # Shortcut query classes - # - - # Not implemented yet - @unittest.expectedFailure - def test_filter_query(self): - super().test_filter_query() diff --git a/wagtail/search/tests/test_backends.py b/wagtail/search/tests/test_backends.py index 97316c461..d27754adf 100644 --- a/wagtail/search/tests/test_backends.py +++ b/wagtail/search/tests/test_backends.py @@ -428,6 +428,8 @@ class BackendTests(WagtailTestUtils): "The Fellowship of the Ring" ]) + +class QueryAPITestMixin: # # Basic query classes # diff --git a/wagtail/search/tests/test_db_backend.py b/wagtail/search/tests/test_db_backend.py index 40953d9e7..9f4c86d58 100644 --- a/wagtail/search/tests/test_db_backend.py +++ b/wagtail/search/tests/test_db_backend.py @@ -2,10 +2,10 @@ import unittest from django.test import TestCase -from .test_backends import BackendTests +from .test_backends import BackendTests, QueryAPITestMixin -class TestDBBackend(BackendTests, TestCase): +class TestDBBackend(QueryAPITestMixin, BackendTests, TestCase): backend_path = 'wagtail.search.backends.db' # Doesn't support ranking From 924d65dce3d81bc3a5965914737115f479b80049 Mon Sep 17 00:00:00 2001 From: Bertrand Bordage Date: Wed, 3 Jan 2018 18:33:13 +0100 Subject: [PATCH 31/40] Breaks down `QueryAPITestMixin.test_plain_text()`. --- wagtail/search/tests/test_backends.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/wagtail/search/tests/test_backends.py b/wagtail/search/tests/test_backends.py index d27754adf..3f0fd6322 100644 --- a/wagtail/search/tests/test_backends.py +++ b/wagtail/search/tests/test_backends.py @@ -515,15 +515,14 @@ class QueryAPITestMixin: # Shortcut query classes # - def test_plain_text(self): - # Single word + def test_plain_text_single_word(self): results = self.backend.search(PlainText('Javascript'), models.Book.objects.all()) self.assertSetEqual({r.title for r in results}, {'JavaScript: The Definitive Guide', 'JavaScript: The good parts'}) - # Multiple words (OR operator) + def test_plain_text_multiple_words_or(self): results = self.backend.search(PlainText('Javascript Definitive', operator='or'), models.Book.objects.all()) @@ -531,7 +530,7 @@ class QueryAPITestMixin: {'JavaScript: The Definitive Guide', 'JavaScript: The good parts'}) - # Multiple words (AND operator) + def test_plain_text_multiple_words_and(self): results = self.backend.search(PlainText('Javascript Definitive', operator='and'), models.Book.objects.all()) From 379c027e1e35435138f766c91f49ab47e3e4a3ac Mon Sep 17 00:00:00 2001 From: Bertrand Bordage Date: Wed, 3 Jan 2018 19:11:44 +0100 Subject: [PATCH 32/40] Fixes the PostgreSQL search backend broken by 25901aa. --- wagtail/contrib/postgres_search/backend.py | 24 +++++++++++++++++-- .../postgres_search/tests/test_backend.py | 11 --------- wagtail/search/backends/db.py | 4 +--- 3 files changed, 23 insertions(+), 16 deletions(-) diff --git a/wagtail/contrib/postgres_search/backend.py b/wagtail/contrib/postgres_search/backend.py index 2d90a99c2..d41b6b0eb 100644 --- a/wagtail/contrib/postgres_search/backend.py +++ b/wagtail/contrib/postgres_search/backend.py @@ -3,7 +3,7 @@ from warnings import warn from django.contrib.postgres.search import SearchQuery as PostgresSearchQuery from django.contrib.postgres.search import SearchRank, SearchVector from django.db import DEFAULT_DB_ALIAS, NotSupportedError, connections, transaction -from django.db.models import F, Manager, TextField, Value +from django.db.models import F, Manager, TextField, Value, Q from django.db.models.constants import LOOKUP_SEP from django.db.models.functions import Cast from django.utils.encoding import force_text @@ -162,6 +162,9 @@ class Index: else: self.add_items_update_then_create(content_type_pk, objs, config) + def delete_item(self, item): + item.index_entries.using(self.db_alias).delete() + def __str__(self): return self.name @@ -245,6 +248,23 @@ class PostgresSearchQueryCompiler(BaseSearchQueryCompiler): queryset = queryset.order_by('-pk') return queryset[start:stop] + def _process_lookup(self, field, lookup, value): + return Q(**{field.get_attname(self.queryset.model) + + '__' + lookup: value}) + + def _connect_filters(self, filters, connector, negated): + if connector == 'AND': + q = Q(*filters) + elif connector == 'OR': + q = OR([Q(fil) for fil in filters]) + else: + return + + if negated: + q = ~q + + return q + class PostgresSearchResults(BaseSearchResults): def _do_search(self): @@ -329,7 +349,7 @@ class PostgresSearchBackend(BaseSearchBackend): self.get_index_for_object(obj_list[0]).add_items(model, obj_list) def delete(self, obj): - obj.index_entries.all().delete() + self.get_index_for_object(obj).delete_item(obj) SearchBackend = PostgresSearchBackend diff --git a/wagtail/contrib/postgres_search/tests/test_backend.py b/wagtail/contrib/postgres_search/tests/test_backend.py index d4a05e429..dbeead789 100644 --- a/wagtail/contrib/postgres_search/tests/test_backend.py +++ b/wagtail/contrib/postgres_search/tests/test_backend.py @@ -38,14 +38,3 @@ class TestPostgresSearchBackend(QueryAPITestMixin, BackendTests, TestCase): [(6, 'A'), (4, 'B'), (2, 'C'), (0, 'D')]) self.assertListEqual(determine_boosts_weights([-2, -1, 0, 1, 2, 3, 4]), [(4, 'A'), (2, 'B'), (0, 'C'), (-2, 'D')]) - - # Broken - # Note: This is applying the filter, but should be raising the FieldError instead - @unittest.expectedFailure - def test_filter_on_non_filterable_field(self): - super().test_filter_on_non_filterable_field() - - # Broken - @unittest.expectedFailure - def test_order_by_non_filterable_field(self): - super().test_order_by_non_filterable_field() diff --git a/wagtail/search/backends/db.py b/wagtail/search/backends/db.py index 45bf4081b..b25aa669e 100644 --- a/wagtail/search/backends/db.py +++ b/wagtail/search/backends/db.py @@ -36,9 +36,7 @@ class DatabaseSearchQueryCompiler(BaseSearchQueryCompiler): if connector == 'AND': q = models.Q(*filters) elif connector == 'OR': - q = models.Q(filters[0]) - for fil in filters[1:]: - q |= fil + q = OR([models.Q(fil) for fil in filters]) else: return From 7915a8b1271f225f3a573941bca788dce3f25196 Mon Sep 17 00:00:00 2001 From: Bertrand Bordage Date: Wed, 3 Jan 2018 20:49:48 +0100 Subject: [PATCH 33/40] Improves Flake8 happiness. --- wagtail/contrib/postgres_search/backend.py | 4 ++-- wagtail/contrib/postgres_search/tests/test_backend.py | 3 --- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/wagtail/contrib/postgres_search/backend.py b/wagtail/contrib/postgres_search/backend.py index d41b6b0eb..e66eea1a2 100644 --- a/wagtail/contrib/postgres_search/backend.py +++ b/wagtail/contrib/postgres_search/backend.py @@ -249,8 +249,8 @@ class PostgresSearchQueryCompiler(BaseSearchQueryCompiler): return queryset[start:stop] def _process_lookup(self, field, lookup, value): - return Q(**{field.get_attname(self.queryset.model) - + '__' + lookup: value}) + return Q(**{field.get_attname(self.queryset.model) + + '__' + lookup: value}) def _connect_filters(self, filters, connector, negated): if connector == 'AND': diff --git a/wagtail/contrib/postgres_search/tests/test_backend.py b/wagtail/contrib/postgres_search/tests/test_backend.py index dbeead789..96ceea996 100644 --- a/wagtail/contrib/postgres_search/tests/test_backend.py +++ b/wagtail/contrib/postgres_search/tests/test_backend.py @@ -1,6 +1,3 @@ -# coding: utf-8 -import unittest - from django.test import TestCase from wagtail.search.tests.test_backends import QueryAPITestMixin, BackendTests From 52691766c5887942feed6eed39d12e63c8f1bf61 Mon Sep 17 00:00:00 2001 From: Karl Hobley Date: Fri, 15 Dec 2017 11:51:23 +0000 Subject: [PATCH 34/40] Reorganise get_inner_query # Conflicts: # wagtail/search/backends/elasticsearch2.py --- wagtail/search/backends/elasticsearch2.py | 61 ++++++++++++----------- 1 file changed, 31 insertions(+), 30 deletions(-) diff --git a/wagtail/search/backends/elasticsearch2.py b/wagtail/search/backends/elasticsearch2.py index b54f3ff75..c3db8e04b 100644 --- a/wagtail/search/backends/elasticsearch2.py +++ b/wagtail/search/backends/elasticsearch2.py @@ -375,41 +375,42 @@ class Elasticsearch2SearchQueryCompiler(BaseSearchQueryCompiler): def get_inner_query(self): if isinstance(self.query, MatchAll): return {'match_all': {}} + elif isinstance(self.query, PlainText): + fields = self.remapped_fields or ['_all', '_partials'] + operator = self.query.operator - if not isinstance(self.query, PlainText): + if len(fields) == 1: + if operator == 'or': + return { + 'match': { + fields[0]: self.query.query_string, + } + } + return { + 'match': { + fields[0]: { + 'query': self.query.query_string, + 'operator': operator, + } + } + } + + query = { + 'multi_match': { + 'query': self.query.query_string, + 'fields': fields, + } + } + if operator != 'or': + query['multi_match']['operator'] = operator + + return query + + else: raise NotImplementedError( '`%s` is not supported by the Elasticsearch search backend.' % self.query.__class__.__name__) - fields = self.remapped_fields or ['_all', '_partials'] - operator = self.query.operator - - if len(fields) == 1: - if operator == 'or': - return { - 'match': { - fields[0]: self.query.query_string, - } - } - return { - 'match': { - fields[0]: { - 'query': self.query.query_string, - 'operator': operator, - } - } - } - - query = { - 'multi_match': { - 'query': self.query.query_string, - 'fields': fields, - } - } - if operator != 'or': - query['multi_match']['operator'] = operator - return query - def get_content_type_filter(self): # Query content_type using a "match" query. See comment in # Elasticsearch2Mapping.get_document for more details From 458ba92f055748d96b513595ddf92036b4078121 Mon Sep 17 00:00:00 2001 From: Karl Hobley Date: Fri, 15 Dec 2017 11:54:56 +0000 Subject: [PATCH 35/40] Add _compile_query method # Conflicts: # wagtail/search/backends/elasticsearch2.py --- wagtail/search/backends/elasticsearch2.py | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/wagtail/search/backends/elasticsearch2.py b/wagtail/search/backends/elasticsearch2.py index c3db8e04b..af9d3aafe 100644 --- a/wagtail/search/backends/elasticsearch2.py +++ b/wagtail/search/backends/elasticsearch2.py @@ -372,24 +372,25 @@ class Elasticsearch2SearchQueryCompiler(BaseSearchQueryCompiler): return filter_out - def get_inner_query(self): - if isinstance(self.query, MatchAll): + def _compile_query(self, query): + if isinstance(query, MatchAll): return {'match_all': {}} - elif isinstance(self.query, PlainText): + + elif isinstance(query, PlainText): fields = self.remapped_fields or ['_all', '_partials'] - operator = self.query.operator + operator = query.operator if len(fields) == 1: if operator == 'or': return { 'match': { - fields[0]: self.query.query_string, + fields[0]: query.query_string, } } return { 'match': { fields[0]: { - 'query': self.query.query_string, + 'query': query.query_string, 'operator': operator, } } @@ -397,7 +398,7 @@ class Elasticsearch2SearchQueryCompiler(BaseSearchQueryCompiler): query = { 'multi_match': { - 'query': self.query.query_string, + 'query': query.query_string, 'fields': fields, } } @@ -409,7 +410,11 @@ class Elasticsearch2SearchQueryCompiler(BaseSearchQueryCompiler): else: raise NotImplementedError( '`%s` is not supported by the Elasticsearch search backend.' - % self.query.__class__.__name__) + % query.__class__.__name__) + + + def get_inner_query(self): + return self._compile_query(self.query) def get_content_type_filter(self): # Query content_type using a "match" query. See comment in From bd6cb6dad779e58fa9f9f7e0ce28b891db8105f2 Mon Sep 17 00:00:00 2001 From: Karl Hobley Date: Fri, 15 Dec 2017 13:29:53 +0000 Subject: [PATCH 36/40] Implemented search query classes for Elasticsearch # Conflicts: # wagtail/search/backends/elasticsearch2.py # wagtail/search/tests/test_backends.py --- wagtail/search/backends/elasticsearch2.py | 164 ++++++++++++++---- wagtail/search/tests/test_backends.py | 69 ++++---- .../tests/test_elasticsearch2_backend.py | 2 +- .../tests/test_elasticsearch5_backend.py | 2 +- 4 files changed, 167 insertions(+), 70 deletions(-) diff --git a/wagtail/search/backends/elasticsearch2.py b/wagtail/search/backends/elasticsearch2.py index af9d3aafe..05e544b34 100644 --- a/wagtail/search/backends/elasticsearch2.py +++ b/wagtail/search/backends/elasticsearch2.py @@ -14,7 +14,7 @@ from wagtail.search.backends.base import ( BaseSearchBackend, BaseSearchQueryCompiler, BaseSearchResults) from wagtail.search.index import ( FilterField, Indexed, RelatedFields, SearchField, class_is_indexed) -from wagtail.search.query import MatchAll, PlainText +from wagtail.search.query import MatchAll, Term, Prefix, Fuzzy, And, Or, Not, PlainText, Filter, Boost def get_model_root(model): @@ -372,49 +372,151 @@ class Elasticsearch2SearchQueryCompiler(BaseSearchQueryCompiler): return filter_out - def _compile_query(self, query): - if isinstance(query, MatchAll): - return {'match_all': {}} + def _compile_term_query(self, query_type, value, field, boost=1.0, **extra): + term_query = { + 'value': value, + } - elif isinstance(query, PlainText): - fields = self.remapped_fields or ['_all', '_partials'] - operator = query.operator + if boost != 1.0: + term_query['boost'] = boost - if len(fields) == 1: - if operator == 'or': - return { - 'match': { - fields[0]: query.query_string, - } - } - return { - 'match': { - fields[0]: { - 'query': query.query_string, - 'operator': operator, - } - } - } + return { + query_type: { + field: term_query, + } + } - query = { - 'multi_match': { - 'query': query.query_string, - 'fields': fields, + def _compile_plaintext_query(self, query, fields, boost=1.0): + match_query = { + 'query': query.query_string + } + + if query.operator != 'or': + match_query['operator'] = query.operator + + if boost != 1.0: + match_query['boost'] = boost + + if len(fields) == 1: + return { + 'match': { + fields[0]: match_query } } - if operator != 'or': - query['multi_match']['operator'] = operator + else: + match_query['fields'] = fields - return query + return { + 'multi_match': match_query + } + + def _compile_query(self, query, field, boost=1.0): + if isinstance(query, MatchAll): + match_all_query = {} + + if boost != 1.0: + match_all_query['boost'] = boost + + return {'match_all': match_all_query} + + elif isinstance(query, Term): + return self._compile_term_query('term', query.term, field, query.boost * boost) + + elif isinstance(query, Prefix): + return self._compile_term_query('prefix', query.prefix, field, query.boost * boost) + + elif isinstance(query, Fuzzy): + return self._compile_term_query('fuzzy', query.term, field, query.boost * boost, fuzziness=query.max_distance) + + elif isinstance(query, And): + return { + 'bool': { + 'must': [ + self._compile_query(child_query, field, boost) + for child_query in query.get_children() + ] + } + } + + elif isinstance(query, Or): + return { + 'bool': { + 'should': [ + self._compile_query(child_query, field, boost) + for child_query in query.get_children() + ] + } + } + + elif isinstance(query, Not): + return { + 'bool': { + 'mustNot': self._compile_query(query.subquery, field, boost) + } + } + + elif isinstance(query, PlainText): + return self._compile_plaintext_query(self.query, [field], boost) + + elif isinstance(query, Filter): + bool_query = { + 'must': self._compile_query(query.query, field, boost), + } + + if query.include: + bool_query['filter'] = self._compile_query(query.include, field, 0.0) + + if query.exclude: + bool_query['mustNot'] = self._compile_query(query.exclude, field, 0.0) + + return { + 'bool': bool_query, + } + + elif isinstance(query, Boost): + return self._compile_query(query.subquery, field, boost * query.boost) else: raise NotImplementedError( '`%s` is not supported by the Elasticsearch search backend.' % query.__class__.__name__) - def get_inner_query(self): - return self._compile_query(self.query) + fields = self.remapped_fields or ['_all', '_partials'] + + if len(fields) == 0: + # No fields. Return a query that'll match nothing + return { + 'bool': { + 'mustNot': {'match_all': {}} + } + } + + # Handle MatchAll and PlainText separately as they were supported + # before "search query classes" was implemented and we'd like to + # keep the query the same as before + if isinstance(self.query, MatchAll): + return {'match_all': {}} + + elif isinstance(self.query, PlainText): + return self._compile_plaintext_query(self.query, fields) + + else: + if len(fields) == 1: + return self._compile_query(self.query, fields[0]) + else: + # Compile a query for each field then combine with disjunction + # max (or operator which takes the max score out of each of the + # field queries) + field_queries = [] + for field in fields: + field_queries.append(self._compile_query(self.query, field)) + + return { + 'dis_max': { + 'queries': field_queries + } + } def get_content_type_filter(self): # Query content_type using a "match" query. See comment in diff --git a/wagtail/search/tests/test_backends.py b/wagtail/search/tests/test_backends.py index 784902afe..0baba13fd 100644 --- a/wagtail/search/tests/test_backends.py +++ b/wagtail/search/tests/test_backends.py @@ -443,37 +443,32 @@ class QueryAPITestMixin: def test_term(self): # Single word - results = self.backend.search(Term('Javascript'), + results = self.backend.search(Term('javascript'), models.Book.objects.all()) + self.assertSetEqual({r.title for r in results}, {'JavaScript: The Definitive Guide', 'JavaScript: The good parts'}) - # Multiple word - results = self.backend.search(Term('Definitive Guide'), - models.Book.objects.all()) - self.assertSetEqual({r.title for r in results}, - {'JavaScript: The Definitive Guide'}) - def test_and(self): - results = self.backend.search(And([Term('Javascript'), - Term('Definitive')]), + results = self.backend.search(And([Term('javascript'), + Term('definitive')]), models.Book.objects.all()) self.assertSetEqual({r.title for r in results}, {'JavaScript: The Definitive Guide'}) - results = self.backend.search(Term('Javascript') & Term('Definitive'), + results = self.backend.search(Term('javascript') & Term('definitive'), models.Book.objects.all()) self.assertSetEqual({r.title for r in results}, {'JavaScript: The Definitive Guide'}) def test_or(self): - results = self.backend.search(Or([Term('Hobbit'), Term('Towers')]), + results = self.backend.search(Or([Term('hobbit'), Term('towers')]), models.Book.objects.all()) self.assertSetEqual({r.title for r in results}, {'The Hobbit', 'The Two Towers'}) - results = self.backend.search(Term('Hobbit') | Term('Towers'), + results = self.backend.search(Term('hobbit') | Term('towers'), models.Book.objects.all()) self.assertSetEqual({r.title for r in results}, {'The Hobbit', 'The Two Towers'}) @@ -493,19 +488,19 @@ class QueryAPITestMixin: 'Two Scoops of Django 1.11', } - results = self.backend.search(Not(Term('Javascript')), + results = self.backend.search(Not(Term('javascript')), models.Book.objects.all()) self.assertSetEqual({r.title for r in results}, all_other_titles) - results = self.backend.search(~Term('Javascript'), + results = self.backend.search(~Term('javascript'), models.Book.objects.all()) self.assertSetEqual({r.title for r in results}, all_other_titles) def test_operators_combination(self): results = self.backend.search( - ((Term('Javascript') & ~Term('Definitive')) | - Term('Python') | Term('Rust')) | - Term('Two'), + ((Term('javascript') & ~Term('definitive')) | + Term('python') | Term('rust')) | + Term('two'), models.Book.objects.all()) self.assertSetEqual({r.title for r in results}, {'JavaScript: The good parts', @@ -571,84 +566,84 @@ class QueryAPITestMixin: operator='xor') def test_filter_equivalent(self): - filter = Filter(Term('Javascript')) + filter = Filter(Term('javascript')) term = filter.child self.assertIsInstance(term, Term) - self.assertEqual(term.term, 'Javascript') + self.assertEqual(term.term, 'javascript') - filter = Filter(Term('Javascript'), include=Term('Definitive')) + filter = Filter(Term('javascript'), include=Term('definitive')) and_obj = filter.child self.assertIsInstance(and_obj, And) javascript = and_obj.children[0] self.assertIsInstance(javascript, Term) - self.assertEqual(javascript.term, 'Javascript') + self.assertEqual(javascript.term, 'javascript') boost_obj = and_obj.children[1] self.assertIsInstance(boost_obj, Boost) self.assertEqual(boost_obj.boost, 0) definitive = boost_obj.child self.assertIsInstance(definitive, Term) - self.assertEqual(definitive.term, 'Definitive') + self.assertEqual(definitive.term, 'definitive') - filter = Filter(Term('Javascript'), - include=Term('Definitive'), exclude=Term('Guide')) + filter = Filter(Term('javascript'), + include=Term('definitive'), exclude=Term('guide')) and_obj1 = filter.child self.assertIsInstance(and_obj1, And) and_obj2 = and_obj1.children[0] javascript = and_obj2.children[0] self.assertIsInstance(javascript, Term) - self.assertEqual(javascript.term, 'Javascript') + self.assertEqual(javascript.term, 'javascript') boost_obj = and_obj2.children[1] self.assertIsInstance(boost_obj, Boost) self.assertEqual(boost_obj.boost, 0) definitive = boost_obj.child self.assertIsInstance(definitive, Term) - self.assertEqual(definitive.term, 'Definitive') + self.assertEqual(definitive.term, 'definitive') boost_obj = and_obj1.children[1] self.assertIsInstance(boost_obj, Boost) self.assertEqual(boost_obj.boost, 0) not_obj = boost_obj.child self.assertIsInstance(not_obj, Not) guide = not_obj.child - self.assertEqual(guide.term, 'Guide') + self.assertEqual(guide.term, 'guide') def test_filter_query(self): - results = self.backend.search(Filter(Term('Javascript')), + results = self.backend.search(Filter(Term('javascript')), models.Book.objects.all()) self.assertSetEqual({r.title for r in results}, {'JavaScript: The Definitive Guide', 'JavaScript: The good parts'}) - results = self.backend.search(Filter(Term('Javascript'), - include=Term('Definitive')), + results = self.backend.search(Filter(Term('javascript'), + include=Term('definitive')), models.Book.objects.all()) self.assertSetEqual({r.title for r in results}, {'JavaScript: The Definitive Guide'}) - results = self.backend.search(Filter(Term('Javascript'), - include=Term('Definitive'), - exclude=Term('Guide')), + results = self.backend.search(Filter(Term('javascript'), + include=Term('definitive'), + exclude=Term('guide')), models.Book.objects.all()) self.assertSetEqual({r.title for r in results}, set()) def test_boost_equivalent(self): - boost = Boost(Term('Guide'), 5) + boost = Boost(Term('guide'), 5) equivalent = boost.children[0] self.assertIsInstance(equivalent, Term) self.assertAlmostEqual(equivalent.boost, 5) - boost = Boost(Term('Guide', boost=0.5), 5) + boost = Boost(Term('guide', boost=0.5), 5) equivalent = boost.children[0] self.assertIsInstance(equivalent, Term) self.assertAlmostEqual(equivalent.boost, 2.5) - boost = Boost(Boost(Term('Guide', 0.1), 3), 5) + boost = Boost(Boost(Term('guide', 0.1), 3), 5) sub_boost = boost.children[0] self.assertIsInstance(sub_boost, Boost) sub_boost = sub_boost.children[0] self.assertIsInstance(sub_boost, Term) self.assertAlmostEqual(sub_boost.boost, 1.5) - boost = Boost(And([Boost(Term('Guide', 0.1), 3), Term('Two', 2)]), 5) + boost = Boost(And([Boost(Term('guide', 0.1), 3), Term('two', 2)]), 5) and_obj = boost.children[0] self.assertIsInstance(and_obj, And) sub_boost = and_obj.children[0] diff --git a/wagtail/search/tests/test_elasticsearch2_backend.py b/wagtail/search/tests/test_elasticsearch2_backend.py index a090ae413..02e3a5264 100644 --- a/wagtail/search/tests/test_elasticsearch2_backend.py +++ b/wagtail/search/tests/test_elasticsearch2_backend.py @@ -123,7 +123,7 @@ class TestElasticsearch2SearchQuery(TestCase): # Check it expected_result = {'filtered': { 'filter': {'match': {'content_type': 'searchtests.Book'}}, - 'query': {'match': {'title': 'Hello'}} + 'query': {'match': {'title': {'query': 'Hello'}}} }} self.assertDictEqual(query_compiler.get_query(), expected_result) diff --git a/wagtail/search/tests/test_elasticsearch5_backend.py b/wagtail/search/tests/test_elasticsearch5_backend.py index ff00efaaa..3fcf00a28 100644 --- a/wagtail/search/tests/test_elasticsearch5_backend.py +++ b/wagtail/search/tests/test_elasticsearch5_backend.py @@ -122,7 +122,7 @@ class TestElasticsearch5SearchQuery(TestCase): # Check it expected_result = {'bool': { 'filter': {'match': {'content_type': 'searchtests.Book'}}, - 'must': {'match': {'title': 'Hello'}} + 'query': {'match': {'title': {'query': 'Hello'}}} }} self.assertDictEqual(query_compiler.get_query(), expected_result) From f9cc020ded3689acc1cabf2d42a5a527fe0d8845 Mon Sep 17 00:00:00 2001 From: Bertrand Bordage Date: Thu, 4 Jan 2018 12:11:09 +0100 Subject: [PATCH 37/40] Fixes an elasticsearch 5 test. --- wagtail/search/tests/test_elasticsearch5_backend.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wagtail/search/tests/test_elasticsearch5_backend.py b/wagtail/search/tests/test_elasticsearch5_backend.py index 3fcf00a28..67f737bcc 100644 --- a/wagtail/search/tests/test_elasticsearch5_backend.py +++ b/wagtail/search/tests/test_elasticsearch5_backend.py @@ -122,7 +122,7 @@ class TestElasticsearch5SearchQuery(TestCase): # Check it expected_result = {'bool': { 'filter': {'match': {'content_type': 'searchtests.Book'}}, - 'query': {'match': {'title': {'query': 'Hello'}}} + 'must': {'match': {'title': {'query': 'Hello'}}} }} self.assertDictEqual(query_compiler.get_query(), expected_result) From da4312703b03bdf90d27dd8079cb149ececcfbe0 Mon Sep 17 00:00:00 2001 From: Bertrand Bordage Date: Thu, 4 Jan 2018 12:12:03 +0100 Subject: [PATCH 38/40] Elasticsearch test_delete should no longer fail. --- wagtail/search/tests/elasticsearch_common_tests.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/wagtail/search/tests/elasticsearch_common_tests.py b/wagtail/search/tests/elasticsearch_common_tests.py index f8e4c2451..0262fece4 100644 --- a/wagtail/search/tests/elasticsearch_common_tests.py +++ b/wagtail/search/tests/elasticsearch_common_tests.py @@ -1,4 +1,3 @@ -import unittest from datetime import date from io import StringIO @@ -172,8 +171,3 @@ class ElasticsearchCommonSearchBackendTests(BackendTests): results = self.backend.search(MATCH_ALL, models.Book)[110:] self.assertEqual(len(results), 53) - - # Broken - @unittest.expectedFailure - def test_delete(self): - super().test_delete() From 8e6dedac67fb70f186fd62c52258ec7c9edde46f Mon Sep 17 00:00:00 2001 From: Bertrand Bordage Date: Thu, 4 Jan 2018 12:14:45 +0100 Subject: [PATCH 39/40] Tests the search query API on Elasticsearch. --- wagtail/search/tests/elasticsearch_common_tests.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/wagtail/search/tests/elasticsearch_common_tests.py b/wagtail/search/tests/elasticsearch_common_tests.py index 0262fece4..ef24f28f1 100644 --- a/wagtail/search/tests/elasticsearch_common_tests.py +++ b/wagtail/search/tests/elasticsearch_common_tests.py @@ -5,10 +5,10 @@ from django.core import management from wagtail.tests.search import models from wagtail.search.query import MATCH_ALL -from wagtail.search.tests.test_backends import BackendTests +from wagtail.search.tests.test_backends import BackendTests, QueryAPITestMixin -class ElasticsearchCommonSearchBackendTests(BackendTests): +class ElasticsearchCommonSearchBackendTests(QueryAPITestMixin, BackendTests): def test_search_with_spaces_only(self): # Search for some space characters and hope it doesn't crash results = self.backend.search(" ", models.Book) From 14490978e50c267874e73f420efe3f68ec4c255a Mon Sep 17 00:00:00 2001 From: Bertrand Bordage Date: Thu, 4 Jan 2018 12:21:33 +0100 Subject: [PATCH 40/40] Merges again QueryAPITestMixin and BackendTest. --- wagtail/contrib/postgres_search/tests/test_backend.py | 4 ++-- wagtail/search/tests/elasticsearch_common_tests.py | 4 ++-- wagtail/search/tests/test_backends.py | 2 -- wagtail/search/tests/test_db_backend.py | 4 ++-- 4 files changed, 6 insertions(+), 8 deletions(-) diff --git a/wagtail/contrib/postgres_search/tests/test_backend.py b/wagtail/contrib/postgres_search/tests/test_backend.py index 96ceea996..50447365a 100644 --- a/wagtail/contrib/postgres_search/tests/test_backend.py +++ b/wagtail/contrib/postgres_search/tests/test_backend.py @@ -1,11 +1,11 @@ from django.test import TestCase -from wagtail.search.tests.test_backends import QueryAPITestMixin, BackendTests +from wagtail.search.tests.test_backends import BackendTests from ..utils import BOOSTS_WEIGHTS, WEIGHTS_VALUES, determine_boosts_weights, get_weight -class TestPostgresSearchBackend(QueryAPITestMixin, BackendTests, TestCase): +class TestPostgresSearchBackend(BackendTests, TestCase): backend_path = 'wagtail.contrib.postgres_search.backend' def test_weights(self): diff --git a/wagtail/search/tests/elasticsearch_common_tests.py b/wagtail/search/tests/elasticsearch_common_tests.py index ef24f28f1..0262fece4 100644 --- a/wagtail/search/tests/elasticsearch_common_tests.py +++ b/wagtail/search/tests/elasticsearch_common_tests.py @@ -5,10 +5,10 @@ from django.core import management from wagtail.tests.search import models from wagtail.search.query import MATCH_ALL -from wagtail.search.tests.test_backends import BackendTests, QueryAPITestMixin +from wagtail.search.tests.test_backends import BackendTests -class ElasticsearchCommonSearchBackendTests(QueryAPITestMixin, BackendTests): +class ElasticsearchCommonSearchBackendTests(BackendTests): def test_search_with_spaces_only(self): # Search for some space characters and hope it doesn't crash results = self.backend.search(" ", models.Book) diff --git a/wagtail/search/tests/test_backends.py b/wagtail/search/tests/test_backends.py index 0baba13fd..92530ecc2 100644 --- a/wagtail/search/tests/test_backends.py +++ b/wagtail/search/tests/test_backends.py @@ -431,8 +431,6 @@ class BackendTests(WagtailTestUtils): "The Fellowship of the Ring" # If this item doesn't appear, "Foundation" is still in the index ]) - -class QueryAPITestMixin: # # Basic query classes # diff --git a/wagtail/search/tests/test_db_backend.py b/wagtail/search/tests/test_db_backend.py index 9f47912f6..fba3332d7 100644 --- a/wagtail/search/tests/test_db_backend.py +++ b/wagtail/search/tests/test_db_backend.py @@ -2,10 +2,10 @@ import unittest from django.test import TestCase -from .test_backends import BackendTests, QueryAPITestMixin +from .test_backends import BackendTests -class TestDBBackend(QueryAPITestMixin, BackendTests, TestCase): +class TestDBBackend(BackendTests, TestCase): backend_path = 'wagtail.search.backends.db' # Doesn't support ranking