diff --git a/tox.ini b/tox.ini index 5a75eb82c..b28e8d8d2 100644 --- a/tox.ini +++ b/tox.ini @@ -49,7 +49,7 @@ deps = elasticsearch5: certifi setenv = - postgres: DATABASE_ENGINE=django.db.backends.postgresql_psycopg2 + postgres: DATABASE_ENGINE=django.db.backends.postgresql mysql: DATABASE_ENGINE=django.db.backends.mysql mysql: DATABASE_HOST=localhost diff --git a/wagtail/contrib/postgres_search/backend.py b/wagtail/contrib/postgres_search/backend.py index 8d282aff4..e66eea1a2 100644 --- a/wagtail/contrib/postgres_search/backend.py +++ b/wagtail/contrib/postgres_search/backend.py @@ -1,21 +1,23 @@ -# coding: utf-8 +from warnings import warn -from django.contrib.postgres.search import SearchQuery, SearchRank, SearchVector +from django.contrib.postgres.search import SearchQuery as PostgresSearchQuery +from django.contrib.postgres.search import SearchRank, SearchVector from django.db import DEFAULT_DB_ALIAS, NotSupportedError, connections, transaction -from django.db.models import F, Manager, TextField, Value +from django.db.models import F, Manager, TextField, Value, Q from django.db.models.constants import LOOKUP_SEP from django.db.models.functions import Cast from django.utils.encoding import force_text from wagtail.search.backends.base import ( - BaseSearchBackend, BaseSearchQuery, BaseSearchResults) + BaseSearchBackend, BaseSearchQueryCompiler, BaseSearchResults) from wagtail.search.index import RelatedFields, SearchField +from wagtail.search.query import And, MatchAll, Not, Or, SearchQueryShortcut, Term +from wagtail.search.utils import ADD, AND, OR from .models import IndexEntry from .utils import ( - ADD, AND, OR, WEIGHTS_VALUES, get_ancestors_content_types_pks, get_content_type_pk, - get_descendants_content_types_pks, get_postgresql_connections, get_weight, keyword_split, - unidecode) + WEIGHTS_VALUES, get_ancestors_content_types_pks, get_content_type_pk, + get_descendants_content_types_pks, get_postgresql_connections, get_weight, unidecode) # TODO: Add autocomplete. @@ -160,23 +162,43 @@ class Index: else: self.add_items_update_then_create(content_type_pk, objs, config) + def delete_item(self, item): + item.index_entries.using(self.db_alias).delete() + def __str__(self): return self.name -class PostgresSearchQuery(BaseSearchQuery): +class PostgresSearchQueryCompiler(BaseSearchQueryCompiler): DEFAULT_OPERATOR = 'and' def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.search_fields = self.queryset.model.get_searchable_search_fields() - def get_search_query(self, config): - combine = OR if self.operator == 'or' else AND - search_terms = keyword_split(unidecode(self.query_string)) - if not search_terms: - return SearchQuery('') - return combine(SearchQuery(q, config=config) for q in search_terms) + def build_database_query(self, query=None, config=None): + if query is None: + query = self.query + + if isinstance(query, SearchQueryShortcut): + return self.build_database_query(query.get_equivalent(), config) + if isinstance(query, Term): + # TODO: Find a way to use the term boosting. + if query.boost != 1: + warn('PostgreSQL search backend ' + 'does not support term boosting for now.') + return PostgresSearchQuery(unidecode(query.term), config=config) + if isinstance(query, Not): + return ~self.build_database_query(query.subquery, config) + if isinstance(query, And): + return AND(self.build_database_query(subquery, config) + for subquery in query.subqueries) + if isinstance(query, Or): + return OR(self.build_database_query(subquery, config) + for subquery in query.subqueries) + raise NotImplementedError( + '`%s` is not supported by the PostgreSQL search backend.' + % self.query.__class__.__name__) def get_boost(self, field_name, fields=None): if fields is None: @@ -186,7 +208,8 @@ class PostgresSearchQuery(BaseSearchQuery): else: sub_field_name = None for field in fields: - if field.field_name == field_name: + if isinstance(field, SearchField) \ + and field.field_name == field_name: # Note: Searching on a specific related field using # `.search(fields=…)` is not yet supported by Wagtail. # This method anticipates by already implementing it. @@ -195,9 +218,11 @@ class PostgresSearchQuery(BaseSearchQuery): return field.boost def search(self, config, start, stop): - if self.query_string is None: + # TODO: Handle MatchAll nested inside other search query classes. + if isinstance(self.query, MatchAll): return self.queryset[start:stop] - search_query = self.get_search_query(config=config) + + search_query = self.build_database_query(config=config) queryset = self.queryset query = queryset.query if self.fields is None: @@ -223,14 +248,31 @@ class PostgresSearchQuery(BaseSearchQuery): queryset = queryset.order_by('-pk') return queryset[start:stop] + def _process_lookup(self, field, lookup, value): + return Q(**{field.get_attname(self.queryset.model) + + '__' + lookup: value}) + + def _connect_filters(self, filters, connector, negated): + if connector == 'AND': + q = Q(*filters) + elif connector == 'OR': + q = OR([Q(fil) for fil in filters]) + else: + return + + if negated: + q = ~q + + return q + class PostgresSearchResults(BaseSearchResults): def _do_search(self): - return list(self.query.search(self.backend.get_config(), - self.start, self.stop)) + return list(self.query_compiler.search(self.backend.get_config(), + self.start, self.stop)) def _do_count(self): - return self.query.search(self.backend.get_config(), None, None).count() + return self.query_compiler.search(self.backend.get_config(), None, None).count() class PostgresSearchRebuilder: @@ -268,7 +310,7 @@ class PostgresSearchAtomicRebuilder(PostgresSearchRebuilder): class PostgresSearchBackend(BaseSearchBackend): - query_class = PostgresSearchQuery + query_compiler_class = PostgresSearchQueryCompiler results_class = PostgresSearchResults rebuilder_class = PostgresSearchRebuilder atomic_rebuilder_class = PostgresSearchAtomicRebuilder @@ -307,7 +349,7 @@ class PostgresSearchBackend(BaseSearchBackend): self.get_index_for_object(obj_list[0]).add_items(model, obj_list) def delete(self, obj): - obj.index_entries.all().delete() + self.get_index_for_object(obj).delete_item(obj) SearchBackend = PostgresSearchBackend diff --git a/wagtail/contrib/postgres_search/models.py b/wagtail/contrib/postgres_search/models.py index ce46fd1ff..cbe9cc5da 100644 --- a/wagtail/contrib/postgres_search/models.py +++ b/wagtail/contrib/postgres_search/models.py @@ -12,6 +12,8 @@ from .utils import get_descendants_content_types_pks class TextIDGenericRelation(GenericRelation): + auto_created = True + def get_content_type_lookup(self, alias, remote_alias): field = self.remote_field.model._meta.get_field( self.content_type_field_name) diff --git a/wagtail/contrib/postgres_search/tests/test_backend.py b/wagtail/contrib/postgres_search/tests/test_backend.py index 1f56f8936..50447365a 100644 --- a/wagtail/contrib/postgres_search/tests/test_backend.py +++ b/wagtail/contrib/postgres_search/tests/test_backend.py @@ -1,6 +1,3 @@ -# coding: utf-8 -import unittest - from django.test import TestCase from wagtail.search.tests.test_backends import BackendTests @@ -38,14 +35,3 @@ class TestPostgresSearchBackend(BackendTests, TestCase): [(6, 'A'), (4, 'B'), (2, 'C'), (0, 'D')]) self.assertListEqual(determine_boosts_weights([-2, -1, 0, 1, 2, 3, 4]), [(4, 'A'), (2, 'B'), (0, 'C'), (-2, 'D')]) - - # Broken - # Note: This is applying the filter, but should be raising the FieldError instead - @unittest.expectedFailure - def test_filter_on_non_filterable_field(self): - super().test_filter_on_non_filterable_field() - - # Broken - @unittest.expectedFailure - def test_order_by_non_filterable_field(self): - super().test_order_by_non_filterable_field() diff --git a/wagtail/contrib/postgres_search/utils.py b/wagtail/contrib/postgres_search/utils.py index 04effaac6..ef4a5e9c5 100644 --- a/wagtail/contrib/postgres_search/utils.py +++ b/wagtail/contrib/postgres_search/utils.py @@ -1,6 +1,3 @@ -import operator -import re -from functools import partial, reduce from itertools import zip_longest from django.apps import apps @@ -21,33 +18,6 @@ def get_postgresql_connections(): if connection.vendor == 'postgresql'] -# Reduce any iterable to a single value using a logical OR e.g. (a | b | ...) -OR = partial(reduce, operator.or_) -# Reduce any iterable to a single value using a logical AND e.g. (a & b & ...) -AND = partial(reduce, operator.and_) -# Reduce any iterable to a single value using an addition -ADD = partial(reduce, operator.add) - - -def keyword_split(keywords): - """ - Return all the keywords in a keyword string. - - Keeps keywords surrounded by quotes together, removing the surrounding quotes: - - >>> keyword_split('Hello I\\'m looking for "something special"') - ['Hello', "I'm", 'looking', 'for', 'something special'] - - Nested quoted strings are returned as is: - - >>> keyword_split("He said \\"I'm looking for 'something special'\\" so I've given him the 'special item'") - ['He', 'said', "I'm looking for 'something special'", 'so', "I've", 'given', 'him', 'the', 'special item'] - - """ - matches = re.findall(r'"([^"]+)"|\'([^\']+)\'|(\S+)', keywords) - return [match[0] or match[1] or match[2] for match in matches] - - def get_descendant_models(model): """ Returns all descendants of a model, including the model itself. diff --git a/wagtail/core/models.py b/wagtail/core/models.py index d43cde7db..1a4a604f9 100644 --- a/wagtail/core/models.py +++ b/wagtail/core/models.py @@ -1033,7 +1033,7 @@ class Page(AbstractPage, index.Indexed, ClusterableModel, metaclass=PageBase): def copy(self, recursive=False, to=None, update_attrs=None, copy_revisions=True, keep_live=True, user=None): # Fill dict with self.specific values specific_self = self.specific - default_exclude_fields = ['id', 'path', 'depth', 'numchild', 'url_path', 'path'] + default_exclude_fields = ['id', 'path', 'depth', 'numchild', 'url_path', 'path', 'index_entries'] exclude_fields = default_exclude_fields + specific_self.exclude_fields_in_copy specific_dict = {} diff --git a/wagtail/core/tests/test_page_queryset.py b/wagtail/core/tests/test_page_queryset.py index 7157a0ea6..fad247ef1 100644 --- a/wagtail/core/tests/test_page_queryset.py +++ b/wagtail/core/tests/test_page_queryset.py @@ -4,6 +4,7 @@ from django.test import TestCase from wagtail.tests.testapp.models import EventPage, SimplePage, SingleEventPage from wagtail.core.models import Page, PageViewRestriction, Site from wagtail.core.signals import page_unpublished +from wagtail.search.query import MATCH_ALL class TestPageQuerySet(TestCase): @@ -591,7 +592,8 @@ class TestSpecificQuery(TestCase): # 1276 - The database search backend didn't return results with the # specific type when searching a specific queryset. - pages = list(Page.objects.specific().live().in_menu().search(None, backend='wagtail.search.backends.db')) + pages = list(Page.objects.specific().live().in_menu().search( + MATCH_ALL, backend='wagtail.search.backends.db')) # Check that each page is in the queryset with the correct type. # We don't care about order here diff --git a/wagtail/search/backends/base.py b/wagtail/search/backends/base.py index f7c4863db..716cd4492 100644 --- a/wagtail/search/backends/base.py +++ b/wagtail/search/backends/base.py @@ -1,9 +1,12 @@ +from warnings import warn + from django.db.models.lookups import Lookup from django.db.models.query import QuerySet from django.db.models.sql.where import SubqueryConstraint, WhereNode from wagtail.search.index import class_is_indexed +from wagtail.search.query import MATCH_ALL, PlainText class FilterError(Exception): @@ -28,14 +31,20 @@ class OrderByFieldError(FieldError): pass -class BaseSearchQuery: +class BaseSearchQueryCompiler: DEFAULT_OPERATOR = 'or' - def __init__(self, queryset, query_string, fields=None, operator=None, order_by_relevance=True): + def __init__(self, queryset, query, fields=None, operator=None, order_by_relevance=True): self.queryset = queryset - self.query_string = query_string + if query is None: + warn('Querying `None` is deprecated, use `MATCH_ALL` instead.', + DeprecationWarning) + query = MATCH_ALL + elif isinstance(query, str): + query = PlainText(query, + operator=operator or self.DEFAULT_OPERATOR) + self.query = query self.fields = fields - self.operator = operator or self.DEFAULT_OPERATOR self.order_by_relevance = order_by_relevance def _get_filterable_field(self, field_attname): @@ -153,9 +162,9 @@ class BaseSearchQuery: class BaseSearchResults: - def __init__(self, backend, query, prefetch_related=None): + def __init__(self, backend, query_compiler, prefetch_related=None): self.backend = backend - self.query = query + self.query_compiler = query_compiler self.prefetch_related = prefetch_related self.start = 0 self.stop = None @@ -178,7 +187,8 @@ class BaseSearchResults: def _clone(self): klass = self.__class__ - new = klass(self.backend, self.query, prefetch_related=self.prefetch_related) + new = klass(self.backend, self.query_compiler, + prefetch_related=self.prefetch_related) new.start = self.start new.stop = self.stop new._score_field = self._score_field @@ -258,7 +268,7 @@ class EmptySearchResults(BaseSearchResults): class BaseSearchBackend: - query_class = None + query_compiler_class = None results_class = None rebuilder_class = None @@ -289,7 +299,7 @@ class BaseSearchBackend: def delete(self, obj): raise NotImplementedError - def search(self, query_string, model_or_queryset, fields=None, filters=None, + def search(self, query, model_or_queryset, fields=None, filters=None, prefetch_related=None, operator=None, order_by_relevance=True): # Find model/queryset if isinstance(model_or_queryset, QuerySet): @@ -304,7 +314,7 @@ class BaseSearchBackend: return EmptySearchResults() # Check that theres still a query string after the clean up - if query_string == "": + if query == "": return EmptySearchResults() # Apply filters to queryset @@ -316,15 +326,9 @@ class BaseSearchBackend: for prefetch in prefetch_related: queryset = queryset.prefetch_related(prefetch) - # Check operator - if operator is not None: - operator = operator.lower() - if operator not in ['or', 'and']: - raise ValueError("operator must be either 'or' or 'and'") - # Search - search_query = self.query_class( - queryset, query_string, fields=fields, operator=operator, order_by_relevance=order_by_relevance + search_query = self.query_compiler_class( + queryset, query, fields=fields, operator=operator, order_by_relevance=order_by_relevance ) # Check the query diff --git a/wagtail/search/backends/db.py b/wagtail/search/backends/db.py index d1d1e68b5..b25aa669e 100644 --- a/wagtail/search/backends/db.py +++ b/wagtail/search/backends/db.py @@ -1,13 +1,34 @@ +from warnings import warn + from django.db import models from django.db.models.expressions import Value from wagtail.search.backends.base import ( - BaseSearchBackend, BaseSearchQuery, BaseSearchResults) + BaseSearchBackend, BaseSearchQueryCompiler, BaseSearchResults) +from wagtail.search.query import And, MatchAll, Not, Or, SearchQueryShortcut, Term +from wagtail.search.utils import AND, OR -class DatabaseSearchQuery(BaseSearchQuery): +class DatabaseSearchQueryCompiler(BaseSearchQueryCompiler): DEFAULT_OPERATOR = 'and' + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.fields_names = list(self.get_fields_names()) + + def get_fields_names(self): + model = self.queryset.model + fields_names = self.fields or [field.field_name for field in + model.get_searchable_search_fields()] + # Check if the field exists (this will filter out indexed callables) + for field_name in fields_names: + try: + model._meta.get_field(field_name) + except models.fields.FieldDoesNotExist: + continue + else: + yield field_name + def _process_lookup(self, field, lookup, value): return models.Q(**{field.get_attname(self.queryset.model) + '__' + lookup: value}) @@ -15,9 +36,7 @@ class DatabaseSearchQuery(BaseSearchQuery): if connector == 'AND': q = models.Q(*filters) elif connector == 'OR': - q = models.Q(filters[0]) - for fil in filters[1:]: - q |= fil + q = OR([models.Q(fil) for fil in filters]) else: return @@ -26,48 +45,47 @@ class DatabaseSearchQuery(BaseSearchQuery): return q - def get_extra_q(self): - # Run _get_filters_from_queryset to test that no fields that are not - # a FilterField have been used in the query. - self._get_filters_from_queryset() + def build_single_term_filter(self, term): + term_query = models.Q() + for field_name in self.fields_names: + term_query |= models.Q(**{field_name + '__icontains': term}) + return term_query - q = models.Q() - model = self.queryset.model + def build_database_filter(self, query=None): + if query is None: + query = self.query - if self.query_string is not None: - # Get fields - fields = self.fields or [field.field_name for field in model.get_searchable_search_fields()] + if isinstance(self.query, MatchAll): + return models.Q() - # Get terms - terms = self.query_string.split() - if not terms: - return model.objects.none() - - # Filter by terms - for term in terms: - term_query = models.Q() - for field_name in fields: - # Check if the field exists (this will filter out indexed callables) - try: - model._meta.get_field(field_name) - except models.fields.FieldDoesNotExist: - continue - - # Filter on this field - term_query |= models.Q(**{'%s__icontains' % field_name: term}) - - if self.operator == 'or': - q |= term_query - elif self.operator == 'and': - q &= term_query - - return q + if isinstance(query, SearchQueryShortcut): + return self.build_database_filter(query.get_equivalent()) + if isinstance(query, Term): + if query.boost != 1: + warn('Database search backend does not support term boosting.') + return self.build_single_term_filter(query.term) + if isinstance(query, Not): + return ~self.build_database_filter(query.subquery) + if isinstance(query, And): + return AND(self.build_database_filter(subquery) + for subquery in query.subqueries) + if isinstance(query, Or): + return OR(self.build_database_filter(subquery) + for subquery in query.subqueries) + raise NotImplementedError( + '`%s` is not supported by the database search backend.' + % self.query.__class__.__name__) class DatabaseSearchResults(BaseSearchResults): def get_queryset(self): - queryset = self.query.queryset - q = self.query.get_extra_q() + queryset = self.query_compiler.queryset + + # Run _get_filters_from_queryset to test that no fields that are not + # a FilterField have been used in the query. + self.query_compiler._get_filters_from_queryset() + + q = self.query_compiler.build_database_filter() return queryset.filter(q).distinct()[self.start:self.stop] @@ -84,7 +102,7 @@ class DatabaseSearchResults(BaseSearchResults): class DatabaseSearchBackend(BaseSearchBackend): - query_class = DatabaseSearchQuery + query_compiler_class = DatabaseSearchQueryCompiler results_class = DatabaseSearchResults def reset_index(self): diff --git a/wagtail/search/backends/elasticsearch2.py b/wagtail/search/backends/elasticsearch2.py index 2ecc1379c..05e544b34 100644 --- a/wagtail/search/backends/elasticsearch2.py +++ b/wagtail/search/backends/elasticsearch2.py @@ -11,9 +11,10 @@ from elasticsearch.helpers import bulk from wagtail.utils.utils import deep_update from wagtail.search.backends.base import ( - BaseSearchBackend, BaseSearchQuery, BaseSearchResults) + BaseSearchBackend, BaseSearchQueryCompiler, BaseSearchResults) from wagtail.search.index import ( FilterField, Indexed, RelatedFields, SearchField, class_is_indexed) +from wagtail.search.query import MatchAll, Term, Prefix, Fuzzy, And, Or, Not, PlainText, Filter, Boost def get_model_root(model): @@ -258,12 +259,12 @@ class Elasticsearch2Mapping: return '' % (self.model.__name__, ) -class Elasticsearch2SearchQuery(BaseSearchQuery): +class Elasticsearch2SearchQueryCompiler(BaseSearchQueryCompiler): mapping_class = Elasticsearch2Mapping DEFAULT_OPERATOR = 'or' def __init__(self, *args, **kwargs): - super(Elasticsearch2SearchQuery, self).__init__(*args, **kwargs) + super(Elasticsearch2SearchQueryCompiler, self).__init__(*args, **kwargs) self.mapping = self.mapping_class(self.queryset.model) # Convert field names into index column names @@ -371,42 +372,151 @@ class Elasticsearch2SearchQuery(BaseSearchQuery): return filter_out - def get_inner_query(self): - if self.query_string is not None: - fields = self.remapped_fields or ['_all', '_partials'] + def _compile_term_query(self, query_type, value, field, boost=1.0, **extra): + term_query = { + 'value': value, + } - if len(fields) == 1: - if self.operator == 'or': - query = { - 'match': { - fields[0]: self.query_string, - } - } - else: - query = { - 'match': { - fields[0]: { - 'query': self.query_string, - 'operator': self.operator, - } - } - } - else: - query = { - 'multi_match': { - 'query': self.query_string, - 'fields': fields, - } + if boost != 1.0: + term_query['boost'] = boost + + return { + query_type: { + field: term_query, + } + } + + def _compile_plaintext_query(self, query, fields, boost=1.0): + match_query = { + 'query': query.query_string + } + + if query.operator != 'or': + match_query['operator'] = query.operator + + if boost != 1.0: + match_query['boost'] = boost + + if len(fields) == 1: + return { + 'match': { + fields[0]: match_query } - - if self.operator != 'or': - query['multi_match']['operator'] = self.operator + } else: - query = { - 'match_all': {} + match_query['fields'] = fields + + return { + 'multi_match': match_query } - return query + def _compile_query(self, query, field, boost=1.0): + if isinstance(query, MatchAll): + match_all_query = {} + + if boost != 1.0: + match_all_query['boost'] = boost + + return {'match_all': match_all_query} + + elif isinstance(query, Term): + return self._compile_term_query('term', query.term, field, query.boost * boost) + + elif isinstance(query, Prefix): + return self._compile_term_query('prefix', query.prefix, field, query.boost * boost) + + elif isinstance(query, Fuzzy): + return self._compile_term_query('fuzzy', query.term, field, query.boost * boost, fuzziness=query.max_distance) + + elif isinstance(query, And): + return { + 'bool': { + 'must': [ + self._compile_query(child_query, field, boost) + for child_query in query.get_children() + ] + } + } + + elif isinstance(query, Or): + return { + 'bool': { + 'should': [ + self._compile_query(child_query, field, boost) + for child_query in query.get_children() + ] + } + } + + elif isinstance(query, Not): + return { + 'bool': { + 'mustNot': self._compile_query(query.subquery, field, boost) + } + } + + elif isinstance(query, PlainText): + return self._compile_plaintext_query(self.query, [field], boost) + + elif isinstance(query, Filter): + bool_query = { + 'must': self._compile_query(query.query, field, boost), + } + + if query.include: + bool_query['filter'] = self._compile_query(query.include, field, 0.0) + + if query.exclude: + bool_query['mustNot'] = self._compile_query(query.exclude, field, 0.0) + + return { + 'bool': bool_query, + } + + elif isinstance(query, Boost): + return self._compile_query(query.subquery, field, boost * query.boost) + + else: + raise NotImplementedError( + '`%s` is not supported by the Elasticsearch search backend.' + % query.__class__.__name__) + + def get_inner_query(self): + fields = self.remapped_fields or ['_all', '_partials'] + + if len(fields) == 0: + # No fields. Return a query that'll match nothing + return { + 'bool': { + 'mustNot': {'match_all': {}} + } + } + + # Handle MatchAll and PlainText separately as they were supported + # before "search query classes" was implemented and we'd like to + # keep the query the same as before + if isinstance(self.query, MatchAll): + return {'match_all': {}} + + elif isinstance(self.query, PlainText): + return self._compile_plaintext_query(self.query, fields) + + else: + if len(fields) == 1: + return self._compile_query(self.query, fields[0]) + else: + # Compile a query for each field then combine with disjunction + # max (or operator which takes the max score out of each of the + # field queries) + field_queries = [] + for field in fields: + field_queries.append(self._compile_query(self.query, field)) + + return { + 'dis_max': { + 'queries': field_queries + } + } def get_content_type_filter(self): # Query content_type using a "match" query. See comment in @@ -486,11 +596,11 @@ class Elasticsearch2SearchResults(BaseSearchResults): def _get_es_body(self, for_count=False): body = { - 'query': self.query.get_query() + 'query': self.query_compiler.get_query() } if not for_count: - sort = self.query.get_sort() + sort = self.query_compiler.get_sort() if sort is not None: body['sort'] = sort @@ -509,7 +619,7 @@ class Elasticsearch2SearchResults(BaseSearchResults): results = {str(pk): None for pk in pks} # Find objects in database and add them to dict - for obj in self.query.queryset.filter(pk__in=pks): + for obj in self.query_compiler.queryset.filter(pk__in=pks): results[str(obj.pk)] = obj if self._score_field: @@ -532,7 +642,7 @@ class Elasticsearch2SearchResults(BaseSearchResults): use_scroll = limit is None or limit > PAGE_SIZE params = { - 'index': self.backend.get_index_for_model(self.query.queryset.model).name, + 'index': self.backend.get_index_for_model(self.query_compiler.queryset.model).name, 'body': self._get_es_body(), '_source': False, self.fields_param_name: 'pk', @@ -601,7 +711,7 @@ class Elasticsearch2SearchResults(BaseSearchResults): def _do_count(self): # Get count hit_count = self.backend.es.count( - index=self.backend.get_index_for_model(self.query.queryset.model).name, + index=self.backend.get_index_for_model(self.query_compiler.queryset.model).name, body=self._get_es_body(for_count=True), )['count'] @@ -809,7 +919,7 @@ class ElasticsearchAtomicIndexRebuilder(ElasticsearchIndexRebuilder): class Elasticsearch2SearchBackend(BaseSearchBackend): index_class = Elasticsearch2Index - query_class = Elasticsearch2SearchQuery + query_compiler_class = Elasticsearch2SearchQueryCompiler results_class = Elasticsearch2SearchResults mapping_class = Elasticsearch2Mapping basic_rebuilder_class = ElasticsearchIndexRebuilder diff --git a/wagtail/search/backends/elasticsearch5.py b/wagtail/search/backends/elasticsearch5.py index 0c43b16b3..2ce1808bf 100644 --- a/wagtail/search/backends/elasticsearch5.py +++ b/wagtail/search/backends/elasticsearch5.py @@ -1,6 +1,6 @@ from .elasticsearch2 import ( Elasticsearch2Index, Elasticsearch2Mapping, Elasticsearch2SearchBackend, - Elasticsearch2SearchQuery, Elasticsearch2SearchResults) + Elasticsearch2SearchQueryCompiler, Elasticsearch2SearchResults) class Elasticsearch5Mapping(Elasticsearch2Mapping): @@ -13,7 +13,7 @@ class Elasticsearch5Index(Elasticsearch2Index): pass -class Elasticsearch5SearchQuery(Elasticsearch2SearchQuery): +class Elasticsearch5SearchQueryCompiler(Elasticsearch2SearchQueryCompiler): mapping_class = Elasticsearch5Mapping def _process_lookup(self, field, lookup, value): @@ -35,7 +35,7 @@ class Elasticsearch5SearchQuery(Elasticsearch2SearchQuery): return query - return super(Elasticsearch5SearchQuery, self)._process_lookup(field, lookup, value) + return super()._process_lookup(field, lookup, value) def _connect_filters(self, filters, connector, negated): if filters: @@ -96,7 +96,7 @@ class Elasticsearch5SearchResults(Elasticsearch2SearchResults): class Elasticsearch5SearchBackend(Elasticsearch2SearchBackend): mapping_class = Elasticsearch5Mapping index_class = Elasticsearch5Index - query_class = Elasticsearch5SearchQuery + query_compiler_class = Elasticsearch5SearchQueryCompiler results_class = Elasticsearch5SearchResults diff --git a/wagtail/search/query.py b/wagtail/search/query.py new file mode 100644 index 000000000..9a159d553 --- /dev/null +++ b/wagtail/search/query.py @@ -0,0 +1,194 @@ +from __future__ import absolute_import, unicode_literals + + +# +# Base classes +# + + +class SearchQuery: + def __and__(self, other): + return And([self, other]) + + def __or__(self, other): + return Or([self, other]) + + def __invert__(self): + return Not(self) + + def apply(self, func): + raise NotImplementedError + + def clone(self): + return self.apply(lambda o: o) + + def get_children(self): + return () + + @property + def children(self): + return list(self.get_children()) + + @property + def child(self): + children = self.children + if len(children) != 1: + raise IndexError('`%s` object has %d children, not a single child.' + % self.__class__.__name__, len(children)) + return children[0] + + +class SearchQueryOperator(SearchQuery): + pass + + +class MultiOperandsSearchQueryOperator(SearchQueryOperator): + def __init__(self, subqueries): + self.subqueries = subqueries + + def apply(self, func): + return func(self.__class__( + [subquery.apply(func) for subquery in self.subqueries])) + + def get_children(self): + yield from self.subqueries + + +class SearchQueryShortcut(SearchQuery): + def get_equivalent(self): + raise NotImplementedError + + def get_children(self): + yield self.get_equivalent() + +# +# Operators +# + + +class And(MultiOperandsSearchQueryOperator): + pass + + +class Or(MultiOperandsSearchQueryOperator): + pass + + +class Not(SearchQueryOperator): + def __init__(self, subquery: SearchQuery): + self.subquery = subquery + + def apply(self, func): + return func(self.__class__(self.subquery.apply(func))) + + def get_children(self): + yield self.subquery + + +# +# Basic query classes +# + + +class MatchAll(SearchQuery): + def apply(self, func): + return self.__class__() + + +class Term(SearchQuery): + def __init__(self, term: str, boost: float = 1): + self.term = term + self.boost = boost + + def apply(self, func): + return func(self.__class__(self.term, self.boost)) + + +class Prefix(SearchQuery): + def __init__(self, prefix: str, boost: float = 1): + self.prefix = prefix + self.boost = boost + + def apply(self, func): + return func(self.__class__(self.prefix, self.boost)) + + +class Fuzzy(SearchQuery): + def __init__(self, term: str, max_distance: float = 3, boost: float = 1): + self.term = term + self.max_distance = max_distance + self.boost = boost + + def apply(self, func): + return func(self.__class__(self.term, self.max_distance, self.boost)) + + +# +# Shortcut query classes +# + + +class PlainText(SearchQueryShortcut): + OPERATORS = { + 'and': And, + 'or': Or, + } + DEFAULT_OPERATOR = 'and' + + def __init__(self, query_string: str, operator: str = DEFAULT_OPERATOR, + boost: float = 1): + self.query_string = query_string + self.operator = operator.lower() + if self.operator not in self.OPERATORS: + raise ValueError("`operator` must be either 'or' or 'and'.") + self.boost = boost + + def apply(self, func): + return func(self.__class__(self.query_string, self.operator, + self.boost)) + + def get_equivalent(self): + return self.OPERATORS[self.operator]([ + Term(term, boost=self.boost) + for term in self.query_string.split()]) + + +class Filter(SearchQueryShortcut): + def __init__(self, query: SearchQuery, + include: SearchQuery = None, exclude: SearchQuery = None): + self.query = query + self.include = include + self.exclude = exclude + + def apply(self, func): + return func(self.__class__( + self.query.apply(func), + self.include.apply(func), self.exclude.apply(func))) + + def get_equivalent(self): + query = self.query + if self.include is not None: + query &= Boost(self.include, 0) + if self.exclude is not None: + query &= Boost(~self.exclude, 0) + return query + + +class Boost(SearchQueryShortcut): + def __init__(self, subquery: SearchQuery, boost: float): + self.subquery = subquery + self.boost = boost + + def apply(self, func): + return func(self.__class__(self.subquery.apply(func), self.boost)) + + def get_equivalent(self): + def boost_child(child): + if isinstance(child, (PlainText, Fuzzy, Prefix, Term)): + child.boost *= self.boost + return child + + return self.subquery.apply(boost_child) + + +MATCH_ALL = MatchAll() diff --git a/wagtail/search/queryset.py b/wagtail/search/queryset.py index df8865405..bf586291b 100644 --- a/wagtail/search/queryset.py +++ b/wagtail/search/queryset.py @@ -1,12 +1,13 @@ from wagtail.search.backends import get_search_backend + class SearchableQuerySetMixin: - def search(self, query_string, fields=None, + def search(self, query, fields=None, operator=None, order_by_relevance=True, backend='default'): """ This runs a search query on all the items in the QuerySet """ search_backend = get_search_backend(backend) - return search_backend.search(query_string, self, fields=fields, + return search_backend.search(query, self, fields=fields, operator=operator, order_by_relevance=order_by_relevance) diff --git a/wagtail/search/tests/elasticsearch_common_tests.py b/wagtail/search/tests/elasticsearch_common_tests.py index 1dc3e6802..0262fece4 100644 --- a/wagtail/search/tests/elasticsearch_common_tests.py +++ b/wagtail/search/tests/elasticsearch_common_tests.py @@ -4,9 +4,11 @@ from io import StringIO from django.core import management from wagtail.tests.search import models +from wagtail.search.query import MATCH_ALL +from wagtail.search.tests.test_backends import BackendTests -class ElasticsearchCommonSearchBackendTests: +class ElasticsearchCommonSearchBackendTests(BackendTests): def test_search_with_spaces_only(self): # Search for some space characters and hope it doesn't crash results = self.backend.search(" ", models.Book) @@ -109,7 +111,7 @@ class ElasticsearchCommonSearchBackendTests: ) # This should not give any results - results = self.backend.search(None, models.Book) + results = self.backend.search(MATCH_ALL, models.Book) self.assertSetEqual(set(results), set()) def test_annotate_score(self): @@ -127,7 +129,7 @@ class ElasticsearchCommonSearchBackendTests: def test_more_than_ten_results(self): # #3431 reported that Elasticsearch only sends back 10 results if the results set is not sliced - results = self.backend.search(None, models.Book) + results = self.backend.search(MATCH_ALL, models.Book) self.assertEqual(len(results), 13) @@ -141,7 +143,7 @@ class ElasticsearchCommonSearchBackendTests: index.add_items(models.Book, books) index.refresh() - results = self.backend.search(None, models.Book) + results = self.backend.search(MATCH_ALL, models.Book) self.assertEqual(len(results), 163) def test_slice_more_than_one_hundred_results(self): @@ -153,7 +155,7 @@ class ElasticsearchCommonSearchBackendTests: index.add_items(models.Book, books) index.refresh() - results = self.backend.search(None, models.Book)[10:120] + results = self.backend.search(MATCH_ALL, models.Book)[10:120] self.assertEqual(len(results), 110) def test_slice_to_next_page(self): @@ -167,5 +169,5 @@ class ElasticsearchCommonSearchBackendTests: index.add_items(models.Book, books) index.refresh() - results = self.backend.search(None, models.Book)[110:] + results = self.backend.search(MATCH_ALL, models.Book)[110:] self.assertEqual(len(results), 53) diff --git a/wagtail/search/tests/test_backends.py b/wagtail/search/tests/test_backends.py index 551e2da45..92530ecc2 100644 --- a/wagtail/search/tests/test_backends.py +++ b/wagtail/search/tests/test_backends.py @@ -15,6 +15,7 @@ from wagtail.search.backends import ( InvalidSearchBackendError, get_search_backend, get_search_backends) from wagtail.search.backends.base import FieldError from wagtail.search.backends.db import DatabaseSearchBackend +from wagtail.search.query import MATCH_ALL, And, Boost, Filter, Not, Or, PlainText, Term class BackendTests(WagtailTestUtils): @@ -63,8 +64,7 @@ class BackendTests(WagtailTestUtils): self.assertSetEqual(set(results), set()) def test_search_all(self): - # Searches on None should return everything in the index - results = self.backend.search(None, models.Book) + results = self.backend.search(MATCH_ALL, models.Book) self.assertSetEqual(set(results), set(models.Book.objects.all())) def test_ranking(self): @@ -88,7 +88,7 @@ class BackendTests(WagtailTestUtils): def test_search_on_child_class(self): # Searches on a child class should only return results that have the child class as well # and all results should be instances of the child class - results = self.backend.search(None, models.Novel) + results = self.backend.search(MATCH_ALL, models.Novel) self.assertSetEqual(set(results), set(models.Novel.objects.all())) def test_search_child_class_field_from_parent(self): @@ -160,7 +160,7 @@ class BackendTests(WagtailTestUtils): # FILTERING TESTS def test_filter_exact_value(self): - results = self.backend.search(None, models.Book.objects.filter(number_of_pages=440)) + results = self.backend.search(MATCH_ALL, models.Book.objects.filter(number_of_pages=440)) self.assertUnsortedListEqual([r.title for r in results], [ "The Return of the King", @@ -168,14 +168,14 @@ class BackendTests(WagtailTestUtils): ]) def test_filter_exact_value_on_parent_model_field(self): - results = self.backend.search(None, models.Novel.objects.filter(number_of_pages=440)) + results = self.backend.search(MATCH_ALL, models.Novel.objects.filter(number_of_pages=440)) self.assertUnsortedListEqual([r.title for r in results], [ "The Return of the King" ]) def test_filter_lt(self): - results = self.backend.search(None, models.Book.objects.filter(number_of_pages__lt=440)) + results = self.backend.search(MATCH_ALL, models.Book.objects.filter(number_of_pages__lt=440)) self.assertUnsortedListEqual([r.title for r in results], [ "The Hobbit", @@ -186,7 +186,7 @@ class BackendTests(WagtailTestUtils): ]) def test_filter_lte(self): - results = self.backend.search(None, models.Book.objects.filter(number_of_pages__lte=440)) + results = self.backend.search(MATCH_ALL, models.Book.objects.filter(number_of_pages__lte=440)) self.assertUnsortedListEqual([r.title for r in results], [ "The Return of the King", @@ -199,7 +199,7 @@ class BackendTests(WagtailTestUtils): ]) def test_filter_gt(self): - results = self.backend.search(None, models.Book.objects.filter(number_of_pages__gt=440)) + results = self.backend.search(MATCH_ALL, models.Book.objects.filter(number_of_pages__gt=440)) self.assertUnsortedListEqual([r.title for r in results], [ "JavaScript: The Definitive Guide", @@ -211,7 +211,7 @@ class BackendTests(WagtailTestUtils): ]) def test_filter_gte(self): - results = self.backend.search(None, models.Book.objects.filter(number_of_pages__gte=440)) + results = self.backend.search(MATCH_ALL, models.Book.objects.filter(number_of_pages__gte=440)) self.assertUnsortedListEqual([r.title for r in results], [ "The Return of the King", @@ -225,7 +225,7 @@ class BackendTests(WagtailTestUtils): ]) def test_filter_in_list(self): - results = self.backend.search(None, models.Book.objects.filter(number_of_pages__in=[440, 1160])) + results = self.backend.search(MATCH_ALL, models.Book.objects.filter(number_of_pages__in=[440, 1160])) self.assertUnsortedListEqual([r.title for r in results], [ "The Return of the King", @@ -234,7 +234,7 @@ class BackendTests(WagtailTestUtils): ]) def test_filter_in_iterable(self): - results = self.backend.search(None, models.Book.objects.filter(number_of_pages__in=iter([440, 1160]))) + results = self.backend.search(MATCH_ALL, models.Book.objects.filter(number_of_pages__in=iter([440, 1160]))) self.assertUnsortedListEqual([r.title for r in results], [ "The Return of the King", @@ -244,7 +244,7 @@ class BackendTests(WagtailTestUtils): def test_filter_in_values_list_subquery(self): values = models.Book.objects.filter(number_of_pages__lt=440).values_list('number_of_pages', flat=True) - results = self.backend.search(None, models.Book.objects.filter(number_of_pages__in=values)) + results = self.backend.search(MATCH_ALL, models.Book.objects.filter(number_of_pages__in=values)) self.assertUnsortedListEqual([r.title for r in results], [ "The Hobbit", @@ -256,7 +256,7 @@ class BackendTests(WagtailTestUtils): def test_filter_isnull_true(self): # Note: We don't know the birth dates of any of the programming guide authors - results = self.backend.search(None, models.Author.objects.filter(date_of_birth__isnull=True)) + results = self.backend.search(MATCH_ALL, models.Author.objects.filter(date_of_birth__isnull=True)) self.assertUnsortedListEqual([r.name for r in results], [ "David Ascher", @@ -271,7 +271,7 @@ class BackendTests(WagtailTestUtils): def test_filter_isnull_false(self): # Note: We know the birth dates of all of the novel authors - results = self.backend.search(None, models.Author.objects.filter(date_of_birth__isnull=False)) + results = self.backend.search(MATCH_ALL, models.Author.objects.filter(date_of_birth__isnull=False)) self.assertUnsortedListEqual([r.name for r in results], [ "Isaac Asimov", @@ -280,7 +280,7 @@ class BackendTests(WagtailTestUtils): ]) def test_filter_prefix(self): - results = self.backend.search(None, models.Book.objects.filter(title__startswith="Th")) + results = self.backend.search(MATCH_ALL, models.Book.objects.filter(title__startswith="Th")) self.assertUnsortedListEqual([r.title for r in results], [ "The Hobbit", @@ -292,14 +292,14 @@ class BackendTests(WagtailTestUtils): def test_filter_and_operator(self): results = self.backend.search( - None, models.Book.objects.filter(number_of_pages=440) & models.Book.objects.filter(publication_date=date(1955, 10, 20))) + MATCH_ALL, models.Book.objects.filter(number_of_pages=440) & models.Book.objects.filter(publication_date=date(1955, 10, 20))) self.assertUnsortedListEqual([r.title for r in results], [ "The Return of the King" ]) def test_filter_or_operator(self): - results = self.backend.search(None, models.Book.objects.filter(number_of_pages=440) | models.Book.objects.filter(number_of_pages=1160)) + results = self.backend.search(MATCH_ALL, models.Book.objects.filter(number_of_pages=440) | models.Book.objects.filter(number_of_pages=1160)) self.assertUnsortedListEqual([r.title for r in results], [ "Learning Python", @@ -309,12 +309,12 @@ class BackendTests(WagtailTestUtils): def test_filter_on_non_filterable_field(self): with self.assertRaises(FieldError): - list(self.backend.search(None, models.Author.objects.filter(name__startswith="Issac"))) + list(self.backend.search(MATCH_ALL, models.Author.objects.filter(name__startswith="Issac"))) # ORDER BY RELEVANCE def test_order_by_relevance(self): - results = self.backend.search(None, models.Novel.objects.order_by('number_of_pages'), order_by_relevance=False) + results = self.backend.search(MATCH_ALL, models.Novel.objects.order_by('number_of_pages'), order_by_relevance=False) # Ordering should be set to "number_of_pages" self.assertEqual([r.title for r in results], [ @@ -330,19 +330,19 @@ class BackendTests(WagtailTestUtils): def test_order_by_non_filterable_field(self): with self.assertRaises(FieldError): - list(self.backend.search(None, models.Author.objects.order_by('name'), order_by_relevance=False)) + list(self.backend.search(MATCH_ALL, models.Author.objects.order_by('name'), order_by_relevance=False)) # SLICING TESTS def test_single_result(self): - results = self.backend.search(None, models.Novel.objects.order_by('number_of_pages'), order_by_relevance=False) + results = self.backend.search(MATCH_ALL, models.Novel.objects.order_by('number_of_pages'), order_by_relevance=False) self.assertEqual(results[0].title, "Foundation") self.assertEqual(results[1].title, "The Hobbit") def test_limit(self): # Note: we need consistent ordering for this test - results = self.backend.search(None, models.Novel.objects.order_by('number_of_pages'), order_by_relevance=False) + results = self.backend.search(MATCH_ALL, models.Novel.objects.order_by('number_of_pages'), order_by_relevance=False) # Limit the results results = results[:3] @@ -355,7 +355,7 @@ class BackendTests(WagtailTestUtils): def test_offset(self): # Note: we need consistent ordering for this test - results = self.backend.search(None, models.Novel.objects.order_by('number_of_pages'), order_by_relevance=False) + results = self.backend.search(MATCH_ALL, models.Novel.objects.order_by('number_of_pages'), order_by_relevance=False) # Offset the results results = results[3:] @@ -370,7 +370,7 @@ class BackendTests(WagtailTestUtils): def test_offset_and_limit(self): # Note: we need consistent ordering for this test - results = self.backend.search(None, models.Novel.objects.order_by('number_of_pages'), order_by_relevance=False) + results = self.backend.search(MATCH_ALL, models.Novel.objects.order_by('number_of_pages'), order_by_relevance=False) # Offset the results results = results[3:6] @@ -419,7 +419,7 @@ class BackendTests(WagtailTestUtils): # instead of three). # Note: we need consistent ordering for this test - results = self.backend.search(None, models.Novel.objects.order_by('number_of_pages'), order_by_relevance=False) + results = self.backend.search(MATCH_ALL, models.Novel.objects.order_by('number_of_pages'), order_by_relevance=False) # Limit the results results = results[:3] @@ -431,6 +431,228 @@ class BackendTests(WagtailTestUtils): "The Fellowship of the Ring" # If this item doesn't appear, "Foundation" is still in the index ]) + # + # Basic query classes + # + + def test_match_all(self): + results = self.backend.search(MATCH_ALL, models.Book.objects.all()) + self.assertEqual(len(results), 13) + + def test_term(self): + # Single word + results = self.backend.search(Term('javascript'), + models.Book.objects.all()) + + self.assertSetEqual({r.title for r in results}, + {'JavaScript: The Definitive Guide', + 'JavaScript: The good parts'}) + + def test_and(self): + results = self.backend.search(And([Term('javascript'), + Term('definitive')]), + models.Book.objects.all()) + self.assertSetEqual({r.title for r in results}, + {'JavaScript: The Definitive Guide'}) + + results = self.backend.search(Term('javascript') & Term('definitive'), + models.Book.objects.all()) + self.assertSetEqual({r.title for r in results}, + {'JavaScript: The Definitive Guide'}) + + def test_or(self): + results = self.backend.search(Or([Term('hobbit'), Term('towers')]), + models.Book.objects.all()) + self.assertSetEqual({r.title for r in results}, + {'The Hobbit', 'The Two Towers'}) + + results = self.backend.search(Term('hobbit') | Term('towers'), + models.Book.objects.all()) + self.assertSetEqual({r.title for r in results}, + {'The Hobbit', 'The Two Towers'}) + + def test_not(self): + all_other_titles = { + 'A Clash of Kings', + 'A Game of Thrones', + 'A Storm of Swords', + 'Foundation', + 'Learning Python', + 'The Hobbit', + 'The Two Towers', + 'The Fellowship of the Ring', + 'The Return of the King', + 'The Rust Programming Language', + 'Two Scoops of Django 1.11', + } + + results = self.backend.search(Not(Term('javascript')), + models.Book.objects.all()) + self.assertSetEqual({r.title for r in results}, all_other_titles) + + results = self.backend.search(~Term('javascript'), + models.Book.objects.all()) + self.assertSetEqual({r.title for r in results}, all_other_titles) + + def test_operators_combination(self): + results = self.backend.search( + ((Term('javascript') & ~Term('definitive')) | + Term('python') | Term('rust')) | + Term('two'), + models.Book.objects.all()) + self.assertSetEqual({r.title for r in results}, + {'JavaScript: The good parts', + 'Learning Python', + 'The Two Towers', + 'The Rust Programming Language', + 'Two Scoops of Django 1.11'}) + + # + # Shortcut query classes + # + + def test_plain_text_single_word(self): + results = self.backend.search(PlainText('Javascript'), + models.Book.objects.all()) + self.assertSetEqual({r.title for r in results}, + {'JavaScript: The Definitive Guide', + 'JavaScript: The good parts'}) + + def test_plain_text_multiple_words_or(self): + results = self.backend.search(PlainText('Javascript Definitive', + operator='or'), + models.Book.objects.all()) + self.assertSetEqual({r.title for r in results}, + {'JavaScript: The Definitive Guide', + 'JavaScript: The good parts'}) + + def test_plain_text_multiple_words_and(self): + results = self.backend.search(PlainText('Javascript Definitive', + operator='and'), + models.Book.objects.all()) + self.assertSetEqual({r.title for r in results}, + {'JavaScript: The Definitive Guide'}) + + def test_plain_text_operator_case(self): + results = self.backend.search(PlainText('Guide', operator='AND'), + models.Book.objects.all()) + self.assertSetEqual({r.title for r in results}, + {'JavaScript: The Definitive Guide'}) + + results = self.backend.search(PlainText('Guide', operator='aNd'), + models.Book.objects.all()) + self.assertSetEqual({r.title for r in results}, + {'JavaScript: The Definitive Guide'}) + + results = self.backend.search('Guide', models.Book.objects.all(), + operator='AND') + self.assertSetEqual({r.title for r in results}, + {'JavaScript: The Definitive Guide'}) + + results = self.backend.search('Guide', models.Book.objects.all(), + operator='aNd') + self.assertSetEqual({r.title for r in results}, + {'JavaScript: The Definitive Guide'}) + + def test_plain_text_invalid_operator(self): + with self.assertRaises(ValueError): + self.backend.search(PlainText('Guide', operator='xor'), + models.Book.objects.all()) + + with self.assertRaises(ValueError): + self.backend.search('Guide', models.Book.objects.all(), + operator='xor') + + def test_filter_equivalent(self): + filter = Filter(Term('javascript')) + term = filter.child + self.assertIsInstance(term, Term) + self.assertEqual(term.term, 'javascript') + + filter = Filter(Term('javascript'), include=Term('definitive')) + and_obj = filter.child + self.assertIsInstance(and_obj, And) + javascript = and_obj.children[0] + self.assertIsInstance(javascript, Term) + self.assertEqual(javascript.term, 'javascript') + boost_obj = and_obj.children[1] + self.assertIsInstance(boost_obj, Boost) + self.assertEqual(boost_obj.boost, 0) + definitive = boost_obj.child + self.assertIsInstance(definitive, Term) + self.assertEqual(definitive.term, 'definitive') + + filter = Filter(Term('javascript'), + include=Term('definitive'), exclude=Term('guide')) + and_obj1 = filter.child + self.assertIsInstance(and_obj1, And) + and_obj2 = and_obj1.children[0] + javascript = and_obj2.children[0] + self.assertIsInstance(javascript, Term) + self.assertEqual(javascript.term, 'javascript') + boost_obj = and_obj2.children[1] + self.assertIsInstance(boost_obj, Boost) + self.assertEqual(boost_obj.boost, 0) + definitive = boost_obj.child + self.assertIsInstance(definitive, Term) + self.assertEqual(definitive.term, 'definitive') + boost_obj = and_obj1.children[1] + self.assertIsInstance(boost_obj, Boost) + self.assertEqual(boost_obj.boost, 0) + not_obj = boost_obj.child + self.assertIsInstance(not_obj, Not) + guide = not_obj.child + self.assertEqual(guide.term, 'guide') + + def test_filter_query(self): + results = self.backend.search(Filter(Term('javascript')), + models.Book.objects.all()) + self.assertSetEqual({r.title for r in results}, + {'JavaScript: The Definitive Guide', + 'JavaScript: The good parts'}) + + results = self.backend.search(Filter(Term('javascript'), + include=Term('definitive')), + models.Book.objects.all()) + self.assertSetEqual({r.title for r in results}, + {'JavaScript: The Definitive Guide'}) + + results = self.backend.search(Filter(Term('javascript'), + include=Term('definitive'), + exclude=Term('guide')), + models.Book.objects.all()) + self.assertSetEqual({r.title for r in results}, set()) + + def test_boost_equivalent(self): + boost = Boost(Term('guide'), 5) + equivalent = boost.children[0] + self.assertIsInstance(equivalent, Term) + self.assertAlmostEqual(equivalent.boost, 5) + + boost = Boost(Term('guide', boost=0.5), 5) + equivalent = boost.children[0] + self.assertIsInstance(equivalent, Term) + self.assertAlmostEqual(equivalent.boost, 2.5) + + boost = Boost(Boost(Term('guide', 0.1), 3), 5) + sub_boost = boost.children[0] + self.assertIsInstance(sub_boost, Boost) + sub_boost = sub_boost.children[0] + self.assertIsInstance(sub_boost, Term) + self.assertAlmostEqual(sub_boost.boost, 1.5) + + boost = Boost(And([Boost(Term('guide', 0.1), 3), Term('two', 2)]), 5) + and_obj = boost.children[0] + self.assertIsInstance(and_obj, And) + sub_boost = and_obj.children[0] + self.assertIsInstance(sub_boost, Boost) + guide = sub_boost.children[0] + self.assertIsInstance(guide, Term) + self.assertAlmostEqual(guide.boost, 1.5) + two = and_obj.children[1] + self.assertIsInstance(two, Term) + self.assertAlmostEqual(two.boost, 10) + @override_settings( WAGTAILSEARCH_BACKENDS={ diff --git a/wagtail/search/tests/test_elasticsearch2_backend.py b/wagtail/search/tests/test_elasticsearch2_backend.py index ce8aa78e3..02e3a5264 100644 --- a/wagtail/search/tests/test_elasticsearch2_backend.py +++ b/wagtail/search/tests/test_elasticsearch2_backend.py @@ -10,12 +10,12 @@ from elasticsearch.serializer import JSONSerializer from wagtail.tests.search import models from wagtail.search.backends.elasticsearch2 import ( Elasticsearch2SearchBackend, get_model_root) +from wagtail.search.query import MATCH_ALL from .elasticsearch_common_tests import ElasticsearchCommonSearchBackendTests -from .test_backends import BackendTests -class TestElasticsearch2SearchBackend(BackendTests, ElasticsearchCommonSearchBackendTests, TestCase): +class TestElasticsearch2SearchBackend(ElasticsearchCommonSearchBackendTests, TestCase): backend_path = 'wagtail.search.backends.elasticsearch2' @@ -26,55 +26,55 @@ class TestElasticsearch2SearchQuery(TestCase): json.dumps(a, sort_keys=True, default=default), json.dumps(b, sort_keys=True, default=default) ) - query_class = Elasticsearch2SearchBackend.query_class + query_compiler_class = Elasticsearch2SearchBackend.query_compiler_class def test_simple(self): # Create a query - query = self.query_class(models.Book.objects.all(), "Hello") + query_compiler = self.query_compiler_class(models.Book.objects.all(), "Hello") # Check it expected_result = {'filtered': { 'filter': {'match': {'content_type': 'searchtests.Book'}}, 'query': {'multi_match': {'query': 'Hello', 'fields': ['_all', '_partials']}} }} - self.assertDictEqual(query.get_query(), expected_result) + self.assertDictEqual(query_compiler.get_query(), expected_result) - def test_none_query_string(self): + def test_match_all(self): # Create a query - query = self.query_class(models.Book.objects.all(), None) + query_compiler = self.query_compiler_class(models.Book.objects.all(), MATCH_ALL) # Check it expected_result = {'filtered': { 'filter': {'match': {'content_type': 'searchtests.Book'}}, 'query': {'match_all': {}} }} - self.assertDictEqual(query.get_query(), expected_result) + self.assertDictEqual(query_compiler.get_query(), expected_result) def test_and_operator(self): # Create a query - query = self.query_class(models.Book.objects.all(), "Hello", operator='and') + query_compiler = self.query_compiler_class(models.Book.objects.all(), "Hello", operator='and') # Check it expected_result = {'filtered': { 'filter': {'match': {'content_type': 'searchtests.Book'}}, 'query': {'multi_match': {'query': 'Hello', 'fields': ['_all', '_partials'], 'operator': 'and'}} }} - self.assertDictEqual(query.get_query(), expected_result) + self.assertDictEqual(query_compiler.get_query(), expected_result) def test_filter(self): # Create a query - query = self.query_class(models.Book.objects.filter(title="Test"), "Hello") + query_compiler = self.query_compiler_class(models.Book.objects.filter(title="Test"), "Hello") # Check it expected_result = {'filtered': {'filter': {'and': [ {'match': {'content_type': 'searchtests.Book'}}, {'term': {'title_filter': 'Test'}} ]}, 'query': {'multi_match': {'query': 'Hello', 'fields': ['_all', '_partials']}}}} - self.assertDictEqual(query.get_query(), expected_result) + self.assertDictEqual(query_compiler.get_query(), expected_result) def test_and_filter(self): # Create a query - query = self.query_class(models.Book.objects.filter(title="Test", publication_date=datetime.date(2017, 10, 18)), "Hello") + query_compiler = self.query_compiler_class(models.Book.objects.filter(title="Test", publication_date=datetime.date(2017, 10, 18)), "Hello") # Check it expected_result = {'filtered': {'filter': {'and': [ @@ -83,7 +83,7 @@ class TestElasticsearch2SearchQuery(TestCase): ]}, 'query': {'multi_match': {'query': 'Hello', 'fields': ['_all', '_partials']}}}} # Make sure field filters are sorted (as they can be in any order which may cause false positives) - query = query.get_query() + query = query_compiler.get_query() field_filters = query['filtered']['filter']['and'][1]['and'] field_filters[:] = sorted(field_filters, key=lambda f: list(f['term'].keys())[0]) @@ -91,10 +91,10 @@ class TestElasticsearch2SearchQuery(TestCase): def test_or_filter(self): # Create a query - query = self.query_class(models.Book.objects.filter(Q(title="Test") | Q(publication_date=datetime.date(2017, 10, 18))), "Hello") + query_compiler = self.query_compiler_class(models.Book.objects.filter(Q(title="Test") | Q(publication_date=datetime.date(2017, 10, 18))), "Hello") # Make sure field filters are sorted (as they can be in any order which may cause false positives) - query = query.get_query() + query = query_compiler.get_query() field_filters = query['filtered']['filter']['and'][1]['or'] field_filters[:] = sorted(field_filters, key=lambda f: list(f['term'].keys())[0]) @@ -107,51 +107,51 @@ class TestElasticsearch2SearchQuery(TestCase): def test_negated_filter(self): # Create a query - query = self.query_class(models.Book.objects.exclude(publication_date=datetime.date(2017, 10, 18)), "Hello") + query_compiler = self.query_compiler_class(models.Book.objects.exclude(publication_date=datetime.date(2017, 10, 18)), "Hello") # Check it expected_result = {'filtered': {'filter': {'and': [ {'match': {'content_type': 'searchtests.Book'}}, {'not': {'term': {'publication_date_filter': '2017-10-18'}}} ]}, 'query': {'multi_match': {'query': 'Hello', 'fields': ['_all', '_partials']}}}} - self.assertDictEqual(query.get_query(), expected_result) + self.assertDictEqual(query_compiler.get_query(), expected_result) def test_fields(self): # Create a query - query = self.query_class(models.Book.objects.all(), "Hello", fields=['title']) + query_compiler = self.query_compiler_class(models.Book.objects.all(), "Hello", fields=['title']) # Check it expected_result = {'filtered': { 'filter': {'match': {'content_type': 'searchtests.Book'}}, - 'query': {'match': {'title': 'Hello'}} + 'query': {'match': {'title': {'query': 'Hello'}}} }} - self.assertDictEqual(query.get_query(), expected_result) + self.assertDictEqual(query_compiler.get_query(), expected_result) def test_fields_with_and_operator(self): # Create a query - query = self.query_class(models.Book.objects.all(), "Hello", fields=['title'], operator='and') + query_compiler = self.query_compiler_class(models.Book.objects.all(), "Hello", fields=['title'], operator='and') # Check it expected_result = {'filtered': { 'filter': {'match': {'content_type': 'searchtests.Book'}}, 'query': {'match': {'title': {'query': 'Hello', 'operator': 'and'}}} }} - self.assertDictEqual(query.get_query(), expected_result) + self.assertDictEqual(query_compiler.get_query(), expected_result) def test_multiple_fields(self): # Create a query - query = self.query_class(models.Book.objects.all(), "Hello", fields=['title', 'content']) + query_compiler = self.query_compiler_class(models.Book.objects.all(), "Hello", fields=['title', 'content']) # Check it expected_result = {'filtered': { 'filter': {'match': {'content_type': 'searchtests.Book'}}, 'query': {'multi_match': {'fields': ['title', 'content'], 'query': 'Hello'}} }} - self.assertDictEqual(query.get_query(), expected_result) + self.assertDictEqual(query_compiler.get_query(), expected_result) def test_multiple_fields_with_and_operator(self): # Create a query - query = self.query_class( + query_compiler = self.query_compiler_class( models.Book.objects.all(), "Hello", fields=['title', 'content'], operator='and' ) @@ -160,68 +160,68 @@ class TestElasticsearch2SearchQuery(TestCase): 'filter': {'match': {'content_type': 'searchtests.Book'}}, 'query': {'multi_match': {'fields': ['title', 'content'], 'query': 'Hello', 'operator': 'and'}} }} - self.assertDictEqual(query.get_query(), expected_result) + self.assertDictEqual(query_compiler.get_query(), expected_result) def test_exact_lookup(self): # Create a query - query = self.query_class(models.Book.objects.filter(title__exact="Test"), "Hello") + query_compiler = self.query_compiler_class(models.Book.objects.filter(title__exact="Test"), "Hello") # Check it expected_result = {'filtered': {'filter': {'and': [ {'match': {'content_type': 'searchtests.Book'}}, {'term': {'title_filter': 'Test'}} ]}, 'query': {'multi_match': {'query': 'Hello', 'fields': ['_all', '_partials']}}}} - self.assertDictEqual(query.get_query(), expected_result) + self.assertDictEqual(query_compiler.get_query(), expected_result) def test_none_lookup(self): # Create a query - query = self.query_class(models.Book.objects.filter(title=None), "Hello") + query_compiler = self.query_compiler_class(models.Book.objects.filter(title=None), "Hello") # Check it expected_result = {'filtered': {'filter': {'and': [ {'match': {'content_type': 'searchtests.Book'}}, {'missing': {'field': 'title_filter'}} ]}, 'query': {'multi_match': {'query': 'Hello', 'fields': ['_all', '_partials']}}}} - self.assertDictEqual(query.get_query(), expected_result) + self.assertDictEqual(query_compiler.get_query(), expected_result) def test_isnull_true_lookup(self): # Create a query - query = self.query_class(models.Book.objects.filter(title__isnull=True), "Hello") + query_compiler = self.query_compiler_class(models.Book.objects.filter(title__isnull=True), "Hello") # Check it expected_result = {'filtered': {'filter': {'and': [ {'match': {'content_type': 'searchtests.Book'}}, {'missing': {'field': 'title_filter'}} ]}, 'query': {'multi_match': {'query': 'Hello', 'fields': ['_all', '_partials']}}}} - self.assertDictEqual(query.get_query(), expected_result) + self.assertDictEqual(query_compiler.get_query(), expected_result) def test_isnull_false_lookup(self): # Create a query - query = self.query_class(models.Book.objects.filter(title__isnull=False), "Hello") + query_compiler = self.query_compiler_class(models.Book.objects.filter(title__isnull=False), "Hello") # Check it expected_result = {'filtered': {'filter': {'and': [ {'match': {'content_type': 'searchtests.Book'}}, {'exists': {'field': 'title_filter'}} ]}, 'query': {'multi_match': {'query': 'Hello', 'fields': ['_all', '_partials']}}}} - self.assertDictEqual(query.get_query(), expected_result) + self.assertDictEqual(query_compiler.get_query(), expected_result) def test_startswith_lookup(self): # Create a query - query = self.query_class(models.Book.objects.filter(title__startswith="Test"), "Hello") + query_compiler = self.query_compiler_class(models.Book.objects.filter(title__startswith="Test"), "Hello") # Check it expected_result = {'filtered': {'filter': {'and': [ {'match': {'content_type': 'searchtests.Book'}}, {'prefix': {'title_filter': 'Test'}} ]}, 'query': {'multi_match': {'query': 'Hello', 'fields': ['_all', '_partials']}}}} - self.assertDictEqual(query.get_query(), expected_result) + self.assertDictEqual(query_compiler.get_query(), expected_result) def test_gt_lookup(self): # This also tests conversion of python dates to strings # Create a query - query = self.query_class( + query_compiler = self.query_compiler_class( models.Book.objects.filter(publication_date__gt=datetime.datetime(2014, 4, 29)), "Hello" ) @@ -230,11 +230,11 @@ class TestElasticsearch2SearchQuery(TestCase): {'match': {'content_type': 'searchtests.Book'}}, {'range': {'publication_date_filter': {'gt': '2014-04-29'}}} ]}, 'query': {'multi_match': {'query': 'Hello', 'fields': ['_all', '_partials']}}}} - self.assertDictEqual(query.get_query(), expected_result) + self.assertDictEqual(query_compiler.get_query(), expected_result) def test_lt_lookup(self): # Create a query - query = self.query_class( + query_compiler = self.query_compiler_class( models.Book.objects.filter(publication_date__lt=datetime.datetime(2014, 4, 29)), "Hello" ) @@ -243,11 +243,11 @@ class TestElasticsearch2SearchQuery(TestCase): {'match': {'content_type': 'searchtests.Book'}}, {'range': {'publication_date_filter': {'lt': '2014-04-29'}}} ]}, 'query': {'multi_match': {'query': 'Hello', 'fields': ['_all', '_partials']}}}} - self.assertDictEqual(query.get_query(), expected_result) + self.assertDictEqual(query_compiler.get_query(), expected_result) def test_gte_lookup(self): # Create a query - query = self.query_class( + query_compiler = self.query_compiler_class( models.Book.objects.filter(publication_date__gte=datetime.datetime(2014, 4, 29)), "Hello" ) @@ -256,11 +256,11 @@ class TestElasticsearch2SearchQuery(TestCase): {'match': {'content_type': 'searchtests.Book'}}, {'range': {'publication_date_filter': {'gte': '2014-04-29'}}} ]}, 'query': {'multi_match': {'query': 'Hello', 'fields': ['_all', '_partials']}}}} - self.assertDictEqual(query.get_query(), expected_result) + self.assertDictEqual(query_compiler.get_query(), expected_result) def test_lte_lookup(self): # Create a query - query = self.query_class( + query_compiler = self.query_compiler_class( models.Book.objects.filter(publication_date__lte=datetime.datetime(2014, 4, 29)), "Hello" ) @@ -269,14 +269,14 @@ class TestElasticsearch2SearchQuery(TestCase): {'match': {'content_type': 'searchtests.Book'}}, {'range': {'publication_date_filter': {'lte': '2014-04-29'}}} ]}, 'query': {'multi_match': {'query': 'Hello', 'fields': ['_all', '_partials']}}}} - self.assertDictEqual(query.get_query(), expected_result) + self.assertDictEqual(query_compiler.get_query(), expected_result) def test_range_lookup(self): start_date = datetime.datetime(2014, 4, 29) end_date = datetime.datetime(2014, 8, 19) # Create a query - query = self.query_class( + query_compiler = self.query_compiler_class( models.Book.objects.filter(publication_date__range=(start_date, end_date)), "Hello" ) @@ -285,37 +285,37 @@ class TestElasticsearch2SearchQuery(TestCase): {'match': {'content_type': 'searchtests.Book'}}, {'range': {'publication_date_filter': {'gte': '2014-04-29', 'lte': '2014-08-19'}}} ]}, 'query': {'multi_match': {'query': 'Hello', 'fields': ['_all', '_partials']}}}} - self.assertDictEqual(query.get_query(), expected_result) + self.assertDictEqual(query_compiler.get_query(), expected_result) def test_custom_ordering(self): # Create a query - query = self.query_class( + query_compiler = self.query_compiler_class( models.Book.objects.order_by('publication_date'), "Hello", order_by_relevance=False ) # Check it expected_result = [{'publication_date_filter': 'asc'}] - self.assertDictEqual(query.get_sort(), expected_result) + self.assertDictEqual(query_compiler.get_sort(), expected_result) def test_custom_ordering_reversed(self): # Create a query - query = self.query_class( + query_compiler = self.query_compiler_class( models.Book.objects.order_by('-publication_date'), "Hello", order_by_relevance=False ) # Check it expected_result = [{'publication_date_filter': 'desc'}] - self.assertDictEqual(query.get_sort(), expected_result) + self.assertDictEqual(query_compiler.get_sort(), expected_result) def test_custom_ordering_multiple(self): # Create a query - query = self.query_class( + query_compiler = self.query_compiler_class( models.Book.objects.order_by('publication_date', 'number_of_pages'), "Hello", order_by_relevance=False ) # Check it expected_result = [{'publication_date_filter': 'asc'}, {'number_of_pages_filter': 'asc'}] - self.assertDictEqual(query.get_sort(), expected_result) + self.assertDictEqual(query_compiler.get_sort(), expected_result) class TestElasticsearch2SearchResults(TestCase): @@ -329,11 +329,11 @@ class TestElasticsearch2SearchResults(TestCase): def get_results(self): backend = Elasticsearch2SearchBackend({}) - query = mock.MagicMock() - query.queryset = models.Book.objects.all() - query.get_query.return_value = 'QUERY' - query.get_sort.return_value = None - return backend.results_class(backend, query) + query_compiler = mock.MagicMock() + query_compiler.queryset = models.Book.objects.all() + query_compiler.get_query.return_value = 'QUERY' + query_compiler.get_sort.return_value = None + return backend.results_class(backend, query_compiler) def construct_search_response(self, results): return { diff --git a/wagtail/search/tests/test_elasticsearch5_backend.py b/wagtail/search/tests/test_elasticsearch5_backend.py index f32aa7f8e..67f737bcc 100644 --- a/wagtail/search/tests/test_elasticsearch5_backend.py +++ b/wagtail/search/tests/test_elasticsearch5_backend.py @@ -9,12 +9,12 @@ from elasticsearch.serializer import JSONSerializer from wagtail.tests.search import models from wagtail.search.backends.elasticsearch5 import Elasticsearch5SearchBackend +from wagtail.search.query import MATCH_ALL from .elasticsearch_common_tests import ElasticsearchCommonSearchBackendTests -from .test_backends import BackendTests -class TestElasticsearch5SearchBackend(BackendTests, ElasticsearchCommonSearchBackendTests, TestCase): +class TestElasticsearch5SearchBackend(ElasticsearchCommonSearchBackendTests, TestCase): backend_path = 'wagtail.search.backends.elasticsearch5' @@ -25,55 +25,55 @@ class TestElasticsearch5SearchQuery(TestCase): json.dumps(a, sort_keys=True, default=default), json.dumps(b, sort_keys=True, default=default) ) - query_class = Elasticsearch5SearchBackend.query_class + query_compiler_class = Elasticsearch5SearchBackend.query_compiler_class def test_simple(self): # Create a query - query = self.query_class(models.Book.objects.all(), "Hello") + query_compiler = self.query_compiler_class(models.Book.objects.all(), "Hello") # Check it expected_result = {'bool': { 'filter': {'match': {'content_type': 'searchtests.Book'}}, 'must': {'multi_match': {'query': 'Hello', 'fields': ['_all', '_partials']}} }} - self.assertDictEqual(query.get_query(), expected_result) + self.assertDictEqual(query_compiler.get_query(), expected_result) - def test_none_query_string(self): + def test_match_all(self): # Create a query - query = self.query_class(models.Book.objects.all(), None) + query_compiler = self.query_compiler_class(models.Book.objects.all(), MATCH_ALL) # Check it expected_result = {'bool': { 'filter': {'match': {'content_type': 'searchtests.Book'}}, 'must': {'match_all': {}} }} - self.assertDictEqual(query.get_query(), expected_result) + self.assertDictEqual(query_compiler.get_query(), expected_result) def test_and_operator(self): # Create a query - query = self.query_class(models.Book.objects.all(), "Hello", operator='and') + query_compiler = self.query_compiler_class(models.Book.objects.all(), "Hello", operator='and') # Check it expected_result = {'bool': { 'filter': {'match': {'content_type': 'searchtests.Book'}}, 'must': {'multi_match': {'query': 'Hello', 'fields': ['_all', '_partials'], 'operator': 'and'}} }} - self.assertDictEqual(query.get_query(), expected_result) + self.assertDictEqual(query_compiler.get_query(), expected_result) def test_filter(self): # Create a query - query = self.query_class(models.Book.objects.filter(title="Test"), "Hello") + query_compiler = self.query_compiler_class(models.Book.objects.filter(title="Test"), "Hello") # Check it expected_result = {'bool': {'filter': [ {'match': {'content_type': 'searchtests.Book'}}, {'term': {'title_filter': 'Test'}} ], 'must': {'multi_match': {'query': 'Hello', 'fields': ['_all', '_partials']}}}} - self.assertDictEqual(query.get_query(), expected_result) + self.assertDictEqual(query_compiler.get_query(), expected_result) def test_and_filter(self): # Create a query - query = self.query_class(models.Book.objects.filter(title="Test", publication_date=datetime.date(2017, 10, 18)), "Hello") + query_compiler = self.query_compiler_class(models.Book.objects.filter(title="Test", publication_date=datetime.date(2017, 10, 18)), "Hello") # Check it expected_result = {'bool': {'filter': [ @@ -82,7 +82,7 @@ class TestElasticsearch5SearchQuery(TestCase): ], 'must': {'multi_match': {'query': 'Hello', 'fields': ['_all', '_partials']}}}} # Make sure field filters are sorted (as they can be in any order which may cause false positives) - query = query.get_query() + query = query_compiler.get_query() field_filters = query['bool']['filter'][1]['bool']['must'] field_filters[:] = sorted(field_filters, key=lambda f: list(f['term'].keys())[0]) @@ -90,10 +90,10 @@ class TestElasticsearch5SearchQuery(TestCase): def test_or_filter(self): # Create a query - query = self.query_class(models.Book.objects.filter(Q(title="Test") | Q(publication_date=datetime.date(2017, 10, 18))), "Hello") + query_compiler = self.query_compiler_class(models.Book.objects.filter(Q(title="Test") | Q(publication_date=datetime.date(2017, 10, 18))), "Hello") # Make sure field filters are sorted (as they can be in any order which may cause false positives) - query = query.get_query() + query = query_compiler.get_query() field_filters = query['bool']['filter'][1]['bool']['should'] field_filters[:] = sorted(field_filters, key=lambda f: list(f['term'].keys())[0]) @@ -106,51 +106,51 @@ class TestElasticsearch5SearchQuery(TestCase): def test_negated_filter(self): # Create a query - query = self.query_class(models.Book.objects.exclude(publication_date=datetime.date(2017, 10, 18)), "Hello") + query_compiler = self.query_compiler_class(models.Book.objects.exclude(publication_date=datetime.date(2017, 10, 18)), "Hello") # Check it expected_result = {'bool': {'filter': [ {'match': {'content_type': 'searchtests.Book'}}, {'bool': {'mustNot': {'term': {'publication_date_filter': '2017-10-18'}}}} ], 'must': {'multi_match': {'query': 'Hello', 'fields': ['_all', '_partials']}}}} - self.assertDictEqual(query.get_query(), expected_result) + self.assertDictEqual(query_compiler.get_query(), expected_result) def test_fields(self): # Create a query - query = self.query_class(models.Book.objects.all(), "Hello", fields=['title']) + query_compiler = self.query_compiler_class(models.Book.objects.all(), "Hello", fields=['title']) # Check it expected_result = {'bool': { 'filter': {'match': {'content_type': 'searchtests.Book'}}, - 'must': {'match': {'title': 'Hello'}} + 'must': {'match': {'title': {'query': 'Hello'}}} }} - self.assertDictEqual(query.get_query(), expected_result) + self.assertDictEqual(query_compiler.get_query(), expected_result) def test_fields_with_and_operator(self): # Create a query - query = self.query_class(models.Book.objects.all(), "Hello", fields=['title'], operator='and') + query_compiler = self.query_compiler_class(models.Book.objects.all(), "Hello", fields=['title'], operator='and') # Check it expected_result = {'bool': { 'filter': {'match': {'content_type': 'searchtests.Book'}}, 'must': {'match': {'title': {'query': 'Hello', 'operator': 'and'}}} }} - self.assertDictEqual(query.get_query(), expected_result) + self.assertDictEqual(query_compiler.get_query(), expected_result) def test_multiple_fields(self): # Create a query - query = self.query_class(models.Book.objects.all(), "Hello", fields=['title', 'content']) + query_compiler = self.query_compiler_class(models.Book.objects.all(), "Hello", fields=['title', 'content']) # Check it expected_result = {'bool': { 'filter': {'match': {'content_type': 'searchtests.Book'}}, 'must': {'multi_match': {'fields': ['title', 'content'], 'query': 'Hello'}} }} - self.assertDictEqual(query.get_query(), expected_result) + self.assertDictEqual(query_compiler.get_query(), expected_result) def test_multiple_fields_with_and_operator(self): # Create a query - query = self.query_class( + query_compiler = self.query_compiler_class( models.Book.objects.all(), "Hello", fields=['title', 'content'], operator='and' ) @@ -159,68 +159,68 @@ class TestElasticsearch5SearchQuery(TestCase): 'filter': {'match': {'content_type': 'searchtests.Book'}}, 'must': {'multi_match': {'fields': ['title', 'content'], 'query': 'Hello', 'operator': 'and'}} }} - self.assertDictEqual(query.get_query(), expected_result) + self.assertDictEqual(query_compiler.get_query(), expected_result) def test_exact_lookup(self): # Create a query - query = self.query_class(models.Book.objects.filter(title__exact="Test"), "Hello") + query_compiler = self.query_compiler_class(models.Book.objects.filter(title__exact="Test"), "Hello") # Check it expected_result = {'bool': {'filter': [ {'match': {'content_type': 'searchtests.Book'}}, {'term': {'title_filter': 'Test'}} ], 'must': {'multi_match': {'query': 'Hello', 'fields': ['_all', '_partials']}}}} - self.assertDictEqual(query.get_query(), expected_result) + self.assertDictEqual(query_compiler.get_query(), expected_result) def test_none_lookup(self): # Create a query - query = self.query_class(models.Book.objects.filter(title=None), "Hello") + query_compiler = self.query_compiler_class(models.Book.objects.filter(title=None), "Hello") # Check it expected_result = {'bool': {'filter': [ {'match': {'content_type': 'searchtests.Book'}}, {'bool': {'mustNot': {'exists': {'field': 'title_filter'}}}} ], 'must': {'multi_match': {'query': 'Hello', 'fields': ['_all', '_partials']}}}} - self.assertDictEqual(query.get_query(), expected_result) + self.assertDictEqual(query_compiler.get_query(), expected_result) def test_isnull_true_lookup(self): # Create a query - query = self.query_class(models.Book.objects.filter(title__isnull=True), "Hello") + query_compiler = self.query_compiler_class(models.Book.objects.filter(title__isnull=True), "Hello") # Check it expected_result = {'bool': {'filter': [ {'match': {'content_type': 'searchtests.Book'}}, {'bool': {'mustNot': {'exists': {'field': 'title_filter'}}}} ], 'must': {'multi_match': {'query': 'Hello', 'fields': ['_all', '_partials']}}}} - self.assertDictEqual(query.get_query(), expected_result) + self.assertDictEqual(query_compiler.get_query(), expected_result) def test_isnull_false_lookup(self): # Create a query - query = self.query_class(models.Book.objects.filter(title__isnull=False), "Hello") + query_compiler = self.query_compiler_class(models.Book.objects.filter(title__isnull=False), "Hello") # Check it expected_result = {'bool': {'filter': [ {'match': {'content_type': 'searchtests.Book'}}, {'exists': {'field': 'title_filter'}} ], 'must': {'multi_match': {'query': 'Hello', 'fields': ['_all', '_partials']}}}} - self.assertDictEqual(query.get_query(), expected_result) + self.assertDictEqual(query_compiler.get_query(), expected_result) def test_startswith_lookup(self): # Create a query - query = self.query_class(models.Book.objects.filter(title__startswith="Test"), "Hello") + query_compiler = self.query_compiler_class(models.Book.objects.filter(title__startswith="Test"), "Hello") # Check it expected_result = {'bool': {'filter': [ {'match': {'content_type': 'searchtests.Book'}}, {'prefix': {'title_filter': 'Test'}} ], 'must': {'multi_match': {'query': 'Hello', 'fields': ['_all', '_partials']}}}} - self.assertDictEqual(query.get_query(), expected_result) + self.assertDictEqual(query_compiler.get_query(), expected_result) def test_gt_lookup(self): # This also tests conversion of python dates to strings # Create a query - query = self.query_class( + query_compiler = self.query_compiler_class( models.Book.objects.filter(publication_date__gt=datetime.datetime(2014, 4, 29)), "Hello" ) @@ -229,11 +229,11 @@ class TestElasticsearch5SearchQuery(TestCase): {'match': {'content_type': 'searchtests.Book'}}, {'range': {'publication_date_filter': {'gt': '2014-04-29'}}} ], 'must': {'multi_match': {'query': 'Hello', 'fields': ['_all', '_partials']}}}} - self.assertDictEqual(query.get_query(), expected_result) + self.assertDictEqual(query_compiler.get_query(), expected_result) def test_lt_lookup(self): # Create a query - query = self.query_class( + query_compiler = self.query_compiler_class( models.Book.objects.filter(publication_date__lt=datetime.datetime(2014, 4, 29)), "Hello" ) @@ -242,11 +242,11 @@ class TestElasticsearch5SearchQuery(TestCase): {'match': {'content_type': 'searchtests.Book'}}, {'range': {'publication_date_filter': {'lt': '2014-04-29'}}} ], 'must': {'multi_match': {'query': 'Hello', 'fields': ['_all', '_partials']}}}} - self.assertDictEqual(query.get_query(), expected_result) + self.assertDictEqual(query_compiler.get_query(), expected_result) def test_gte_lookup(self): # Create a query - query = self.query_class( + query_compiler = self.query_compiler_class( models.Book.objects.filter(publication_date__gte=datetime.datetime(2014, 4, 29)), "Hello" ) @@ -255,11 +255,11 @@ class TestElasticsearch5SearchQuery(TestCase): {'match': {'content_type': 'searchtests.Book'}}, {'range': {'publication_date_filter': {'gte': '2014-04-29'}}} ], 'must': {'multi_match': {'query': 'Hello', 'fields': ['_all', '_partials']}}}} - self.assertDictEqual(query.get_query(), expected_result) + self.assertDictEqual(query_compiler.get_query(), expected_result) def test_lte_lookup(self): # Create a query - query = self.query_class( + query_compiler = self.query_compiler_class( models.Book.objects.filter(publication_date__lte=datetime.datetime(2014, 4, 29)), "Hello" ) @@ -268,14 +268,14 @@ class TestElasticsearch5SearchQuery(TestCase): {'match': {'content_type': 'searchtests.Book'}}, {'range': {'publication_date_filter': {'lte': '2014-04-29'}}} ], 'must': {'multi_match': {'query': 'Hello', 'fields': ['_all', '_partials']}}}} - self.assertDictEqual(query.get_query(), expected_result) + self.assertDictEqual(query_compiler.get_query(), expected_result) def test_range_lookup(self): start_date = datetime.datetime(2014, 4, 29) end_date = datetime.datetime(2014, 8, 19) # Create a query - query = self.query_class( + query_compiler = self.query_compiler_class( models.Book.objects.filter(publication_date__range=(start_date, end_date)), "Hello" ) @@ -284,37 +284,37 @@ class TestElasticsearch5SearchQuery(TestCase): {'match': {'content_type': 'searchtests.Book'}}, {'range': {'publication_date_filter': {'gte': '2014-04-29', 'lte': '2014-08-19'}}} ], 'must': {'multi_match': {'query': 'Hello', 'fields': ['_all', '_partials']}}}} - self.assertDictEqual(query.get_query(), expected_result) + self.assertDictEqual(query_compiler.get_query(), expected_result) def test_custom_ordering(self): # Create a query - query = self.query_class( + query_compiler = self.query_compiler_class( models.Book.objects.order_by('publication_date'), "Hello", order_by_relevance=False ) # Check it expected_result = [{'publication_date_filter': 'asc'}] - self.assertDictEqual(query.get_sort(), expected_result) + self.assertDictEqual(query_compiler.get_sort(), expected_result) def test_custom_ordering_reversed(self): # Create a query - query = self.query_class( + query_compiler = self.query_compiler_class( models.Book.objects.order_by('-publication_date'), "Hello", order_by_relevance=False ) # Check it expected_result = [{'publication_date_filter': 'desc'}] - self.assertDictEqual(query.get_sort(), expected_result) + self.assertDictEqual(query_compiler.get_sort(), expected_result) def test_custom_ordering_multiple(self): # Create a query - query = self.query_class( + query_compiler = self.query_compiler_class( models.Book.objects.order_by('publication_date', 'number_of_pages'), "Hello", order_by_relevance=False ) # Check it expected_result = [{'publication_date_filter': 'asc'}, {'number_of_pages_filter': 'asc'}] - self.assertDictEqual(query.get_sort(), expected_result) + self.assertDictEqual(query_compiler.get_sort(), expected_result) class TestElasticsearch5SearchResults(TestCase): @@ -328,11 +328,11 @@ class TestElasticsearch5SearchResults(TestCase): def get_results(self): backend = Elasticsearch5SearchBackend({}) - query = mock.MagicMock() - query.queryset = models.Book.objects.all() - query.get_query.return_value = 'QUERY' - query.get_sort.return_value = None - return backend.results_class(backend, query) + query_compiler = mock.MagicMock() + query_compiler.queryset = models.Book.objects.all() + query_compiler.get_query.return_value = 'QUERY' + query_compiler.get_sort.return_value = None + return backend.results_class(backend, query_compiler) def construct_search_response(self, results): return { diff --git a/wagtail/search/utils.py b/wagtail/search/utils.py index 01d428903..1afde61c9 100644 --- a/wagtail/search/utils.py +++ b/wagtail/search/utils.py @@ -1,5 +1,14 @@ +import operator import re import string +from functools import partial, reduce + +# Reduce any iterable to a single value using a logical OR e.g. (a | b | ...) +OR = partial(reduce, operator.or_) +# Reduce any iterable to a single value using a logical AND e.g. (a & b & ...) +AND = partial(reduce, operator.and_) +# Reduce any iterable to a single value using an addition +ADD = partial(reduce, operator.add) MAX_QUERY_STRING_LENGTH = 255 diff --git a/wagtail/tests/settings.py b/wagtail/tests/settings.py index b04c99483..b2850b919 100644 --- a/wagtail/tests/settings.py +++ b/wagtail/tests/settings.py @@ -168,6 +168,7 @@ if os.environ.get('DATABASE_ENGINE') == 'django.db.backends.postgresql': INSTALLED_APPS += ('wagtail.contrib.postgres_search',) WAGTAILSEARCH_BACKENDS['postgresql'] = { 'BACKEND': 'wagtail.contrib.postgres_search.backend', + 'AUTO_UPDATE': False, } if 'ELASTICSEARCH_URL' in os.environ: