From 2ba532d745cfbc271574e4dd42a9b2d3c5563d36 Mon Sep 17 00:00:00 2001 From: Bertrand Bordage Date: Thu, 23 Nov 2017 15:56:41 +0100 Subject: [PATCH] Adds search query API & implements PlainText & MatchAll. --- wagtail/contrib/postgres_search/backend.py | 13 +++- wagtail/wagtailsearch/backends/base.py | 15 +++- wagtail/wagtailsearch/backends/db.py | 54 ++++++++------ .../wagtailsearch/backends/elasticsearch2.py | 54 +++++++------- wagtail/wagtailsearch/query.py | 74 +++++++++++++++++++ wagtail/wagtailsearch/tests/test_backends.py | 50 ++++++------- 6 files changed, 178 insertions(+), 82 deletions(-) create mode 100644 wagtail/wagtailsearch/query.py diff --git a/wagtail/contrib/postgres_search/backend.py b/wagtail/contrib/postgres_search/backend.py index 7288ea139..df8d79df5 100644 --- a/wagtail/contrib/postgres_search/backend.py +++ b/wagtail/contrib/postgres_search/backend.py @@ -12,6 +12,7 @@ from django.utils.encoding import force_text from wagtail.wagtailsearch.backends.base import ( BaseSearchBackend, BaseSearchQuery, BaseSearchResults) from wagtail.wagtailsearch.index import RelatedFields, SearchField +from wagtail.wagtailsearch.query import MatchAll, PlainText from .models import IndexEntry from .utils import ( @@ -174,8 +175,8 @@ class PostgresSearchQuery(BaseSearchQuery): self.search_fields = self.queryset.model.get_search_fields() def get_search_query(self, config): - combine = OR if self.operator == 'or' else AND - search_terms = keyword_split(unidecode(self.query_string)) + combine = OR if self.query.operator == 'or' else AND + search_terms = keyword_split(unidecode(self.query.query_string)) if not search_terms: return SearchQuery('') return combine(SearchQuery(q, config=config) for q in search_terms) @@ -197,8 +198,14 @@ class PostgresSearchQuery(BaseSearchQuery): return field.boost def search(self, config, start, stop): - if self.query_string is None: + if isinstance(self.query, MatchAll): return self.queryset[start:stop] + + if not isinstance(self.query, PlainText): + raise NotImplementedError( + '%s is not supported by the PostgreSQL search backend.' + % self.query.__class__) + search_query = self.get_search_query(config=config) queryset = self.queryset query = queryset.query diff --git a/wagtail/wagtailsearch/backends/base.py b/wagtail/wagtailsearch/backends/base.py index c0fc4cc8a..5b6592360 100644 --- a/wagtail/wagtailsearch/backends/base.py +++ b/wagtail/wagtailsearch/backends/base.py @@ -1,11 +1,14 @@ from __future__ import absolute_import, unicode_literals +from warnings import warn + from django.db.models.lookups import Lookup from django.db.models.query import QuerySet from django.db.models.sql.where import SubqueryConstraint, WhereNode from wagtail.wagtailsearch.index import class_is_indexed +from wagtail.wagtailsearch.query import MATCH_ALL, PlainText class FilterError(Exception): @@ -19,11 +22,17 @@ class FieldError(Exception): class BaseSearchQuery(object): DEFAULT_OPERATOR = 'or' - def __init__(self, queryset, query_string, fields=None, operator=None, order_by_relevance=True): + def __init__(self, queryset, query, fields=None, operator=None, order_by_relevance=True): self.queryset = queryset - self.query_string = query_string + if query is None: + warn('Querying `None` is deprecated, use `MATCH_ALL` instead.', + DeprecationWarning) + query = MATCH_ALL + elif isinstance(query, str): + query = PlainText(query, + operator=operator or self.DEFAULT_OPERATOR) + self.query = query self.fields = fields - self.operator = operator or self.DEFAULT_OPERATOR self.order_by_relevance = order_by_relevance def _get_filterable_field(self, field_attname): diff --git a/wagtail/wagtailsearch/backends/db.py b/wagtail/wagtailsearch/backends/db.py index 90539fad0..1dbcdd325 100644 --- a/wagtail/wagtailsearch/backends/db.py +++ b/wagtail/wagtailsearch/backends/db.py @@ -5,6 +5,7 @@ from django.db.models.expressions import Value from wagtail.wagtailsearch.backends.base import ( BaseSearchBackend, BaseSearchQuery, BaseSearchResults) +from wagtail.wagtailsearch.query import MatchAll, PlainText class DatabaseSearchQuery(BaseSearchQuery): @@ -36,32 +37,41 @@ class DatabaseSearchQuery(BaseSearchQuery): q = models.Q() model = self.queryset.model - if self.query_string is not None: - # Get fields - fields = self.fields or [field.field_name for field in model.get_searchable_search_fields()] + if isinstance(self.query, MatchAll): + return q - # Get terms - terms = self.query_string.split() - if not terms: - return model.objects.none() + if not isinstance(self.query, PlainText): + raise NotImplementedError( + '%s is not supported by the database search backend.' + % self.query.__class__) - # Filter by terms - for term in terms: - term_query = models.Q() - for field_name in fields: - # Check if the field exists (this will filter out indexed callables) - try: - model._meta.get_field(field_name) - except models.fields.FieldDoesNotExist: - continue + # Get fields + fields = self.fields or [field.field_name for field in model.get_searchable_search_fields()] - # Filter on this field - term_query |= models.Q(**{'%s__icontains' % field_name: term}) + # Get terms + terms = self.query.query_string.split() + if not terms: + return model.objects.none() - if self.operator == 'or': - q |= term_query - elif self.operator == 'and': - q &= term_query + # Filter by terms + for term in terms: + term_query = models.Q() + for field_name in fields: + # Check if the field exists (this will filter out indexed callables) + try: + model._meta.get_field(field_name) + except models.fields.FieldDoesNotExist: + continue + + # Filter on this field + term_query |= models.Q(**{'%s__icontains' % field_name: term}) + + operator = self.query.operator + + if operator == 'or': + q |= term_query + elif operator == 'and': + q &= term_query return q diff --git a/wagtail/wagtailsearch/backends/elasticsearch2.py b/wagtail/wagtailsearch/backends/elasticsearch2.py index 207d8155e..35e1af0d1 100644 --- a/wagtail/wagtailsearch/backends/elasticsearch2.py +++ b/wagtail/wagtailsearch/backends/elasticsearch2.py @@ -16,6 +16,7 @@ from wagtail.wagtailsearch.backends.base import ( BaseSearchBackend, BaseSearchQuery, BaseSearchResults) from wagtail.wagtailsearch.index import ( FilterField, Indexed, RelatedFields, SearchField, class_is_indexed) +from wagtail.wagtailsearch.query import MatchAll, PlainText def get_model_root(model): @@ -371,40 +372,35 @@ class Elasticsearch2SearchQuery(BaseSearchQuery): return filter_out def get_inner_query(self): - if self.query_string is not None: - fields = self.fields or ['_all', '_partials'] + if isinstance(self.query, MatchAll): + return {'match_all': {}} - if len(fields) == 1: - if self.operator == 'or': - query = { - 'match': { - fields[0]: self.query_string, - } - } - else: - query = { - 'match': { - fields[0]: { - 'query': self.query_string, - 'operator': self.operator, - } - } - } - else: - query = { - 'multi_match': { - 'query': self.query_string, - 'fields': fields, - } + if not isinstance(self.query, PlainText): + raise NotImplementedError( + '%s is not supported by the Elasticsearch search backend.' + % self.query.__class__) + + fields = self.fields or ['_all', '_partials'] + operator = self.query.operator + + if len(fields) == 1: + field = fields[0] + query = { + 'match': { + field: self.query.query_string, } - - if self.operator != 'or': - query['multi_match']['operator'] = self.operator + } + if operator != 'or': + query['match'][field]['operator'] = operator else: query = { - 'match_all': {} + 'multi_match': { + 'query': self.query.query_string, + 'fields': fields, + } } - + if operator != 'or': + query['multi_match']['operator'] = operator return query def get_content_type_filter(self): diff --git a/wagtail/wagtailsearch/query.py b/wagtail/wagtailsearch/query.py new file mode 100644 index 000000000..fb72b22e1 --- /dev/null +++ b/wagtail/wagtailsearch/query.py @@ -0,0 +1,74 @@ +class SearchQuery: + def __and__(self, other): + return And([self, other]) + + def __or__(self, other): + return Or([self, other]) + + def __invert__(self): + return Not(self) + + +class SearchQueryOperator(SearchQuery): + pass + + +class And(SearchQueryOperator): + def __init__(self, subqueries): + self.subqueries = subqueries + + +class Or(SearchQueryOperator): + def __init__(self, subqueries): + self.subqueries = subqueries + + +class Not(SearchQueryOperator): + def __init__(self, subquery): + self.subquery = subquery + + +class MatchAll(SearchQuery): + pass + + +class PlainText(SearchQuery): + def __init__(self, query_string, operator=None, boost=1.0): + self.query_string = query_string + self.operator = operator + self.boost = boost + + +class Term(SearchQuery): + def __init__(self, term, boost=1.0): + self.term = term + self.boost = boost + + +class Prefix(SearchQuery): + def __init__(self, prefix, boost=1.0): + self.prefix = prefix + self.boost = boost + + +class Fuzzy(SearchQuery): + def __init__(self, term, max_distance=3, boost=1.0): + self.term = term + self.max_distance = max_distance + self.boost = boost + + +class Boost(SearchQuery): + def __init__(self, query, boost): + self.query = query + self.boost = boost + + +class Filter(SearchQuery): + def __init__(self, query, include=None, exclude=None): + self.query = query + self.include = include + self.exclude = exclude + + +MATCH_ALL = MatchAll() diff --git a/wagtail/wagtailsearch/tests/test_backends.py b/wagtail/wagtailsearch/tests/test_backends.py index 7ed7ad056..8e79b4876 100644 --- a/wagtail/wagtailsearch/tests/test_backends.py +++ b/wagtail/wagtailsearch/tests/test_backends.py @@ -17,6 +17,7 @@ from wagtail.wagtailsearch.backends import ( InvalidSearchBackendError, get_search_backend, get_search_backends) from wagtail.wagtailsearch.backends.base import FieldError from wagtail.wagtailsearch.backends.db import DatabaseSearchBackend +from wagtail.wagtailsearch.query import MATCH_ALL class BackendTests(WagtailTestUtils): @@ -65,8 +66,7 @@ class BackendTests(WagtailTestUtils): self.assertSetEqual(set(results), set()) def test_search_all(self): - # Searches on None should return everything in the index - results = self.backend.search(None, models.Book) + results = self.backend.search(MATCH_ALL, models.Book) self.assertSetEqual(set(results), set(models.Book.objects.all())) def test_ranking(self): @@ -90,7 +90,7 @@ class BackendTests(WagtailTestUtils): def test_search_on_child_class(self): # Searches on a child class should only return results that have the child class as well # and all results should be instances of the child class - results = self.backend.search(None, models.Novel) + results = self.backend.search(MATCH_ALL, models.Novel) self.assertSetEqual(set(results), set(models.Novel.objects.all())) def test_search_child_class_field_from_parent(self): @@ -162,7 +162,7 @@ class BackendTests(WagtailTestUtils): # FILTERING TESTS def test_filter_exact_value(self): - results = self.backend.search(None, models.Book.objects.filter(number_of_pages=440)) + results = self.backend.search(MATCH_ALL, models.Book.objects.filter(number_of_pages=440)) self.assertUnsortedListEqual([r.title for r in results], [ "The Return of the King", @@ -170,14 +170,14 @@ class BackendTests(WagtailTestUtils): ]) def test_filter_exact_value_on_parent_model_field(self): - results = self.backend.search(None, models.Novel.objects.filter(number_of_pages=440)) + results = self.backend.search(MATCH_ALL, models.Novel.objects.filter(number_of_pages=440)) self.assertUnsortedListEqual([r.title for r in results], [ "The Return of the King" ]) def test_filter_lt(self): - results = self.backend.search(None, models.Book.objects.filter(number_of_pages__lt=440)) + results = self.backend.search(MATCH_ALL, models.Book.objects.filter(number_of_pages__lt=440)) self.assertUnsortedListEqual([r.title for r in results], [ "The Hobbit", @@ -188,7 +188,7 @@ class BackendTests(WagtailTestUtils): ]) def test_filter_lte(self): - results = self.backend.search(None, models.Book.objects.filter(number_of_pages__lte=440)) + results = self.backend.search(MATCH_ALL, models.Book.objects.filter(number_of_pages__lte=440)) self.assertUnsortedListEqual([r.title for r in results], [ "The Return of the King", @@ -201,7 +201,7 @@ class BackendTests(WagtailTestUtils): ]) def test_filter_gt(self): - results = self.backend.search(None, models.Book.objects.filter(number_of_pages__gt=440)) + results = self.backend.search(MATCH_ALL, models.Book.objects.filter(number_of_pages__gt=440)) self.assertUnsortedListEqual([r.title for r in results], [ "JavaScript: The Definitive Guide", @@ -213,7 +213,7 @@ class BackendTests(WagtailTestUtils): ]) def test_filter_gte(self): - results = self.backend.search(None, models.Book.objects.filter(number_of_pages__gte=440)) + results = self.backend.search(MATCH_ALL, models.Book.objects.filter(number_of_pages__gte=440)) self.assertUnsortedListEqual([r.title for r in results], [ "The Return of the King", @@ -227,7 +227,7 @@ class BackendTests(WagtailTestUtils): ]) def test_filter_in_list(self): - results = self.backend.search(None, models.Book.objects.filter(number_of_pages__in=[440, 1160])) + results = self.backend.search(MATCH_ALL, models.Book.objects.filter(number_of_pages__in=[440, 1160])) self.assertUnsortedListEqual([r.title for r in results], [ "The Return of the King", @@ -236,7 +236,7 @@ class BackendTests(WagtailTestUtils): ]) def test_filter_in_iterable(self): - results = self.backend.search(None, models.Book.objects.filter(number_of_pages__in=iter([440, 1160]))) + results = self.backend.search(MATCH_ALL, models.Book.objects.filter(number_of_pages__in=iter([440, 1160]))) self.assertUnsortedListEqual([r.title for r in results], [ "The Return of the King", @@ -246,7 +246,7 @@ class BackendTests(WagtailTestUtils): def test_filter_in_values_list_subquery(self): values = models.Book.objects.filter(number_of_pages__lt=440).values_list('number_of_pages', flat=True) - results = self.backend.search(None, models.Book.objects.filter(number_of_pages__in=values)) + results = self.backend.search(MATCH_ALL, models.Book.objects.filter(number_of_pages__in=values)) self.assertUnsortedListEqual([r.title for r in results], [ "The Hobbit", @@ -258,7 +258,7 @@ class BackendTests(WagtailTestUtils): def test_filter_isnull_true(self): # Note: We don't know the birth dates of any of the programming guide authors - results = self.backend.search(None, models.Author.objects.filter(date_of_birth__isnull=True)) + results = self.backend.search(MATCH_ALL, models.Author.objects.filter(date_of_birth__isnull=True)) self.assertUnsortedListEqual([r.name for r in results], [ "David Ascher", @@ -273,7 +273,7 @@ class BackendTests(WagtailTestUtils): def test_filter_isnull_false(self): # Note: We know the birth dates of all of the novel authors - results = self.backend.search(None, models.Author.objects.filter(date_of_birth__isnull=False)) + results = self.backend.search(MATCH_ALL, models.Author.objects.filter(date_of_birth__isnull=False)) self.assertUnsortedListEqual([r.name for r in results], [ "Isaac Asimov", @@ -282,7 +282,7 @@ class BackendTests(WagtailTestUtils): ]) def test_filter_prefix(self): - results = self.backend.search(None, models.Book.objects.filter(title__startswith="Th")) + results = self.backend.search(MATCH_ALL, models.Book.objects.filter(title__startswith="Th")) self.assertUnsortedListEqual([r.title for r in results], [ "The Hobbit", @@ -294,14 +294,14 @@ class BackendTests(WagtailTestUtils): def test_filter_and_operator(self): results = self.backend.search( - None, models.Book.objects.filter(number_of_pages=440) & models.Book.objects.filter(publication_date=date(1955, 10, 20))) + MATCH_ALL, models.Book.objects.filter(number_of_pages=440) & models.Book.objects.filter(publication_date=date(1955, 10, 20))) self.assertUnsortedListEqual([r.title for r in results], [ "The Return of the King" ]) def test_filter_or_operator(self): - results = self.backend.search(None, models.Book.objects.filter(number_of_pages=440) | models.Book.objects.filter(number_of_pages=1160)) + results = self.backend.search(MATCH_ALL, models.Book.objects.filter(number_of_pages=440) | models.Book.objects.filter(number_of_pages=1160)) self.assertUnsortedListEqual([r.title for r in results], [ "Learning Python", @@ -311,12 +311,12 @@ class BackendTests(WagtailTestUtils): def test_filter_on_non_filterable_field(self): with self.assertRaises(FieldError): - list(self.backend.search(None, models.Author.objects.filter(name__startswith="Issac"))) + list(self.backend.search(MATCH_ALL, models.Author.objects.filter(name__startswith="Issac"))) # ORDER BY RELEVANCE def test_order_by_relevance(self): - results = self.backend.search(None, models.Novel.objects.order_by('number_of_pages'), order_by_relevance=False) + results = self.backend.search(MATCH_ALL, models.Novel.objects.order_by('number_of_pages'), order_by_relevance=False) # Ordering should be set to "number_of_pages" self.assertEqual([r.title for r in results], [ @@ -332,19 +332,19 @@ class BackendTests(WagtailTestUtils): def test_order_by_non_filterable_field(self): with self.assertRaises(FieldError): - list(self.backend.search(None, models.Author.objects.order_by('name'), order_by_relevance=False)) + list(self.backend.search(MATCH_ALL, models.Author.objects.order_by('name'), order_by_relevance=False)) # SLICING TESTS def test_single_result(self): - results = self.backend.search(None, models.Novel.objects.order_by('number_of_pages'), order_by_relevance=False) + results = self.backend.search(MATCH_ALL, models.Novel.objects.order_by('number_of_pages'), order_by_relevance=False) self.assertEqual(results[0].title, "Foundation") self.assertEqual(results[1].title, "The Hobbit") def test_limit(self): # Note: we need consistent ordering for this test - results = self.backend.search(None, models.Novel.objects.order_by('number_of_pages'), order_by_relevance=False) + results = self.backend.search(MATCH_ALL, models.Novel.objects.order_by('number_of_pages'), order_by_relevance=False) # Limit the results results = results[:3] @@ -357,7 +357,7 @@ class BackendTests(WagtailTestUtils): def test_offset(self): # Note: we need consistent ordering for this test - results = self.backend.search(None, models.Novel.objects.order_by('number_of_pages'), order_by_relevance=False) + results = self.backend.search(MATCH_ALL, models.Novel.objects.order_by('number_of_pages'), order_by_relevance=False) # Offset the results results = results[3:] @@ -372,7 +372,7 @@ class BackendTests(WagtailTestUtils): def test_offset_and_limit(self): # Note: we need consistent ordering for this test - results = self.backend.search(None, models.Novel.objects.order_by('number_of_pages'), order_by_relevance=False) + results = self.backend.search(MATCH_ALL, models.Novel.objects.order_by('number_of_pages'), order_by_relevance=False) # Offset the results results = results[3:6] @@ -419,7 +419,7 @@ class BackendTests(WagtailTestUtils): # instead of three). # Note: we need consistent ordering for this test - results = self.backend.search(None, models.Novel.objects.order_by('number_of_pages'), order_by_relevance=False) + results = self.backend.search(MATCH_ALL, models.Novel.objects.order_by('number_of_pages'), order_by_relevance=False) # Limit the results results = results[:3]