From c737f516e8928ffbba610ad4a4981419e4cf5ba8 Mon Sep 17 00:00:00 2001 From: Bertrand Bordage Date: Thu, 23 Nov 2017 21:03:00 +0100 Subject: [PATCH] Implements Filter & Boost in database search backends. --- wagtail/contrib/postgres_search/backend.py | 6 +- wagtail/wagtailsearch/backends/db.py | 6 +- wagtail/wagtailsearch/query.py | 168 ++++++++++++++---- wagtail/wagtailsearch/tests/test_backends.py | 135 +++++++++++--- .../tests/test_elasticsearch2_backend.py | 11 +- 5 files changed, 261 insertions(+), 65 deletions(-) diff --git a/wagtail/contrib/postgres_search/backend.py b/wagtail/contrib/postgres_search/backend.py index eb7ca7306..0429ecbd2 100644 --- a/wagtail/contrib/postgres_search/backend.py +++ b/wagtail/contrib/postgres_search/backend.py @@ -15,7 +15,7 @@ from django.utils.encoding import force_text from wagtail.wagtailsearch.backends.base import ( BaseSearchBackend, BaseSearchQueryCompiler, BaseSearchResults) from wagtail.wagtailsearch.index import RelatedFields, SearchField -from wagtail.wagtailsearch.query import And, MatchAll, Not, Or, PlainText, Term +from wagtail.wagtailsearch.query import And, MatchAll, Not, Or, SearchQueryShortcut, Term from wagtail.wagtailsearch.utils import ADD, AND, OR from .models import IndexEntry @@ -181,8 +181,8 @@ class PostgresSearchQueryCompiler(BaseSearchQueryCompiler): if query is None: query = self.query - if isinstance(query, PlainText): - return self.build_database_query(query.to_combined_terms(), config) + if isinstance(query, SearchQueryShortcut): + return self.build_database_query(query.get_equivalent(), config) if isinstance(query, Term): # TODO: Find a way to use the term boosting. if query.boost != 1: diff --git a/wagtail/wagtailsearch/backends/db.py b/wagtail/wagtailsearch/backends/db.py index bcfd89ce2..20e1fec03 100644 --- a/wagtail/wagtailsearch/backends/db.py +++ b/wagtail/wagtailsearch/backends/db.py @@ -7,7 +7,7 @@ from django.db.models.expressions import Value from wagtail.wagtailsearch.backends.base import ( BaseSearchBackend, BaseSearchQueryCompiler, BaseSearchResults) -from wagtail.wagtailsearch.query import And, MatchAll, Not, Or, PlainText, Term +from wagtail.wagtailsearch.query import And, MatchAll, Not, Or, SearchQueryShortcut, Term from wagtail.wagtailsearch.utils import AND, OR @@ -62,8 +62,8 @@ class DatabaseSearchQueryCompiler(BaseSearchQueryCompiler): if isinstance(self.query, MatchAll): return models.Q() - if isinstance(query, PlainText): - return self.build_database_filter(query.to_combined_terms()) + if isinstance(query, SearchQueryShortcut): + return self.build_database_filter(query.get_equivalent()) if isinstance(query, Term): if query.boost != 1: warn('Database search backend does not support term boosting.') diff --git a/wagtail/wagtailsearch/query.py b/wagtail/wagtailsearch/query.py index de341ce44..276cffe90 100644 --- a/wagtail/wagtailsearch/query.py +++ b/wagtail/wagtailsearch/query.py @@ -1,6 +1,11 @@ from __future__ import absolute_import, unicode_literals +# +# Base classes +# + + class SearchQuery: def __and__(self, other): return And([self, other]) @@ -11,31 +16,119 @@ class SearchQuery: def __invert__(self): return Not(self) + def apply(self, func): + raise NotImplementedError + + def clone(self): + return self.apply(lambda o: o) + + def get_children(self): + return () + + @property + def children(self): + return list(self.get_children()) + + @property + def child(self): + children = self.children + if len(children) != 1: + raise IndexError('`%s` object has %d children, not a single child.' + % self.__class__.__name__, len(children)) + return children[0] + class SearchQueryOperator(SearchQuery): pass -class And(SearchQueryOperator): +class MultiOperandsSearchQueryOperator(SearchQueryOperator): def __init__(self, subqueries): self.subqueries = subqueries + def apply(self, func): + return func(self.__class__( + [subquery.apply(func) for subquery in self.subqueries])) -class Or(SearchQueryOperator): - def __init__(self, subqueries): - self.subqueries = subqueries + def get_children(self): + yield from self.subqueries + + +class SearchQueryShortcut(SearchQuery): + def get_equivalent(self): + raise NotImplementedError + + def get_children(self): + return self.get_equivalent(), + +# +# Operators +# + + +class And(MultiOperandsSearchQueryOperator): + pass + + +class Or(MultiOperandsSearchQueryOperator): + pass class Not(SearchQueryOperator): def __init__(self, subquery: SearchQuery): self.subquery = subquery + def apply(self, func): + return func(self.__class__(self.subquery.apply(func))) + + def get_children(self): + yield self.subquery + + +# +# Basic query classes +# + class MatchAll(SearchQuery): - pass + def apply(self, func): + return self.__class__() -class PlainText(SearchQuery): +class Term(SearchQuery): + def __init__(self, term: str, boost: float = 1): + self.term = term + self.boost = boost + + def apply(self, func): + return func(self.__class__(self.term, self.boost)) + + +class Prefix(SearchQuery): + def __init__(self, prefix: str, boost: float = 1): + self.prefix = prefix + self.boost = boost + + def apply(self, func): + return func(self.__class__(self.prefix, self.boost)) + + +class Fuzzy(SearchQuery): + def __init__(self, term: str, max_distance: float = 3, boost: float = 1): + self.term = term + self.max_distance = max_distance + self.boost = boost + + def apply(self, func): + return func(self.__class__(self.term, self.max_distance, self.boost)) + + +# +# Shortcut query classes +# + + +class PlainText(SearchQueryShortcut): OPERATORS = { 'and': And, 'or': Or, @@ -50,43 +143,52 @@ class PlainText(SearchQuery): self.operator = operator self.boost = boost - def to_combined_terms(self): + def apply(self, func): + return func(self.__class__(self.query_string, self.operator, + self.boost)) + + def get_equivalent(self): return self.OPERATORS[self.operator]([ Term(term, boost=self.boost) for term in self.query_string.split()]) -class Term(SearchQuery): - def __init__(self, term: str, boost: float = 1): - self.term = term - self.boost = boost - - -class Prefix(SearchQuery): - def __init__(self, prefix: str, boost: float = 1): - self.prefix = prefix - self.boost = boost - - -class Fuzzy(SearchQuery): - def __init__(self, term: str, max_distance: float = 3, boost: float = 1): - self.term = term - self.max_distance = max_distance - self.boost = boost - - -class Boost(SearchQuery): - def __init__(self, query: SearchQuery, boost: float): - self.query = query - self.boost = boost - - -class Filter(SearchQuery): +class Filter(SearchQueryShortcut): def __init__(self, query: SearchQuery, include: SearchQuery = None, exclude: SearchQuery = None): self.query = query self.include = include self.exclude = exclude + def apply(self, func): + return func(self.__class__( + self.query.apply(func), + self.include.apply(func), self.exclude.apply(func))) + + def get_equivalent(self): + query = self.query + if self.include is not None: + query &= self.include + if self.exclude is not None: + query &= ~self.exclude + return query + + +class Boost(SearchQueryShortcut): + def __init__(self, subquery: SearchQuery, boost: float): + self.subquery = subquery + self.boost = boost + + def apply(self, func): + return func(self.__class__(self.subquery.apply(func), self.boost)) + + def get_equivalent(self): + def boost_child(child): + if isinstance(child, (PlainText, Fuzzy, Prefix, Term)): + child.boost *= self.boost + return child + + return self.subquery.apply(boost_child) + MATCH_ALL = MatchAll() diff --git a/wagtail/wagtailsearch/tests/test_backends.py b/wagtail/wagtailsearch/tests/test_backends.py index 66ffe6156..82cddef95 100644 --- a/wagtail/wagtailsearch/tests/test_backends.py +++ b/wagtail/wagtailsearch/tests/test_backends.py @@ -17,7 +17,7 @@ from wagtail.wagtailsearch.backends import ( InvalidSearchBackendError, get_search_backend, get_search_backends) from wagtail.wagtailsearch.backends.base import FieldError from wagtail.wagtailsearch.backends.db import DatabaseSearchBackend -from wagtail.wagtailsearch.query import MATCH_ALL, And, Not, Or, PlainText, Term +from wagtail.wagtailsearch.query import MATCH_ALL, And, Boost, Filter, Not, Or, PlainText, Term class BackendTests(WagtailTestUtils): @@ -431,7 +431,7 @@ class BackendTests(WagtailTestUtils): ]) # - # Query classes + # Basic query classes # def test_match_all(self): @@ -452,29 +452,6 @@ class BackendTests(WagtailTestUtils): self.assertSetEqual({r.title for r in results}, {'JavaScript: The Definitive Guide'}) - def test_plain_text(self): - # Single word - results = self.backend.search(PlainText('Javascript'), - models.Book.objects.all()) - self.assertSetEqual({r.title for r in results}, - {'JavaScript: The Definitive Guide', - 'JavaScript: The good parts'}) - - # Multiple words (OR operator) - results = self.backend.search(PlainText('Javascript Definitive', - operator='or'), - models.Book.objects.all()) - self.assertSetEqual({r.title for r in results}, - {'JavaScript: The Definitive Guide', - 'JavaScript: The good parts'}) - - # Multiple words (AND operator) - results = self.backend.search(PlainText('Javascript Definitive', - operator='and'), - models.Book.objects.all()) - self.assertSetEqual({r.title for r in results}, - {'JavaScript: The Definitive Guide'}) - def test_and(self): results = self.backend.search(And([Term('Javascript'), Term('Definitive')]), @@ -534,6 +511,114 @@ class BackendTests(WagtailTestUtils): 'The Rust Programming Language', 'Two Scoops of Django 1.11'}) + # + # Shortcut query classes + # + + def test_plain_text(self): + # Single word + results = self.backend.search(PlainText('Javascript'), + models.Book.objects.all()) + self.assertSetEqual({r.title for r in results}, + {'JavaScript: The Definitive Guide', + 'JavaScript: The good parts'}) + + # Multiple words (OR operator) + results = self.backend.search(PlainText('Javascript Definitive', + operator='or'), + models.Book.objects.all()) + self.assertSetEqual({r.title for r in results}, + {'JavaScript: The Definitive Guide', + 'JavaScript: The good parts'}) + + # Multiple words (AND operator) + results = self.backend.search(PlainText('Javascript Definitive', + operator='and'), + models.Book.objects.all()) + self.assertSetEqual({r.title for r in results}, + {'JavaScript: The Definitive Guide'}) + + def test_filter_equivalent(self): + filter = Filter(Term('Javascript')) + term = filter.child + self.assertIsInstance(term, Term) + self.assertEqual(term.term, 'Javascript') + + filter = Filter(Term('Javascript'), include=Term('Definitive')) + and_obj = filter.child + self.assertIsInstance(and_obj, And) + javascript = and_obj.children[0] + self.assertIsInstance(javascript, Term) + self.assertEqual(javascript.term, 'Javascript') + definitive = and_obj.children[1] + self.assertIsInstance(definitive, Term) + self.assertEqual(definitive.term, 'Definitive') + + filter = Filter(Term('Javascript'), + include=Term('Definitive'), exclude=Term('Guide')) + and_obj1 = filter.child + self.assertIsInstance(and_obj1, And) + and_obj2 = and_obj1.children[0] + javascript = and_obj2.children[0] + self.assertIsInstance(javascript, Term) + self.assertEqual(javascript.term, 'Javascript') + definitive = and_obj2.children[1] + self.assertIsInstance(definitive, Term) + self.assertEqual(definitive.term, 'Definitive') + not_obj = and_obj1.children[1] + self.assertIsInstance(not_obj, Not) + guide = not_obj.child + self.assertEqual(guide.term, 'Guide') + + def test_filter_query(self): + results = self.backend.search(Filter(Term('Javascript')), + models.Book.objects.all()) + self.assertSetEqual({r.title for r in results}, + {'JavaScript: The Definitive Guide', + 'JavaScript: The good parts'}) + + results = self.backend.search(Filter(Term('Javascript'), + include=Term('Definitive')), + models.Book.objects.all()) + self.assertSetEqual({r.title for r in results}, + {'JavaScript: The Definitive Guide'}) + + results = self.backend.search(Filter(Term('Javascript'), + include=Term('Definitive'), + exclude=Term('Guide')), + models.Book.objects.all()) + self.assertSetEqual({r.title for r in results}, set()) + + def test_boost_equivalent(self): + boost = Boost(Term('Guide'), 5) + equivalent = boost.children[0] + self.assertIsInstance(equivalent, Term) + self.assertAlmostEqual(equivalent.boost, 5) + + boost = Boost(Term('Guide', boost=0.5), 5) + equivalent = boost.children[0] + self.assertIsInstance(equivalent, Term) + self.assertAlmostEqual(equivalent.boost, 2.5) + + boost = Boost(Boost(Term('Guide', 0.1), 3), 5) + sub_boost = boost.children[0] + self.assertIsInstance(sub_boost, Boost) + sub_boost = sub_boost.children[0] + self.assertIsInstance(sub_boost, Term) + self.assertAlmostEqual(sub_boost.boost, 1.5) + + boost = Boost(And([Boost(Term('Guide', 0.1), 3), Term('Two', 2)]), 5) + and_obj = boost.children[0] + self.assertIsInstance(and_obj, And) + sub_boost = and_obj.children[0] + self.assertIsInstance(sub_boost, Boost) + guide = sub_boost.children[0] + self.assertIsInstance(guide, Term) + self.assertAlmostEqual(guide.boost, 1.5) + two = and_obj.children[1] + self.assertIsInstance(two, Term) + self.assertAlmostEqual(two.boost, 10) + @override_settings( WAGTAILSEARCH_BACKENDS={ diff --git a/wagtail/wagtailsearch/tests/test_elasticsearch2_backend.py b/wagtail/wagtailsearch/tests/test_elasticsearch2_backend.py index 936a50e4b..e921e7a6f 100644 --- a/wagtail/wagtailsearch/tests/test_elasticsearch2_backend.py +++ b/wagtail/wagtailsearch/tests/test_elasticsearch2_backend.py @@ -38,7 +38,7 @@ class TestElasticsearch2SearchBackend(BackendTests, ElasticsearchCommonSearchBac super(TestElasticsearch2SearchBackend, self).test_delete() # - # Query classes + # Basic query classes # # Not implemented yet @@ -66,6 +66,15 @@ class TestElasticsearch2SearchBackend(BackendTests, ElasticsearchCommonSearchBac def test_operators_combination(self): super().test_operators_combination() + # + # Shortcut query classes + # + + # Not implemented yet + @unittest.expectedFailure + def test_filter_query(self): + super().test_filter_query() + class TestElasticsearch2SearchQuery(TestCase): def assertDictEqual(self, a, b):