Implements Filter & Boost in database search backends.

This commit is contained in:
Bertrand Bordage 2017-11-23 21:03:00 +01:00
parent b36165fd6a
commit c737f516e8
5 changed files with 261 additions and 65 deletions

View file

@ -15,7 +15,7 @@ from django.utils.encoding import force_text
from wagtail.wagtailsearch.backends.base import (
BaseSearchBackend, BaseSearchQueryCompiler, BaseSearchResults)
from wagtail.wagtailsearch.index import RelatedFields, SearchField
from wagtail.wagtailsearch.query import And, MatchAll, Not, Or, PlainText, Term
from wagtail.wagtailsearch.query import And, MatchAll, Not, Or, SearchQueryShortcut, Term
from wagtail.wagtailsearch.utils import ADD, AND, OR
from .models import IndexEntry
@ -181,8 +181,8 @@ class PostgresSearchQueryCompiler(BaseSearchQueryCompiler):
if query is None:
query = self.query
if isinstance(query, PlainText):
return self.build_database_query(query.to_combined_terms(), config)
if isinstance(query, SearchQueryShortcut):
return self.build_database_query(query.get_equivalent(), config)
if isinstance(query, Term):
# TODO: Find a way to use the term boosting.
if query.boost != 1:

View file

@ -7,7 +7,7 @@ from django.db.models.expressions import Value
from wagtail.wagtailsearch.backends.base import (
BaseSearchBackend, BaseSearchQueryCompiler, BaseSearchResults)
from wagtail.wagtailsearch.query import And, MatchAll, Not, Or, PlainText, Term
from wagtail.wagtailsearch.query import And, MatchAll, Not, Or, SearchQueryShortcut, Term
from wagtail.wagtailsearch.utils import AND, OR
@ -62,8 +62,8 @@ class DatabaseSearchQueryCompiler(BaseSearchQueryCompiler):
if isinstance(self.query, MatchAll):
return models.Q()
if isinstance(query, PlainText):
return self.build_database_filter(query.to_combined_terms())
if isinstance(query, SearchQueryShortcut):
return self.build_database_filter(query.get_equivalent())
if isinstance(query, Term):
if query.boost != 1:
warn('Database search backend does not support term boosting.')

View file

@ -1,6 +1,11 @@
from __future__ import absolute_import, unicode_literals
#
# Base classes
#
class SearchQuery:
def __and__(self, other):
return And([self, other])
@ -11,31 +16,119 @@ class SearchQuery:
def __invert__(self):
return Not(self)
def apply(self, func):
raise NotImplementedError
def clone(self):
return self.apply(lambda o: o)
def get_children(self):
return ()
@property
def children(self):
return list(self.get_children())
@property
def child(self):
children = self.children
if len(children) != 1:
raise IndexError('`%s` object has %d children, not a single child.'
% self.__class__.__name__, len(children))
return children[0]
class SearchQueryOperator(SearchQuery):
pass
class And(SearchQueryOperator):
class MultiOperandsSearchQueryOperator(SearchQueryOperator):
def __init__(self, subqueries):
self.subqueries = subqueries
def apply(self, func):
return func(self.__class__(
[subquery.apply(func) for subquery in self.subqueries]))
class Or(SearchQueryOperator):
def __init__(self, subqueries):
self.subqueries = subqueries
def get_children(self):
yield from self.subqueries
class SearchQueryShortcut(SearchQuery):
def get_equivalent(self):
raise NotImplementedError
def get_children(self):
return self.get_equivalent(),
#
# Operators
#
class And(MultiOperandsSearchQueryOperator):
pass
class Or(MultiOperandsSearchQueryOperator):
pass
class Not(SearchQueryOperator):
def __init__(self, subquery: SearchQuery):
self.subquery = subquery
def apply(self, func):
return func(self.__class__(self.subquery.apply(func)))
def get_children(self):
yield self.subquery
#
# Basic query classes
#
class MatchAll(SearchQuery):
pass
def apply(self, func):
return self.__class__()
class PlainText(SearchQuery):
class Term(SearchQuery):
def __init__(self, term: str, boost: float = 1):
self.term = term
self.boost = boost
def apply(self, func):
return func(self.__class__(self.term, self.boost))
class Prefix(SearchQuery):
def __init__(self, prefix: str, boost: float = 1):
self.prefix = prefix
self.boost = boost
def apply(self, func):
return func(self.__class__(self.prefix, self.boost))
class Fuzzy(SearchQuery):
def __init__(self, term: str, max_distance: float = 3, boost: float = 1):
self.term = term
self.max_distance = max_distance
self.boost = boost
def apply(self, func):
return func(self.__class__(self.term, self.max_distance, self.boost))
#
# Shortcut query classes
#
class PlainText(SearchQueryShortcut):
OPERATORS = {
'and': And,
'or': Or,
@ -50,43 +143,52 @@ class PlainText(SearchQuery):
self.operator = operator
self.boost = boost
def to_combined_terms(self):
def apply(self, func):
return func(self.__class__(self.query_string, self.operator,
self.boost))
def get_equivalent(self):
return self.OPERATORS[self.operator]([
Term(term, boost=self.boost)
for term in self.query_string.split()])
class Term(SearchQuery):
def __init__(self, term: str, boost: float = 1):
self.term = term
self.boost = boost
class Prefix(SearchQuery):
def __init__(self, prefix: str, boost: float = 1):
self.prefix = prefix
self.boost = boost
class Fuzzy(SearchQuery):
def __init__(self, term: str, max_distance: float = 3, boost: float = 1):
self.term = term
self.max_distance = max_distance
self.boost = boost
class Boost(SearchQuery):
def __init__(self, query: SearchQuery, boost: float):
self.query = query
self.boost = boost
class Filter(SearchQuery):
class Filter(SearchQueryShortcut):
def __init__(self, query: SearchQuery,
include: SearchQuery = None, exclude: SearchQuery = None):
self.query = query
self.include = include
self.exclude = exclude
def apply(self, func):
return func(self.__class__(
self.query.apply(func),
self.include.apply(func), self.exclude.apply(func)))
def get_equivalent(self):
query = self.query
if self.include is not None:
query &= self.include
if self.exclude is not None:
query &= ~self.exclude
return query
class Boost(SearchQueryShortcut):
def __init__(self, subquery: SearchQuery, boost: float):
self.subquery = subquery
self.boost = boost
def apply(self, func):
return func(self.__class__(self.subquery.apply(func), self.boost))
def get_equivalent(self):
def boost_child(child):
if isinstance(child, (PlainText, Fuzzy, Prefix, Term)):
child.boost *= self.boost
return child
return self.subquery.apply(boost_child)
MATCH_ALL = MatchAll()

View file

@ -17,7 +17,7 @@ from wagtail.wagtailsearch.backends import (
InvalidSearchBackendError, get_search_backend, get_search_backends)
from wagtail.wagtailsearch.backends.base import FieldError
from wagtail.wagtailsearch.backends.db import DatabaseSearchBackend
from wagtail.wagtailsearch.query import MATCH_ALL, And, Not, Or, PlainText, Term
from wagtail.wagtailsearch.query import MATCH_ALL, And, Boost, Filter, Not, Or, PlainText, Term
class BackendTests(WagtailTestUtils):
@ -431,7 +431,7 @@ class BackendTests(WagtailTestUtils):
])
#
# Query classes
# Basic query classes
#
def test_match_all(self):
@ -452,29 +452,6 @@ class BackendTests(WagtailTestUtils):
self.assertSetEqual({r.title for r in results},
{'JavaScript: The Definitive Guide'})
def test_plain_text(self):
# Single word
results = self.backend.search(PlainText('Javascript'),
models.Book.objects.all())
self.assertSetEqual({r.title for r in results},
{'JavaScript: The Definitive Guide',
'JavaScript: The good parts'})
# Multiple words (OR operator)
results = self.backend.search(PlainText('Javascript Definitive',
operator='or'),
models.Book.objects.all())
self.assertSetEqual({r.title for r in results},
{'JavaScript: The Definitive Guide',
'JavaScript: The good parts'})
# Multiple words (AND operator)
results = self.backend.search(PlainText('Javascript Definitive',
operator='and'),
models.Book.objects.all())
self.assertSetEqual({r.title for r in results},
{'JavaScript: The Definitive Guide'})
def test_and(self):
results = self.backend.search(And([Term('Javascript'),
Term('Definitive')]),
@ -534,6 +511,114 @@ class BackendTests(WagtailTestUtils):
'The Rust Programming Language',
'Two Scoops of Django 1.11'})
#
# Shortcut query classes
#
def test_plain_text(self):
# Single word
results = self.backend.search(PlainText('Javascript'),
models.Book.objects.all())
self.assertSetEqual({r.title for r in results},
{'JavaScript: The Definitive Guide',
'JavaScript: The good parts'})
# Multiple words (OR operator)
results = self.backend.search(PlainText('Javascript Definitive',
operator='or'),
models.Book.objects.all())
self.assertSetEqual({r.title for r in results},
{'JavaScript: The Definitive Guide',
'JavaScript: The good parts'})
# Multiple words (AND operator)
results = self.backend.search(PlainText('Javascript Definitive',
operator='and'),
models.Book.objects.all())
self.assertSetEqual({r.title for r in results},
{'JavaScript: The Definitive Guide'})
def test_filter_equivalent(self):
filter = Filter(Term('Javascript'))
term = filter.child
self.assertIsInstance(term, Term)
self.assertEqual(term.term, 'Javascript')
filter = Filter(Term('Javascript'), include=Term('Definitive'))
and_obj = filter.child
self.assertIsInstance(and_obj, And)
javascript = and_obj.children[0]
self.assertIsInstance(javascript, Term)
self.assertEqual(javascript.term, 'Javascript')
definitive = and_obj.children[1]
self.assertIsInstance(definitive, Term)
self.assertEqual(definitive.term, 'Definitive')
filter = Filter(Term('Javascript'),
include=Term('Definitive'), exclude=Term('Guide'))
and_obj1 = filter.child
self.assertIsInstance(and_obj1, And)
and_obj2 = and_obj1.children[0]
javascript = and_obj2.children[0]
self.assertIsInstance(javascript, Term)
self.assertEqual(javascript.term, 'Javascript')
definitive = and_obj2.children[1]
self.assertIsInstance(definitive, Term)
self.assertEqual(definitive.term, 'Definitive')
not_obj = and_obj1.children[1]
self.assertIsInstance(not_obj, Not)
guide = not_obj.child
self.assertEqual(guide.term, 'Guide')
def test_filter_query(self):
results = self.backend.search(Filter(Term('Javascript')),
models.Book.objects.all())
self.assertSetEqual({r.title for r in results},
{'JavaScript: The Definitive Guide',
'JavaScript: The good parts'})
results = self.backend.search(Filter(Term('Javascript'),
include=Term('Definitive')),
models.Book.objects.all())
self.assertSetEqual({r.title for r in results},
{'JavaScript: The Definitive Guide'})
results = self.backend.search(Filter(Term('Javascript'),
include=Term('Definitive'),
exclude=Term('Guide')),
models.Book.objects.all())
self.assertSetEqual({r.title for r in results}, set())
def test_boost_equivalent(self):
boost = Boost(Term('Guide'), 5)
equivalent = boost.children[0]
self.assertIsInstance(equivalent, Term)
self.assertAlmostEqual(equivalent.boost, 5)
boost = Boost(Term('Guide', boost=0.5), 5)
equivalent = boost.children[0]
self.assertIsInstance(equivalent, Term)
self.assertAlmostEqual(equivalent.boost, 2.5)
boost = Boost(Boost(Term('Guide', 0.1), 3), 5)
sub_boost = boost.children[0]
self.assertIsInstance(sub_boost, Boost)
sub_boost = sub_boost.children[0]
self.assertIsInstance(sub_boost, Term)
self.assertAlmostEqual(sub_boost.boost, 1.5)
boost = Boost(And([Boost(Term('Guide', 0.1), 3), Term('Two', 2)]), 5)
and_obj = boost.children[0]
self.assertIsInstance(and_obj, And)
sub_boost = and_obj.children[0]
self.assertIsInstance(sub_boost, Boost)
guide = sub_boost.children[0]
self.assertIsInstance(guide, Term)
self.assertAlmostEqual(guide.boost, 1.5)
two = and_obj.children[1]
self.assertIsInstance(two, Term)
self.assertAlmostEqual(two.boost, 10)
@override_settings(
WAGTAILSEARCH_BACKENDS={

View file

@ -38,7 +38,7 @@ class TestElasticsearch2SearchBackend(BackendTests, ElasticsearchCommonSearchBac
super(TestElasticsearch2SearchBackend, self).test_delete()
#
# Query classes
# Basic query classes
#
# Not implemented yet
@ -66,6 +66,15 @@ class TestElasticsearch2SearchBackend(BackendTests, ElasticsearchCommonSearchBac
def test_operators_combination(self):
super().test_operators_combination()
#
# Shortcut query classes
#
# Not implemented yet
@unittest.expectedFailure
def test_filter_query(self):
super().test_filter_query()
class TestElasticsearch2SearchQuery(TestCase):
def assertDictEqual(self, a, b):