mirror of
https://github.com/Hopiu/wagtail.git
synced 2026-05-11 16:53:10 +00:00
Implements Filter & Boost in database search backends.
This commit is contained in:
parent
b36165fd6a
commit
c737f516e8
5 changed files with 261 additions and 65 deletions
|
|
@ -15,7 +15,7 @@ from django.utils.encoding import force_text
|
|||
from wagtail.wagtailsearch.backends.base import (
|
||||
BaseSearchBackend, BaseSearchQueryCompiler, BaseSearchResults)
|
||||
from wagtail.wagtailsearch.index import RelatedFields, SearchField
|
||||
from wagtail.wagtailsearch.query import And, MatchAll, Not, Or, PlainText, Term
|
||||
from wagtail.wagtailsearch.query import And, MatchAll, Not, Or, SearchQueryShortcut, Term
|
||||
from wagtail.wagtailsearch.utils import ADD, AND, OR
|
||||
|
||||
from .models import IndexEntry
|
||||
|
|
@ -181,8 +181,8 @@ class PostgresSearchQueryCompiler(BaseSearchQueryCompiler):
|
|||
if query is None:
|
||||
query = self.query
|
||||
|
||||
if isinstance(query, PlainText):
|
||||
return self.build_database_query(query.to_combined_terms(), config)
|
||||
if isinstance(query, SearchQueryShortcut):
|
||||
return self.build_database_query(query.get_equivalent(), config)
|
||||
if isinstance(query, Term):
|
||||
# TODO: Find a way to use the term boosting.
|
||||
if query.boost != 1:
|
||||
|
|
|
|||
|
|
@ -7,7 +7,7 @@ from django.db.models.expressions import Value
|
|||
|
||||
from wagtail.wagtailsearch.backends.base import (
|
||||
BaseSearchBackend, BaseSearchQueryCompiler, BaseSearchResults)
|
||||
from wagtail.wagtailsearch.query import And, MatchAll, Not, Or, PlainText, Term
|
||||
from wagtail.wagtailsearch.query import And, MatchAll, Not, Or, SearchQueryShortcut, Term
|
||||
from wagtail.wagtailsearch.utils import AND, OR
|
||||
|
||||
|
||||
|
|
@ -62,8 +62,8 @@ class DatabaseSearchQueryCompiler(BaseSearchQueryCompiler):
|
|||
if isinstance(self.query, MatchAll):
|
||||
return models.Q()
|
||||
|
||||
if isinstance(query, PlainText):
|
||||
return self.build_database_filter(query.to_combined_terms())
|
||||
if isinstance(query, SearchQueryShortcut):
|
||||
return self.build_database_filter(query.get_equivalent())
|
||||
if isinstance(query, Term):
|
||||
if query.boost != 1:
|
||||
warn('Database search backend does not support term boosting.')
|
||||
|
|
|
|||
|
|
@ -1,6 +1,11 @@
|
|||
from __future__ import absolute_import, unicode_literals
|
||||
|
||||
|
||||
#
|
||||
# Base classes
|
||||
#
|
||||
|
||||
|
||||
class SearchQuery:
|
||||
def __and__(self, other):
|
||||
return And([self, other])
|
||||
|
|
@ -11,31 +16,119 @@ class SearchQuery:
|
|||
def __invert__(self):
|
||||
return Not(self)
|
||||
|
||||
def apply(self, func):
|
||||
raise NotImplementedError
|
||||
|
||||
def clone(self):
|
||||
return self.apply(lambda o: o)
|
||||
|
||||
def get_children(self):
|
||||
return ()
|
||||
|
||||
@property
|
||||
def children(self):
|
||||
return list(self.get_children())
|
||||
|
||||
@property
|
||||
def child(self):
|
||||
children = self.children
|
||||
if len(children) != 1:
|
||||
raise IndexError('`%s` object has %d children, not a single child.'
|
||||
% self.__class__.__name__, len(children))
|
||||
return children[0]
|
||||
|
||||
|
||||
class SearchQueryOperator(SearchQuery):
|
||||
pass
|
||||
|
||||
|
||||
class And(SearchQueryOperator):
|
||||
class MultiOperandsSearchQueryOperator(SearchQueryOperator):
|
||||
def __init__(self, subqueries):
|
||||
self.subqueries = subqueries
|
||||
|
||||
def apply(self, func):
|
||||
return func(self.__class__(
|
||||
[subquery.apply(func) for subquery in self.subqueries]))
|
||||
|
||||
class Or(SearchQueryOperator):
|
||||
def __init__(self, subqueries):
|
||||
self.subqueries = subqueries
|
||||
def get_children(self):
|
||||
yield from self.subqueries
|
||||
|
||||
|
||||
class SearchQueryShortcut(SearchQuery):
|
||||
def get_equivalent(self):
|
||||
raise NotImplementedError
|
||||
|
||||
def get_children(self):
|
||||
return self.get_equivalent(),
|
||||
|
||||
#
|
||||
# Operators
|
||||
#
|
||||
|
||||
|
||||
class And(MultiOperandsSearchQueryOperator):
|
||||
pass
|
||||
|
||||
|
||||
class Or(MultiOperandsSearchQueryOperator):
|
||||
pass
|
||||
|
||||
|
||||
class Not(SearchQueryOperator):
|
||||
def __init__(self, subquery: SearchQuery):
|
||||
self.subquery = subquery
|
||||
|
||||
def apply(self, func):
|
||||
return func(self.__class__(self.subquery.apply(func)))
|
||||
|
||||
def get_children(self):
|
||||
yield self.subquery
|
||||
|
||||
|
||||
#
|
||||
# Basic query classes
|
||||
#
|
||||
|
||||
|
||||
class MatchAll(SearchQuery):
|
||||
pass
|
||||
def apply(self, func):
|
||||
return self.__class__()
|
||||
|
||||
|
||||
class PlainText(SearchQuery):
|
||||
class Term(SearchQuery):
|
||||
def __init__(self, term: str, boost: float = 1):
|
||||
self.term = term
|
||||
self.boost = boost
|
||||
|
||||
def apply(self, func):
|
||||
return func(self.__class__(self.term, self.boost))
|
||||
|
||||
|
||||
class Prefix(SearchQuery):
|
||||
def __init__(self, prefix: str, boost: float = 1):
|
||||
self.prefix = prefix
|
||||
self.boost = boost
|
||||
|
||||
def apply(self, func):
|
||||
return func(self.__class__(self.prefix, self.boost))
|
||||
|
||||
|
||||
class Fuzzy(SearchQuery):
|
||||
def __init__(self, term: str, max_distance: float = 3, boost: float = 1):
|
||||
self.term = term
|
||||
self.max_distance = max_distance
|
||||
self.boost = boost
|
||||
|
||||
def apply(self, func):
|
||||
return func(self.__class__(self.term, self.max_distance, self.boost))
|
||||
|
||||
|
||||
#
|
||||
# Shortcut query classes
|
||||
#
|
||||
|
||||
|
||||
class PlainText(SearchQueryShortcut):
|
||||
OPERATORS = {
|
||||
'and': And,
|
||||
'or': Or,
|
||||
|
|
@ -50,43 +143,52 @@ class PlainText(SearchQuery):
|
|||
self.operator = operator
|
||||
self.boost = boost
|
||||
|
||||
def to_combined_terms(self):
|
||||
def apply(self, func):
|
||||
return func(self.__class__(self.query_string, self.operator,
|
||||
self.boost))
|
||||
|
||||
def get_equivalent(self):
|
||||
return self.OPERATORS[self.operator]([
|
||||
Term(term, boost=self.boost)
|
||||
for term in self.query_string.split()])
|
||||
|
||||
|
||||
class Term(SearchQuery):
|
||||
def __init__(self, term: str, boost: float = 1):
|
||||
self.term = term
|
||||
self.boost = boost
|
||||
|
||||
|
||||
class Prefix(SearchQuery):
|
||||
def __init__(self, prefix: str, boost: float = 1):
|
||||
self.prefix = prefix
|
||||
self.boost = boost
|
||||
|
||||
|
||||
class Fuzzy(SearchQuery):
|
||||
def __init__(self, term: str, max_distance: float = 3, boost: float = 1):
|
||||
self.term = term
|
||||
self.max_distance = max_distance
|
||||
self.boost = boost
|
||||
|
||||
|
||||
class Boost(SearchQuery):
|
||||
def __init__(self, query: SearchQuery, boost: float):
|
||||
self.query = query
|
||||
self.boost = boost
|
||||
|
||||
|
||||
class Filter(SearchQuery):
|
||||
class Filter(SearchQueryShortcut):
|
||||
def __init__(self, query: SearchQuery,
|
||||
include: SearchQuery = None, exclude: SearchQuery = None):
|
||||
self.query = query
|
||||
self.include = include
|
||||
self.exclude = exclude
|
||||
|
||||
def apply(self, func):
|
||||
return func(self.__class__(
|
||||
self.query.apply(func),
|
||||
self.include.apply(func), self.exclude.apply(func)))
|
||||
|
||||
def get_equivalent(self):
|
||||
query = self.query
|
||||
if self.include is not None:
|
||||
query &= self.include
|
||||
if self.exclude is not None:
|
||||
query &= ~self.exclude
|
||||
return query
|
||||
|
||||
|
||||
class Boost(SearchQueryShortcut):
|
||||
def __init__(self, subquery: SearchQuery, boost: float):
|
||||
self.subquery = subquery
|
||||
self.boost = boost
|
||||
|
||||
def apply(self, func):
|
||||
return func(self.__class__(self.subquery.apply(func), self.boost))
|
||||
|
||||
def get_equivalent(self):
|
||||
def boost_child(child):
|
||||
if isinstance(child, (PlainText, Fuzzy, Prefix, Term)):
|
||||
child.boost *= self.boost
|
||||
return child
|
||||
|
||||
return self.subquery.apply(boost_child)
|
||||
|
||||
|
||||
MATCH_ALL = MatchAll()
|
||||
|
|
|
|||
|
|
@ -17,7 +17,7 @@ from wagtail.wagtailsearch.backends import (
|
|||
InvalidSearchBackendError, get_search_backend, get_search_backends)
|
||||
from wagtail.wagtailsearch.backends.base import FieldError
|
||||
from wagtail.wagtailsearch.backends.db import DatabaseSearchBackend
|
||||
from wagtail.wagtailsearch.query import MATCH_ALL, And, Not, Or, PlainText, Term
|
||||
from wagtail.wagtailsearch.query import MATCH_ALL, And, Boost, Filter, Not, Or, PlainText, Term
|
||||
|
||||
|
||||
class BackendTests(WagtailTestUtils):
|
||||
|
|
@ -431,7 +431,7 @@ class BackendTests(WagtailTestUtils):
|
|||
])
|
||||
|
||||
#
|
||||
# Query classes
|
||||
# Basic query classes
|
||||
#
|
||||
|
||||
def test_match_all(self):
|
||||
|
|
@ -452,29 +452,6 @@ class BackendTests(WagtailTestUtils):
|
|||
self.assertSetEqual({r.title for r in results},
|
||||
{'JavaScript: The Definitive Guide'})
|
||||
|
||||
def test_plain_text(self):
|
||||
# Single word
|
||||
results = self.backend.search(PlainText('Javascript'),
|
||||
models.Book.objects.all())
|
||||
self.assertSetEqual({r.title for r in results},
|
||||
{'JavaScript: The Definitive Guide',
|
||||
'JavaScript: The good parts'})
|
||||
|
||||
# Multiple words (OR operator)
|
||||
results = self.backend.search(PlainText('Javascript Definitive',
|
||||
operator='or'),
|
||||
models.Book.objects.all())
|
||||
self.assertSetEqual({r.title for r in results},
|
||||
{'JavaScript: The Definitive Guide',
|
||||
'JavaScript: The good parts'})
|
||||
|
||||
# Multiple words (AND operator)
|
||||
results = self.backend.search(PlainText('Javascript Definitive',
|
||||
operator='and'),
|
||||
models.Book.objects.all())
|
||||
self.assertSetEqual({r.title for r in results},
|
||||
{'JavaScript: The Definitive Guide'})
|
||||
|
||||
def test_and(self):
|
||||
results = self.backend.search(And([Term('Javascript'),
|
||||
Term('Definitive')]),
|
||||
|
|
@ -534,6 +511,114 @@ class BackendTests(WagtailTestUtils):
|
|||
'The Rust Programming Language',
|
||||
'Two Scoops of Django 1.11'})
|
||||
|
||||
#
|
||||
# Shortcut query classes
|
||||
#
|
||||
|
||||
def test_plain_text(self):
|
||||
# Single word
|
||||
results = self.backend.search(PlainText('Javascript'),
|
||||
models.Book.objects.all())
|
||||
self.assertSetEqual({r.title for r in results},
|
||||
{'JavaScript: The Definitive Guide',
|
||||
'JavaScript: The good parts'})
|
||||
|
||||
# Multiple words (OR operator)
|
||||
results = self.backend.search(PlainText('Javascript Definitive',
|
||||
operator='or'),
|
||||
models.Book.objects.all())
|
||||
self.assertSetEqual({r.title for r in results},
|
||||
{'JavaScript: The Definitive Guide',
|
||||
'JavaScript: The good parts'})
|
||||
|
||||
# Multiple words (AND operator)
|
||||
results = self.backend.search(PlainText('Javascript Definitive',
|
||||
operator='and'),
|
||||
models.Book.objects.all())
|
||||
self.assertSetEqual({r.title for r in results},
|
||||
{'JavaScript: The Definitive Guide'})
|
||||
|
||||
def test_filter_equivalent(self):
|
||||
filter = Filter(Term('Javascript'))
|
||||
term = filter.child
|
||||
self.assertIsInstance(term, Term)
|
||||
self.assertEqual(term.term, 'Javascript')
|
||||
|
||||
filter = Filter(Term('Javascript'), include=Term('Definitive'))
|
||||
and_obj = filter.child
|
||||
self.assertIsInstance(and_obj, And)
|
||||
javascript = and_obj.children[0]
|
||||
self.assertIsInstance(javascript, Term)
|
||||
self.assertEqual(javascript.term, 'Javascript')
|
||||
definitive = and_obj.children[1]
|
||||
self.assertIsInstance(definitive, Term)
|
||||
self.assertEqual(definitive.term, 'Definitive')
|
||||
|
||||
filter = Filter(Term('Javascript'),
|
||||
include=Term('Definitive'), exclude=Term('Guide'))
|
||||
and_obj1 = filter.child
|
||||
self.assertIsInstance(and_obj1, And)
|
||||
and_obj2 = and_obj1.children[0]
|
||||
javascript = and_obj2.children[0]
|
||||
self.assertIsInstance(javascript, Term)
|
||||
self.assertEqual(javascript.term, 'Javascript')
|
||||
definitive = and_obj2.children[1]
|
||||
self.assertIsInstance(definitive, Term)
|
||||
self.assertEqual(definitive.term, 'Definitive')
|
||||
not_obj = and_obj1.children[1]
|
||||
self.assertIsInstance(not_obj, Not)
|
||||
guide = not_obj.child
|
||||
self.assertEqual(guide.term, 'Guide')
|
||||
|
||||
def test_filter_query(self):
|
||||
results = self.backend.search(Filter(Term('Javascript')),
|
||||
models.Book.objects.all())
|
||||
self.assertSetEqual({r.title for r in results},
|
||||
{'JavaScript: The Definitive Guide',
|
||||
'JavaScript: The good parts'})
|
||||
|
||||
results = self.backend.search(Filter(Term('Javascript'),
|
||||
include=Term('Definitive')),
|
||||
models.Book.objects.all())
|
||||
self.assertSetEqual({r.title for r in results},
|
||||
{'JavaScript: The Definitive Guide'})
|
||||
|
||||
results = self.backend.search(Filter(Term('Javascript'),
|
||||
include=Term('Definitive'),
|
||||
exclude=Term('Guide')),
|
||||
models.Book.objects.all())
|
||||
self.assertSetEqual({r.title for r in results}, set())
|
||||
|
||||
def test_boost_equivalent(self):
|
||||
boost = Boost(Term('Guide'), 5)
|
||||
equivalent = boost.children[0]
|
||||
self.assertIsInstance(equivalent, Term)
|
||||
self.assertAlmostEqual(equivalent.boost, 5)
|
||||
|
||||
boost = Boost(Term('Guide', boost=0.5), 5)
|
||||
equivalent = boost.children[0]
|
||||
self.assertIsInstance(equivalent, Term)
|
||||
self.assertAlmostEqual(equivalent.boost, 2.5)
|
||||
|
||||
boost = Boost(Boost(Term('Guide', 0.1), 3), 5)
|
||||
sub_boost = boost.children[0]
|
||||
self.assertIsInstance(sub_boost, Boost)
|
||||
sub_boost = sub_boost.children[0]
|
||||
self.assertIsInstance(sub_boost, Term)
|
||||
self.assertAlmostEqual(sub_boost.boost, 1.5)
|
||||
|
||||
boost = Boost(And([Boost(Term('Guide', 0.1), 3), Term('Two', 2)]), 5)
|
||||
and_obj = boost.children[0]
|
||||
self.assertIsInstance(and_obj, And)
|
||||
sub_boost = and_obj.children[0]
|
||||
self.assertIsInstance(sub_boost, Boost)
|
||||
guide = sub_boost.children[0]
|
||||
self.assertIsInstance(guide, Term)
|
||||
self.assertAlmostEqual(guide.boost, 1.5)
|
||||
two = and_obj.children[1]
|
||||
self.assertIsInstance(two, Term)
|
||||
self.assertAlmostEqual(two.boost, 10)
|
||||
|
||||
|
||||
@override_settings(
|
||||
WAGTAILSEARCH_BACKENDS={
|
||||
|
|
|
|||
|
|
@ -38,7 +38,7 @@ class TestElasticsearch2SearchBackend(BackendTests, ElasticsearchCommonSearchBac
|
|||
super(TestElasticsearch2SearchBackend, self).test_delete()
|
||||
|
||||
#
|
||||
# Query classes
|
||||
# Basic query classes
|
||||
#
|
||||
|
||||
# Not implemented yet
|
||||
|
|
@ -66,6 +66,15 @@ class TestElasticsearch2SearchBackend(BackendTests, ElasticsearchCommonSearchBac
|
|||
def test_operators_combination(self):
|
||||
super().test_operators_combination()
|
||||
|
||||
#
|
||||
# Shortcut query classes
|
||||
#
|
||||
|
||||
# Not implemented yet
|
||||
@unittest.expectedFailure
|
||||
def test_filter_query(self):
|
||||
super().test_filter_query()
|
||||
|
||||
|
||||
class TestElasticsearch2SearchQuery(TestCase):
|
||||
def assertDictEqual(self, a, b):
|
||||
|
|
|
|||
Loading…
Reference in a new issue