Implemented search query classes for Elasticsearch

# Conflicts:
#	wagtail/search/backends/elasticsearch2.py
#	wagtail/search/tests/test_backends.py
This commit is contained in:
Karl Hobley 2017-12-15 13:29:53 +00:00 committed by Bertrand Bordage
parent 458ba92f05
commit bd6cb6dad7
4 changed files with 167 additions and 70 deletions

View file

@ -14,7 +14,7 @@ from wagtail.search.backends.base import (
BaseSearchBackend, BaseSearchQueryCompiler, BaseSearchResults)
from wagtail.search.index import (
FilterField, Indexed, RelatedFields, SearchField, class_is_indexed)
from wagtail.search.query import MatchAll, PlainText
from wagtail.search.query import MatchAll, Term, Prefix, Fuzzy, And, Or, Not, PlainText, Filter, Boost
def get_model_root(model):
@ -372,49 +372,151 @@ class Elasticsearch2SearchQueryCompiler(BaseSearchQueryCompiler):
return filter_out
def _compile_query(self, query):
if isinstance(query, MatchAll):
return {'match_all': {}}
def _compile_term_query(self, query_type, value, field, boost=1.0, **extra):
term_query = {
'value': value,
}
elif isinstance(query, PlainText):
fields = self.remapped_fields or ['_all', '_partials']
operator = query.operator
if boost != 1.0:
term_query['boost'] = boost
if len(fields) == 1:
if operator == 'or':
return {
'match': {
fields[0]: query.query_string,
}
}
return {
'match': {
fields[0]: {
'query': query.query_string,
'operator': operator,
}
}
}
return {
query_type: {
field: term_query,
}
}
query = {
'multi_match': {
'query': query.query_string,
'fields': fields,
def _compile_plaintext_query(self, query, fields, boost=1.0):
match_query = {
'query': query.query_string
}
if query.operator != 'or':
match_query['operator'] = query.operator
if boost != 1.0:
match_query['boost'] = boost
if len(fields) == 1:
return {
'match': {
fields[0]: match_query
}
}
if operator != 'or':
query['multi_match']['operator'] = operator
else:
match_query['fields'] = fields
return query
return {
'multi_match': match_query
}
def _compile_query(self, query, field, boost=1.0):
if isinstance(query, MatchAll):
match_all_query = {}
if boost != 1.0:
match_all_query['boost'] = boost
return {'match_all': match_all_query}
elif isinstance(query, Term):
return self._compile_term_query('term', query.term, field, query.boost * boost)
elif isinstance(query, Prefix):
return self._compile_term_query('prefix', query.prefix, field, query.boost * boost)
elif isinstance(query, Fuzzy):
return self._compile_term_query('fuzzy', query.term, field, query.boost * boost, fuzziness=query.max_distance)
elif isinstance(query, And):
return {
'bool': {
'must': [
self._compile_query(child_query, field, boost)
for child_query in query.get_children()
]
}
}
elif isinstance(query, Or):
return {
'bool': {
'should': [
self._compile_query(child_query, field, boost)
for child_query in query.get_children()
]
}
}
elif isinstance(query, Not):
return {
'bool': {
'mustNot': self._compile_query(query.subquery, field, boost)
}
}
elif isinstance(query, PlainText):
return self._compile_plaintext_query(self.query, [field], boost)
elif isinstance(query, Filter):
bool_query = {
'must': self._compile_query(query.query, field, boost),
}
if query.include:
bool_query['filter'] = self._compile_query(query.include, field, 0.0)
if query.exclude:
bool_query['mustNot'] = self._compile_query(query.exclude, field, 0.0)
return {
'bool': bool_query,
}
elif isinstance(query, Boost):
return self._compile_query(query.subquery, field, boost * query.boost)
else:
raise NotImplementedError(
'`%s` is not supported by the Elasticsearch search backend.'
% query.__class__.__name__)
def get_inner_query(self):
return self._compile_query(self.query)
fields = self.remapped_fields or ['_all', '_partials']
if len(fields) == 0:
# No fields. Return a query that'll match nothing
return {
'bool': {
'mustNot': {'match_all': {}}
}
}
# Handle MatchAll and PlainText separately as they were supported
# before "search query classes" was implemented and we'd like to
# keep the query the same as before
if isinstance(self.query, MatchAll):
return {'match_all': {}}
elif isinstance(self.query, PlainText):
return self._compile_plaintext_query(self.query, fields)
else:
if len(fields) == 1:
return self._compile_query(self.query, fields[0])
else:
# Compile a query for each field then combine with disjunction
# max (or operator which takes the max score out of each of the
# field queries)
field_queries = []
for field in fields:
field_queries.append(self._compile_query(self.query, field))
return {
'dis_max': {
'queries': field_queries
}
}
def get_content_type_filter(self):
# Query content_type using a "match" query. See comment in

View file

@ -443,37 +443,32 @@ class QueryAPITestMixin:
def test_term(self):
# Single word
results = self.backend.search(Term('Javascript'),
results = self.backend.search(Term('javascript'),
models.Book.objects.all())
self.assertSetEqual({r.title for r in results},
{'JavaScript: The Definitive Guide',
'JavaScript: The good parts'})
# Multiple word
results = self.backend.search(Term('Definitive Guide'),
models.Book.objects.all())
self.assertSetEqual({r.title for r in results},
{'JavaScript: The Definitive Guide'})
def test_and(self):
results = self.backend.search(And([Term('Javascript'),
Term('Definitive')]),
results = self.backend.search(And([Term('javascript'),
Term('definitive')]),
models.Book.objects.all())
self.assertSetEqual({r.title for r in results},
{'JavaScript: The Definitive Guide'})
results = self.backend.search(Term('Javascript') & Term('Definitive'),
results = self.backend.search(Term('javascript') & Term('definitive'),
models.Book.objects.all())
self.assertSetEqual({r.title for r in results},
{'JavaScript: The Definitive Guide'})
def test_or(self):
results = self.backend.search(Or([Term('Hobbit'), Term('Towers')]),
results = self.backend.search(Or([Term('hobbit'), Term('towers')]),
models.Book.objects.all())
self.assertSetEqual({r.title for r in results},
{'The Hobbit', 'The Two Towers'})
results = self.backend.search(Term('Hobbit') | Term('Towers'),
results = self.backend.search(Term('hobbit') | Term('towers'),
models.Book.objects.all())
self.assertSetEqual({r.title for r in results},
{'The Hobbit', 'The Two Towers'})
@ -493,19 +488,19 @@ class QueryAPITestMixin:
'Two Scoops of Django 1.11',
}
results = self.backend.search(Not(Term('Javascript')),
results = self.backend.search(Not(Term('javascript')),
models.Book.objects.all())
self.assertSetEqual({r.title for r in results}, all_other_titles)
results = self.backend.search(~Term('Javascript'),
results = self.backend.search(~Term('javascript'),
models.Book.objects.all())
self.assertSetEqual({r.title for r in results}, all_other_titles)
def test_operators_combination(self):
results = self.backend.search(
((Term('Javascript') & ~Term('Definitive')) |
Term('Python') | Term('Rust')) |
Term('Two'),
((Term('javascript') & ~Term('definitive')) |
Term('python') | Term('rust')) |
Term('two'),
models.Book.objects.all())
self.assertSetEqual({r.title for r in results},
{'JavaScript: The good parts',
@ -571,84 +566,84 @@ class QueryAPITestMixin:
operator='xor')
def test_filter_equivalent(self):
filter = Filter(Term('Javascript'))
filter = Filter(Term('javascript'))
term = filter.child
self.assertIsInstance(term, Term)
self.assertEqual(term.term, 'Javascript')
self.assertEqual(term.term, 'javascript')
filter = Filter(Term('Javascript'), include=Term('Definitive'))
filter = Filter(Term('javascript'), include=Term('definitive'))
and_obj = filter.child
self.assertIsInstance(and_obj, And)
javascript = and_obj.children[0]
self.assertIsInstance(javascript, Term)
self.assertEqual(javascript.term, 'Javascript')
self.assertEqual(javascript.term, 'javascript')
boost_obj = and_obj.children[1]
self.assertIsInstance(boost_obj, Boost)
self.assertEqual(boost_obj.boost, 0)
definitive = boost_obj.child
self.assertIsInstance(definitive, Term)
self.assertEqual(definitive.term, 'Definitive')
self.assertEqual(definitive.term, 'definitive')
filter = Filter(Term('Javascript'),
include=Term('Definitive'), exclude=Term('Guide'))
filter = Filter(Term('javascript'),
include=Term('definitive'), exclude=Term('guide'))
and_obj1 = filter.child
self.assertIsInstance(and_obj1, And)
and_obj2 = and_obj1.children[0]
javascript = and_obj2.children[0]
self.assertIsInstance(javascript, Term)
self.assertEqual(javascript.term, 'Javascript')
self.assertEqual(javascript.term, 'javascript')
boost_obj = and_obj2.children[1]
self.assertIsInstance(boost_obj, Boost)
self.assertEqual(boost_obj.boost, 0)
definitive = boost_obj.child
self.assertIsInstance(definitive, Term)
self.assertEqual(definitive.term, 'Definitive')
self.assertEqual(definitive.term, 'definitive')
boost_obj = and_obj1.children[1]
self.assertIsInstance(boost_obj, Boost)
self.assertEqual(boost_obj.boost, 0)
not_obj = boost_obj.child
self.assertIsInstance(not_obj, Not)
guide = not_obj.child
self.assertEqual(guide.term, 'Guide')
self.assertEqual(guide.term, 'guide')
def test_filter_query(self):
results = self.backend.search(Filter(Term('Javascript')),
results = self.backend.search(Filter(Term('javascript')),
models.Book.objects.all())
self.assertSetEqual({r.title for r in results},
{'JavaScript: The Definitive Guide',
'JavaScript: The good parts'})
results = self.backend.search(Filter(Term('Javascript'),
include=Term('Definitive')),
results = self.backend.search(Filter(Term('javascript'),
include=Term('definitive')),
models.Book.objects.all())
self.assertSetEqual({r.title for r in results},
{'JavaScript: The Definitive Guide'})
results = self.backend.search(Filter(Term('Javascript'),
include=Term('Definitive'),
exclude=Term('Guide')),
results = self.backend.search(Filter(Term('javascript'),
include=Term('definitive'),
exclude=Term('guide')),
models.Book.objects.all())
self.assertSetEqual({r.title for r in results}, set())
def test_boost_equivalent(self):
boost = Boost(Term('Guide'), 5)
boost = Boost(Term('guide'), 5)
equivalent = boost.children[0]
self.assertIsInstance(equivalent, Term)
self.assertAlmostEqual(equivalent.boost, 5)
boost = Boost(Term('Guide', boost=0.5), 5)
boost = Boost(Term('guide', boost=0.5), 5)
equivalent = boost.children[0]
self.assertIsInstance(equivalent, Term)
self.assertAlmostEqual(equivalent.boost, 2.5)
boost = Boost(Boost(Term('Guide', 0.1), 3), 5)
boost = Boost(Boost(Term('guide', 0.1), 3), 5)
sub_boost = boost.children[0]
self.assertIsInstance(sub_boost, Boost)
sub_boost = sub_boost.children[0]
self.assertIsInstance(sub_boost, Term)
self.assertAlmostEqual(sub_boost.boost, 1.5)
boost = Boost(And([Boost(Term('Guide', 0.1), 3), Term('Two', 2)]), 5)
boost = Boost(And([Boost(Term('guide', 0.1), 3), Term('two', 2)]), 5)
and_obj = boost.children[0]
self.assertIsInstance(and_obj, And)
sub_boost = and_obj.children[0]

View file

@ -123,7 +123,7 @@ class TestElasticsearch2SearchQuery(TestCase):
# Check it
expected_result = {'filtered': {
'filter': {'match': {'content_type': 'searchtests.Book'}},
'query': {'match': {'title': 'Hello'}}
'query': {'match': {'title': {'query': 'Hello'}}}
}}
self.assertDictEqual(query_compiler.get_query(), expected_result)

View file

@ -122,7 +122,7 @@ class TestElasticsearch5SearchQuery(TestCase):
# Check it
expected_result = {'bool': {
'filter': {'match': {'content_type': 'searchtests.Book'}},
'must': {'match': {'title': 'Hello'}}
'query': {'match': {'title': {'query': 'Hello'}}}
}}
self.assertDictEqual(query_compiler.get_query(), expected_result)