From bd6cb6dad779e58fa9f9f7e0ce28b891db8105f2 Mon Sep 17 00:00:00 2001 From: Karl Hobley Date: Fri, 15 Dec 2017 13:29:53 +0000 Subject: [PATCH] Implemented search query classes for Elasticsearch # Conflicts: # wagtail/search/backends/elasticsearch2.py # wagtail/search/tests/test_backends.py --- wagtail/search/backends/elasticsearch2.py | 164 ++++++++++++++---- wagtail/search/tests/test_backends.py | 69 ++++---- .../tests/test_elasticsearch2_backend.py | 2 +- .../tests/test_elasticsearch5_backend.py | 2 +- 4 files changed, 167 insertions(+), 70 deletions(-) diff --git a/wagtail/search/backends/elasticsearch2.py b/wagtail/search/backends/elasticsearch2.py index af9d3aafe..05e544b34 100644 --- a/wagtail/search/backends/elasticsearch2.py +++ b/wagtail/search/backends/elasticsearch2.py @@ -14,7 +14,7 @@ from wagtail.search.backends.base import ( BaseSearchBackend, BaseSearchQueryCompiler, BaseSearchResults) from wagtail.search.index import ( FilterField, Indexed, RelatedFields, SearchField, class_is_indexed) -from wagtail.search.query import MatchAll, PlainText +from wagtail.search.query import MatchAll, Term, Prefix, Fuzzy, And, Or, Not, PlainText, Filter, Boost def get_model_root(model): @@ -372,49 +372,151 @@ class Elasticsearch2SearchQueryCompiler(BaseSearchQueryCompiler): return filter_out - def _compile_query(self, query): - if isinstance(query, MatchAll): - return {'match_all': {}} + def _compile_term_query(self, query_type, value, field, boost=1.0, **extra): + term_query = { + 'value': value, + } - elif isinstance(query, PlainText): - fields = self.remapped_fields or ['_all', '_partials'] - operator = query.operator + if boost != 1.0: + term_query['boost'] = boost - if len(fields) == 1: - if operator == 'or': - return { - 'match': { - fields[0]: query.query_string, - } - } - return { - 'match': { - fields[0]: { - 'query': query.query_string, - 'operator': operator, - } - } - } + return { + query_type: { + field: term_query, + } + } - query = { - 'multi_match': { - 'query': query.query_string, - 'fields': fields, + def _compile_plaintext_query(self, query, fields, boost=1.0): + match_query = { + 'query': query.query_string + } + + if query.operator != 'or': + match_query['operator'] = query.operator + + if boost != 1.0: + match_query['boost'] = boost + + if len(fields) == 1: + return { + 'match': { + fields[0]: match_query } } - if operator != 'or': - query['multi_match']['operator'] = operator + else: + match_query['fields'] = fields - return query + return { + 'multi_match': match_query + } + + def _compile_query(self, query, field, boost=1.0): + if isinstance(query, MatchAll): + match_all_query = {} + + if boost != 1.0: + match_all_query['boost'] = boost + + return {'match_all': match_all_query} + + elif isinstance(query, Term): + return self._compile_term_query('term', query.term, field, query.boost * boost) + + elif isinstance(query, Prefix): + return self._compile_term_query('prefix', query.prefix, field, query.boost * boost) + + elif isinstance(query, Fuzzy): + return self._compile_term_query('fuzzy', query.term, field, query.boost * boost, fuzziness=query.max_distance) + + elif isinstance(query, And): + return { + 'bool': { + 'must': [ + self._compile_query(child_query, field, boost) + for child_query in query.get_children() + ] + } + } + + elif isinstance(query, Or): + return { + 'bool': { + 'should': [ + self._compile_query(child_query, field, boost) + for child_query in query.get_children() + ] + } + } + + elif isinstance(query, Not): + return { + 'bool': { + 'mustNot': self._compile_query(query.subquery, field, boost) + } + } + + elif isinstance(query, PlainText): + return self._compile_plaintext_query(self.query, [field], boost) + + elif isinstance(query, Filter): + bool_query = { + 'must': self._compile_query(query.query, field, boost), + } + + if query.include: + bool_query['filter'] = self._compile_query(query.include, field, 0.0) + + if query.exclude: + bool_query['mustNot'] = self._compile_query(query.exclude, field, 0.0) + + return { + 'bool': bool_query, + } + + elif isinstance(query, Boost): + return self._compile_query(query.subquery, field, boost * query.boost) else: raise NotImplementedError( '`%s` is not supported by the Elasticsearch search backend.' % query.__class__.__name__) - def get_inner_query(self): - return self._compile_query(self.query) + fields = self.remapped_fields or ['_all', '_partials'] + + if len(fields) == 0: + # No fields. Return a query that'll match nothing + return { + 'bool': { + 'mustNot': {'match_all': {}} + } + } + + # Handle MatchAll and PlainText separately as they were supported + # before "search query classes" was implemented and we'd like to + # keep the query the same as before + if isinstance(self.query, MatchAll): + return {'match_all': {}} + + elif isinstance(self.query, PlainText): + return self._compile_plaintext_query(self.query, fields) + + else: + if len(fields) == 1: + return self._compile_query(self.query, fields[0]) + else: + # Compile a query for each field then combine with disjunction + # max (or operator which takes the max score out of each of the + # field queries) + field_queries = [] + for field in fields: + field_queries.append(self._compile_query(self.query, field)) + + return { + 'dis_max': { + 'queries': field_queries + } + } def get_content_type_filter(self): # Query content_type using a "match" query. See comment in diff --git a/wagtail/search/tests/test_backends.py b/wagtail/search/tests/test_backends.py index 784902afe..0baba13fd 100644 --- a/wagtail/search/tests/test_backends.py +++ b/wagtail/search/tests/test_backends.py @@ -443,37 +443,32 @@ class QueryAPITestMixin: def test_term(self): # Single word - results = self.backend.search(Term('Javascript'), + results = self.backend.search(Term('javascript'), models.Book.objects.all()) + self.assertSetEqual({r.title for r in results}, {'JavaScript: The Definitive Guide', 'JavaScript: The good parts'}) - # Multiple word - results = self.backend.search(Term('Definitive Guide'), - models.Book.objects.all()) - self.assertSetEqual({r.title for r in results}, - {'JavaScript: The Definitive Guide'}) - def test_and(self): - results = self.backend.search(And([Term('Javascript'), - Term('Definitive')]), + results = self.backend.search(And([Term('javascript'), + Term('definitive')]), models.Book.objects.all()) self.assertSetEqual({r.title for r in results}, {'JavaScript: The Definitive Guide'}) - results = self.backend.search(Term('Javascript') & Term('Definitive'), + results = self.backend.search(Term('javascript') & Term('definitive'), models.Book.objects.all()) self.assertSetEqual({r.title for r in results}, {'JavaScript: The Definitive Guide'}) def test_or(self): - results = self.backend.search(Or([Term('Hobbit'), Term('Towers')]), + results = self.backend.search(Or([Term('hobbit'), Term('towers')]), models.Book.objects.all()) self.assertSetEqual({r.title for r in results}, {'The Hobbit', 'The Two Towers'}) - results = self.backend.search(Term('Hobbit') | Term('Towers'), + results = self.backend.search(Term('hobbit') | Term('towers'), models.Book.objects.all()) self.assertSetEqual({r.title for r in results}, {'The Hobbit', 'The Two Towers'}) @@ -493,19 +488,19 @@ class QueryAPITestMixin: 'Two Scoops of Django 1.11', } - results = self.backend.search(Not(Term('Javascript')), + results = self.backend.search(Not(Term('javascript')), models.Book.objects.all()) self.assertSetEqual({r.title for r in results}, all_other_titles) - results = self.backend.search(~Term('Javascript'), + results = self.backend.search(~Term('javascript'), models.Book.objects.all()) self.assertSetEqual({r.title for r in results}, all_other_titles) def test_operators_combination(self): results = self.backend.search( - ((Term('Javascript') & ~Term('Definitive')) | - Term('Python') | Term('Rust')) | - Term('Two'), + ((Term('javascript') & ~Term('definitive')) | + Term('python') | Term('rust')) | + Term('two'), models.Book.objects.all()) self.assertSetEqual({r.title for r in results}, {'JavaScript: The good parts', @@ -571,84 +566,84 @@ class QueryAPITestMixin: operator='xor') def test_filter_equivalent(self): - filter = Filter(Term('Javascript')) + filter = Filter(Term('javascript')) term = filter.child self.assertIsInstance(term, Term) - self.assertEqual(term.term, 'Javascript') + self.assertEqual(term.term, 'javascript') - filter = Filter(Term('Javascript'), include=Term('Definitive')) + filter = Filter(Term('javascript'), include=Term('definitive')) and_obj = filter.child self.assertIsInstance(and_obj, And) javascript = and_obj.children[0] self.assertIsInstance(javascript, Term) - self.assertEqual(javascript.term, 'Javascript') + self.assertEqual(javascript.term, 'javascript') boost_obj = and_obj.children[1] self.assertIsInstance(boost_obj, Boost) self.assertEqual(boost_obj.boost, 0) definitive = boost_obj.child self.assertIsInstance(definitive, Term) - self.assertEqual(definitive.term, 'Definitive') + self.assertEqual(definitive.term, 'definitive') - filter = Filter(Term('Javascript'), - include=Term('Definitive'), exclude=Term('Guide')) + filter = Filter(Term('javascript'), + include=Term('definitive'), exclude=Term('guide')) and_obj1 = filter.child self.assertIsInstance(and_obj1, And) and_obj2 = and_obj1.children[0] javascript = and_obj2.children[0] self.assertIsInstance(javascript, Term) - self.assertEqual(javascript.term, 'Javascript') + self.assertEqual(javascript.term, 'javascript') boost_obj = and_obj2.children[1] self.assertIsInstance(boost_obj, Boost) self.assertEqual(boost_obj.boost, 0) definitive = boost_obj.child self.assertIsInstance(definitive, Term) - self.assertEqual(definitive.term, 'Definitive') + self.assertEqual(definitive.term, 'definitive') boost_obj = and_obj1.children[1] self.assertIsInstance(boost_obj, Boost) self.assertEqual(boost_obj.boost, 0) not_obj = boost_obj.child self.assertIsInstance(not_obj, Not) guide = not_obj.child - self.assertEqual(guide.term, 'Guide') + self.assertEqual(guide.term, 'guide') def test_filter_query(self): - results = self.backend.search(Filter(Term('Javascript')), + results = self.backend.search(Filter(Term('javascript')), models.Book.objects.all()) self.assertSetEqual({r.title for r in results}, {'JavaScript: The Definitive Guide', 'JavaScript: The good parts'}) - results = self.backend.search(Filter(Term('Javascript'), - include=Term('Definitive')), + results = self.backend.search(Filter(Term('javascript'), + include=Term('definitive')), models.Book.objects.all()) self.assertSetEqual({r.title for r in results}, {'JavaScript: The Definitive Guide'}) - results = self.backend.search(Filter(Term('Javascript'), - include=Term('Definitive'), - exclude=Term('Guide')), + results = self.backend.search(Filter(Term('javascript'), + include=Term('definitive'), + exclude=Term('guide')), models.Book.objects.all()) self.assertSetEqual({r.title for r in results}, set()) def test_boost_equivalent(self): - boost = Boost(Term('Guide'), 5) + boost = Boost(Term('guide'), 5) equivalent = boost.children[0] self.assertIsInstance(equivalent, Term) self.assertAlmostEqual(equivalent.boost, 5) - boost = Boost(Term('Guide', boost=0.5), 5) + boost = Boost(Term('guide', boost=0.5), 5) equivalent = boost.children[0] self.assertIsInstance(equivalent, Term) self.assertAlmostEqual(equivalent.boost, 2.5) - boost = Boost(Boost(Term('Guide', 0.1), 3), 5) + boost = Boost(Boost(Term('guide', 0.1), 3), 5) sub_boost = boost.children[0] self.assertIsInstance(sub_boost, Boost) sub_boost = sub_boost.children[0] self.assertIsInstance(sub_boost, Term) self.assertAlmostEqual(sub_boost.boost, 1.5) - boost = Boost(And([Boost(Term('Guide', 0.1), 3), Term('Two', 2)]), 5) + boost = Boost(And([Boost(Term('guide', 0.1), 3), Term('two', 2)]), 5) and_obj = boost.children[0] self.assertIsInstance(and_obj, And) sub_boost = and_obj.children[0] diff --git a/wagtail/search/tests/test_elasticsearch2_backend.py b/wagtail/search/tests/test_elasticsearch2_backend.py index a090ae413..02e3a5264 100644 --- a/wagtail/search/tests/test_elasticsearch2_backend.py +++ b/wagtail/search/tests/test_elasticsearch2_backend.py @@ -123,7 +123,7 @@ class TestElasticsearch2SearchQuery(TestCase): # Check it expected_result = {'filtered': { 'filter': {'match': {'content_type': 'searchtests.Book'}}, - 'query': {'match': {'title': 'Hello'}} + 'query': {'match': {'title': {'query': 'Hello'}}} }} self.assertDictEqual(query_compiler.get_query(), expected_result) diff --git a/wagtail/search/tests/test_elasticsearch5_backend.py b/wagtail/search/tests/test_elasticsearch5_backend.py index ff00efaaa..3fcf00a28 100644 --- a/wagtail/search/tests/test_elasticsearch5_backend.py +++ b/wagtail/search/tests/test_elasticsearch5_backend.py @@ -122,7 +122,7 @@ class TestElasticsearch5SearchQuery(TestCase): # Check it expected_result = {'bool': { 'filter': {'match': {'content_type': 'searchtests.Book'}}, - 'must': {'match': {'title': 'Hello'}} + 'query': {'match': {'title': {'query': 'Hello'}}} }} self.assertDictEqual(query_compiler.get_query(), expected_result)