Adds search query API & implements PlainText & MatchAll.

This commit is contained in:
Bertrand Bordage 2017-11-23 15:56:41 +01:00
parent 8d31cd4707
commit 2ba532d745
6 changed files with 178 additions and 82 deletions

View file

@ -12,6 +12,7 @@ from django.utils.encoding import force_text
from wagtail.wagtailsearch.backends.base import (
BaseSearchBackend, BaseSearchQuery, BaseSearchResults)
from wagtail.wagtailsearch.index import RelatedFields, SearchField
from wagtail.wagtailsearch.query import MatchAll, PlainText
from .models import IndexEntry
from .utils import (
@ -174,8 +175,8 @@ class PostgresSearchQuery(BaseSearchQuery):
self.search_fields = self.queryset.model.get_search_fields()
def get_search_query(self, config):
combine = OR if self.operator == 'or' else AND
search_terms = keyword_split(unidecode(self.query_string))
combine = OR if self.query.operator == 'or' else AND
search_terms = keyword_split(unidecode(self.query.query_string))
if not search_terms:
return SearchQuery('')
return combine(SearchQuery(q, config=config) for q in search_terms)
@ -197,8 +198,14 @@ class PostgresSearchQuery(BaseSearchQuery):
return field.boost
def search(self, config, start, stop):
if self.query_string is None:
if isinstance(self.query, MatchAll):
return self.queryset[start:stop]
if not isinstance(self.query, PlainText):
raise NotImplementedError(
'%s is not supported by the PostgreSQL search backend.'
% self.query.__class__)
search_query = self.get_search_query(config=config)
queryset = self.queryset
query = queryset.query

View file

@ -1,11 +1,14 @@
from __future__ import absolute_import, unicode_literals
from warnings import warn
from django.db.models.lookups import Lookup
from django.db.models.query import QuerySet
from django.db.models.sql.where import SubqueryConstraint, WhereNode
from wagtail.wagtailsearch.index import class_is_indexed
from wagtail.wagtailsearch.query import MATCH_ALL, PlainText
class FilterError(Exception):
@ -19,11 +22,17 @@ class FieldError(Exception):
class BaseSearchQuery(object):
DEFAULT_OPERATOR = 'or'
def __init__(self, queryset, query_string, fields=None, operator=None, order_by_relevance=True):
def __init__(self, queryset, query, fields=None, operator=None, order_by_relevance=True):
self.queryset = queryset
self.query_string = query_string
if query is None:
warn('Querying `None` is deprecated, use `MATCH_ALL` instead.',
DeprecationWarning)
query = MATCH_ALL
elif isinstance(query, str):
query = PlainText(query,
operator=operator or self.DEFAULT_OPERATOR)
self.query = query
self.fields = fields
self.operator = operator or self.DEFAULT_OPERATOR
self.order_by_relevance = order_by_relevance
def _get_filterable_field(self, field_attname):

View file

@ -5,6 +5,7 @@ from django.db.models.expressions import Value
from wagtail.wagtailsearch.backends.base import (
BaseSearchBackend, BaseSearchQuery, BaseSearchResults)
from wagtail.wagtailsearch.query import MatchAll, PlainText
class DatabaseSearchQuery(BaseSearchQuery):
@ -36,32 +37,41 @@ class DatabaseSearchQuery(BaseSearchQuery):
q = models.Q()
model = self.queryset.model
if self.query_string is not None:
# Get fields
fields = self.fields or [field.field_name for field in model.get_searchable_search_fields()]
if isinstance(self.query, MatchAll):
return q
# Get terms
terms = self.query_string.split()
if not terms:
return model.objects.none()
if not isinstance(self.query, PlainText):
raise NotImplementedError(
'%s is not supported by the database search backend.'
% self.query.__class__)
# Filter by terms
for term in terms:
term_query = models.Q()
for field_name in fields:
# Check if the field exists (this will filter out indexed callables)
try:
model._meta.get_field(field_name)
except models.fields.FieldDoesNotExist:
continue
# Get fields
fields = self.fields or [field.field_name for field in model.get_searchable_search_fields()]
# Filter on this field
term_query |= models.Q(**{'%s__icontains' % field_name: term})
# Get terms
terms = self.query.query_string.split()
if not terms:
return model.objects.none()
if self.operator == 'or':
q |= term_query
elif self.operator == 'and':
q &= term_query
# Filter by terms
for term in terms:
term_query = models.Q()
for field_name in fields:
# Check if the field exists (this will filter out indexed callables)
try:
model._meta.get_field(field_name)
except models.fields.FieldDoesNotExist:
continue
# Filter on this field
term_query |= models.Q(**{'%s__icontains' % field_name: term})
operator = self.query.operator
if operator == 'or':
q |= term_query
elif operator == 'and':
q &= term_query
return q

View file

@ -16,6 +16,7 @@ from wagtail.wagtailsearch.backends.base import (
BaseSearchBackend, BaseSearchQuery, BaseSearchResults)
from wagtail.wagtailsearch.index import (
FilterField, Indexed, RelatedFields, SearchField, class_is_indexed)
from wagtail.wagtailsearch.query import MatchAll, PlainText
def get_model_root(model):
@ -371,40 +372,35 @@ class Elasticsearch2SearchQuery(BaseSearchQuery):
return filter_out
def get_inner_query(self):
if self.query_string is not None:
fields = self.fields or ['_all', '_partials']
if isinstance(self.query, MatchAll):
return {'match_all': {}}
if len(fields) == 1:
if self.operator == 'or':
query = {
'match': {
fields[0]: self.query_string,
}
}
else:
query = {
'match': {
fields[0]: {
'query': self.query_string,
'operator': self.operator,
}
}
}
else:
query = {
'multi_match': {
'query': self.query_string,
'fields': fields,
}
if not isinstance(self.query, PlainText):
raise NotImplementedError(
'%s is not supported by the Elasticsearch search backend.'
% self.query.__class__)
fields = self.fields or ['_all', '_partials']
operator = self.query.operator
if len(fields) == 1:
field = fields[0]
query = {
'match': {
field: self.query.query_string,
}
if self.operator != 'or':
query['multi_match']['operator'] = self.operator
}
if operator != 'or':
query['match'][field]['operator'] = operator
else:
query = {
'match_all': {}
'multi_match': {
'query': self.query.query_string,
'fields': fields,
}
}
if operator != 'or':
query['multi_match']['operator'] = operator
return query
def get_content_type_filter(self):

View file

@ -0,0 +1,74 @@
class SearchQuery:
def __and__(self, other):
return And([self, other])
def __or__(self, other):
return Or([self, other])
def __invert__(self):
return Not(self)
class SearchQueryOperator(SearchQuery):
pass
class And(SearchQueryOperator):
def __init__(self, subqueries):
self.subqueries = subqueries
class Or(SearchQueryOperator):
def __init__(self, subqueries):
self.subqueries = subqueries
class Not(SearchQueryOperator):
def __init__(self, subquery):
self.subquery = subquery
class MatchAll(SearchQuery):
pass
class PlainText(SearchQuery):
def __init__(self, query_string, operator=None, boost=1.0):
self.query_string = query_string
self.operator = operator
self.boost = boost
class Term(SearchQuery):
def __init__(self, term, boost=1.0):
self.term = term
self.boost = boost
class Prefix(SearchQuery):
def __init__(self, prefix, boost=1.0):
self.prefix = prefix
self.boost = boost
class Fuzzy(SearchQuery):
def __init__(self, term, max_distance=3, boost=1.0):
self.term = term
self.max_distance = max_distance
self.boost = boost
class Boost(SearchQuery):
def __init__(self, query, boost):
self.query = query
self.boost = boost
class Filter(SearchQuery):
def __init__(self, query, include=None, exclude=None):
self.query = query
self.include = include
self.exclude = exclude
MATCH_ALL = MatchAll()

View file

@ -17,6 +17,7 @@ from wagtail.wagtailsearch.backends import (
InvalidSearchBackendError, get_search_backend, get_search_backends)
from wagtail.wagtailsearch.backends.base import FieldError
from wagtail.wagtailsearch.backends.db import DatabaseSearchBackend
from wagtail.wagtailsearch.query import MATCH_ALL
class BackendTests(WagtailTestUtils):
@ -65,8 +66,7 @@ class BackendTests(WagtailTestUtils):
self.assertSetEqual(set(results), set())
def test_search_all(self):
# Searches on None should return everything in the index
results = self.backend.search(None, models.Book)
results = self.backend.search(MATCH_ALL, models.Book)
self.assertSetEqual(set(results), set(models.Book.objects.all()))
def test_ranking(self):
@ -90,7 +90,7 @@ class BackendTests(WagtailTestUtils):
def test_search_on_child_class(self):
# Searches on a child class should only return results that have the child class as well
# and all results should be instances of the child class
results = self.backend.search(None, models.Novel)
results = self.backend.search(MATCH_ALL, models.Novel)
self.assertSetEqual(set(results), set(models.Novel.objects.all()))
def test_search_child_class_field_from_parent(self):
@ -162,7 +162,7 @@ class BackendTests(WagtailTestUtils):
# FILTERING TESTS
def test_filter_exact_value(self):
results = self.backend.search(None, models.Book.objects.filter(number_of_pages=440))
results = self.backend.search(MATCH_ALL, models.Book.objects.filter(number_of_pages=440))
self.assertUnsortedListEqual([r.title for r in results], [
"The Return of the King",
@ -170,14 +170,14 @@ class BackendTests(WagtailTestUtils):
])
def test_filter_exact_value_on_parent_model_field(self):
results = self.backend.search(None, models.Novel.objects.filter(number_of_pages=440))
results = self.backend.search(MATCH_ALL, models.Novel.objects.filter(number_of_pages=440))
self.assertUnsortedListEqual([r.title for r in results], [
"The Return of the King"
])
def test_filter_lt(self):
results = self.backend.search(None, models.Book.objects.filter(number_of_pages__lt=440))
results = self.backend.search(MATCH_ALL, models.Book.objects.filter(number_of_pages__lt=440))
self.assertUnsortedListEqual([r.title for r in results], [
"The Hobbit",
@ -188,7 +188,7 @@ class BackendTests(WagtailTestUtils):
])
def test_filter_lte(self):
results = self.backend.search(None, models.Book.objects.filter(number_of_pages__lte=440))
results = self.backend.search(MATCH_ALL, models.Book.objects.filter(number_of_pages__lte=440))
self.assertUnsortedListEqual([r.title for r in results], [
"The Return of the King",
@ -201,7 +201,7 @@ class BackendTests(WagtailTestUtils):
])
def test_filter_gt(self):
results = self.backend.search(None, models.Book.objects.filter(number_of_pages__gt=440))
results = self.backend.search(MATCH_ALL, models.Book.objects.filter(number_of_pages__gt=440))
self.assertUnsortedListEqual([r.title for r in results], [
"JavaScript: The Definitive Guide",
@ -213,7 +213,7 @@ class BackendTests(WagtailTestUtils):
])
def test_filter_gte(self):
results = self.backend.search(None, models.Book.objects.filter(number_of_pages__gte=440))
results = self.backend.search(MATCH_ALL, models.Book.objects.filter(number_of_pages__gte=440))
self.assertUnsortedListEqual([r.title for r in results], [
"The Return of the King",
@ -227,7 +227,7 @@ class BackendTests(WagtailTestUtils):
])
def test_filter_in_list(self):
results = self.backend.search(None, models.Book.objects.filter(number_of_pages__in=[440, 1160]))
results = self.backend.search(MATCH_ALL, models.Book.objects.filter(number_of_pages__in=[440, 1160]))
self.assertUnsortedListEqual([r.title for r in results], [
"The Return of the King",
@ -236,7 +236,7 @@ class BackendTests(WagtailTestUtils):
])
def test_filter_in_iterable(self):
results = self.backend.search(None, models.Book.objects.filter(number_of_pages__in=iter([440, 1160])))
results = self.backend.search(MATCH_ALL, models.Book.objects.filter(number_of_pages__in=iter([440, 1160])))
self.assertUnsortedListEqual([r.title for r in results], [
"The Return of the King",
@ -246,7 +246,7 @@ class BackendTests(WagtailTestUtils):
def test_filter_in_values_list_subquery(self):
values = models.Book.objects.filter(number_of_pages__lt=440).values_list('number_of_pages', flat=True)
results = self.backend.search(None, models.Book.objects.filter(number_of_pages__in=values))
results = self.backend.search(MATCH_ALL, models.Book.objects.filter(number_of_pages__in=values))
self.assertUnsortedListEqual([r.title for r in results], [
"The Hobbit",
@ -258,7 +258,7 @@ class BackendTests(WagtailTestUtils):
def test_filter_isnull_true(self):
# Note: We don't know the birth dates of any of the programming guide authors
results = self.backend.search(None, models.Author.objects.filter(date_of_birth__isnull=True))
results = self.backend.search(MATCH_ALL, models.Author.objects.filter(date_of_birth__isnull=True))
self.assertUnsortedListEqual([r.name for r in results], [
"David Ascher",
@ -273,7 +273,7 @@ class BackendTests(WagtailTestUtils):
def test_filter_isnull_false(self):
# Note: We know the birth dates of all of the novel authors
results = self.backend.search(None, models.Author.objects.filter(date_of_birth__isnull=False))
results = self.backend.search(MATCH_ALL, models.Author.objects.filter(date_of_birth__isnull=False))
self.assertUnsortedListEqual([r.name for r in results], [
"Isaac Asimov",
@ -282,7 +282,7 @@ class BackendTests(WagtailTestUtils):
])
def test_filter_prefix(self):
results = self.backend.search(None, models.Book.objects.filter(title__startswith="Th"))
results = self.backend.search(MATCH_ALL, models.Book.objects.filter(title__startswith="Th"))
self.assertUnsortedListEqual([r.title for r in results], [
"The Hobbit",
@ -294,14 +294,14 @@ class BackendTests(WagtailTestUtils):
def test_filter_and_operator(self):
results = self.backend.search(
None, models.Book.objects.filter(number_of_pages=440) & models.Book.objects.filter(publication_date=date(1955, 10, 20)))
MATCH_ALL, models.Book.objects.filter(number_of_pages=440) & models.Book.objects.filter(publication_date=date(1955, 10, 20)))
self.assertUnsortedListEqual([r.title for r in results], [
"The Return of the King"
])
def test_filter_or_operator(self):
results = self.backend.search(None, models.Book.objects.filter(number_of_pages=440) | models.Book.objects.filter(number_of_pages=1160))
results = self.backend.search(MATCH_ALL, models.Book.objects.filter(number_of_pages=440) | models.Book.objects.filter(number_of_pages=1160))
self.assertUnsortedListEqual([r.title for r in results], [
"Learning Python",
@ -311,12 +311,12 @@ class BackendTests(WagtailTestUtils):
def test_filter_on_non_filterable_field(self):
with self.assertRaises(FieldError):
list(self.backend.search(None, models.Author.objects.filter(name__startswith="Issac")))
list(self.backend.search(MATCH_ALL, models.Author.objects.filter(name__startswith="Issac")))
# ORDER BY RELEVANCE
def test_order_by_relevance(self):
results = self.backend.search(None, models.Novel.objects.order_by('number_of_pages'), order_by_relevance=False)
results = self.backend.search(MATCH_ALL, models.Novel.objects.order_by('number_of_pages'), order_by_relevance=False)
# Ordering should be set to "number_of_pages"
self.assertEqual([r.title for r in results], [
@ -332,19 +332,19 @@ class BackendTests(WagtailTestUtils):
def test_order_by_non_filterable_field(self):
with self.assertRaises(FieldError):
list(self.backend.search(None, models.Author.objects.order_by('name'), order_by_relevance=False))
list(self.backend.search(MATCH_ALL, models.Author.objects.order_by('name'), order_by_relevance=False))
# SLICING TESTS
def test_single_result(self):
results = self.backend.search(None, models.Novel.objects.order_by('number_of_pages'), order_by_relevance=False)
results = self.backend.search(MATCH_ALL, models.Novel.objects.order_by('number_of_pages'), order_by_relevance=False)
self.assertEqual(results[0].title, "Foundation")
self.assertEqual(results[1].title, "The Hobbit")
def test_limit(self):
# Note: we need consistent ordering for this test
results = self.backend.search(None, models.Novel.objects.order_by('number_of_pages'), order_by_relevance=False)
results = self.backend.search(MATCH_ALL, models.Novel.objects.order_by('number_of_pages'), order_by_relevance=False)
# Limit the results
results = results[:3]
@ -357,7 +357,7 @@ class BackendTests(WagtailTestUtils):
def test_offset(self):
# Note: we need consistent ordering for this test
results = self.backend.search(None, models.Novel.objects.order_by('number_of_pages'), order_by_relevance=False)
results = self.backend.search(MATCH_ALL, models.Novel.objects.order_by('number_of_pages'), order_by_relevance=False)
# Offset the results
results = results[3:]
@ -372,7 +372,7 @@ class BackendTests(WagtailTestUtils):
def test_offset_and_limit(self):
# Note: we need consistent ordering for this test
results = self.backend.search(None, models.Novel.objects.order_by('number_of_pages'), order_by_relevance=False)
results = self.backend.search(MATCH_ALL, models.Novel.objects.order_by('number_of_pages'), order_by_relevance=False)
# Offset the results
results = results[3:6]
@ -419,7 +419,7 @@ class BackendTests(WagtailTestUtils):
# instead of three).
# Note: we need consistent ordering for this test
results = self.backend.search(None, models.Novel.objects.order_by('number_of_pages'), order_by_relevance=False)
results = self.backend.search(MATCH_ALL, models.Novel.objects.order_by('number_of_pages'), order_by_relevance=False)
# Limit the results
results = results[:3]