Started work in refactor

This commit is contained in:
David Sauve 2009-10-21 08:41:27 -04:00
parent 817943bfc7
commit ed4ef56225
3 changed files with 96 additions and 163 deletions

View file

@ -18,4 +18,4 @@ import warnings
warnings.simplefilter('ignore', Warning)
from xapian_tests.tests.xapian_query import *
from xapian_tests.tests.xapian_backend import *
# from xapian_tests.tests.xapian_backend import *

View file

@ -21,6 +21,7 @@ from django.conf import settings
from django.test import TestCase
from haystack.backends.xapian_backend import SearchBackend, SearchQuery
from haystack.query import SQ
from core.models import MockModel, AnotherMockModel
@ -49,78 +50,78 @@ class XapianSearchQueryTestCase(TestCase):
super(XapianSearchQueryTestCase, self).tearDown()
def test_build_query_all(self):
self.assertEqual(self.sq.build_query(), '*')
self.assertEqual(self.sq.build_query().get_description(), 'Xapian::Query(<alldocuments>)')
def test_build_query_single_word(self):
self.sq.add_filter('content', 'hello')
self.assertEqual(self.sq.build_query(), 'hello')
self.sq.add_filter(SQ(content='hello'))
self.assertEqual(self.sq.build_query().get_description(), 'Xapian::Query(hello)')
def test_build_query_multiple_words_and(self):
self.sq.add_filter('content', 'hello')
self.sq.add_filter('content', 'world')
self.assertEqual(self.sq.build_query(), 'hello AND world')
self.sq.add_filter(SQ(content='hello'))
self.sq.add_filter(SQ(content='world'))
self.assertEqual(self.sq.build_query().get_description(), 'Xapian::Query((hello AND world))')
def test_build_query_multiple_words_not(self):
self.sq.add_filter('content', 'hello', use_not=True)
self.sq.add_filter('content', 'world', use_not=True)
self.assertEqual(self.sq.build_query(), 'NOT hello NOT world')
def test_build_query_multiple_words_or(self):
self.sq.add_filter('content', 'hello', use_or=True)
self.sq.add_filter('content', 'world', use_or=True)
self.assertEqual(self.sq.build_query(), 'hello OR world')
def test_build_query_multiple_words_mixed(self):
self.sq.add_filter('content', 'why')
self.sq.add_filter('content', 'hello', use_or=True)
self.sq.add_filter('content', 'world', use_not=True)
self.assertEqual(self.sq.build_query(), 'why OR hello NOT world')
def test_build_query_phrase(self):
self.sq.add_filter('content', 'hello world')
self.assertEqual(self.sq.build_query(), '"hello world"')
def test_build_query_multiple_filter_types(self):
self.sq.add_filter('content', 'why')
self.sq.add_filter('pub_date__lte', datetime.datetime(2009, 2, 10, 1, 59))
self.sq.add_filter('author__gt', 'david')
self.sq.add_filter('created__lt', datetime.datetime(2009, 2, 12, 12, 13))
self.sq.add_filter('title__gte', 'B')
self.sq.add_filter('id__in', [1, 2, 3])
self.assertEqual(self.sq.build_query(), 'why AND pub_date:..20090210015900 AND NOT author:..david AND NOT created:20090212121300..* AND title:B..* AND (id:1 OR id:2 OR id:3)')
def test_build_query_multiple_exclude_types(self):
self.sq.add_filter('content', 'why', use_not=True)
self.sq.add_filter('pub_date__lte', datetime.datetime(2009, 2, 10, 1, 59), use_not=True)
self.sq.add_filter('author__gt', 'david', use_not=True)
self.sq.add_filter('created__lt', datetime.datetime(2009, 2, 12, 12, 13), use_not=True)
self.sq.add_filter('title__gte', 'B', use_not=True)
self.sq.add_filter('id__in', [1, 2, 3], use_not=True)
self.assertEqual(self.sq.build_query(), 'NOT why AND NOT pub_date:..20090210015900 AND author:..david AND created:20090212121300..* AND NOT title:B..* AND NOT id:1 NOT id:2 NOT id:3')
def test_build_query_wildcard_filter_types(self):
self.sq.add_filter('content', 'why')
self.sq.add_filter('title__startswith', 'haystack')
self.assertEqual(self.sq.build_query(), 'why AND title:haystack*')
def test_clean(self):
self.assertEqual(self.sq.clean('hello world'), 'hello world')
self.assertEqual(self.sq.clean('hello AND world'), 'hello and world')
self.assertEqual(self.sq.clean('hello AND OR NOT + - && || ! ( ) { } [ ] ^ " ~ * ? : \ world'), 'hello and or not \\+ \\- \\&& \\|| \\! \\( \\) \\{ \\} \\[ \\] \\^ \\" \\~ \\* \\? \\: \\\\ world')
self.assertEqual(self.sq.clean('so please NOTe i am in a bAND and bORed'), 'so please NOTe i am in a bAND and bORed')
def test_build_query_with_models(self):
self.sq.add_filter('content', 'hello')
self.sq.add_model(MockModel)
self.assertEqual(self.sq.build_query(), u'(hello) django_ct:core.mockmodel')
self.sq.add_model(AnotherMockModel)
self.assertEqual(self.sq.build_query(), u'(hello) django_ct:core.anothermockmodel django_ct:core.mockmodel')
def test_build_query_with_datetime(self):
self.sq.add_filter('pub_date', datetime.datetime(2009, 5, 9, 16, 20))
self.assertEqual(self.sq.build_query(), u'pub_date:20090509162000')
def test_build_query_with_sequence_and_filter_not_in(self):
self.sq.add_filter('id__exact', [1, 2, 3])
self.assertEqual(self.sq.build_query(), u'id:[1, 2, 3]')
self.sq.add_filter(~SQ(content='hello'))
self.sq.add_filter(~SQ(content='world'))
self.assertEqual(self.sq.build_query().get_description(), 'Xapian::Query((NOT hello NOT world))')
# def test_build_query_multiple_words_or(self):
# self.sq.add_filter('content', 'hello', use_or=True)
# self.sq.add_filter('content', 'world', use_or=True)
# self.assertEqual(self.sq.build_query(), 'hello OR world')
#
# def test_build_query_multiple_words_mixed(self):
# self.sq.add_filter('content', 'why')
# self.sq.add_filter('content', 'hello', use_or=True)
# self.sq.add_filter('content', 'world', use_not=True)
# self.assertEqual(self.sq.build_query(), 'why OR hello NOT world')
#
# def test_build_query_phrase(self):
# self.sq.add_filter('content', 'hello world')
# self.assertEqual(self.sq.build_query(), '"hello world"')
#
# def test_build_query_multiple_filter_types(self):
# self.sq.add_filter('content', 'why')
# self.sq.add_filter('pub_date__lte', datetime.datetime(2009, 2, 10, 1, 59))
# self.sq.add_filter('author__gt', 'david')
# self.sq.add_filter('created__lt', datetime.datetime(2009, 2, 12, 12, 13))
# self.sq.add_filter('title__gte', 'B')
# self.sq.add_filter('id__in', [1, 2, 3])
# self.assertEqual(self.sq.build_query(), 'why AND pub_date:..20090210015900 AND NOT author:..david AND NOT created:20090212121300..* AND title:B..* AND (id:1 OR id:2 OR id:3)')
#
# def test_build_query_multiple_exclude_types(self):
# self.sq.add_filter('content', 'why', use_not=True)
# self.sq.add_filter('pub_date__lte', datetime.datetime(2009, 2, 10, 1, 59), use_not=True)
# self.sq.add_filter('author__gt', 'david', use_not=True)
# self.sq.add_filter('created__lt', datetime.datetime(2009, 2, 12, 12, 13), use_not=True)
# self.sq.add_filter('title__gte', 'B', use_not=True)
# self.sq.add_filter('id__in', [1, 2, 3], use_not=True)
# self.assertEqual(self.sq.build_query(), 'NOT why AND NOT pub_date:..20090210015900 AND author:..david AND created:20090212121300..* AND NOT title:B..* AND NOT id:1 NOT id:2 NOT id:3')
#
# def test_build_query_wildcard_filter_types(self):
# self.sq.add_filter('content', 'why')
# self.sq.add_filter('title__startswith', 'haystack')
# self.assertEqual(self.sq.build_query(), 'why AND title:haystack*')
#
# def test_clean(self):
# self.assertEqual(self.sq.clean('hello world'), 'hello world')
# self.assertEqual(self.sq.clean('hello AND world'), 'hello and world')
# self.assertEqual(self.sq.clean('hello AND OR NOT + - && || ! ( ) { } [ ] ^ " ~ * ? : \ world'), 'hello and or not \\+ \\- \\&& \\|| \\! \\( \\) \\{ \\} \\[ \\] \\^ \\" \\~ \\* \\? \\: \\\\ world')
# self.assertEqual(self.sq.clean('so please NOTe i am in a bAND and bORed'), 'so please NOTe i am in a bAND and bORed')
#
# def test_build_query_with_models(self):
# self.sq.add_filter('content', 'hello')
# self.sq.add_model(MockModel)
# self.assertEqual(self.sq.build_query(), u'(hello) django_ct:core.mockmodel')
#
# self.sq.add_model(AnotherMockModel)
# self.assertEqual(self.sq.build_query(), u'(hello) django_ct:core.anothermockmodel django_ct:core.mockmodel')
#
# def test_build_query_with_datetime(self):
# self.sq.add_filter('pub_date', datetime.datetime(2009, 5, 9, 16, 20))
# self.assertEqual(self.sq.build_query(), u'pub_date:20090509162000')
#
# def test_build_query_with_sequence_and_filter_not_in(self):
# self.sq.add_filter('id__exact', [1, 2, 3])
# self.assertEqual(self.sq.build_query(), u'id:[1, 2, 3]')

View file

@ -919,10 +919,9 @@ class SearchBackend(BaseSearchBackend):
class SearchQuery(BaseSearchQuery):
"""
`SearchQuery` is responsible for converting search queries into a format
that Xapian can understand.
Most of the work is done by the :method:`build_query`.
This class is the Xapian specific version of the SearchQuery class.
It acts as an intermediary between the ``SearchQuerySet`` and the
``SearchBackend`` itself.
"""
def __init__(self, backend=None):
"""
@ -930,103 +929,33 @@ class SearchQuery(BaseSearchQuery):
specified. If no backend is set, will use the Xapian `SearchBackend`.
Optional arguments:
`backend` -- The `SearchBackend` to use (default = None)
``backend`` -- The ``SearchBackend`` to use (default = None)
"""
super(SearchQuery, self).__init__(backend=backend)
self.backend = backend or SearchBackend()
def build_query(self):
"""
Builds a search query from previously set values, returning a query
string in a format ready for use by the Xapian `SearchBackend`.
if not self.query_filter:
return xapian.Query('')
values = []
Returns:
A query string suitable for parsing by Xapian.
"""
query = ''
if not self.query_filters:
query = '*'
else:
query_chunks = []
for the_filter in self.query_filters:
if the_filter.is_and():
query_chunks.append('AND')
if the_filter.is_or():
query_chunks.append('OR')
if the_filter.is_not() and the_filter.field == 'content':
query_chunks.append('NOT')
value = the_filter.value
for child in self.query_filter.children:
if isinstance(child, self.query_filter.__class__):
print 'SQ: ', child # TODO: Recursive call down tree...
else:
expression, value = child
field, filter_type = self.query_filter.split_expression(expression)
values.append(value)
if not isinstance(value, (list, tuple)):
# Convert whatever we find to what xapian wants.
value = self.backend._marshal_value(value)
# Check to see if it's a phrase for an exact match.
if ' ' in value:
value = '"%s"' % value
# 'content' is a special reserved word, much like 'pk' in
# Django's ORM layer. It indicates 'no special field'.
if the_filter.field == 'content':
query_chunks.append(value)
else:
if the_filter.is_not():
query_chunks.append('AND')
filter_types = {
'exact': 'NOT %s:%s',
'gte': 'NOT %s:%s..*',
'gt': '%s:..%s',
'lte': 'NOT %s:..%s',
'lt': '%s:%s..*',
'startswith': 'NOT %s:%s*',
}
else:
filter_types = {
'exact': '%s:%s',
'gte': '%s:%s..*',
'gt': 'NOT %s:..%s',
'lte': '%s:..%s',
'lt': 'NOT %s:%s..*',
'startswith': '%s:%s*',
}
if the_filter.filter_type != 'in':
query_chunks.append(filter_types[the_filter.filter_type] % (the_filter.field, value))
else:
in_options = []
if the_filter.is_not():
for possible_value in value:
in_options.append('%s:%s' % (the_filter.field, possible_value))
query_chunks.append('NOT %s' % ' NOT '.join(in_options))
else:
for possible_value in value:
in_options.append('%s:%s' % (the_filter.field, possible_value))
query_chunks.append('(%s)' % ' OR '.join(in_options))
if query_chunks[0] in ('AND', 'OR'):
# Pull off an undesirable leading "AND" or "OR".
del(query_chunks[0])
query = ' '.join(query_chunks)
if len(self.models):
models = ['django_ct:%s.%s' % (model._meta.app_label, model._meta.module_name) for model in self.models]
models_clause = ' '.join(models)
final_query = '(%s) %s' % (query, models_clause)
else:
final_query = query
return final_query
return xapian.Query(xapian.Query.OP_AND, values)
def run(self, spelling_query=None):
"""
Builds and executes the query. Returns a list of search results.
Returns:
List of search results
"""
final_query = self.build_query()
kwargs = {
@ -1069,6 +998,9 @@ class SearchQuery(BaseSearchQuery):
def run_mlt(self):
"""
Builds and executes the query. Returns a list of search results.
Returns:
List of search results
"""
if self._more_like_this is False or self._mlt_instance is None:
raise MoreLikeThisError("No instance was provided to determine 'More Like This' results.")