From ed4ef56225308b0408e49a12dc321403255bb87c Mon Sep 17 00:00:00 2001 From: David Sauve Date: Wed, 21 Oct 2009 08:41:27 -0400 Subject: [PATCH 1/9] Started work in refactor --- tests/xapian_tests/tests/__init__.py | 2 +- tests/xapian_tests/tests/xapian_query.py | 145 ++++++++++++----------- xapian_backend.py | 112 ++++------------- 3 files changed, 96 insertions(+), 163 deletions(-) diff --git a/tests/xapian_tests/tests/__init__.py b/tests/xapian_tests/tests/__init__.py index 25b3a6f..5b721c7 100644 --- a/tests/xapian_tests/tests/__init__.py +++ b/tests/xapian_tests/tests/__init__.py @@ -18,4 +18,4 @@ import warnings warnings.simplefilter('ignore', Warning) from xapian_tests.tests.xapian_query import * -from xapian_tests.tests.xapian_backend import * +# from xapian_tests.tests.xapian_backend import * diff --git a/tests/xapian_tests/tests/xapian_query.py b/tests/xapian_tests/tests/xapian_query.py index cc1fdc5..b3568cd 100644 --- a/tests/xapian_tests/tests/xapian_query.py +++ b/tests/xapian_tests/tests/xapian_query.py @@ -21,6 +21,7 @@ from django.conf import settings from django.test import TestCase from haystack.backends.xapian_backend import SearchBackend, SearchQuery +from haystack.query import SQ from core.models import MockModel, AnotherMockModel @@ -49,78 +50,78 @@ class XapianSearchQueryTestCase(TestCase): super(XapianSearchQueryTestCase, self).tearDown() def test_build_query_all(self): - self.assertEqual(self.sq.build_query(), '*') - + self.assertEqual(self.sq.build_query().get_description(), 'Xapian::Query()') + def test_build_query_single_word(self): - self.sq.add_filter('content', 'hello') - self.assertEqual(self.sq.build_query(), 'hello') - + self.sq.add_filter(SQ(content='hello')) + self.assertEqual(self.sq.build_query().get_description(), 'Xapian::Query(hello)') + def test_build_query_multiple_words_and(self): - self.sq.add_filter('content', 'hello') - self.sq.add_filter('content', 'world') - self.assertEqual(self.sq.build_query(), 'hello AND world') - + self.sq.add_filter(SQ(content='hello')) + self.sq.add_filter(SQ(content='world')) + self.assertEqual(self.sq.build_query().get_description(), 'Xapian::Query((hello AND world))') + def test_build_query_multiple_words_not(self): - self.sq.add_filter('content', 'hello', use_not=True) - self.sq.add_filter('content', 'world', use_not=True) - self.assertEqual(self.sq.build_query(), 'NOT hello NOT world') - - def test_build_query_multiple_words_or(self): - self.sq.add_filter('content', 'hello', use_or=True) - self.sq.add_filter('content', 'world', use_or=True) - self.assertEqual(self.sq.build_query(), 'hello OR world') - - def test_build_query_multiple_words_mixed(self): - self.sq.add_filter('content', 'why') - self.sq.add_filter('content', 'hello', use_or=True) - self.sq.add_filter('content', 'world', use_not=True) - self.assertEqual(self.sq.build_query(), 'why OR hello NOT world') - - def test_build_query_phrase(self): - self.sq.add_filter('content', 'hello world') - self.assertEqual(self.sq.build_query(), '"hello world"') - - def test_build_query_multiple_filter_types(self): - self.sq.add_filter('content', 'why') - self.sq.add_filter('pub_date__lte', datetime.datetime(2009, 2, 10, 1, 59)) - self.sq.add_filter('author__gt', 'david') - self.sq.add_filter('created__lt', datetime.datetime(2009, 2, 12, 12, 13)) - self.sq.add_filter('title__gte', 'B') - self.sq.add_filter('id__in', [1, 2, 3]) - self.assertEqual(self.sq.build_query(), 'why AND pub_date:..20090210015900 AND NOT author:..david AND NOT created:20090212121300..* AND title:B..* AND (id:1 OR id:2 OR id:3)') - - def test_build_query_multiple_exclude_types(self): - self.sq.add_filter('content', 'why', use_not=True) - self.sq.add_filter('pub_date__lte', datetime.datetime(2009, 2, 10, 1, 59), use_not=True) - self.sq.add_filter('author__gt', 'david', use_not=True) - self.sq.add_filter('created__lt', datetime.datetime(2009, 2, 12, 12, 13), use_not=True) - self.sq.add_filter('title__gte', 'B', use_not=True) - self.sq.add_filter('id__in', [1, 2, 3], use_not=True) - self.assertEqual(self.sq.build_query(), 'NOT why AND NOT pub_date:..20090210015900 AND author:..david AND created:20090212121300..* AND NOT title:B..* AND NOT id:1 NOT id:2 NOT id:3') - - def test_build_query_wildcard_filter_types(self): - self.sq.add_filter('content', 'why') - self.sq.add_filter('title__startswith', 'haystack') - self.assertEqual(self.sq.build_query(), 'why AND title:haystack*') - - def test_clean(self): - self.assertEqual(self.sq.clean('hello world'), 'hello world') - self.assertEqual(self.sq.clean('hello AND world'), 'hello and world') - self.assertEqual(self.sq.clean('hello AND OR NOT + - && || ! ( ) { } [ ] ^ " ~ * ? : \ world'), 'hello and or not \\+ \\- \\&& \\|| \\! \\( \\) \\{ \\} \\[ \\] \\^ \\" \\~ \\* \\? \\: \\\\ world') - self.assertEqual(self.sq.clean('so please NOTe i am in a bAND and bORed'), 'so please NOTe i am in a bAND and bORed') - - def test_build_query_with_models(self): - self.sq.add_filter('content', 'hello') - self.sq.add_model(MockModel) - self.assertEqual(self.sq.build_query(), u'(hello) django_ct:core.mockmodel') - - self.sq.add_model(AnotherMockModel) - self.assertEqual(self.sq.build_query(), u'(hello) django_ct:core.anothermockmodel django_ct:core.mockmodel') - - def test_build_query_with_datetime(self): - self.sq.add_filter('pub_date', datetime.datetime(2009, 5, 9, 16, 20)) - self.assertEqual(self.sq.build_query(), u'pub_date:20090509162000') - - def test_build_query_with_sequence_and_filter_not_in(self): - self.sq.add_filter('id__exact', [1, 2, 3]) - self.assertEqual(self.sq.build_query(), u'id:[1, 2, 3]') \ No newline at end of file + self.sq.add_filter(~SQ(content='hello')) + self.sq.add_filter(~SQ(content='world')) + self.assertEqual(self.sq.build_query().get_description(), 'Xapian::Query((NOT hello NOT world))') + + # def test_build_query_multiple_words_or(self): + # self.sq.add_filter('content', 'hello', use_or=True) + # self.sq.add_filter('content', 'world', use_or=True) + # self.assertEqual(self.sq.build_query(), 'hello OR world') + # + # def test_build_query_multiple_words_mixed(self): + # self.sq.add_filter('content', 'why') + # self.sq.add_filter('content', 'hello', use_or=True) + # self.sq.add_filter('content', 'world', use_not=True) + # self.assertEqual(self.sq.build_query(), 'why OR hello NOT world') + # + # def test_build_query_phrase(self): + # self.sq.add_filter('content', 'hello world') + # self.assertEqual(self.sq.build_query(), '"hello world"') + # + # def test_build_query_multiple_filter_types(self): + # self.sq.add_filter('content', 'why') + # self.sq.add_filter('pub_date__lte', datetime.datetime(2009, 2, 10, 1, 59)) + # self.sq.add_filter('author__gt', 'david') + # self.sq.add_filter('created__lt', datetime.datetime(2009, 2, 12, 12, 13)) + # self.sq.add_filter('title__gte', 'B') + # self.sq.add_filter('id__in', [1, 2, 3]) + # self.assertEqual(self.sq.build_query(), 'why AND pub_date:..20090210015900 AND NOT author:..david AND NOT created:20090212121300..* AND title:B..* AND (id:1 OR id:2 OR id:3)') + # + # def test_build_query_multiple_exclude_types(self): + # self.sq.add_filter('content', 'why', use_not=True) + # self.sq.add_filter('pub_date__lte', datetime.datetime(2009, 2, 10, 1, 59), use_not=True) + # self.sq.add_filter('author__gt', 'david', use_not=True) + # self.sq.add_filter('created__lt', datetime.datetime(2009, 2, 12, 12, 13), use_not=True) + # self.sq.add_filter('title__gte', 'B', use_not=True) + # self.sq.add_filter('id__in', [1, 2, 3], use_not=True) + # self.assertEqual(self.sq.build_query(), 'NOT why AND NOT pub_date:..20090210015900 AND author:..david AND created:20090212121300..* AND NOT title:B..* AND NOT id:1 NOT id:2 NOT id:3') + # + # def test_build_query_wildcard_filter_types(self): + # self.sq.add_filter('content', 'why') + # self.sq.add_filter('title__startswith', 'haystack') + # self.assertEqual(self.sq.build_query(), 'why AND title:haystack*') + # + # def test_clean(self): + # self.assertEqual(self.sq.clean('hello world'), 'hello world') + # self.assertEqual(self.sq.clean('hello AND world'), 'hello and world') + # self.assertEqual(self.sq.clean('hello AND OR NOT + - && || ! ( ) { } [ ] ^ " ~ * ? : \ world'), 'hello and or not \\+ \\- \\&& \\|| \\! \\( \\) \\{ \\} \\[ \\] \\^ \\" \\~ \\* \\? \\: \\\\ world') + # self.assertEqual(self.sq.clean('so please NOTe i am in a bAND and bORed'), 'so please NOTe i am in a bAND and bORed') + # + # def test_build_query_with_models(self): + # self.sq.add_filter('content', 'hello') + # self.sq.add_model(MockModel) + # self.assertEqual(self.sq.build_query(), u'(hello) django_ct:core.mockmodel') + # + # self.sq.add_model(AnotherMockModel) + # self.assertEqual(self.sq.build_query(), u'(hello) django_ct:core.anothermockmodel django_ct:core.mockmodel') + # + # def test_build_query_with_datetime(self): + # self.sq.add_filter('pub_date', datetime.datetime(2009, 5, 9, 16, 20)) + # self.assertEqual(self.sq.build_query(), u'pub_date:20090509162000') + # + # def test_build_query_with_sequence_and_filter_not_in(self): + # self.sq.add_filter('id__exact', [1, 2, 3]) + # self.assertEqual(self.sq.build_query(), u'id:[1, 2, 3]') \ No newline at end of file diff --git a/xapian_backend.py b/xapian_backend.py index ab4f4ad..d18f048 100755 --- a/xapian_backend.py +++ b/xapian_backend.py @@ -919,10 +919,9 @@ class SearchBackend(BaseSearchBackend): class SearchQuery(BaseSearchQuery): """ - `SearchQuery` is responsible for converting search queries into a format - that Xapian can understand. - - Most of the work is done by the :method:`build_query`. + This class is the Xapian specific version of the SearchQuery class. + It acts as an intermediary between the ``SearchQuerySet`` and the + ``SearchBackend`` itself. """ def __init__(self, backend=None): """ @@ -930,103 +929,33 @@ class SearchQuery(BaseSearchQuery): specified. If no backend is set, will use the Xapian `SearchBackend`. Optional arguments: - `backend` -- The `SearchBackend` to use (default = None) + ``backend`` -- The ``SearchBackend`` to use (default = None) """ super(SearchQuery, self).__init__(backend=backend) self.backend = backend or SearchBackend() def build_query(self): - """ - Builds a search query from previously set values, returning a query - string in a format ready for use by the Xapian `SearchBackend`. + if not self.query_filter: + return xapian.Query('') + + values = [] - Returns: - A query string suitable for parsing by Xapian. - """ - query = '' - - if not self.query_filters: - query = '*' - else: - query_chunks = [] - - for the_filter in self.query_filters: - if the_filter.is_and(): - query_chunks.append('AND') - - if the_filter.is_or(): - query_chunks.append('OR') - - if the_filter.is_not() and the_filter.field == 'content': - query_chunks.append('NOT') - - value = the_filter.value + for child in self.query_filter.children: + if isinstance(child, self.query_filter.__class__): + print 'SQ: ', child # TODO: Recursive call down tree... + else: + expression, value = child + field, filter_type = self.query_filter.split_expression(expression) + values.append(value) - if not isinstance(value, (list, tuple)): - # Convert whatever we find to what xapian wants. - value = self.backend._marshal_value(value) - - # Check to see if it's a phrase for an exact match. - if ' ' in value: - value = '"%s"' % value - - # 'content' is a special reserved word, much like 'pk' in - # Django's ORM layer. It indicates 'no special field'. - if the_filter.field == 'content': - query_chunks.append(value) - else: - if the_filter.is_not(): - query_chunks.append('AND') - filter_types = { - 'exact': 'NOT %s:%s', - 'gte': 'NOT %s:%s..*', - 'gt': '%s:..%s', - 'lte': 'NOT %s:..%s', - 'lt': '%s:%s..*', - 'startswith': 'NOT %s:%s*', - } - else: - filter_types = { - 'exact': '%s:%s', - 'gte': '%s:%s..*', - 'gt': 'NOT %s:..%s', - 'lte': '%s:..%s', - 'lt': 'NOT %s:%s..*', - 'startswith': '%s:%s*', - } - - if the_filter.filter_type != 'in': - query_chunks.append(filter_types[the_filter.filter_type] % (the_filter.field, value)) - else: - in_options = [] - if the_filter.is_not(): - for possible_value in value: - in_options.append('%s:%s' % (the_filter.field, possible_value)) - query_chunks.append('NOT %s' % ' NOT '.join(in_options)) - else: - for possible_value in value: - in_options.append('%s:%s' % (the_filter.field, possible_value)) - query_chunks.append('(%s)' % ' OR '.join(in_options)) - - if query_chunks[0] in ('AND', 'OR'): - # Pull off an undesirable leading "AND" or "OR". - del(query_chunks[0]) - - query = ' '.join(query_chunks) - - if len(self.models): - models = ['django_ct:%s.%s' % (model._meta.app_label, model._meta.module_name) for model in self.models] - models_clause = ' '.join(models) - final_query = '(%s) %s' % (query, models_clause) - - else: - final_query = query - - return final_query + return xapian.Query(xapian.Query.OP_AND, values) def run(self, spelling_query=None): """ Builds and executes the query. Returns a list of search results. + + Returns: + List of search results """ final_query = self.build_query() kwargs = { @@ -1069,6 +998,9 @@ class SearchQuery(BaseSearchQuery): def run_mlt(self): """ Builds and executes the query. Returns a list of search results. + + Returns: + List of search results """ if self._more_like_this is False or self._mlt_instance is None: raise MoreLikeThisError("No instance was provided to determine 'More Like This' results.") From 0a63686593aa8a6a68262ee306d0025ed9296340 Mon Sep 17 00:00:00 2001 From: David Sauve Date: Wed, 21 Oct 2009 16:20:17 -0400 Subject: [PATCH 2/9] More changes to build_query --- xapian_backend.py | 50 +++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 44 insertions(+), 6 deletions(-) diff --git a/xapian_backend.py b/xapian_backend.py index d18f048..12774d4 100755 --- a/xapian_backend.py +++ b/xapian_backend.py @@ -935,11 +935,9 @@ class SearchQuery(BaseSearchQuery): self.backend = backend or SearchBackend() def build_query(self): - if not self.query_filter: - return xapian.Query('') - values = [] - + + return final_query for child in self.query_filter.children: if isinstance(child, self.query_filter.__class__): print 'SQ: ', child # TODO: Recursive call down tree... @@ -947,9 +945,49 @@ class SearchQuery(BaseSearchQuery): expression, value = child field, filter_type = self.query_filter.split_expression(expression) values.append(value) - + return xapian.Query(xapian.Query.OP_AND, values) - + + def build_query_fragment(self, field, filter_type, value): + """ + Builds a search query fragment from a field, filter type and value. + Returns: + A query string fragment suitable for parsing by Xapian. + """ + result = '' + + if not isinstance(value, (list, tuple)): + # Convert whatever we find to what xapian wants. + value = self.backend._marshal_value(value) + + # Check to see if it's a phrase for an exact match. + if ' ' in value: + value = '"%s"' % value + + # 'content' is a special reserved word, much like 'pk' in + # Django's ORM layer. It indicates 'no special field'. + if field == 'content': + result = value + else: + filter_types = { + 'exact': '%s:%s', + 'gte': '%s:%s..*', + 'gt': 'NOT %s:..%s', + 'lte': '%s:..%s', + 'lt': 'NOT %s:%s..*', + 'startswith': '%s:%s*', + } + + if filter_type != 'in': + result = filter_types[filter_type] % (field, value) + else: + in_options = [] + for possible_value in value: + in_options.append('%s:%s' % (field, possible_value)) + result = '(%s)' % ' OR '.join(in_options) + + return result + def run(self, spelling_query=None): """ Builds and executes the query. Returns a list of search results. From 7da4ea8fd19794102409664f75b9b13018ba18fb Mon Sep 17 00:00:00 2001 From: David Sauve Date: Tue, 27 Oct 2009 22:03:04 -0400 Subject: [PATCH 3/9] Work on refactoring. Eliminated a lot of useless code and started to implement build_query using xapian.Query --- xapian_backend.py | 175 +++++++++++++++------------------------------- 1 file changed, 57 insertions(+), 118 deletions(-) diff --git a/xapian_backend.py b/xapian_backend.py index 12774d4..5534591 100755 --- a/xapian_backend.py +++ b/xapian_backend.py @@ -933,125 +933,64 @@ class SearchQuery(BaseSearchQuery): """ super(SearchQuery, self).__init__(backend=backend) self.backend = backend or SearchBackend() - + def build_query(self): - values = [] - - return final_query - for child in self.query_filter.children: - if isinstance(child, self.query_filter.__class__): - print 'SQ: ', child # TODO: Recursive call down tree... - else: - expression, value = child - field, filter_type = self.query_filter.split_expression(expression) - values.append(value) - - return xapian.Query(xapian.Query.OP_AND, values) - - def build_query_fragment(self, field, filter_type, value): - """ - Builds a search query fragment from a field, filter type and value. - Returns: - A query string fragment suitable for parsing by Xapian. - """ - result = '' - - if not isinstance(value, (list, tuple)): - # Convert whatever we find to what xapian wants. - value = self.backend._marshal_value(value) - - # Check to see if it's a phrase for an exact match. - if ' ' in value: - value = '"%s"' % value - - # 'content' is a special reserved word, much like 'pk' in - # Django's ORM layer. It indicates 'no special field'. - if field == 'content': - result = value + if not self.query_filter.children: + return xapian.Query('') else: - filter_types = { - 'exact': '%s:%s', - 'gte': '%s:%s..*', - 'gt': 'NOT %s:..%s', - 'lte': '%s:..%s', - 'lt': 'NOT %s:%s..*', - 'startswith': '%s:%s*', - } + query_list = [] + + for child in self.query_filter.children: + if isinstance(child, self.query_filter.__class__): + pass + else: + expression, value = child + field, filter_type = self.query_filter.split_expression(expression) + query_list.append(xapian.Query(value)) + + return xapian.Query(xapian.Query.OP_AND, query_list) + - if filter_type != 'in': - result = filter_types[filter_type] % (field, value) - else: - in_options = [] - for possible_value in value: - in_options.append('%s:%s' % (field, possible_value)) - result = '(%s)' % ' OR '.join(in_options) - - return result - - def run(self, spelling_query=None): - """ - Builds and executes the query. Returns a list of search results. - - Returns: - List of search results - """ - final_query = self.build_query() - kwargs = { - 'start_offset': self.start_offset, - } - - if self.order_by: - kwargs['sort_by'] = self.order_by - - if self.end_offset is not None: - kwargs['end_offset'] = self.end_offset - self.start_offset - - if self.highlight: - kwargs['highlight'] = self.highlight - - if self.facets: - kwargs['facets'] = list(self.facets) - - if self.date_facets: - kwargs['date_facets'] = self.date_facets - - if self.query_facets: - kwargs['query_facets'] = self.query_facets - - if self.narrow_queries: - kwargs['narrow_queries'] = self.narrow_queries - - if spelling_query: - kwargs['spelling_query'] = spelling_query - - if self.boost: - kwargs['boost'] = self.boost - - results = self.backend.search(final_query, **kwargs) - self._results = results.get('results', []) - self._hit_count = results.get('hits', 0) - self._facet_counts = results.get('facets', {}) - self._spelling_suggestion = results.get('spelling_suggestion', None) + # def build_query_fragment(self, field, filter_type, value): + # print 'field: ', field + # print 'filter_type: ', filter_type + # print 'value: ', value - def run_mlt(self): - """ - Builds and executes the query. Returns a list of search results. - - Returns: - List of search results - """ - if self._more_like_this is False or self._mlt_instance is None: - raise MoreLikeThisError("No instance was provided to determine 'More Like This' results.") - - additional_query_string = self.build_query() - kwargs = { - 'start_offset': self.start_offset, - } - - if self.end_offset is not None: - kwargs['end_offset'] = self.end_offset - self.start_offset - - results = self.backend.more_like_this(self._mlt_instance, additional_query_string, **kwargs) - self._results = results.get('results', []) - self._hit_count = results.get('hits', 0) - + # """ + # Builds a search query fragment from a field, filter type and value. + # Returns: + # A query string fragment suitable for parsing by Xapian. + # """ + # result = '' + # + # if not isinstance(value, (list, tuple)): + # # Convert whatever we find to what xapian wants. + # value = self.backend._marshal_value(value) + # + # # Check to see if it's a phrase for an exact match. + # if ' ' in value: + # value = '"%s"' % value + # + # # 'content' is a special reserved word, much like 'pk' in + # # Django's ORM layer. It indicates 'no special field'. + # if field == 'content': + # result = value + # else: + # filter_types = { + # 'exact': '%s:%s', + # 'gte': '%s:%s..*', + # 'gt': 'NOT %s:..%s', + # 'lte': '%s:..%s', + # 'lt': 'NOT %s:%s..*', + # 'startswith': '%s:%s*', + # } + # + # if filter_type != 'in': + # result = filter_types[filter_type] % (field, value) + # else: + # in_options = [] + # for possible_value in value: + # in_options.append('%s:%s' % (field, possible_value)) + # result = '(%s)' % ' OR '.join(in_options) + # + # return result From 29ae7c58854f2284476a8b736c9b7b3705af8dba Mon Sep 17 00:00:00 2001 From: David Sauve Date: Mon, 9 Nov 2009 20:01:20 -0500 Subject: [PATCH 4/9] More refactor work --- tests/xapian_tests/tests/xapian_query.py | 22 +++++------ xapian_backend.py | 48 ++++++++++++++++-------- 2 files changed, 44 insertions(+), 26 deletions(-) diff --git a/tests/xapian_tests/tests/xapian_query.py b/tests/xapian_tests/tests/xapian_query.py index b3568cd..4ec3ee5 100644 --- a/tests/xapian_tests/tests/xapian_query.py +++ b/tests/xapian_tests/tests/xapian_query.py @@ -49,22 +49,22 @@ class XapianSearchQueryTestCase(TestCase): settings.HAYSTACK_XAPIAN_PATH = self.old_xapian_path super(XapianSearchQueryTestCase, self).tearDown() - def test_build_query_all(self): - self.assertEqual(self.sq.build_query().get_description(), 'Xapian::Query()') + # def test_build_query_all(self): + # self.assertEqual(self.sq.build_query().get_description(), 'Xapian::Query()') def test_build_query_single_word(self): self.sq.add_filter(SQ(content='hello')) self.assertEqual(self.sq.build_query().get_description(), 'Xapian::Query(hello)') - def test_build_query_multiple_words_and(self): - self.sq.add_filter(SQ(content='hello')) - self.sq.add_filter(SQ(content='world')) - self.assertEqual(self.sq.build_query().get_description(), 'Xapian::Query((hello AND world))') - - def test_build_query_multiple_words_not(self): - self.sq.add_filter(~SQ(content='hello')) - self.sq.add_filter(~SQ(content='world')) - self.assertEqual(self.sq.build_query().get_description(), 'Xapian::Query((NOT hello NOT world))') + # def test_build_query_multiple_words_and(self): + # self.sq.add_filter(SQ(content='hello')) + # self.sq.add_filter(SQ(content='world')) + # self.assertEqual(self.sq.build_query().get_description(), 'Xapian::Query((hello AND world))') + # + # def test_build_query_multiple_words_not(self): + # self.sq.add_filter(~SQ(content='hello')) + # self.sq.add_filter(~SQ(content='world')) + # self.assertEqual(self.sq.build_query().get_description(), 'Xapian::Query((NOT hello NOT world))') # def test_build_query_multiple_words_or(self): # self.sq.add_filter('content', 'hello', use_or=True) diff --git a/xapian_backend.py b/xapian_backend.py index 5534591..16725f6 100755 --- a/xapian_backend.py +++ b/xapian_backend.py @@ -933,24 +933,42 @@ class SearchQuery(BaseSearchQuery): """ super(SearchQuery, self).__init__(backend=backend) self.backend = backend or SearchBackend() + + def as_xapian_query(self, parent, query_fragment_callback): + query_list = [] + + for child in parent.children: + if hasattr(child, 'as_query_string'): + query_list.append(self.as_xapian_query(child, query_fragment_callback)) + else: + expression, value = child + field, filter_type = self.query_filter.split_expression(expression) + query_list.append(query_fragment_callback(field, filter_type, value)) + + return xapian.Query(xapian.Query.OP_AND, query_list) def build_query(self): - if not self.query_filter.children: - return xapian.Query('') - else: - query_list = [] - - for child in self.query_filter.children: - if isinstance(child, self.query_filter.__class__): - pass - else: - expression, value = child - field, filter_type = self.query_filter.split_expression(expression) - query_list.append(xapian.Query(value)) - - return xapian.Query(xapian.Query.OP_AND, query_list) - + query = self.as_xapian_query(self.query_filter, self.build_query_fragment) + def build_query_fragment(self, field, filter_type, value): + return xapian.Query(value) + + # + # if not self.query_filter.children: + # return xapian.Query('') + # else: + # query_list = [] + # + # for child in self.query_filter.children: + # if isinstance(child, self.query_filter.__class__): + # query_list.append(self.build_query(child)) + # else: + # expression, value = child + # field, filter_type = self.query_filter.split_expression(expression) + # query_list.append(xapian.Query(value)) + # + # return xapian.Query(xapian.Query.OP_AND, query_list) + # def build_query_fragment(self, field, filter_type, value): # print 'field: ', field # print 'filter_type: ', filter_type From c06277188781c52ffa95de1b377b0f248d8056ef Mon Sep 17 00:00:00 2001 From: David Sauve Date: Tue, 10 Nov 2009 20:45:37 -0500 Subject: [PATCH 5/9] Passing first two tests... --- tests/xapian_tests/tests/xapian_query.py | 4 ++-- xapian_backend.py | 22 ++++++++-------------- 2 files changed, 10 insertions(+), 16 deletions(-) diff --git a/tests/xapian_tests/tests/xapian_query.py b/tests/xapian_tests/tests/xapian_query.py index 4ec3ee5..4affe50 100644 --- a/tests/xapian_tests/tests/xapian_query.py +++ b/tests/xapian_tests/tests/xapian_query.py @@ -49,8 +49,8 @@ class XapianSearchQueryTestCase(TestCase): settings.HAYSTACK_XAPIAN_PATH = self.old_xapian_path super(XapianSearchQueryTestCase, self).tearDown() - # def test_build_query_all(self): - # self.assertEqual(self.sq.build_query().get_description(), 'Xapian::Query()') + def test_build_query_all(self): + self.assertEqual(self.sq.build_query().get_description(), 'Xapian::Query()') def test_build_query_single_word(self): self.sq.add_filter(SQ(content='hello')) diff --git a/xapian_backend.py b/xapian_backend.py index 16725f6..bee95c9 100755 --- a/xapian_backend.py +++ b/xapian_backend.py @@ -934,21 +934,15 @@ class SearchQuery(BaseSearchQuery): super(SearchQuery, self).__init__(backend=backend) self.backend = backend or SearchBackend() - def as_xapian_query(self, parent, query_fragment_callback): - query_list = [] - - for child in parent.children: - if hasattr(child, 'as_query_string'): - query_list.append(self.as_xapian_query(child, query_fragment_callback)) - else: - expression, value = child - field, filter_type = self.query_filter.split_expression(expression) - query_list.append(query_fragment_callback(field, filter_type, value)) - - return xapian.Query(xapian.Query.OP_AND, query_list) - def build_query(self): - query = self.as_xapian_query(self.query_filter, self.build_query_fragment) + if not self.query_filter: + query = xapian.Query('') + else: + for child in self.query_filter.children: + expression, value = child + query = xapian.Query(value) + + return query def build_query_fragment(self, field, filter_type, value): return xapian.Query(value) From 048e296d651781782b906ed2beb2c101680828f4 Mon Sep 17 00:00:00 2001 From: David Sauve Date: Tue, 10 Nov 2009 20:47:59 -0500 Subject: [PATCH 6/9] Passing three tests. Empty query, single content value, multi-content values --- tests/xapian_tests/tests/xapian_query.py | 10 +++++----- xapian_backend.py | 6 +++++- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/tests/xapian_tests/tests/xapian_query.py b/tests/xapian_tests/tests/xapian_query.py index 4affe50..da09a0b 100644 --- a/tests/xapian_tests/tests/xapian_query.py +++ b/tests/xapian_tests/tests/xapian_query.py @@ -56,11 +56,11 @@ class XapianSearchQueryTestCase(TestCase): self.sq.add_filter(SQ(content='hello')) self.assertEqual(self.sq.build_query().get_description(), 'Xapian::Query(hello)') - # def test_build_query_multiple_words_and(self): - # self.sq.add_filter(SQ(content='hello')) - # self.sq.add_filter(SQ(content='world')) - # self.assertEqual(self.sq.build_query().get_description(), 'Xapian::Query((hello AND world))') - # + def test_build_query_multiple_words_and(self): + self.sq.add_filter(SQ(content='hello')) + self.sq.add_filter(SQ(content='world')) + self.assertEqual(self.sq.build_query().get_description(), 'Xapian::Query((hello AND world))') + # def test_build_query_multiple_words_not(self): # self.sq.add_filter(~SQ(content='hello')) # self.sq.add_filter(~SQ(content='world')) diff --git a/xapian_backend.py b/xapian_backend.py index bee95c9..b4e4b73 100755 --- a/xapian_backend.py +++ b/xapian_backend.py @@ -938,9 +938,13 @@ class SearchQuery(BaseSearchQuery): if not self.query_filter: query = xapian.Query('') else: + query_list = [] + for child in self.query_filter.children: expression, value = child - query = xapian.Query(value) + query_list.append(value) + + query = xapian.Query(xapian.Query.OP_AND, query_list) return query From 5c67f5476a91b302c900614723f8cb84a4d97d95 Mon Sep 17 00:00:00 2001 From: David Sauve Date: Tue, 10 Nov 2009 21:31:25 -0500 Subject: [PATCH 7/9] Four tests passing now. Recursively parsing the search nodes and negated on NOT as required. --- tests/xapian_tests/tests/xapian_query.py | 8 ++--- xapian_backend.py | 39 ++++++++++++++++-------- 2 files changed, 30 insertions(+), 17 deletions(-) diff --git a/tests/xapian_tests/tests/xapian_query.py b/tests/xapian_tests/tests/xapian_query.py index da09a0b..54d4e57 100644 --- a/tests/xapian_tests/tests/xapian_query.py +++ b/tests/xapian_tests/tests/xapian_query.py @@ -61,10 +61,10 @@ class XapianSearchQueryTestCase(TestCase): self.sq.add_filter(SQ(content='world')) self.assertEqual(self.sq.build_query().get_description(), 'Xapian::Query((hello AND world))') - # def test_build_query_multiple_words_not(self): - # self.sq.add_filter(~SQ(content='hello')) - # self.sq.add_filter(~SQ(content='world')) - # self.assertEqual(self.sq.build_query().get_description(), 'Xapian::Query((NOT hello NOT world))') + def test_build_query_multiple_words_not(self): + self.sq.add_filter(~SQ(content='hello')) + self.sq.add_filter(~SQ(content='world')) + self.assertEqual(self.sq.build_query().get_description(), 'Xapian::Query((( AND_NOT hello) AND ( AND_NOT world)))') # def test_build_query_multiple_words_or(self): # self.sq.add_filter('content', 'hello', use_or=True) diff --git a/xapian_backend.py b/xapian_backend.py index b4e4b73..6585225 100755 --- a/xapian_backend.py +++ b/xapian_backend.py @@ -15,7 +15,7 @@ # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. __author__ = 'David Sauve' -__version__ = (1, 0, 0, 'beta') +__version__ = (2, 0, 0, 'alpha') import datetime import cPickle as pickle @@ -29,7 +29,7 @@ from django.conf import settings from django.core.exceptions import ImproperlyConfigured from django.utils.encoding import smart_unicode, force_unicode -from haystack.backends import BaseSearchBackend, BaseSearchQuery, log_query +from haystack.backends import BaseSearchBackend, BaseSearchQuery, SearchNode, log_query from haystack.exceptions import MissingDependency from haystack.fields import DateField, DateTimeField, IntegerField, FloatField, BooleanField, MultiValueField from haystack.models import SearchResult @@ -936,19 +936,32 @@ class SearchQuery(BaseSearchQuery): def build_query(self): if not self.query_filter: - query = xapian.Query('') + return xapian.Query('') else: - query_list = [] - - for child in self.query_filter.children: - expression, value = child - query_list.append(value) - - query = xapian.Query(xapian.Query.OP_AND, query_list) - - return query + return self._query_from_search_node(self.query_filter) - def build_query_fragment(self, field, filter_type, value): + def _query_from_search_node(self, search_node, is_not=False): + query_list = [] + + for child in search_node.children: + if isinstance(child, SearchNode): + query_list.append( + xapian.Query( + xapian.Query.OP_AND, + self._query_from_search_node(child, child.negated) + ) + ) + else: + expression, value = child + if is_not: + # DS_TODO: This can almost definitely be improved. + query_list.append(xapian.Query(xapian.Query.OP_AND_NOT, '', value)) + else: + query_list.append(xapian.Query(value)) + + return xapian.Query(xapian.Query.OP_AND, query_list) + + def build_sub_query(self, value): return xapian.Query(value) # From 132e13e66688cb20ae02db163a1ec89a3124f70a Mon Sep 17 00:00:00 2001 From: David Sauve Date: Tue, 10 Nov 2009 21:45:49 -0500 Subject: [PATCH 8/9] Five tests. OR operator now working --- tests/xapian_tests/tests/xapian_query.py | 9 ++++----- xapian_backend.py | 9 +++++++-- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/tests/xapian_tests/tests/xapian_query.py b/tests/xapian_tests/tests/xapian_query.py index 54d4e57..db6e7b0 100644 --- a/tests/xapian_tests/tests/xapian_query.py +++ b/tests/xapian_tests/tests/xapian_query.py @@ -66,11 +66,10 @@ class XapianSearchQueryTestCase(TestCase): self.sq.add_filter(~SQ(content='world')) self.assertEqual(self.sq.build_query().get_description(), 'Xapian::Query((( AND_NOT hello) AND ( AND_NOT world)))') - # def test_build_query_multiple_words_or(self): - # self.sq.add_filter('content', 'hello', use_or=True) - # self.sq.add_filter('content', 'world', use_or=True) - # self.assertEqual(self.sq.build_query(), 'hello OR world') - # + def test_build_query_multiple_words_or(self): + self.sq.add_filter(SQ(content='hello') | SQ(content='world')) + self.assertEqual(self.sq.build_query().get_description(), 'Xapian::Query((hello OR world))') + # def test_build_query_multiple_words_mixed(self): # self.sq.add_filter('content', 'why') # self.sq.add_filter('content', 'hello', use_or=True) diff --git a/xapian_backend.py b/xapian_backend.py index 6585225..f87bd70 100755 --- a/xapian_backend.py +++ b/xapian_backend.py @@ -948,7 +948,9 @@ class SearchQuery(BaseSearchQuery): query_list.append( xapian.Query( xapian.Query.OP_AND, - self._query_from_search_node(child, child.negated) + self._query_from_search_node( + child, child.negated + ) ) ) else: @@ -959,7 +961,10 @@ class SearchQuery(BaseSearchQuery): else: query_list.append(xapian.Query(value)) - return xapian.Query(xapian.Query.OP_AND, query_list) + if search_node.connector == 'OR': + return xapian.Query(xapian.Query.OP_OR, query_list) + else: + return xapian.Query(xapian.Query.OP_AND, query_list) def build_sub_query(self, value): return xapian.Query(value) From b740066f43d6e2edac13cfffc156a021ff239d07 Mon Sep 17 00:00:00 2001 From: David Sauve Date: Tue, 10 Nov 2009 21:53:32 -0500 Subject: [PATCH 9/9] Six passing tests. Combining AND, OR, NOT works. --- tests/xapian_tests/tests/xapian_query.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/tests/xapian_tests/tests/xapian_query.py b/tests/xapian_tests/tests/xapian_query.py index db6e7b0..ea5eb46 100644 --- a/tests/xapian_tests/tests/xapian_query.py +++ b/tests/xapian_tests/tests/xapian_query.py @@ -70,12 +70,11 @@ class XapianSearchQueryTestCase(TestCase): self.sq.add_filter(SQ(content='hello') | SQ(content='world')) self.assertEqual(self.sq.build_query().get_description(), 'Xapian::Query((hello OR world))') - # def test_build_query_multiple_words_mixed(self): - # self.sq.add_filter('content', 'why') - # self.sq.add_filter('content', 'hello', use_or=True) - # self.sq.add_filter('content', 'world', use_not=True) - # self.assertEqual(self.sq.build_query(), 'why OR hello NOT world') - # + def test_build_query_multiple_words_mixed(self): + self.sq.add_filter(SQ(content='why') | SQ(content='hello')) + self.sq.add_filter(~SQ(content='world')) + self.assertEqual(self.sq.build_query().get_description(), 'Xapian::Query(((why OR hello) AND ( AND_NOT world)))') + # def test_build_query_phrase(self): # self.sq.add_filter('content', 'hello world') # self.assertEqual(self.sq.build_query(), '"hello world"')