From 500664a2e48b954fcedf6292b172580b59ebf304 Mon Sep 17 00:00:00 2001 From: David Sauve Date: Sat, 28 Nov 2009 12:31:03 -0500 Subject: [PATCH] Merged master changes into next --- tests/xapian_tests/tests/xapian_backend.py | 21 +++++----- xapian_backend.py | 46 ++++++++++++---------- 2 files changed, 37 insertions(+), 30 deletions(-) diff --git a/tests/xapian_tests/tests/xapian_backend.py b/tests/xapian_tests/tests/xapian_backend.py index 80fa3e1..ffbfdee 100644 --- a/tests/xapian_tests/tests/xapian_backend.py +++ b/tests/xapian_tests/tests/xapian_backend.py @@ -26,7 +26,7 @@ from django.utils.encoding import force_unicode from django.test import TestCase from haystack import indexes, sites -from haystack.backends.xapian_backend import SearchBackend, DEFAULT_MAX_RESULTS +from haystack.backends.xapian_backend import SearchBackend from core.models import MockTag, AnotherMockModel @@ -78,10 +78,6 @@ class XapianSearchBackendTestCase(TestCase): def setUp(self): super(XapianSearchBackendTestCase, self).setUp() - temp_path = os.path.join('tmp', 'test_xapian_query') - self.old_xapian_path = getattr(settings, 'HAYSTACK_XAPIAN_PATH', temp_path) - settings.HAYSTACK_XAPIAN_PATH = temp_path - self.site = XapianSearchSite() self.sb = SearchBackend(site=self.site) self.msi = XapianMockSearchIndex(XapianMockModel, backend=self.sb) @@ -100,14 +96,13 @@ class XapianSearchBackendTestCase(TestCase): self.sample_objs.append(mock) self.sample_objs[0].popularity = 834.0 - self.sample_objs[1].popularity = 35.0 + self.sample_objs[1].popularity = 35.5 self.sample_objs[2].popularity = 972.0 def tearDown(self): if os.path.exists(settings.HAYSTACK_XAPIAN_PATH): shutil.rmtree(settings.HAYSTACK_XAPIAN_PATH) - settings.HAYSTACK_XAPIAN_PATH = self.old_xapian_path super(XapianSearchBackendTestCase, self).tearDown() def xapian_search(self, query_string): @@ -120,7 +115,7 @@ class XapianSearchBackendTestCase(TestCase): query = xapian.Query(query_string) # Empty query matches all enquire = xapian.Enquire(database) enquire.set_query(query) - matches = enquire.get_mset(0, DEFAULT_MAX_RESULTS) + matches = enquire.get_mset(0, database.get_doccount()) document_list = [] @@ -189,6 +184,12 @@ class XapianSearchBackendTestCase(TestCase): self.assertEqual(self.sb.search('*')['hits'], 3) self.assertEqual([result.pk for result in self.sb.search('*')['results']], [1, 2, 3]) + # Exact match + self.assertEqual([result.pk for result in self.sb.search('name:david2')['results']], [2]) + self.assertEqual([result.pk for result in self.sb.search('value:10')['results']], [2]) + self.assertEqual([result.pk for result in self.sb.search('flag:false')['results']], [2]) + self.assertEqual([result.pk for result in self.sb.search('popularity:35.5')['results']], [2]) + # NOT operator self.assertEqual([result.pk for result in self.sb.search('NOT name:david1')['results']], [2, 3]) self.assertEqual([result.pk for result in self.sb.search('NOT name:david1 AND index')['results']], [2, 3]) @@ -262,8 +263,8 @@ class XapianSearchBackendTestCase(TestCase): self.sb.update(self.msi, self.sample_objs) self.assertEqual(len(self.xapian_search('')), 3) - self.assertEqual(self.sb.search('', narrow_queries=['name:david1']), {'hits': 0, 'results': []}) - results = self.sb.search('index', narrow_queries=['name:david1']) + self.assertEqual(self.sb.search('', narrow_queries=set(['name:david1'])), {'hits': 0, 'results': []}) + results = self.sb.search('index', narrow_queries=set(['name:david1'])) self.assertEqual(results['hits'], 1) def test_highlight(self): diff --git a/xapian_backend.py b/xapian_backend.py index 24d5432..8c827a3 100755 --- a/xapian_backend.py +++ b/xapian_backend.py @@ -31,7 +31,7 @@ from django.core.exceptions import ImproperlyConfigured from django.utils.encoding import smart_unicode, force_unicode from haystack.backends import BaseSearchBackend, BaseSearchQuery, SearchNode, log_query -from haystack.exceptions import MissingDependency +from haystack.exceptions import MissingDependency, HaystackError from haystack.fields import DateField, DateTimeField, IntegerField, FloatField, BooleanField, MultiValueField from haystack.models import SearchResult from haystack.utils import get_identifier @@ -42,13 +42,16 @@ except ImportError: raise MissingDependency("The 'xapian' backend requires the installation of 'xapian'. Please refer to the documentation.") -DEFAULT_MAX_RESULTS = 100000 - DOCUMENT_ID_TERM_PREFIX = 'Q' DOCUMENT_CUSTOM_TERM_PREFIX = 'X' DOCUMENT_CT_TERM_PREFIX = DOCUMENT_CUSTOM_TERM_PREFIX + 'CONTENTTYPE' +class InvalidIndexError(HaystackError): + """Raised when an index can not be opened.""" + pass + + class XHValueRangeProcessor(xapian.ValueRangeProcessor): def __init__(self, sb): self.sb = sb @@ -258,7 +261,7 @@ class SearchBackend(BaseSearchBackend): if not models: query = xapian.Query('') enquire = self._enquire(database, query) - for match in enquire.get_mset(0, DEFAULT_MAX_RESULTS): + for match in enquire.get_mset(0, self.document_count()): database.delete_document(match.docid) else: for model in models: @@ -268,7 +271,7 @@ class SearchBackend(BaseSearchBackend): ) @log_query def search(self, query, sort_by=None, start_offset=0, - end_offset=DEFAULT_MAX_RESULTS, fields='', highlight=False, + end_offset=0, fields='', highlight=False, facets=None, date_facets=None, query_facets=None, narrow_queries=None, boost=None, spelling_query=None, limit_to_registered_models=True, **kwargs): @@ -281,7 +284,7 @@ class SearchBackend(BaseSearchBackend): Optional arguments: `sort_by` -- Sort results by specified field (default = None) `start_offset` -- Slice results from `start_offset` (default = 0) - `end_offset` -- Slice results at `end_offset` (default = 10,000) + `end_offset` -- Slice results at `end_offset` (default = 0), if 0, then all documents `fields` -- Filter results on `fields` (default = '') `highlight` -- Highlight terms in results (default = False) `facets` -- Facet results on fields (default = None) @@ -327,12 +330,12 @@ class SearchBackend(BaseSearchBackend): if limit_to_registered_models: if narrow_queries is None: - narrow_queries = [] + narrow_queries = set() registered_models = self.build_registered_models_list() if len(registered_models) > 0: - narrow_queries.append( + narrow_queries.add( ' '.join(['django_ct:%s' % model for model in registered_models]) ) @@ -352,6 +355,8 @@ class SearchBackend(BaseSearchBackend): 'dates': {}, 'queries': {}, } + if not end_offset: + end_offset = self.document_count() matches = enquire.get_mset(start_offset, (end_offset - start_offset)) for match in matches: @@ -393,14 +398,10 @@ class SearchBackend(BaseSearchBackend): """ Retrieves the total document count for the search index. """ - try: - database = self._database() - except xapian.DatabaseOpeningError: - return 0 - return database.get_doccount() + return self._database().get_doccount() def more_like_this(self, model_instance, additional_query_string=None, - start_offset=0, end_offset=DEFAULT_MAX_RESULTS, + start_offset=0, end_offset=0, limit_to_registered_models=True, **kwargs): """ Given a model instance, returns a result set of similar documents. @@ -413,7 +414,7 @@ class SearchBackend(BaseSearchBackend): `additional_query_string` -- An additional query string to narrow results `start_offset` -- The starting offset (default=0) - `end_offset` -- The ending offset (default=None) + `end_offset` -- The ending offset (default=0), if 0, then all documents `limit_to_registered_models` -- Limit returned results to models registered in the current `SearchSite` (default = True) Returns: @@ -436,10 +437,12 @@ class SearchBackend(BaseSearchBackend): query = xapian.Query(DOCUMENT_ID_TERM_PREFIX + get_identifier(model_instance)) enquire = self._enquire(database, query) rset = xapian.RSet() - for match in enquire.get_mset(0, DEFAULT_MAX_RESULTS): + if not end_offset: + end_offset = self.document_count() + for match in enquire.get_mset(0, end_offset): rset.add_document(match.docid) query = xapian.Query(xapian.Query.OP_OR, - [expand.term for expand in enquire.get_eset(DEFAULT_MAX_RESULTS, rset, XHExpandDecider())] + [expand.term for expand in enquire.get_eset(match.document.termlist_count(), rset, XHExpandDecider())] ) query = xapian.Query( xapian.Query.OP_AND_NOT, [query, DOCUMENT_ID_TERM_PREFIX + get_identifier(model_instance)] @@ -449,8 +452,8 @@ class SearchBackend(BaseSearchBackend): registered_models = self.build_registered_models_list() if len(registered_models) > 0: - narrow_queries = [] - narrow_queries.append( + narrow_queries = set() + narrow_queries.add( ' '.join(['django_ct:%s' % model for model in registered_models]) ) if additional_query_string: @@ -720,7 +723,10 @@ class SearchBackend(BaseSearchBackend): database.set_metadata('schema', pickle.dumps(self.schema, pickle.HIGHEST_PROTOCOL)) database.set_metadata('content', pickle.dumps(self.content_field_name, pickle.HIGHEST_PROTOCOL)) else: - database = xapian.Database(settings.HAYSTACK_XAPIAN_PATH) + try: + database = xapian.Database(settings.HAYSTACK_XAPIAN_PATH) + except xapian.DatabaseOpeningError: + raise InvalidIndexError(u'Unable to open index at %s' % settings.HAYSTACK_XAPIAN_PATH) self.schema = pickle.loads(database.get_metadata('schema')) self.content_field_name = pickle.loads(database.get_metadata('content'))