diff --git a/tests/xapian_backend.py b/tests/xapian_backend.py index 36880ad..1108ecb 100644 --- a/tests/xapian_backend.py +++ b/tests/xapian_backend.py @@ -94,12 +94,12 @@ class XapianSearchBackendTestCase(TestCase): for match in matches: document = match.get_document() - object_data = pickle.loads(document.get_data()) - for key, value in object_data.iteritems(): - object_data[key] = self.sb._from_python(value) - object_data['id'] = force_unicode(document.get_value(0)) - document_list.append(object_data) - + app_label, module_name, pk, model_data = pickle.loads(document.get_data()) + for key, value in model_data.iteritems(): + model_data[key] = self.sb._from_python(value) + model_data['id'] = u'%s.%s.%d' % (app_label, module_name, pk) + document_list.append(model_data) + return document_list def test_update(self): @@ -148,10 +148,10 @@ class XapianSearchBackendTestCase(TestCase): # Wildcard -- All self.assertEqual(self.sb.search('*')['hits'], 3) - self.assertEqual([result.pk for result in self.sb.search('*')['results']], [u'1', u'2', u'3']) + self.assertEqual([result.pk for result in self.sb.search('*')['results']], [1, 2, 3]) # NOT operator - self.assertEqual([result.pk for result in self.sb.search('NOT author:david1')['results']], [u'1', u'2', u'3']) + self.assertEqual([result.pk for result in self.sb.search('NOT author:david1')['results']], [1, 2, 3]) def test_field_facets(self): self.sb.update(self.msi, self.sample_objs) @@ -231,29 +231,29 @@ class XapianSearchBackendTestCase(TestCase): self.sb.update(self.msi, self.sample_objs) results = self.sb.search('*', sort_by=['pub_date']) - self.assertEqual([result.pk for result in results['results']], [u'1', u'2', u'3']) - + self.assertEqual([result.pk for result in results['results']], [1, 2, 3]) + results = self.sb.search('*', sort_by=['-pub_date']) - self.assertEqual([result.pk for result in results['results']], [u'3', u'2', u'1']) - + self.assertEqual([result.pk for result in results['results']], [3, 2, 1]) + results = self.sb.search('*', sort_by=['id']) - self.assertEqual([result.pk for result in results['results']], [u'3', u'2', u'1']) - + self.assertEqual([result.pk for result in results['results']], [3, 2, 1]) + results = self.sb.search('*', sort_by=['-id']) - self.assertEqual([result.pk for result in results['results']], [u'1', u'2', u'3']) - + self.assertEqual([result.pk for result in results['results']], [1, 2, 3]) + results = self.sb.search('*', sort_by=['value']) - self.assertEqual([result.pk for result in results['results']], [u'3', u'2', u'1']) - + self.assertEqual([result.pk for result in results['results']], [3, 2, 1]) + results = self.sb.search('*', sort_by=['-value']) - self.assertEqual([result.pk for result in results['results']], [u'1', u'2', u'3']) - + self.assertEqual([result.pk for result in results['results']], [1, 2, 3]) + results = self.sb.search('*', sort_by=['flag', 'id']) - self.assertEqual([result.pk for result in results['results']], [u'3', u'1', u'2']) - + self.assertEqual([result.pk for result in results['results']], [3, 1, 2]) + results = self.sb.search('*', sort_by=['flag', '-id']) - self.assertEqual([result.pk for result in results['results']], [u'1', u'3', u'2']) - + self.assertEqual([result.pk for result in results['results']], [1, 3, 2]) + def test__from_python(self): self.assertEqual(self.sb._from_python('abc'), u'abc') self.assertEqual(self.sb._from_python(1), u'1') diff --git a/xapian_backend.py b/xapian_backend.py index 5d960df..25ebdad 100644 --- a/xapian_backend.py +++ b/xapian_backend.py @@ -40,7 +40,6 @@ DOCUMENT_ID_TERM_PREFIX = 'Q' DOCUMENT_CUSTOM_TERM_PREFIX = 'X' DOCUMENT_CT_TERM_PREFIX = DOCUMENT_CUSTOM_TERM_PREFIX + 'CONTENTTYPE' - field_re = re.compile(r'(?<=(?..` which should be unique to this object. """ - database = self._open_database(writable=True) - database.delete_document(DOCUMENT_ID_TERM_PREFIX + self.get_identifier(obj)) + database = self._database(writable=True) + database.delete_document(self.get_identifier(obj)) def clear(self, models=[]): """ @@ -190,15 +194,15 @@ class SearchBackend(BaseSearchBackend): the term `XCONTENTTYPE.`. This will delete all documents with the specified model type. """ - self._prepare(rw=True) + database = self._database(writable=True) if not models: - query = xapian.Query('') # Empty query matches all - enquire = self._get_enquire(query) + query, __unused__ = self._query(database, '*') + enquire = self._enquire(database, query) for match in enquire.get_mset(0, DEFAULT_MAX_RESULTS): - self.database.delete_document(match.get_docid()) + database.delete_document(match.get_docid()) else: for model in models: - self.database.delete_document( + database.delete_document( DOCUMENT_CT_TERM_PREFIX + '%s.%s' % (model._meta.app_label, model._meta.module_name) ) @@ -262,38 +266,45 @@ class SearchBackend(BaseSearchBackend): if query_facets is not None: warnings.warn("Query faceting has not been implemented yet.", Warning, stacklevel=2) - spelling_suggestion = None - - self._prepare() - - if query_string == '*': - query = xapian.Query('') # Make '*' match everything - else: - flags = self._get_flags() - qp = self._get_query_parser() - query = qp.parse_query(query_string, flags) - if getattr(settings, 'HAYSTACK_INCLUDE_SPELLING', False) is True: - spelling_suggestion = qp.get_corrected_query_string() - - if narrow_queries: - subqueries = [qp.parse_query(narrow_query, flags) for narrow_query in narrow_queries] - query = xapian.Query(xapian.Query.OP_FILTER, query, xapian.Query(xapian.Query.OP_AND, subqueries)) - - enquire = self._get_enquire(query) + database = self._database() + query, spelling_suggestion = self._query(database, query_string, narrow_queries) + enquire = self._enquire(database, query) if sort_by: - sorter = self._get_sorter(sort_by) + sorter = self._sorter(sort_by) enquire.set_sort_by_key_then_relevance(sorter, True) + results = [] + facets_dict = { + 'fields': {}, + 'dates': {}, + 'queries': {}, + } matches = enquire.get_mset(start_offset, end_offset) - results = self._process_results( - matches, query_string=query_string, highlight=highlight, facets=facets - ) - if spelling_suggestion: - results['spelling_suggestion'] = spelling_suggestion + for match in matches: + document = match.get_document() + app_label, module_name, pk, model_data = pickle.loads(document.get_data()) + results.append( + SearchResult(app_label, module_name, pk, match.weight, **model_data) + ) + if facets: + facets_dict['fields'] = self._do_field_facets( + document, facets, facets_dict['fields'] + ) + if highlight and (len(query_string) > 0): + model_data['highlighted'] = { + self.content_field_name: self._do_highlight( + model_data.get(self.content_field_name), query_string + ) + } - return results + return { + 'results': results, + 'hits': matches.get_matches_estimated(), + 'facets': facets_dict, + 'spelling_suggestion': spelling_suggestion, + } def delete_index(self): """ @@ -303,10 +314,8 @@ class SearchBackend(BaseSearchBackend): """ if os.path.exists(settings.HAYSTACK_XAPIAN_PATH): index_files = os.listdir(settings.HAYSTACK_XAPIAN_PATH) - for index_file in index_files: os.remove(os.path.join(settings.HAYSTACK_XAPIAN_PATH, index_file)) - os.removedirs(settings.HAYSTACK_XAPIAN_PATH) def document_count(self): @@ -314,11 +323,10 @@ class SearchBackend(BaseSearchBackend): Retrieves the total document count for the search index. """ try: - if not self.loaded: - self._load() + database = self._database() except xapian.DatabaseOpeningError: return 0 - return self.database.get_doccount() + return database.get_doccount() def more_like_this(self, model_instance): """ @@ -344,13 +352,11 @@ class SearchBackend(BaseSearchBackend): Finally, processes the resulting matches and returns. """ - if not self.loaded: - self._load() - + database = self._database() query = xapian.Query( DOCUMENT_ID_TERM_PREFIX + self.get_identifier(model_instance) ) - enquire = self._get_enquire(query) + enquire = self._enquire(database, query) rset = xapian.RSet() for match in enquire.get_mset(0, DEFAULT_MAX_RESULTS): rset.add_document(match.get_docid()) @@ -472,7 +478,7 @@ class SearchBackend(BaseSearchBackend): def _from_python(self, value): """ - Converts Python values to a string for Xapian. + Private method that converts Python values to a string for Xapian. """ if isinstance(value, datetime.datetime): if value.microsecond: @@ -496,9 +502,6 @@ class SearchBackend(BaseSearchBackend): value = force_unicode(value) return value - def _dump_document_data(self, document_id, model_data): - return pickle.dumps((document_id, model_data), pickle.HIGHEST_PROTOCOL) - def _database(self, writable=False): """ Private method that returns a xapian.Database for use and sets up @@ -531,9 +534,8 @@ class SearchBackend(BaseSearchBackend): Required Argument: `document` -- The document to be indexed - Returns a Xapian.TermGenerator instance ready for use. If - `HAYSTACK_INCLUDE_SPELLING` is True, then the term generator - will have spell-checking enabled. + Returns a Xapian.TermGenerator instance. If `HAYSTACK_INCLUDE_SPELLING` + is True, then the term generator will have spell-checking enabled. """ term_generator = xapian.TermGenerator() term_generator.set_database(database) @@ -543,9 +545,39 @@ class SearchBackend(BaseSearchBackend): term_generator.set_document(document) return term_generator - def _get_sorter(self, sort_by): + def _query(self, database, query_string, narrow_queries=None): """ - Given a list of fields to sort by, returns a xapian.MultiValueSorter + Private method that takes a query string and returns a xapian.Query + + Required arguments: + `query_string` -- The query string to parse + + Optional arguments: + `narrow_queries` -- A list of queries to narrow the query with + + Returns a xapian.Query instance + """ + spelling_suggestion = None + + if query_string == '*': + query = xapian.Query('') # Make '*' match everything + else: + flags = self._flags() + qp = self._query_parser(database) + query = qp.parse_query(query_string, flags) + if getattr(settings, 'HAYSTACK_INCLUDE_SPELLING', False) is True: + spelling_suggestion = qp.get_corrected_query_string() + + if narrow_queries: + subqueries = [qp.parse_query(narrow_query, flags) for narrow_query in narrow_queries] + query = xapian.Query(xapian.Query.OP_FILTER, query, xapian.Query(xapian.Query.OP_AND, subqueries)) + + return query, spelling_suggestion + + def _sorter(self, sort_by): + """ + Private methos that takes a list of fields to sort by and returns a + xapian.MultiValueSorter Required Arguments: `sort_by` -- A list of fields to sort by @@ -553,22 +585,18 @@ class SearchBackend(BaseSearchBackend): Returns a xapian.MultiValueSorter instance """ sorter = xapian.MultiValueSorter() + for sort_field in sort_by: if sort_field.startswith('-'): reverse = False sort_field = sort_field[1:] # Strip the '-' else: reverse = True # Reverse is inverted in Xapian -- http://trac.xapian.org/ticket/311 - sorter.add(self._get_value_column(sort_field), reverse) + sorter.add(self._value_column(sort_field), reverse) + return sorter - def _get_value_column(self, field): - for field_dict in self.schema: - if field_dict['field_name'] == field: - return field_dict['column'] - return 0 - - def _get_flags(self): + def _flags(self): """ Returns the commonly used Xapian.QueryParser flags """ @@ -582,16 +610,18 @@ class SearchBackend(BaseSearchBackend): flags = flags | xapian.QueryParser.FLAG_SPELLING_CORRECTION return flags - def _get_query_parser(self): + def _query_parser(self, database): """ - Returns a Xapian.QueryParser instance. + Private method that returns a Xapian.QueryParser instance. + + Required arguments: + `database` -- The database to be queried The query parser returned will have stemming enabled, a boolean prefix - for `django_ct`, and prefixes for all of the fields in the designated - `schema`. + for `django_ct`, and prefixes for all of the fields in the `self.schema`. """ qp = xapian.QueryParser() - qp.set_database(self.database) + qp.set_database(database) qp.set_stemmer(self.stemmer) qp.set_stemming_strategy(xapian.QueryParser.STEM_SOME) qp.add_boolean_prefix('django_ct', DOCUMENT_CT_TERM_PREFIX) @@ -599,18 +629,20 @@ class SearchBackend(BaseSearchBackend): qp.add_prefix(field_dict['field_name'], DOCUMENT_CUSTOM_TERM_PREFIX + field_dict['field_name'].upper()) return qp - def _get_enquire(self, query): + def _enquire(self, database, query): """ - Given a query, returns an Xapian.Enquire instance. + Private method that that returns a Xapian.Enquire instance for use with + the specifed `query`. Required Arguments: `query` -- The query to run Returns a xapian.Enquire instance """ - enquire = xapian.Enquire(self.database) + enquire = xapian.Enquire(database) enquire.set_query(query) enquire.set_docid_order(enquire.ASCENDING) + return enquire def _build_schema(self, fields): @@ -630,6 +662,22 @@ class SearchBackend(BaseSearchBackend): del field return fields + def _value_column(self, field): + """ + Private method that returns the column value slot in the database + for a given field. + + Required arguemnts: + `field` -- The field to lookup + + Returns an integer with the column location (0 indexed). + """ + for field_dict in self.schema: + if field_dict['field_name'] == field: + return field_dict['column'] + return 0 + + class SearchQuery(BaseSearchQuery): """