diff --git a/tests/settings.py b/tests/settings.py index 82c7085..7544e48 100644 --- a/tests/settings.py +++ b/tests/settings.py @@ -13,3 +13,4 @@ ROOT_URLCONF = 'tests.urls' HAYSTACK_SEARCH_ENGINE = 'xapian' HAYSTACK_XAPIAN_PATH = os.path.join('tmp', 'test_xapian_query') +HAYSTACK_INCLUDE_SPELLING = True diff --git a/tests/xapian_backend.py b/tests/xapian_backend.py index 3e8fcc3..c58d969 100644 --- a/tests/xapian_backend.py +++ b/tests/xapian_backend.py @@ -150,6 +150,12 @@ class XapianSearchBackendTestCase(TestCase): # # results = self.sb.search('Index*', narrow_queries=['name:david1']) # # self.assertEqual(results['hits'], 1) + # Xapian' spell correction is different from Solr's. It doesn't catch + # missing letters, but does catch jumbled letters within (a default) of 2 positions. + # This can be increased to 3, but should be good enough for most uses. + self.assertEqual(self.sb.search('indxe')['hits'], 0) + self.assertEqual(self.sb.search('indxe')['spelling_suggestion'], 'indexed') + def test_stemming(self): self.sb.update(self.msi, self.sample_objs) self.assertEqual(len(self.xapian_search('')), 3) diff --git a/xapian_backend.py b/xapian_backend.py index 10cd2c5..c46e13b 100644 --- a/xapian_backend.py +++ b/xapian_backend.py @@ -246,6 +246,11 @@ class SearchBackend(BaseSearchBackend): The resulting match set is passed to :method:`_process_results` for further processing prior to returning a dictionary with the results. + + If `HAYSTACK_INCLUDE_SPELLING` was enabled in `settings.py`, the + extra flag `FLAG_SPELLING_CORRECTION` will be passed to the query parser + and any suggestions for spell correction will be returned as well as + the results. """ if not query_string: return { @@ -266,6 +271,10 @@ class SearchBackend(BaseSearchBackend): warnings.warn("Highlight has not been implemented yet.", Warning, stacklevel=2) database = xapian.Database(self.path) + + if getattr(settings, 'HAYSTACK_INCLUDE_SPELLING', False) is True: + spelling_suggestion = '' + if query_string == '*': query = xapian.Query('') # Make '*' match everything else: @@ -276,18 +285,26 @@ class SearchBackend(BaseSearchBackend): qp.add_boolean_prefix('django_ct', DOCUMENT_CT_TERM_PREFIX) for field in pickle.loads(database.get_metadata('fields')): qp.add_prefix(field, DOCUMENT_CUSTOM_TERM_PREFIX + field.upper()) - query = qp.parse_query( - query_string, - xapian.QueryParser.FLAG_PARTIAL | xapian.QueryParser.FLAG_PHRASE | - xapian.QueryParser.FLAG_BOOLEAN | xapian.QueryParser.FLAG_LOVEHATE | - xapian.QueryParser.FLAG_WILDCARD - ) + flags = xapian.QueryParser.FLAG_PARTIAL \ + | xapian.QueryParser.FLAG_PHRASE \ + | xapian.QueryParser.FLAG_BOOLEAN \ + | xapian.QueryParser.FLAG_LOVEHATE \ + | xapian.QueryParser.FLAG_WILDCARD + if getattr(settings, 'HAYSTACK_INCLUDE_SPELLING', False) is True: + flags = flags | xapian.QueryParser.FLAG_SPELLING_CORRECTION + query = qp.parse_query(query_string, flags) + if getattr(settings, 'HAYSTACK_INCLUDE_SPELLING', False) is True: + spelling_suggestion = qp.get_corrected_query_string() enquire = xapian.Enquire(database) enquire.set_query(query) matches = enquire.get_mset(start_offset, end_offset) + results = self._process_results(matches, facets) - return self._process_results(matches, facets) + if getattr(settings, 'HAYSTACK_INCLUDE_SPELLING', False) is True: + results['spelling_suggestion'] = spelling_suggestion + + return results def delete_index(self): """ @@ -405,7 +422,7 @@ class SearchBackend(BaseSearchBackend): facets_dict['fields'] = self._do_field_facets( document, facets, facets_dict['fields'] ) - + return { 'results': results, 'hits': hits,