diff --git a/tests/xapian_backend.py b/tests/xapian_backend.py index 5ed280f..f874128 100644 --- a/tests/xapian_backend.py +++ b/tests/xapian_backend.py @@ -85,7 +85,7 @@ class XapianSearchBackendTestCase(TestCase): document_list.append(object_data) return document_list - + def test_update(self): self.sb.update(self.msi, self.sample_objs) self.sb.update(self.msi, self.sample_objs) # Duplicates should be updated, not appended -- http://github.com/notanumber/xapian-haystack/issues/#issue/6 @@ -104,37 +104,37 @@ class XapianSearchBackendTestCase(TestCase): def test_clear(self): self.sb.update(self.msi, self.sample_objs) self.assertEqual(len(self.xapian_search('')), 3) - + self.sb.clear() self.assertEqual(len(self.xapian_search('')), 0) - + self.sb.update(self.msi, self.sample_objs) self.assertEqual(len(self.xapian_search('')), 3) - + self.sb.clear([AnotherMockModel]) self.assertEqual(len(self.xapian_search('')), 3) - + self.sb.clear([MockModel]) self.assertEqual(len(self.xapian_search('')), 0) - + self.sb.update(self.msi, self.sample_objs) self.assertEqual(len(self.xapian_search('')), 3) - + self.sb.clear([AnotherMockModel, MockModel]) self.assertEqual(len(self.xapian_search('')), 0) def test_search(self): self.sb.update(self.msi, self.sample_objs) self.assertEqual(len(self.xapian_search('')), 3) - + self.assertEqual(self.sb.search(''), {'hits': 0, 'results': []}) self.assertEqual(self.sb.search('*')['hits'], 3) self.assertEqual([result.pk for result in self.sb.search('*')['results']], [u'1', u'2', u'3']) - - # self.assertEqual(self.sb.search('', highlight=True), {'hits': 0, 'results': []}) - # self.assertEqual(self.sb.search('Index*', highlight=True)['hits'], 3) - # self.assertEqual([result.highlighted['text'][0] for result in self.sb.search('Index*', highlight=True)['results']], ['Indexed!\n3', 'Indexed!\n2', 'Indexed!\n1']) - # + + def test_field_facets(self): + self.sb.update(self.msi, self.sample_objs) + self.assertEqual(len(self.xapian_search('')), 3) + self.assertEqual(self.sb.search('', facets=['name']), {'hits': 0, 'results': []}) results = self.sb.search('index', facets=['name']) self.assertEqual(results['hits'], 3) @@ -150,10 +150,22 @@ class XapianSearchBackendTestCase(TestCase): # self.assertEqual(results['hits'], 3) # self.assertEqual(results['facets'], {}) + def test_narrow_queries(self): + self.sb.update(self.msi, self.sample_objs) + self.assertEqual(len(self.xapian_search('')), 3) + self.assertEqual(self.sb.search('', narrow_queries=['name:david1']), {'hits': 0, 'results': []}) - results = self.sb.search('index*', narrow_queries=['name:david1']) + results = self.sb.search('index', narrow_queries=['name:david1']) self.assertEqual(results['hits'], 1) + def test_highlight(self): + self.sb.update(self.msi, self.sample_objs) + self.assertEqual(len(self.xapian_search('')), 3) + + self.assertEqual(self.sb.search('', highlight=True), {'hits': 0, 'results': []}) + self.assertEqual(self.sb.search('Index', highlight=True)['hits'], 3) + self.assertEqual([result.highlighted['text'] for result in self.sb.search('Index', highlight=True)['results']], ['Indexed!\n1', 'Indexed!\n2', 'Indexed!\n3']) + def test_spelling_suggestion(self): self.sb.update(self.msi, self.sample_objs) self.assertEqual(len(self.xapian_search('')), 3) @@ -168,10 +180,10 @@ class XapianSearchBackendTestCase(TestCase): self.sb.update(self.msi, self.sample_objs) self.assertEqual(len(self.xapian_search('')), 3) - results = self.sb.search('index', facets=['name']) + results = self.sb.search('index') self.assertEqual(results['hits'], 3) - results = self.sb.search('indexing', facets=['name']) + results = self.sb.search('indexing') self.assertEqual(results['hits'], 3) def test_more_like_this(self): diff --git a/xapian_backend.py b/xapian_backend.py index f0c374f..4bf057a 100644 --- a/xapian_backend.py +++ b/xapian_backend.py @@ -78,7 +78,7 @@ class SearchBackend(BaseSearchBackend): raise ImproperlyConfigured('You must specify a HAYSTACK_XAPIAN_PATH in your settings.') self.path = settings.HAYSTACK_XAPIAN_PATH - self.stemmer = xapian.Stem('english') + self.stemmer = xapian.Stem(stem_lang) if not os.path.exists(self.path): os.makedirs(self.path) @@ -264,9 +264,6 @@ class SearchBackend(BaseSearchBackend): if query_facets is not None: warnings.warn("Query faceting has not been implemented yet.", Warning, stacklevel=2) - if highlight is not False: - warnings.warn("Highlight has not been implemented yet.", Warning, stacklevel=2) - database = self._open_database() schema = pickle.loads(database.get_metadata('schema')) spelling_suggestion = None @@ -291,7 +288,9 @@ class SearchBackend(BaseSearchBackend): enquire.set_sort_by_key_then_relevance(sorter, True) matches = enquire.get_mset(start_offset, end_offset) - results = self._process_results(matches, facets) + results = self._process_results( + matches, query_string=query_string, highlight=highlight, facets=facets + ) if spelling_suggestion: results['spelling_suggestion'] = spelling_suggestion @@ -364,7 +363,7 @@ class SearchBackend(BaseSearchBackend): matches = enquire.get_mset(0, DEFAULT_MAX_RESULTS) return self._process_results(matches) - def _process_results(self, matches, facets=None): + def _process_results(self, matches, query_string='', highlight=False, facets=None): """ Private method for processing an MSet (match set). @@ -372,6 +371,8 @@ class SearchBackend(BaseSearchBackend): `matches` -- An MSet of matches Optional arguments: + `query_string` -- The query string that generated the matches + `highlight` -- Add highlighting to results? (default=False) `facets` -- Fields to facet (default = None) Returns: @@ -404,6 +405,12 @@ class SearchBackend(BaseSearchBackend): document = match.get_document() app_label, module_name, pk = document.get_value(0).split('.') additional_fields = pickle.loads(document.get_data()) + if highlight and (len(query_string) > 0): + additional_fields['highlighted'] = { + self.content_field_name: self._do_highlight( + additional_fields.get(self.content_field_name), query_string + ) + } result = SearchResult( app_label, module_name, pk, match.weight, **additional_fields ) @@ -420,6 +427,23 @@ class SearchBackend(BaseSearchBackend): 'facets': facets_dict, } + def _do_highlight(self, content, text, tag='em'): + """ + Highlight `text` in `content` with html `tag`. + + This method assumes that the input text (`content`) does not contain + any special formatting. That is, it does not contain any html tags + or similar markup that could be screwed up by the highlighting. + + Required arguments: + `content` -- Content to search for instances of `text` + `text` -- The text to be highlighted + """ + for term in [term.replace('*', '') for term in text.split()]: + term_re = re.compile(re.escape(term), re.IGNORECASE) + content = term_re.sub('<%s>%s' % (tag, term, tag), content) + return content + def _do_field_facets(self, document, facets, fields): """ Private method that facets a document by field name. @@ -477,7 +501,7 @@ class SearchBackend(BaseSearchBackend): Returns a dictionary that can be stored in the database ('schema') metdata. """ - content_field_name, fields = self.site.build_unified_schema() + self.content_field_name, fields = self.site.build_unified_schema() schema_fields = {} for i, field in enumerate(fields): if field['indexed'] == 'true': @@ -586,6 +610,7 @@ class SearchBackend(BaseSearchBackend): enquire.set_docid_order(enquire.ASCENDING) return enquire + class SearchQuery(BaseSearchQuery): """ `SearchQuery` is responsible for converting search queries into a format