mirror of
https://github.com/Hopiu/xapian-haystack.git
synced 2026-05-16 02:53:13 +00:00
Implemented highlighting
This commit is contained in:
parent
a238bafe80
commit
c087348a48
2 changed files with 60 additions and 23 deletions
|
|
@ -85,7 +85,7 @@ class XapianSearchBackendTestCase(TestCase):
|
|||
document_list.append(object_data)
|
||||
|
||||
return document_list
|
||||
|
||||
|
||||
def test_update(self):
|
||||
self.sb.update(self.msi, self.sample_objs)
|
||||
self.sb.update(self.msi, self.sample_objs) # Duplicates should be updated, not appended -- http://github.com/notanumber/xapian-haystack/issues/#issue/6
|
||||
|
|
@ -104,37 +104,37 @@ class XapianSearchBackendTestCase(TestCase):
|
|||
def test_clear(self):
|
||||
self.sb.update(self.msi, self.sample_objs)
|
||||
self.assertEqual(len(self.xapian_search('')), 3)
|
||||
|
||||
|
||||
self.sb.clear()
|
||||
self.assertEqual(len(self.xapian_search('')), 0)
|
||||
|
||||
|
||||
self.sb.update(self.msi, self.sample_objs)
|
||||
self.assertEqual(len(self.xapian_search('')), 3)
|
||||
|
||||
|
||||
self.sb.clear([AnotherMockModel])
|
||||
self.assertEqual(len(self.xapian_search('')), 3)
|
||||
|
||||
|
||||
self.sb.clear([MockModel])
|
||||
self.assertEqual(len(self.xapian_search('')), 0)
|
||||
|
||||
|
||||
self.sb.update(self.msi, self.sample_objs)
|
||||
self.assertEqual(len(self.xapian_search('')), 3)
|
||||
|
||||
|
||||
self.sb.clear([AnotherMockModel, MockModel])
|
||||
self.assertEqual(len(self.xapian_search('')), 0)
|
||||
|
||||
def test_search(self):
|
||||
self.sb.update(self.msi, self.sample_objs)
|
||||
self.assertEqual(len(self.xapian_search('')), 3)
|
||||
|
||||
|
||||
self.assertEqual(self.sb.search(''), {'hits': 0, 'results': []})
|
||||
self.assertEqual(self.sb.search('*')['hits'], 3)
|
||||
self.assertEqual([result.pk for result in self.sb.search('*')['results']], [u'1', u'2', u'3'])
|
||||
|
||||
# self.assertEqual(self.sb.search('', highlight=True), {'hits': 0, 'results': []})
|
||||
# self.assertEqual(self.sb.search('Index*', highlight=True)['hits'], 3)
|
||||
# self.assertEqual([result.highlighted['text'][0] for result in self.sb.search('Index*', highlight=True)['results']], ['<em>Indexed</em>!\n3', '<em>Indexed</em>!\n2', '<em>Indexed</em>!\n1'])
|
||||
#
|
||||
|
||||
def test_field_facets(self):
|
||||
self.sb.update(self.msi, self.sample_objs)
|
||||
self.assertEqual(len(self.xapian_search('')), 3)
|
||||
|
||||
self.assertEqual(self.sb.search('', facets=['name']), {'hits': 0, 'results': []})
|
||||
results = self.sb.search('index', facets=['name'])
|
||||
self.assertEqual(results['hits'], 3)
|
||||
|
|
@ -150,10 +150,22 @@ class XapianSearchBackendTestCase(TestCase):
|
|||
# self.assertEqual(results['hits'], 3)
|
||||
# self.assertEqual(results['facets'], {})
|
||||
|
||||
def test_narrow_queries(self):
|
||||
self.sb.update(self.msi, self.sample_objs)
|
||||
self.assertEqual(len(self.xapian_search('')), 3)
|
||||
|
||||
self.assertEqual(self.sb.search('', narrow_queries=['name:david1']), {'hits': 0, 'results': []})
|
||||
results = self.sb.search('index*', narrow_queries=['name:david1'])
|
||||
results = self.sb.search('index', narrow_queries=['name:david1'])
|
||||
self.assertEqual(results['hits'], 1)
|
||||
|
||||
def test_highlight(self):
|
||||
self.sb.update(self.msi, self.sample_objs)
|
||||
self.assertEqual(len(self.xapian_search('')), 3)
|
||||
|
||||
self.assertEqual(self.sb.search('', highlight=True), {'hits': 0, 'results': []})
|
||||
self.assertEqual(self.sb.search('Index', highlight=True)['hits'], 3)
|
||||
self.assertEqual([result.highlighted['text'] for result in self.sb.search('Index', highlight=True)['results']], ['<em>Index</em>ed!\n1', '<em>Index</em>ed!\n2', '<em>Index</em>ed!\n3'])
|
||||
|
||||
def test_spelling_suggestion(self):
|
||||
self.sb.update(self.msi, self.sample_objs)
|
||||
self.assertEqual(len(self.xapian_search('')), 3)
|
||||
|
|
@ -168,10 +180,10 @@ class XapianSearchBackendTestCase(TestCase):
|
|||
self.sb.update(self.msi, self.sample_objs)
|
||||
self.assertEqual(len(self.xapian_search('')), 3)
|
||||
|
||||
results = self.sb.search('index', facets=['name'])
|
||||
results = self.sb.search('index')
|
||||
self.assertEqual(results['hits'], 3)
|
||||
|
||||
results = self.sb.search('indexing', facets=['name'])
|
||||
results = self.sb.search('indexing')
|
||||
self.assertEqual(results['hits'], 3)
|
||||
|
||||
def test_more_like_this(self):
|
||||
|
|
|
|||
|
|
@ -78,7 +78,7 @@ class SearchBackend(BaseSearchBackend):
|
|||
raise ImproperlyConfigured('You must specify a HAYSTACK_XAPIAN_PATH in your settings.')
|
||||
|
||||
self.path = settings.HAYSTACK_XAPIAN_PATH
|
||||
self.stemmer = xapian.Stem('english')
|
||||
self.stemmer = xapian.Stem(stem_lang)
|
||||
|
||||
if not os.path.exists(self.path):
|
||||
os.makedirs(self.path)
|
||||
|
|
@ -264,9 +264,6 @@ class SearchBackend(BaseSearchBackend):
|
|||
if query_facets is not None:
|
||||
warnings.warn("Query faceting has not been implemented yet.", Warning, stacklevel=2)
|
||||
|
||||
if highlight is not False:
|
||||
warnings.warn("Highlight has not been implemented yet.", Warning, stacklevel=2)
|
||||
|
||||
database = self._open_database()
|
||||
schema = pickle.loads(database.get_metadata('schema'))
|
||||
spelling_suggestion = None
|
||||
|
|
@ -291,7 +288,9 @@ class SearchBackend(BaseSearchBackend):
|
|||
enquire.set_sort_by_key_then_relevance(sorter, True)
|
||||
|
||||
matches = enquire.get_mset(start_offset, end_offset)
|
||||
results = self._process_results(matches, facets)
|
||||
results = self._process_results(
|
||||
matches, query_string=query_string, highlight=highlight, facets=facets
|
||||
)
|
||||
|
||||
if spelling_suggestion:
|
||||
results['spelling_suggestion'] = spelling_suggestion
|
||||
|
|
@ -364,7 +363,7 @@ class SearchBackend(BaseSearchBackend):
|
|||
matches = enquire.get_mset(0, DEFAULT_MAX_RESULTS)
|
||||
return self._process_results(matches)
|
||||
|
||||
def _process_results(self, matches, facets=None):
|
||||
def _process_results(self, matches, query_string='', highlight=False, facets=None):
|
||||
"""
|
||||
Private method for processing an MSet (match set).
|
||||
|
||||
|
|
@ -372,6 +371,8 @@ class SearchBackend(BaseSearchBackend):
|
|||
`matches` -- An MSet of matches
|
||||
|
||||
Optional arguments:
|
||||
`query_string` -- The query string that generated the matches
|
||||
`highlight` -- Add highlighting to results? (default=False)
|
||||
`facets` -- Fields to facet (default = None)
|
||||
|
||||
Returns:
|
||||
|
|
@ -404,6 +405,12 @@ class SearchBackend(BaseSearchBackend):
|
|||
document = match.get_document()
|
||||
app_label, module_name, pk = document.get_value(0).split('.')
|
||||
additional_fields = pickle.loads(document.get_data())
|
||||
if highlight and (len(query_string) > 0):
|
||||
additional_fields['highlighted'] = {
|
||||
self.content_field_name: self._do_highlight(
|
||||
additional_fields.get(self.content_field_name), query_string
|
||||
)
|
||||
}
|
||||
result = SearchResult(
|
||||
app_label, module_name, pk, match.weight, **additional_fields
|
||||
)
|
||||
|
|
@ -420,6 +427,23 @@ class SearchBackend(BaseSearchBackend):
|
|||
'facets': facets_dict,
|
||||
}
|
||||
|
||||
def _do_highlight(self, content, text, tag='em'):
|
||||
"""
|
||||
Highlight `text` in `content` with html `tag`.
|
||||
|
||||
This method assumes that the input text (`content`) does not contain
|
||||
any special formatting. That is, it does not contain any html tags
|
||||
or similar markup that could be screwed up by the highlighting.
|
||||
|
||||
Required arguments:
|
||||
`content` -- Content to search for instances of `text`
|
||||
`text` -- The text to be highlighted
|
||||
"""
|
||||
for term in [term.replace('*', '') for term in text.split()]:
|
||||
term_re = re.compile(re.escape(term), re.IGNORECASE)
|
||||
content = term_re.sub('<%s>%s</%s>' % (tag, term, tag), content)
|
||||
return content
|
||||
|
||||
def _do_field_facets(self, document, facets, fields):
|
||||
"""
|
||||
Private method that facets a document by field name.
|
||||
|
|
@ -477,7 +501,7 @@ class SearchBackend(BaseSearchBackend):
|
|||
|
||||
Returns a dictionary that can be stored in the database ('schema') metdata.
|
||||
"""
|
||||
content_field_name, fields = self.site.build_unified_schema()
|
||||
self.content_field_name, fields = self.site.build_unified_schema()
|
||||
schema_fields = {}
|
||||
for i, field in enumerate(fields):
|
||||
if field['indexed'] == 'true':
|
||||
|
|
@ -586,6 +610,7 @@ class SearchBackend(BaseSearchBackend):
|
|||
enquire.set_docid_order(enquire.ASCENDING)
|
||||
return enquire
|
||||
|
||||
|
||||
class SearchQuery(BaseSearchQuery):
|
||||
"""
|
||||
`SearchQuery` is responsible for converting search queries into a format
|
||||
|
|
|
|||
Loading…
Reference in a new issue