mirror of
https://github.com/Hopiu/django-watson.git
synced 2026-05-04 03:44:42 +00:00
Added in splitting up of emails when preprocessing content.
This commit is contained in:
parent
b20ad0c56f
commit
ffda21b33e
3 changed files with 33 additions and 6 deletions
|
|
@ -1,5 +1,7 @@
|
|||
"""Search backends used by django-watson."""
|
||||
|
||||
import re
|
||||
|
||||
from django.conf import settings
|
||||
from django.core.exceptions import ImproperlyConfigured
|
||||
from django.contrib.contenttypes.models import ContentType
|
||||
|
|
@ -14,7 +16,7 @@ def regex_from_search_text(search_text):
|
|||
words = search_text.split()
|
||||
return u"|".join(
|
||||
u"(\s{word}\s)|(^{word}\s)|(\s{word}$)|(^{word}$)".format(
|
||||
word = word,
|
||||
word = re.escape(word),
|
||||
)
|
||||
for word in words
|
||||
)
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
"""Adapters for registering models with django-watson."""
|
||||
|
||||
import operator
|
||||
import operator, re
|
||||
from threading import local
|
||||
from contextlib import contextmanager
|
||||
from functools import wraps
|
||||
|
|
@ -24,6 +24,10 @@ class SearchAdapterError(Exception):
|
|||
"""Something went wrong with a search adapter."""
|
||||
|
||||
|
||||
# Used for splitting up email addresses.
|
||||
RE_EMAIL = re.compile(u"([a-z0-9][a-z0-9\.+]*)@([a-z0-9\.+]*[a-z])", re.IGNORECASE)
|
||||
|
||||
|
||||
class SearchAdapter(object):
|
||||
|
||||
"""An adapter for performing a full-text search on a model."""
|
||||
|
|
@ -66,6 +70,20 @@ class SearchAdapter(object):
|
|||
# Resolution complete!
|
||||
return value
|
||||
|
||||
def prepare_content(self, content):
|
||||
"""Sanitizes the given content string for better parsing by the search engine."""
|
||||
# Strip out HTML tags.
|
||||
content = strip_tags(content)
|
||||
# Split up email addresess.
|
||||
def split_email(match):
|
||||
return u" ".join((
|
||||
match.group(0),
|
||||
match.group(1),
|
||||
match.group(2),
|
||||
))
|
||||
content = RE_EMAIL.sub(split_email, content)
|
||||
return content
|
||||
|
||||
def get_title(self, obj):
|
||||
"""Returns the search title for the given obj."""
|
||||
return unicode(obj)
|
||||
|
|
@ -84,10 +102,10 @@ class SearchAdapter(object):
|
|||
# Exclude named fields.
|
||||
field_names = (field_name for field_name in field_names if field_name not in self.exclude)
|
||||
# Create the text.
|
||||
return u" ".join(
|
||||
strip_tags(self._resolve_field(obj, field_name))
|
||||
return self.prepare_content(u" ".join(
|
||||
self._resolve_field(obj, field_name)
|
||||
for field_name in field_names
|
||||
)
|
||||
))
|
||||
|
||||
def get_url(self, obj):
|
||||
"""Return the URL of the given obj."""
|
||||
|
|
|
|||
|
|
@ -168,7 +168,14 @@ class InternalsTest(SearchTestBase):
|
|||
call_command("buildwatson")
|
||||
# Make sure that we have four again (including duplicates).
|
||||
self.assertEqual(SearchEntry.objects.count(), 4)
|
||||
|
||||
|
||||
def testSearchEmailParts(self):
|
||||
with watson.context():
|
||||
self.test11.content = "foo@bar.com"
|
||||
self.test11.save()
|
||||
self.assertEqual(watson.search("foo").count(), 1)
|
||||
self.assertEqual(watson.search("bar.com").count(), 1)
|
||||
self.assertEqual(watson.search("foo@bar.com").count(), 1)
|
||||
|
||||
class SearchTest(SearchTestBase):
|
||||
|
||||
|
|
|
|||
Loading…
Reference in a new issue