Refactored for new search paradigm

This commit is contained in:
Dave Hall 2011-08-23 17:12:35 +01:00
parent 103a01e9c1
commit 81e436bdc3
9 changed files with 440 additions and 455 deletions

View file

@ -6,13 +6,21 @@ Developed by Dave Hall.
<http://www.etianen.com/>
"""
from watson.registration import SearchAdapter, register, unregister, is_registered, get_registered_models, search_context_manager, get_backend
from watson.registration import SearchAdapter, default_search_engine
# The main search method.
search = get_backend().search
search = default_search_engine.search
# Easy registration.
register = default_search_engine.register
unregister = default_search_engine.unregister
is_registered = default_search_engine.is_registered
get_registered_models = default_search_engine.get_registered_models
get_adapter = default_search_engine.get_adapter
# Easy context management.
context = search_context_manager.context
update_index = search_context_manager.update_index
context = default_search_engine.context
update_index = default_search_engine.update_index

View file

@ -1,8 +1,5 @@
"""Search backends used by django-watson."""
from abc import ABCMeta, abstractmethod
import operator
from django.conf import settings
from django.core.exceptions import ImproperlyConfigured
from django.contrib.contenttypes.models import ContentType
@ -10,67 +7,32 @@ from django.db import models, connection
from django.db.models import Q
from watson.models import SearchEntry, has_int_pk
from watson.registration import get_registered_models, get_adapter
class SearchBackend(object):
"""Base class for all search backends."""
__metaclass__ = ABCMeta
@abstractmethod
def do_install(self):
"""Generates the SQL needed to install django-watson."""
raise NotImplementedError
pass
@abstractmethod
def do_search(self, queryset, search_text):
"""Filters the given queryset according the the search logic for this backend."""
raise NotImplementedError
words = search_text.split()
regex = u"|".join(
u"(\s{word}\s)|(^{word}\s)|(\s{word}$)|(^{word}$)".format(
word = word,
)
for word in words
)
return queryset.filter(
Q(title__iregex=regex) | Q(content__iregex=regex) | Q(content__iregex=regex),
)
@abstractmethod
def save_search_entry(self, obj, search_entry, weighted_search_text):
def save_search_entry(self, search_entry, obj, adapter):
"""Saves the given search entry in the database."""
raise NotImplementedError
def search(self, search_text, models=None, exclude=None):
"""Performs a search using the given text, returning a queryset of SearchEntry."""
queryset = SearchEntry.objects.all()
# Add in a model limiter.
allowed_models = models or get_registered_models()
if exclude:
allowed_models = [model for model in allowed_models if not model in exclude]
# Perform any live filters.
live_subqueries = []
for model in allowed_models:
content_type = ContentType.objects.get_for_model(model)
adapter = get_adapter(model)
if adapter.live_filter:
needs_live_subquery = True
live_pks = model._default_manager.all().values_list("pk", flat=True)
if has_int_pk(model):
# We can do this as an in-database join.
live_subquery = Q(
content_type = content_type,
object_id_int__in = live_pks,
)
else:
# We have to do this as two separate queries. Oh well.
live_subquery = Q(
content_type = content_type,
object_id__in = [unicode(pk) for pk in live_pks],
)
else:
live_subquery = Q(
content_type = content_type,
)
live_subqueries.append(live_subquery)
live_subquery = reduce(operator.or_, live_subqueries)
queryset = queryset.filter(live_subquery)
# Perform the backend-specific full text match.
queryset = self.do_search(queryset, search_text)
return queryset
search_entry.save()
class PostgresSearchBackend(SearchBackend):
@ -85,64 +47,6 @@ class PostgresSearchBackend(SearchBackend):
CREATE INDEX "watson_searchentry_search_tsv" ON "watson_searchentry" USING gin("search_tsv");
""")
def save_search_entry(self, obj, search_entry, weighted_search_text):
"""Saves the search entry."""
sql_params = [
search_entry.object_id,
search_entry.object_id_int,
search_entry.content_type_id,
search_entry.meta_encoded,
u" ".join(weighted_search_text[:1]),
u" ".join(weighted_search_text[1:2]),
u" ".join(weighted_search_text[2:3]),
u" ".join(weighted_search_text[3:]),
]
if search_entry.id is None:
# Perform a raw insert.
sql_str = u"""
INSERT INTO
"watson_searchentry"
(
"object_id",
"object_id_int",
"content_type_id",
"meta_encoded",
"search_tsv"
) VALUES (
%s,
%s,
%s,
%s,
(
setweight(to_tsvector(%s), 'A') ||
setweight(to_tsvector(%s), 'B') ||
setweight(to_tsvector(%s), 'C') ||
setweight(to_tsvector(%s), 'D')
)
)"""
else:
# Perform a raw update.
sql_str = u"""
UPDATE
"watson_searchentry"
SET
"object_id" = %s,
"object_id_int" = %s,
"content_type_id" = %s,
"meta_encoded" = %s,
"search_tsv" = (
setweight(to_tsvector(%s), 'A') ||
setweight(to_tsvector(%s), 'B') ||
setweight(to_tsvector(%s), 'C') ||
setweight(to_tsvector(%s), 'D')
)
WHERE
"id" = %s
"""
sql_params.append(search_entry.id)
# Perform the query.
connection.cursor().execute(sql_str, sql_params)
def do_search(self, queryset, search_text):
"""Performs the full text search."""
return queryset.extra(
@ -156,89 +60,6 @@ class PostgresSearchBackend(SearchBackend):
)
class DumbSearchBackend(SearchBackend):
"""
A search backend that uses a dumb ILIKE search to find results.
This is fine for debugging locally, but rubbish for production.
"""
def do_install(self):
"""Just create a dumb text column."""
from south.db import db
db.add_column(SearchEntry._meta.db_table, "search_text", models.TextField(default=""), keep_default=False)
def do_search(self, queryset, search_text):
"""Performs the dumb search."""
words = search_text.lower().split()
sql_str = "({sql})".format(
sql = u" OR ".join(
u"({search_text} LIKE %s)".format(
search_text = connection.ops.quote_name("search_text"),
)
for _ in words
)
)
sql_params = [
"%" + connection.ops.prep_for_like_query(word) + "%"
for word in words
]
return queryset.extra(
where = (sql_str,),
params = sql_params,
)
def save_search_entry(self, obj, search_entry, weighted_search_text):
"""Saves the search entry."""
# Consolidate the search entry data.
search_text = u" ".join(weighted_search_text).lower()
# Hijack the save with raw SQL!
sql_params = [
search_entry.object_id,
search_entry.object_id_int,
search_entry.content_type_id,
search_entry.meta_encoded,
search_text,
]
if search_entry.pk is None:
# Perform a raw insert.
sql_str = u"""
INSERT INTO
{watson_searchentry}
(
{object_id},
{object_id_int},
{content_type_id},
{meta_encoded},
{search_text}
) VALUES (
%s, %s, %s, %s, %s
)"""
else:
# Perform a raw update.
sql_str = u"""
UPDATE
{watson_searchentry}
SET
{object_id} = %s,
{object_id_int} = %s,
{content_type_id} = %s,
{meta_encoded} = %s,
{search_text} = %s
WHERE
{id} = %s
"""
sql_params.append(search_entry.id)
# Perform the query.
sql_str = sql_str.format(**dict(
(column_name, connection.ops.quote_name(column_name))
for column_name in
("watson_searchentry", "object_id", "object_id_int", "content_type_id", "meta_encoded", "search_text", "id")
))
connection.cursor().execute(sql_str, sql_params)
class AdaptiveSearchBackend(SearchBackend):
"""
@ -248,8 +69,9 @@ class AdaptiveSearchBackend(SearchBackend):
def __new__(cls):
"""Guess the correct search backend and initialize it."""
return SearchBackend()
database_engine = settings.DATABASES["default"]["ENGINE"]
if database_engine.endswith("postgresql_psycopg2") or database_engine.endswith("postgresql"):
return PostgresSearchBackend()
else:
return DumbSearchBackend()
return SearchBackend()

View file

@ -0,0 +1,46 @@
"""Rebuilds the database indices needed by django-watson."""
from django.core.management.base import NoArgsCommand
from django.contrib.contenttypes.models import ContentType
from django.db import transaction
from watson.registration import SearchEngine
from watson.models import SearchEntry
class Command(NoArgsCommand):
help = "Rebuilds the database indices needed by django-watson."
@transaction.commit_on_success
def handle_noargs(self, **options):
"""Runs the management command."""
verbosity = int(options.get("verbosity", 1))
for engine_slug, search_engine in SearchEngine.get_created_engines():
registered_models = search_engine.get_registered_models()
# Rebuild the index for all registered models.
refreshed_model_count = 0
for model in registered_models:
for obj in model._default_manager.all().iterator():
search_engine.update_obj_index(obj)
refreshed_model_count += 1
if verbosity >= 2:
print u"Refreshed {refreshed_model_count} search entry(s) in {engine_slug!r} search engine.".format(
refreshed_model_count = refreshed_model_count,
engine_slug = engine_slug,
)
# Clean out any search entries that exist for stale content types.
valid_content_types = [ContentType.objects.get_for_model(model) for model in registered_models]
stale_entries = SearchEntry.objects.filter(
engine_slug = engine_slug,
).exclude(
content_type__in = valid_content_types
)
stale_entry_count = stale_entries.count()
if stale_entry_count > 0:
stale_entries.delete()
if verbosity >= 2:
print u"Deleted {stale_entry_count} stale search entry(s) in {engine_slug!r} search engine.".format(
stale_entry_count = stale_entry_count,
engine_slug = engine_slug,
)

View file

@ -16,4 +16,4 @@ class Command(NoArgsCommand):
"""Runs the management command."""
backend = get_backend()
install_sql = backend.do_install()
call_command("rebuildwatson")
call_command("buildwatson")

View file

@ -1,38 +0,0 @@
"""Rebuilds the database indices needed by django-watson."""
from django.core.management.base import NoArgsCommand
from django.contrib.contenttypes.models import ContentType
from django.db import transaction
from watson.registration import get_registered_models, search_context_manager
from watson.models import SearchEntry
class Command(NoArgsCommand):
help = "Rebuilds the database indices needed by django-watson."
@transaction.commit_on_success
def handle_noargs(self, **options):
"""Runs the management command."""
verbosity = int(options.get("verbosity", 1))
registered_models = get_registered_models()
# Rebuild the index for all registered models.
refreshed_model_count = 0
for model in registered_models:
for obj in model._default_manager.all().iterator():
search_context_manager.update_obj_index(obj)
refreshed_model_count += 1
if verbosity >= 2:
print u"Refreshed {refreshed_model_count} current search entry(s).".format(
refreshed_model_count = refreshed_model_count,
)
# Clean out any search entries that exist for stale content types.
valid_content_types = [ContentType.objects.get_for_model(model) for model in registered_models]
stale_entries = SearchEntry.objects.exclude(content_type__in=valid_content_types)
stale_entry_count = stale_entries.count()
stale_entries.delete()
if verbosity >= 2:
print u"Deleted {stale_entry_count} stale search entry(s).".format(
stale_entry_count = stale_entry_count,
)

View file

@ -12,14 +12,19 @@ class Migration(SchemaMigration):
# Adding model 'SearchEntry'
db.create_table('watson_searchentry', (
('id', self.gf('django.db.models.fields.AutoField')(primary_key=True)),
('engine_slug', self.gf('django.db.models.fields.CharField')(max_length=200, db_index=True)),
('content_type', self.gf('django.db.models.fields.related.ForeignKey')(to=orm['contenttypes.ContentType'])),
('object_id', self.gf('django.db.models.fields.TextField')()),
('object_id_int', self.gf('django.db.models.fields.IntegerField')(db_index=True, null=True, blank=True)),
('title', self.gf('django.db.models.fields.CharField')(max_length=1000)),
('description', self.gf('django.db.models.fields.TextField')(blank=True)),
('content', self.gf('django.db.models.fields.TextField')(blank=True)),
('url', self.gf('django.db.models.fields.CharField')(max_length=1000, blank=True)),
('meta_encoded', self.gf('django.db.models.fields.TextField')()),
))
db.send_create_signal('watson', ['SearchEntry'])
# Install the django-watson search indices.
# Install watson.
call_command("installwatson")
@ -39,12 +44,16 @@ class Migration(SchemaMigration):
},
'watson.searchentry': {
'Meta': {'object_name': 'SearchEntry'},
'content': ('django.db.models.fields.TextField', [], {'blank': 'True'}),
'content_type': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['contenttypes.ContentType']"}),
'description': ('django.db.models.fields.TextField', [], {'blank': 'True'}),
'engine_slug': ('django.db.models.fields.CharField', [], {'max_length': '200', 'db_index': 'True'}),
'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
'meta_encoded': ('django.db.models.fields.TextField', [], {}),
'object_id': ('django.db.models.fields.TextField', [], {}),
'object_id_int': ('django.db.models.fields.IntegerField', [], {'db_index': 'True', 'null': 'True', 'blank': 'True'}),
'search_text': ('django.db.models.fields.TextField', [], {})
'title': ('django.db.models.fields.CharField', [], {'max_length': '1000'}),
'url': ('django.db.models.fields.CharField', [], {'max_length': '1000', 'blank': 'True'})
}
}

View file

@ -21,7 +21,12 @@ META_CACHE_KEY = "_meta_cache"
class SearchEntry(models.Model):
"""An entry in the search index."""
engine_slug = models.CharField(
max_length = 200,
db_index = True,
)
content_type = models.ForeignKey(
ContentType,
)
@ -36,6 +41,23 @@ class SearchEntry(models.Model):
object = generic.GenericForeignKey()
title = models.CharField(
max_length = 1000,
)
description = models.TextField(
blank = True,
)
content = models.TextField(
blank = True,
)
url = models.CharField(
max_length = 1000,
blank = True,
)
meta_encoded = models.TextField()
@property
@ -56,4 +78,15 @@ class SearchEntry(models.Model):
if hasattr(self, META_CACHE_KEY):
delattr(self, META_CACHE_KEY)
# Set the meta.
self.meta_encoded = cPickle.dumps(meta_value).decode("utf-8")
self.meta_encoded = cPickle.dumps(meta_value).decode("utf-8")
def get_absolute_url(self):
"""Returns the URL of the referenced object."""
return self.url
def __unicode__(self):
"""Returns a unicode representation."""
return self.title
class Meta:
verbose_name_plural = "search entries"

View file

@ -1,13 +1,16 @@
"""Adapters for registering models with django-watson."""
import operator
from threading import local
from contextlib import contextmanager
from functools import wraps
from weakref import WeakValueDictionary
from django.conf import settings
from django.core.signals import request_started, request_finished
from django.contrib.contenttypes.models import ContentType
from django.db import models
from django.db.models import Q
from django.db.models.signals import post_save, pre_delete
from django.utils.html import strip_tags
from django.utils.importlib import import_module
@ -15,6 +18,11 @@ from django.utils.importlib import import_module
from watson.models import SearchEntry, has_int_pk
class SearchAdapterError(Exception):
"""Something went wrong with a search adapter."""
class SearchAdapter(object):
"""An adapter for performing a full-text search on a model."""
@ -29,131 +37,79 @@ class SearchAdapter(object):
fields = None
# Use to exclude fields from the search.
exclude = None
exclude = ()
# Use to specify object properties to be stored in the search index.
store = None
store = ()
def __init__(self, model):
"""Initializes the search adapter."""
self.model = model
def get_meta(self, obj):
"""Returns a dictionary of meta information about the given obj."""
meta = {
"title": unicode(obj),
}
# Add in the URL.
if hasattr(obj, "get_absolute_url"):
meta["url"] = obj.get_absolute_url()
# Add in the stored fields.
if self.store:
for field_name in self.store:
value = getattr(obj, field_name)
if callable(value):
value = value()
meta[field_name] = value
# Return the meta information.
return meta
def _resolve_field(self, obj, name):
"""Resolves the content of the given model field."""
# Get the attribute.
if hasattr(obj, name):
value = getattr(obj, name)
elif hasattr(self, name):
value = getattr(self, name)
else:
raise SearchAdapterError("Could not find a property called {name!r} on either {obj!r} or {search_adapter!r}".format(
name = name,
obj = obj,
search_adapter = self,
))
# Check for callables.
if callable(value):
value = value()
# Resolution complete!
return value
def get_title(self, obj):
"""Returns the search title for the given obj."""
return unicode(obj)
def get_search_text(self, obj):
"""Returns the search text associated with the given obj."""
def get_description(self, obj):
"""Returns the search description for the given obj."""
return u""
def get_content(self, obj):
"""Returns the search content for the given obj."""
# Get the field names to look up.
if self.fields is None:
field_names = [field.name for field in self.model._meta.fields if isinstance(field, (models.CharField, models.TextField))]
field_names = (field.name for field in self.model._meta.fields if isinstance(field, (models.CharField, models.TextField)))
else:
field_names = self.fields
# Exclude named fields.
if self.exclude:
field_names = [field_name for field_name in field_names if field_name not in self.exclude]
field_names = (field_name for field_name in field_names if field_name not in self.exclude)
# Create the text.
text_parts = []
for field_name in field_names:
# Resolve the value.
value = getattr(obj, field_name)
if callable(value):
value = value()
value = unicode(value)
value = strip_tags(value)
# Store the value.
text_parts.append(value)
# Consolidate the text.
return u" ".join(text_parts)
def get_weighted_search_text(self, obj):
"""Returns the weighted search text associated with the given obj."""
return (unicode(obj), self.get_search_text(obj),)
class RegistrationError(Exception):
"""Something went wrong with registering a model with django-watson."""
# The registered models.
_registered_models = {}
def is_registered(model):
"""Checks whether the given model is registered with django-watson."""
global _registered_models
return model in _registered_models
def register(model, adapter_cls=SearchAdapter):
"""
Registers the given model with django-watson.
return u" ".join(
strip_tags(self._resolve_field(obj, field_name))
for field_name in field_names
)
If the given model is already registered with django-watson, a
RegistrationError will be raised.
"""
global _registered_models
# Check for existing registration.
if is_registered(model):
raise RegistrationError("{model!r} is already registered with django-watson".format(
model = model,
))
# Perform the registration.
adapter_obj = adapter_cls(model)
_registered_models[model] = adapter_obj
# Connect to the signalling framework.
post_save.connect(search_context_manager.post_save_receiver, model)
pre_delete.connect(search_context_manager.pre_delete_receiver, model)
def get_url(self, obj):
"""Return the URL of the given obj."""
if hasattr(obj, "get_absolute_url"):
return obj.get_absolute_url()
return u""
def unregister(model):
"""
Unregisters the given model with django-watson.
If the given model is not registered with django-watson, a RegistrationError
will be raised.
"""
global _registered_models
if not is_registered(model):
raise RegistrationError("{model!r} not registered with django-watson".format(
model = model,
))
# Perform the unregistration.
del _registered_models[model]
# Disconnect from the signalling framework.
post_save.disconnect(search_context_manager.post_save_receiver, model)
pre_delete.connect(search_context_manager.pre_delete_receiver, model)
def get_registered_models():
"""Returns a sequence of models that have been registered with django-watson."""
global _registered_models
return _registered_models.keys()
def get_adapter(model):
"""Returns the adapter associated with the given model."""
global _registered_models
if is_registered(model):
return _registered_models[model]
raise RegistrationError("{model!r} not registered with django-watson".format(
model = model,
))
def get_meta(self, obj):
"""Returns a dictionary of meta information about the given obj."""
return dict(
(field_name, self._resolve_field(field_name))
for field_name in self.store
)
class SearchEngineError(Exception):
"""Something went wrong with a search engine."""
class RegistrationError(SearchEngineError):
"""Something went wrong when registering a model with a search engine."""
class SearchContextError(Exception):
@ -185,60 +141,18 @@ class SearchContextManager(local):
"""Starts a level in the search context."""
self._stack.append(set())
def _get_entries_for_obj(self, obj):
"""Returns a queryset of entries associate with the given obj."""
model = obj.__class__
content_type = ContentType.objects.get_for_model(model)
object_id = unicode(obj.pk)
# Get the basic list of search entries.
search_entries = SearchEntry.objects.filter(
content_type = content_type,
)
if has_int_pk(model):
# Do a fast indexed lookup.
object_id_int = int(obj.pk)
search_entries = search_entries.filter(
object_id_int = object_id_int,
)
else:
# Alas, have to do a slow unindexed lookup.
object_id_int = None
search_entries = search_entries.filter(
object_id = object_id,
)
return object_id_int, search_entries
def update_obj_index(self, obj):
"""Updates the search index for the given obj."""
model = obj.__class__
adapter = get_adapter(model)
content_type = ContentType.objects.get_for_model(model)
object_id = unicode(obj.pk)
# Create the search data.
meta = adapter.get_meta(obj)
weighted_search_text = adapter.get_weighted_search_text(obj)
# Try to get the existing search entry.
object_id_int, search_entries = self._get_entries_for_obj(obj)
try:
search_entry = search_entries.get()
except SearchEntry.DoesNotExist:
search_entry = SearchEntry(
content_type = content_type,
object_id = object_id,
object_id_int = object_id_int,
)
# Store search meta.
search_entry.meta = meta
# Pass on the entry for final processing to the search backend.
get_backend().save_search_entry(obj, search_entry, weighted_search_text)
def add_to_context(self, engine, obj):
"""Adds an object to the current context, if active."""
if self.is_active():
self._stack[-1].add((engine, obj))
def end(self):
"""Ends a level in the search context."""
self._assert_active()
# Save all the models.
objs = self._stack.pop()
for obj in objs:
self.update_obj_index(obj)
tasks = self._stack.pop()
for engine, obj in tasks:
engine.update_obj_index(obj)
# Context management.
@ -260,16 +174,6 @@ class SearchContextManager(local):
return do_update_index
# Signalling hooks.
def post_save_receiver(self, instance, **kwargs):
"""Signal handler for when a registered model has been saved."""
if self.is_active():
self._stack[-1].add(instance)
def pre_delete_receiver(self, instance, **kwargs):
"""Signal handler for when a registered model has been deleted."""
_, search_entries = self._get_entries_for_obj(instance)
search_entries.delete()
def request_started_receiver(self, **kwargs):
"""Signal handler for when the request starts."""
@ -290,6 +194,209 @@ class SearchContextManager(local):
search_context_manager = SearchContextManager()
class SearchEngine(object):
"""A search engine capable of performing multi-table searches."""
_created_engines = WeakValueDictionary()
@classmethod
def get_created_engines(cls):
"""Returns all created search engines."""
return list(cls._created_engines.items())
def __init__(self, engine_slug, search_context_manager=search_context_manager):
"""Initializes the search engine."""
# Check the slug is unique for this project.
if engine_slug in SearchEngine._created_engines:
raise SearchEngineError("A search engine has already been created with the slug {engine_slug!r}".format(
engine_slug = engine_slug,
))
# Initialize thie engine.
self._registered_models = {}
self._engine_slug = engine_slug
# Store the search context.
self._search_context_manager = search_context_manager
self.context = search_context_manager.context
self.update_index = search_context_manager.update_index
# Store a reference to this engine.
self.__class__._created_engines[engine_slug] = self
def is_registered(self, model):
"""Checks whether the given model is registered with this search engine."""
return model in self._registered_models
def register(self, model, adapter_cls=SearchAdapter, live_filter=None, fields=None, exclude=None, store=None):
"""
Registers the given model with this search engine.
If the given model is already registered with this search engine, a
RegistrationError will be raised.
"""
# Check for existing registration.
if self.is_registered(model):
raise RegistrationError("{model!r} is already registered with this search engine".format(
model = model,
))
# Perform any customization.
field_overrides = dict(
(field_name, field_value)
for field_name, field_value
in (
("live_filter", live_filter),
("fields", fields),
("exclude", exclude),
("store", store),
)
if field_value is not None
)
if field_overrides:
adapter_cls = type(adapter_cls.__name__ + "Custom", (adapter_cls,), field_overrides)
# Perform the registration.
adapter_obj = adapter_cls(model)
self._registered_models[model] = adapter_obj
# Connect to the signalling framework.
post_save.connect(self.post_save_receiver, model)
pre_delete.connect(self.pre_delete_receiver, model)
def unregister(self, model):
"""
Unregisters the given model with this search engine.
If the given model is not registered with this search engine, a RegistrationError
will be raised.
"""
if not self.is_registered(model):
raise RegistrationError("{model!r} is not registered with this search engine".format(
model = model,
))
# Perform the unregistration.
del self._registered_models[model]
# Disconnect from the signalling framework.
post_save.disconnect(self.post_save_receiver, model)
pre_delete.connect(self.pre_delete_receiver, model)
def get_registered_models(self):
"""Returns a sequence of models that have been registered with this search engine."""
return self._registered_models.keys()
def get_adapter(self, model):
"""Returns the adapter associated with the given model."""
if self.is_registered(model):
return self._registered_models[model]
raise RegistrationError("{model!r} is not registered with this search engine".format(
model = model,
))
def _get_entries_for_obj(self, obj):
"""Returns a queryset of entries associate with the given obj."""
model = obj.__class__
content_type = ContentType.objects.get_for_model(model)
object_id = unicode(obj.pk)
# Get the basic list of search entries.
search_entries = SearchEntry.objects.filter(
content_type = content_type,
engine_slug = self._engine_slug,
)
if has_int_pk(model):
# Do a fast indexed lookup.
object_id_int = int(obj.pk)
search_entries = search_entries.filter(
object_id_int = object_id_int,
)
else:
# Alas, have to do a slow unindexed lookup.
object_id_int = None
search_entries = search_entries.filter(
object_id = object_id,
)
return object_id_int, search_entries
def update_obj_index(self, obj):
"""Updates the search index for the given obj."""
model = obj.__class__
adapter = self.get_adapter(model)
content_type = ContentType.objects.get_for_model(model)
object_id = unicode(obj.pk)
# Try to get the existing search entry.
object_id_int, search_entries = self._get_entries_for_obj(obj)
try:
search_entry = search_entries.get()
except SearchEntry.DoesNotExist:
search_entry = SearchEntry(
content_type = content_type,
object_id = object_id,
object_id_int = object_id_int,
)
# Store data.
search_entry.engine_slug = self._engine_slug
search_entry.title = adapter.get_title(obj)
search_entry.description = adapter.get_description(obj)
search_entry.content = adapter.get_content(obj)
search_entry.url = adapter.get_url(obj)
search_entry.meta = adapter.get_meta(obj)
# Pass on the entry for final processing to the search backend.
get_backend().save_search_entry(search_entry, obj, adapter)
# Signalling hooks.
def post_save_receiver(self, instance, **kwargs):
"""Signal handler for when a registered model has been saved."""
if self._search_context_manager.is_active():
self._search_context_manager.add_to_context(self, instance)
def pre_delete_receiver(self, instance, **kwargs):
"""Signal handler for when a registered model has been deleted."""
_, search_entries = self._get_entries_for_obj(instance)
search_entries.delete()
# Searching.
def search(self, search_text, models=None, exclude=None):
"""Performs a search using the given text, returning a queryset of SearchEntry."""
queryset = SearchEntry.objects.filter(
engine_slug = self._engine_slug,
)
# Add in a model limiter.
allowed_models = models or self.get_registered_models()
if exclude:
allowed_models = [model for model in allowed_models if not model in exclude]
# Perform any live filters.
live_subqueries = []
for model in allowed_models:
content_type = ContentType.objects.get_for_model(model)
adapter = self.get_adapter(model)
if adapter.live_filter:
needs_live_subquery = True
live_pks = model._default_manager.all().values_list("pk", flat=True)
if has_int_pk(model):
# We can do this as an in-database join.
live_subquery = Q(
content_type = content_type,
object_id_int__in = live_pks,
)
else:
# We have to do this as two separate queries. Oh well.
live_subquery = Q(
content_type = content_type,
object_id__in = [unicode(pk) for pk in live_pks],
)
else:
live_subquery = Q(
content_type = content_type,
)
live_subqueries.append(live_subquery)
live_subquery = reduce(operator.or_, live_subqueries)
queryset = queryset.filter(live_subquery)
# Perform the backend-specific full text match.
queryset = get_backend().do_search(queryset, search_text)
return queryset
# The default search engine.
default_search_engine = SearchEngine("default")
# The cache for the initialized backend.
_backend_cache = None

View file

@ -4,7 +4,8 @@ from django.db import models
from django.test import TestCase
from django.core.management import call_command
from watson.registration import register, unregister, is_registered, get_registered_models, get_adapter, RegistrationError, SearchAdapter, search_context_manager, get_backend
import watson
from watson.registration import RegistrationError
from watson.models import SearchEntry
@ -64,38 +65,38 @@ class TestModel2(TestModelBase):
)
class TestModel2SearchAdapter(SearchAdapter):
class TestModel2SearchAdapter(watson.SearchAdapter):
exclude = ("id",)
class RegistrationText(TestCase):
class RegistrationTest(TestCase):
def testRegistration(self):
# Register the model and test.
register(TestModel1)
self.assertTrue(is_registered(TestModel1))
self.assertRaises(RegistrationError, lambda: register(TestModel1))
self.assertEqual(get_registered_models(), [TestModel1])
self.assertTrue(isinstance(get_adapter(TestModel1), SearchAdapter))
watson.register(TestModel1)
self.assertTrue(watson.is_registered(TestModel1))
self.assertRaises(RegistrationError, lambda: watson.register(TestModel1))
self.assertEqual(watson.get_registered_models(), [TestModel1])
self.assertTrue(isinstance(watson.get_adapter(TestModel1), watson.SearchAdapter))
# Unregister the model and text.
unregister(TestModel1)
self.assertFalse(is_registered(TestModel1))
self.assertRaises(RegistrationError, lambda: unregister(TestModel1))
self.assertEqual(get_registered_models(), [])
self.assertRaises(RegistrationError, lambda: isinstance(get_adapter(TestModel1)))
watson.unregister(TestModel1)
self.assertFalse(watson.is_registered(TestModel1))
self.assertRaises(RegistrationError, lambda: watson.unregister(TestModel1))
self.assertEqual(watson.get_registered_models(), [])
self.assertRaises(RegistrationError, lambda: isinstance(watson.get_adapter(TestModel1)))
class SearchTest(TestCase):
search_adapter_1 = SearchAdapter
search_adapter_1 = watson.SearchAdapter
search_adapter_2 = TestModel2SearchAdapter
@search_context_manager.update_index
@watson.update_index
def setUp(self):
register(TestModel1, self.search_adapter_1)
register(TestModel2, self.search_adapter_2)
watson.register(TestModel1, self.search_adapter_1)
watson.register(TestModel2, self.search_adapter_2)
# Create some test models.
self.test11 = TestModel1.objects.create(
title = "title model1 11",
@ -118,65 +119,63 @@ class SearchTest(TestCase):
description = "description model2 22",
)
def testSearchEntriesCreated(self):
self.assertEqual(SearchEntry.objects.count(), 4)
def testMultiTableSearch(self):
backend = get_backend()
# Test a search that should get all models.
self.assertEqual(backend.search("tItle Content Description").count(), 4)
self.assertEqual(watson.search("tItle Content Description").count(), 4)
# Test a search that should get two models.
self.assertEqual(backend.search("mOdel1").count(), 2)
self.assertEqual(watson.search("mOdel1").count(), 2)
# Test a search that should get one model.
exact_search = backend.search("11")
exact_search = watson.search("11")
self.assertEqual(len(exact_search), 1)
self.assertEqual(exact_search[0].meta["title"], "title model1 11")
self.assertEqual(exact_search[0].title, "title model1 11")
def testUpdateSearchIndex(self):
backend = get_backend()
# Update a model and make sure that the search results match.
with search_context_manager.context():
with watson.context():
self.test11.title = "foo"
self.test11.save()
# Test a search that should get one model.
exact_search = backend.search("foo")
exact_search = watson.search("foo")
self.assertEqual(len(exact_search), 1)
self.assertEqual(exact_search[0].meta["title"], "foo")
self.assertEqual(exact_search[0].title, "foo")
def testLimitedModelList(self):
backend = get_backend()
# Test a search that should get all models.
self.assertEqual(backend.search("tItle Content Description", models=(TestModel1,)).count(), 2)
self.assertEqual(watson.search("tItle Content Description", models=(TestModel1,)).count(), 2)
# Test a search that should get one model.
exact_search = backend.search("11", models=(TestModel1,))
exact_search = watson.search("11", models=(TestModel1,))
self.assertEqual(len(exact_search), 1)
self.assertEqual(exact_search[0].meta["title"], "title model1 11")
self.assertEqual(exact_search[0].title, "title model1 11")
# Test a search that should get no models.
self.assertEqual(backend.search("11", models=(TestModel2,)).count(), 0)
self.assertEqual(watson.search("11", models=(TestModel2,)).count(), 0)
def testExcludedModelList(self):
backend = get_backend()
# Test a search that should get all models.
self.assertEqual(backend.search("tItle Content Description", exclude=(TestModel2,)).count(), 2)
self.assertEqual(watson.search("tItle Content Description", exclude=(TestModel2,)).count(), 2)
# Test a search that should get one model.
exact_search = backend.search("11", exclude=(TestModel2,))
exact_search = watson.search("11", exclude=(TestModel2,))
self.assertEqual(len(exact_search), 1)
self.assertEqual(exact_search[0].meta["title"], "title model1 11")
self.assertEqual(exact_search[0].title, "title model1 11")
# Test a search that should get no models.
self.assertEqual(backend.search("11", exclude=(TestModel1,)).count(), 0)
self.assertEqual(watson.search("11", exclude=(TestModel1,)).count(), 0)
def testRebuildWatsonCommand(self):
backend = get_backend()
# This update won't take affect, because no search context is active.
self.test11.title = "foo"
self.test11.save()
# Test that no update has happened.
self.assertEqual(backend.search("foo").count(), 0)
self.assertEqual(watson.search("foo").count(), 0)
# Run the rebuild command.
call_command("rebuildwatson")
call_command("buildwatson")
# Test that the update is now applies.
self.assertEqual(backend.search("foo").count(), 1)
self.assertEqual(watson.search("foo").count(), 1)
def tearDown(self):
unregister(TestModel1)
unregister(TestModel2)
watson.unregister(TestModel1)
watson.unregister(TestModel2)
# Delete the test models.
TestModel1.objects.all().delete()
TestModel2.objects.all().delete()
@ -188,7 +187,7 @@ class SearchTest(TestCase):
SearchEntry.objects.all().delete()
class LiveFilterSearchAdapter(SearchAdapter):
class LiveFilterSearchAdapter(watson.SearchAdapter):
live_filter = True
@ -205,14 +204,13 @@ class LiveFilterSearchTest(SearchTest):
search_adapter_2 = LiveFilterModel2SearchAdapter
def testUnpublishedModelsNotFound(self):
backend = get_backend()
# Make sure that there are four to find!
self.assertEqual(backend.search("tItle Content Description").count(), 4)
self.assertEqual(watson.search("tItle Content Description").count(), 4)
# Unpublish two objects.
with search_context_manager.context():
with watson.context():
self.test11.is_published = False
self.test11.save()
self.test21.is_published = False
self.test21.save()
# This should return 4, but two of them are unpublished.
self.assertEqual(backend.search("tItle Content Description").count(), 2)
self.assertEqual(watson.search("tItle Content Description").count(), 2)