Updated Xapian for the multiple indexes support in Haystack 2.X.

This commit is contained in:
Daniel Lindsley 2011-05-08 23:21:14 -05:00
parent 8112cf383d
commit b0a7505253
4 changed files with 122 additions and 140 deletions

View file

@ -7,6 +7,10 @@ INSTALLED_APPS += [
'xapian_tests',
]
HAYSTACK_SEARCH_ENGINE = 'xapian'
HAYSTACK_XAPIAN_PATH = os.path.join('tmp', 'test_xapian_query')
HAYSTACK_INCLUDE_SPELLING = True
HAYSTACK_CONNECTIONS = {
'default': {
'ENGINE': 'haystack.backends.xapian_backend.XapianEngine',
'PATH': os.path.join('tmp', 'test_xapian_query'),
'INCLUDE_SPELLING': True,
}
}

View file

@ -12,12 +12,13 @@ from django.conf import settings
from django.db import models
from django.test import TestCase
from haystack import indexes, sites, backends
from haystack.backends.xapian_backend import SearchBackend, SearchQuery, _marshal_value
from haystack import connections, reset_search_queries
from haystack import indexes
from haystack.backends.xapian_backend import _marshal_value
from haystack.exceptions import HaystackError
from haystack.models import SearchResult
from haystack.query import SearchQuerySet, SQ
from haystack.sites import SearchSite
from haystack.utils.loading import UnifiedIndex
from core.models import MockTag, MockModel, AnotherMockModel, AFourthMockModel
from core.tests.mocks import MockSearchResult
@ -67,6 +68,9 @@ class XapianMockSearchIndex(indexes.SearchIndex):
keys = indexes.MultiValueField()
titles = indexes.MultiValueField()
def get_model(self):
return XapianMockModel
def prepare_sites(self, obj):
return ['%d' % (i * obj.id) for i in xrange(1, 4)]
@ -105,16 +109,21 @@ class XapianBoostMockSearchIndex(indexes.SearchIndex):
author = indexes.CharField(model_attr='author', weight=2.0)
editor = indexes.CharField(model_attr='editor')
pub_date = indexes.DateField(model_attr='pub_date')
def get_model(self):
return AFourthMockModel
class XapianSearchBackendTestCase(TestCase):
def setUp(self):
super(XapianSearchBackendTestCase, self).setUp()
self.site = SearchSite()
self.backend = SearchBackend(site=self.site)
self.index = XapianMockSearchIndex(XapianMockModel, backend=self.backend)
self.site.register(XapianMockModel, XapianMockSearchIndex)
self.old_ui = connections['default'].get_unified_index()
self.ui = UnifiedIndex()
self.index = XapianMockSearchIndex()
self.ui.build(indexes=[self.index])
self.backend = connections['default'].get_backend()
connections['default']._index = self.ui
self.sample_objs = []
@ -134,9 +143,10 @@ class XapianSearchBackendTestCase(TestCase):
self.sample_objs[2].popularity = 972.0
def tearDown(self):
if os.path.exists(settings.HAYSTACK_XAPIAN_PATH):
shutil.rmtree(settings.HAYSTACK_XAPIAN_PATH)
if os.path.exists(settings.HAYSTACK_CONNECTIONS['default']['PATH']):
shutil.rmtree(settings.HAYSTACK_CONNECTIONS['default']['PATH'])
connections['default']._index = self.old_ui
super(XapianSearchBackendTestCase, self).tearDown()
def test_update(self):
@ -310,36 +320,6 @@ class XapianSearchBackendTestCase(TestCase):
# Ensure that swapping the ``result_class`` works.
self.assertTrue(isinstance(self.backend.more_like_this(self.sample_objs[0], result_class=MockSearchResult)['results'][0], MockSearchResult))
def test_use_correct_site(self):
test_site = SearchSite()
test_site.register(XapianMockModel, XapianMockSearchIndex)
self.backend.update(self.index, self.sample_objs)
# Make sure that ``_process_results`` uses the right ``site``.
self.assertEqual(self.backend.search(xapian.Query('indexed'))['hits'], 3)
self.assertEqual([result.pk for result in self.backend.search(xapian.Query('indexed'))['results']], [1, 2, 3])
self.site.unregister(XapianMockModel)
self.assertEqual(len(self.site.get_indexed_models()), 0)
self.backend.site = test_site
self.assertTrue(len(self.backend.site.get_indexed_models()) > 0)
# Should still be there, despite the main ``site`` not having that model
# registered any longer.
self.assertEqual(self.backend.search(xapian.Query('indexed'))['hits'], 3)
self.assertEqual([result.pk for result in self.backend.search(xapian.Query('indexed'))['results']], [1, 2, 3])
# Unregister it on the backend & make sure it takes effect.
self.backend.site.unregister(XapianMockModel)
self.assertEqual(len(self.backend.site.get_indexed_models()), 0)
self.assertEqual(self.backend.search(xapian.Query('indexed'))['hits'], 0)
# Nuke it & fallback on the main ``site``.
self.backend.site = haystack.site
self.assertEqual(self.backend.search(xapian.Query('indexed'))['hits'], 0)
self.site.register(XapianMockModel, XapianMockSearchIndex)
self.assertEqual(self.backend.search(xapian.Query('indexed'))['hits'], 3)
def test_order_by(self):
self.backend.update(self.index, self.sample_objs)
self.assertEqual(self.backend.document_count(), 3)
@ -395,7 +375,7 @@ class XapianSearchBackendTestCase(TestCase):
self.assertEqual(_marshal_value(datetime.datetime(2009, 5, 18, 1, 16, 30, 250)), u'20090518011630000250')
def test_build_schema(self):
(content_field_name, fields) = self.backend.build_schema(self.site.all_searchfields())
(content_field_name, fields) = self.backend.build_schema(connections['default'].get_unified_index().all_searchfields())
self.assertEqual(content_field_name, 'text')
self.assertEqual(len(fields), 13)
self.assertEqual(fields, [
@ -437,6 +417,9 @@ class LiveXapianMockSearchIndex(indexes.SearchIndex):
pub_date = indexes.DateField(model_attr='pub_date')
created = indexes.DateField()
title = indexes.CharField()
def get_model(self):
return MockModel
class LiveXapianSearchQueryTestCase(TestCase):
@ -448,13 +431,19 @@ class LiveXapianSearchQueryTestCase(TestCase):
def setUp(self):
super(LiveXapianSearchQueryTestCase, self).setUp()
site = SearchSite()
backend = SearchBackend(site=site)
index = LiveXapianMockSearchIndex(MockModel, backend=backend)
site.register(MockModel, LiveXapianMockSearchIndex)
self.old_ui = connections['default'].get_unified_index()
ui = UnifiedIndex()
index = LiveXapianMockSearchIndex()
ui.build(indexes=[index])
backend = connections['default'].get_backend()
connections['default']._index = ui
backend.update(index, MockModel.objects.all())
self.sq = SearchQuery(backend=backend)
self.sq = connections['default'].get_query()
def tearDown(self):
connections['default']._index = self.old_ui
super(LiveXapianSearchQueryTestCase, self).tearDown()
def test_get_spelling(self):
self.sq.add_filter(SQ(content='indxd'))
@ -491,32 +480,32 @@ class LiveXapianSearchQueryTestCase(TestCase):
self.assertEqual(str(self.sq.build_query()), u'Xapian::Query(((Zwhi OR why) AND VALUE_RANGE 2 00010101000000 20090210015900 AND (<alldocuments> AND_NOT VALUE_RANGE 3 a david) AND (<alldocuments> AND_NOT VALUE_RANGE 4 20090212121300 99990101000000) AND VALUE_RANGE 1 b zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz AND (Q1 OR Q2 OR Q3)))')
def test_log_query(self):
backends.reset_search_queries()
self.assertEqual(len(backends.queries), 0)
reset_search_queries()
self.assertEqual(len(connections['default'].queries), 0)
# Stow.
old_debug = settings.DEBUG
settings.DEBUG = False
len(self.sq.get_results())
self.assertEqual(len(backends.queries), 0)
self.assertEqual(len(connections['default'].queries), 0)
settings.DEBUG = True
# Redefine it to clear out the cached results.
self.sq = SearchQuery(backend=SearchBackend())
self.sq = connections['default'].get_query()
self.sq.add_filter(SQ(name='bar'))
len(self.sq.get_results())
self.assertEqual(len(backends.queries), 1)
self.assertEqual(str(backends.queries[0]['query_string']), u'Xapian::Query((ZXNAMEbar OR XNAMEbar))')
self.assertEqual(len(connections['default'].queries), 1)
self.assertEqual(str(connections['default'].queries[0]['query_string']), u'Xapian::Query((ZXNAMEbar OR XNAMEbar))')
# And again, for good measure.
self.sq = SearchQuery(backend=SearchBackend())
self.sq = connections['default'].get_query()
self.sq.add_filter(SQ(name='bar'))
self.sq.add_filter(SQ(text='moof'))
len(self.sq.get_results())
self.assertEqual(len(backends.queries), 2)
self.assertEqual(str(backends.queries[0]['query_string']), u'Xapian::Query((ZXNAMEbar OR XNAMEbar))')
self.assertEqual(str(backends.queries[1]['query_string']), u'Xapian::Query(((ZXNAMEbar OR XNAMEbar) AND (ZXTEXTmoof OR XTEXTmoof)))')
self.assertEqual(len(connections['default'].queries), 2)
self.assertEqual(str(connections['default'].queries[0]['query_string']), u'Xapian::Query((ZXNAMEbar OR XNAMEbar))')
self.assertEqual(str(connections['default'].queries[1]['query_string']), u'Xapian::Query(((ZXNAMEbar OR XNAMEbar) AND (ZXTEXTmoof OR XTEXTmoof)))')
# Restore.
settings.DEBUG = old_debug
@ -531,14 +520,20 @@ class LiveXapianSearchQuerySetTestCase(TestCase):
def setUp(self):
super(LiveXapianSearchQuerySetTestCase, self).setUp()
site = SearchSite()
backend = SearchBackend(site=site)
index = LiveXapianMockSearchIndex(MockModel, backend=backend)
site.register(MockModel, LiveXapianMockSearchIndex)
backend.update(index, MockModel.objects.all())
self.old_ui = connections['default'].get_unified_index()
self.ui = UnifiedIndex()
self.index = LiveXapianMockSearchIndex()
self.ui.build(indexes=[self.index])
self.backend = connections['default'].get_backend()
connections['default']._index = self.ui
self.backend.update(self.index, MockModel.objects.all())
self.sq = SearchQuery(backend=backend)
self.sqs = SearchQuerySet(query=self.sq)
self.sq = connections['default'].get_query()
self.sqs = SearchQuerySet()
def tearDown(self):
connections['default']._index = self.old_ui
super(LiveXapianSearchQuerySetTestCase, self).tearDown()
def test_result_class(self):
# Assert that we're defaulting to ``SearchResult``.
@ -557,16 +552,14 @@ class LiveXapianSearchQuerySetTestCase(TestCase):
class XapianBoostBackendTestCase(TestCase):
def setUp(self):
super(XapianBoostBackendTestCase, self).setUp()
self.site = SearchSite()
self.sb = SearchBackend(site=self.site)
self.smmi = XapianBoostMockSearchIndex(AFourthMockModel, backend=self.sb)
self.site.register(AFourthMockModel, XapianBoostMockSearchIndex)
# Stow.
import haystack
self.old_site = haystack.site
haystack.site = self.site
self.old_ui = connections['default'].get_unified_index()
self.ui = UnifiedIndex()
self.index = XapianBoostMockSearchIndex()
self.ui.build(indexes=[self.index])
self.sb = connections['default'].get_backend()
connections['default']._index = self.ui
self.sample_objs = []
@ -583,12 +576,11 @@ class XapianBoostBackendTestCase(TestCase):
self.sample_objs.append(mock)
def tearDown(self):
import haystack
haystack.site = self.old_site
connections['default']._index = self.old_ui
super(XapianBoostBackendTestCase, self).tearDown()
def test_boost(self):
self.sb.update(self.smmi, self.sample_objs)
self.sb.update(self.index, self.sample_objs)
sqs = SearchQuerySet()

View file

@ -7,7 +7,7 @@ import shutil
from django.conf import settings
from django.test import TestCase
from haystack.backends.xapian_backend import SearchBackend, SearchQuery
from haystack import connections
from haystack.query import SQ
from core.models import MockModel, AnotherMockModel
@ -16,11 +16,11 @@ from core.models import MockModel, AnotherMockModel
class XapianSearchQueryTestCase(TestCase):
def setUp(self):
super(XapianSearchQueryTestCase, self).setUp()
self.sq = SearchQuery(backend=SearchBackend())
self.sq = connections['default'].get_query()
def tearDown(self):
if os.path.exists(settings.HAYSTACK_XAPIAN_PATH):
shutil.rmtree(settings.HAYSTACK_XAPIAN_PATH)
if os.path.exists(settings.HAYSTACK_CONNECTIONS['default']['PATH']):
shutil.rmtree(settings.HAYSTACK_CONNECTIONS['default']['PATH'])
super(XapianSearchQueryTestCase, self).tearDown()

View file

@ -1,7 +1,7 @@
# Copyright (C) 2009-2011 David Sauve, Trapeze. All rights reserved.
__author__ = 'David Sauve'
__version__ = (1, 1, 5, 'beta')
__version__ = (2, 0, 0, 'beta')
import time
import datetime
@ -12,11 +12,11 @@ import shutil
import sys
import warnings
from django.conf import settings
from django.core.exceptions import ImproperlyConfigured
from django.utils.encoding import smart_unicode, force_unicode
from django.utils.encoding import force_unicode
from haystack.backends import BaseSearchBackend, BaseSearchQuery, SearchNode, log_query
from haystack import connections
from haystack.backends import BaseEngine, BaseSearchBackend, BaseSearchQuery, SearchNode, log_query
from haystack.exceptions import HaystackError, MissingDependency, MoreLikeThisError
from haystack.fields import DateField, DateTimeField, IntegerField, FloatField, BooleanField, MultiValueField
from haystack.models import SearchResult
@ -32,8 +32,6 @@ DOCUMENT_ID_TERM_PREFIX = 'Q'
DOCUMENT_CUSTOM_TERM_PREFIX = 'X'
DOCUMENT_CT_TERM_PREFIX = DOCUMENT_CUSTOM_TERM_PREFIX + 'CONTENTTYPE'
BACKEND_NAME = 'xapian'
DEFAULT_XAPIAN_FLAGS = (
xapian.QueryParser.FLAG_PHRASE |
xapian.QueryParser.FLAG_BOOLEAN |
@ -50,7 +48,8 @@ class InvalidIndexError(HaystackError):
class XHValueRangeProcessor(xapian.ValueRangeProcessor):
def __init__(self, backend):
self.backend = backend or SearchBackend()
# FIXME: This needs to get smarter about pulling the right backend.
self.backend = backend or XapianSearchBackend()
xapian.ValueRangeProcessor.__init__(self)
def __call__(self, begin, end):
@ -107,7 +106,7 @@ class XHExpandDecider(xapian.ExpandDecider):
return True
class SearchBackend(BaseSearchBackend):
class XapianSearchBackend(BaseSearchBackend):
"""
`SearchBackend` defines the Xapian search backend for use with the Haystack
API for Django search.
@ -120,28 +119,33 @@ class SearchBackend(BaseSearchBackend):
`WSGIApplicationGroup to %{GLOBAL}` when using mod_wsgi, or
`PythonInterpreter main_interpreter` when using mod_python.
In order to use this backend, `HAYSTACK_XAPIAN_PATH` must be set in
your settings. This should point to a location where you would your
In order to use this backend, `PATH` must be included in the
`connection_options`. This should point to a location where you would your
indexes to reside.
"""
def __init__(self, site=None, language='english'):
def __init__(self, connection_alias, language='english', **connection_options):
"""
Instantiates an instance of `SearchBackend`.
Optional arguments:
`site` -- The site to associate the backend with (default = None)
`stemming_language` -- The stemming language (default = 'english')
`connection_alias` -- The name of the connection
`language` -- The stemming language (default = 'english')
`**connection_options` -- The various options needed to setup
the backend.
Also sets the stemming language to be used to `stemming_language`.
Also sets the stemming language to be used to `language`.
"""
super(SearchBackend, self).__init__(site)
super(XapianSearchBackend, self).__init__(connection_alias, **connection_options)
if not hasattr(settings, 'HAYSTACK_XAPIAN_PATH'):
raise ImproperlyConfigured('You must specify a HAYSTACK_XAPIAN_PATH in your settings.')
if not 'PATH' in connection_options:
raise ImproperlyConfigured("You must specify a 'PATH' in your settings for connection '%s'." % connection_alias)
if not os.path.exists(settings.HAYSTACK_XAPIAN_PATH):
os.makedirs(settings.HAYSTACK_XAPIAN_PATH)
self.path = connection_options.get('PATH')
if not os.path.exists(self.path):
os.makedirs(self.path)
self.flags = connection_options.get('FLAGS', DEFAULT_XAPIAN_FLAGS)
self.language = language
self._schema = None
self._content_field_name = None
@ -149,13 +153,13 @@ class SearchBackend(BaseSearchBackend):
@property
def schema(self):
if not self._schema:
self._content_field_name, self._schema = self.build_schema(self.site.all_searchfields())
self._content_field_name, self._schema = self.build_schema(connections[self.connection_alias].get_unified_index().all_searchfields())
return self._schema
@property
def content_field_name(self):
if not self._content_field_name:
self._content_field_name, self._schema = self.build_schema(self.site.all_searchfields())
self._content_field_name, self._schema = self.build_schema(connections[self.connection_alias].get_unified_index().all_searchfields())
return self._content_field_name
def update(self, index, iterable):
@ -204,7 +208,7 @@ class SearchBackend(BaseSearchBackend):
term_generator = xapian.TermGenerator()
term_generator.set_database(database)
term_generator.set_stemmer(xapian.Stem(self.language))
if getattr(settings, 'HAYSTACK_INCLUDE_SPELLING', False) is True:
if self.include_spelling is True:
term_generator.set_flags(xapian.TermGenerator.FLAG_SPELLING)
term_generator.set_document(document)
@ -296,10 +300,10 @@ class SearchBackend(BaseSearchBackend):
database = self._database(writable=True)
if not models:
# Because there does not appear to be a "clear all" method,
# it's much quicker to remove the contents of the `HAYSTACK_XAPIAN_PATH`
# it's much quicker to remove the contents of the `self.path`
# folder than it is to remove each document one at a time.
if os.path.exists(settings.HAYSTACK_XAPIAN_PATH):
shutil.rmtree(settings.HAYSTACK_XAPIAN_PATH)
if os.path.exists(self.path):
shutil.rmtree(self.path)
else:
for model in models:
database.delete_document(
@ -349,16 +353,11 @@ class SearchBackend(BaseSearchBackend):
If `query` is None, returns no results.
If `HAYSTACK_INCLUDE_SPELLING` was enabled in `settings.py`, the
If `INCLUDE_SPELLING` was enabled in the connection options, the
extra flag `FLAG_SPELLING_CORRECTION` will be passed to the query parser
and any suggestions for spell correction will be returned as well as
the results.
"""
if not self.site:
from haystack import site
else:
site = self.site
if xapian.Query.empty(query):
return {
'results': [],
@ -370,7 +369,7 @@ class SearchBackend(BaseSearchBackend):
if result_class is None:
result_class = SearchResult
if getattr(settings, 'HAYSTACK_INCLUDE_SPELLING', False) is True:
if self.include_spelling is True:
spelling_suggestion = self._do_spelling_suggestion(database, query, spelling_query)
else:
spelling_suggestion = ''
@ -383,7 +382,7 @@ class SearchBackend(BaseSearchBackend):
)
if limit_to_registered_models:
registered_models = self.build_registered_models_list()
registered_models = self.build_models_list()
if len(registered_models) > 0:
query = xapian.Query(
@ -432,7 +431,7 @@ class SearchBackend(BaseSearchBackend):
)
}
results.append(
result_class(app_label, module_name, pk, match.percent, searchsite=site, **model_data)
result_class(app_label, module_name, pk, match.percent, **model_data)
)
if facets:
@ -481,11 +480,6 @@ class SearchBackend(BaseSearchBackend):
Finally, processes the resulting matches and returns.
"""
if not self.site:
from haystack import site
else:
site = self.site
database = self._database()
if result_class is None:
@ -513,7 +507,7 @@ class SearchBackend(BaseSearchBackend):
xapian.Query.OP_AND_NOT, [query, DOCUMENT_ID_TERM_PREFIX + get_identifier(model_instance)]
)
if limit_to_registered_models:
registered_models = self.build_registered_models_list()
registered_models = self.build_models_list()
if len(registered_models) > 0:
query = xapian.Query(
@ -537,7 +531,7 @@ class SearchBackend(BaseSearchBackend):
for match in matches:
app_label, module_name, pk, model_data = pickle.loads(self._get_document_data(database, match.document))
results.append(
result_class(app_label, module_name, pk, match.percent, searchsite=site, **model_data)
result_class(app_label, module_name, pk, match.percent, **model_data)
)
return {
@ -565,7 +559,6 @@ class SearchBackend(BaseSearchBackend):
elif query_string == '':
return xapian.Query() # Match nothing
flags = getattr(settings, 'HAYSTACK_XAPIAN_FLAGS', DEFAULT_XAPIAN_FLAGS)
qp = xapian.QueryParser()
qp.set_database(self._database())
qp.set_stemmer(xapian.Stem(self.language))
@ -581,7 +574,7 @@ class SearchBackend(BaseSearchBackend):
vrp = XHValueRangeProcessor(self)
qp.add_valuerangeprocessor(vrp)
return qp.parse_query(query_string, flags)
return qp.parse_query(query_string, self.flags)
def build_schema(self, fields):
"""
@ -811,12 +804,12 @@ class SearchBackend(BaseSearchBackend):
Returns an instance of a xapian.Database or xapian.WritableDatabase
"""
if writable:
database = xapian.WritableDatabase(settings.HAYSTACK_XAPIAN_PATH, xapian.DB_CREATE_OR_OPEN)
database = xapian.WritableDatabase(self.path, xapian.DB_CREATE_OR_OPEN)
else:
try:
database = xapian.Database(settings.HAYSTACK_XAPIAN_PATH)
database = xapian.Database(self.path)
except xapian.DatabaseOpeningError:
raise InvalidIndexError(u'Unable to open index at %s' % settings.HAYSTACK_XAPIAN_PATH)
raise InvalidIndexError(u'Unable to open index at %s' % self.path)
return database
@ -900,24 +893,12 @@ class SearchBackend(BaseSearchBackend):
return False
class SearchQuery(BaseSearchQuery):
class XapianSearchQuery(BaseSearchQuery):
"""
This class is the Xapian specific version of the SearchQuery class.
It acts as an intermediary between the ``SearchQuerySet`` and the
``SearchBackend`` itself.
"""
def __init__(self, backend=None, site=None):
"""
Create a new instance of the SearchQuery setting the backend as
specified. If no backend is set, will use the Xapian `SearchBackend`.
Optional arguments:
``backend`` -- The ``SearchBackend`` to use (default = None)
``site`` -- The site to use (default = None)
"""
super(SearchQuery, self).__init__(backend=backend)
self.backend = backend or SearchBackend(site=site)
def build_query(self):
if not self.query_filter:
query = xapian.Query('')
@ -1305,3 +1286,8 @@ def run_mlt(self):
results = self.backend.more_like_this(self._mlt_instance, additional_query_string, **kwargs)
self._results = results.get('results', [])
self._hit_count = results.get('hits', 0)
class XapianEngine(BaseEngine):
backend = XapianSearchBackend
query = XapianSearchQuery