Merged master changes into next

This commit is contained in:
David Sauve 2009-11-28 12:31:03 -05:00
parent bff2a62f93
commit 500664a2e4
2 changed files with 37 additions and 30 deletions

View file

@ -26,7 +26,7 @@ from django.utils.encoding import force_unicode
from django.test import TestCase
from haystack import indexes, sites
from haystack.backends.xapian_backend import SearchBackend, DEFAULT_MAX_RESULTS
from haystack.backends.xapian_backend import SearchBackend
from core.models import MockTag, AnotherMockModel
@ -78,10 +78,6 @@ class XapianSearchBackendTestCase(TestCase):
def setUp(self):
super(XapianSearchBackendTestCase, self).setUp()
temp_path = os.path.join('tmp', 'test_xapian_query')
self.old_xapian_path = getattr(settings, 'HAYSTACK_XAPIAN_PATH', temp_path)
settings.HAYSTACK_XAPIAN_PATH = temp_path
self.site = XapianSearchSite()
self.sb = SearchBackend(site=self.site)
self.msi = XapianMockSearchIndex(XapianMockModel, backend=self.sb)
@ -100,14 +96,13 @@ class XapianSearchBackendTestCase(TestCase):
self.sample_objs.append(mock)
self.sample_objs[0].popularity = 834.0
self.sample_objs[1].popularity = 35.0
self.sample_objs[1].popularity = 35.5
self.sample_objs[2].popularity = 972.0
def tearDown(self):
if os.path.exists(settings.HAYSTACK_XAPIAN_PATH):
shutil.rmtree(settings.HAYSTACK_XAPIAN_PATH)
settings.HAYSTACK_XAPIAN_PATH = self.old_xapian_path
super(XapianSearchBackendTestCase, self).tearDown()
def xapian_search(self, query_string):
@ -120,7 +115,7 @@ class XapianSearchBackendTestCase(TestCase):
query = xapian.Query(query_string) # Empty query matches all
enquire = xapian.Enquire(database)
enquire.set_query(query)
matches = enquire.get_mset(0, DEFAULT_MAX_RESULTS)
matches = enquire.get_mset(0, database.get_doccount())
document_list = []
@ -189,6 +184,12 @@ class XapianSearchBackendTestCase(TestCase):
self.assertEqual(self.sb.search('*')['hits'], 3)
self.assertEqual([result.pk for result in self.sb.search('*')['results']], [1, 2, 3])
# Exact match
self.assertEqual([result.pk for result in self.sb.search('name:david2')['results']], [2])
self.assertEqual([result.pk for result in self.sb.search('value:10')['results']], [2])
self.assertEqual([result.pk for result in self.sb.search('flag:false')['results']], [2])
self.assertEqual([result.pk for result in self.sb.search('popularity:35.5')['results']], [2])
# NOT operator
self.assertEqual([result.pk for result in self.sb.search('NOT name:david1')['results']], [2, 3])
self.assertEqual([result.pk for result in self.sb.search('NOT name:david1 AND index')['results']], [2, 3])
@ -262,8 +263,8 @@ class XapianSearchBackendTestCase(TestCase):
self.sb.update(self.msi, self.sample_objs)
self.assertEqual(len(self.xapian_search('')), 3)
self.assertEqual(self.sb.search('', narrow_queries=['name:david1']), {'hits': 0, 'results': []})
results = self.sb.search('index', narrow_queries=['name:david1'])
self.assertEqual(self.sb.search('', narrow_queries=set(['name:david1'])), {'hits': 0, 'results': []})
results = self.sb.search('index', narrow_queries=set(['name:david1']))
self.assertEqual(results['hits'], 1)
def test_highlight(self):

View file

@ -31,7 +31,7 @@ from django.core.exceptions import ImproperlyConfigured
from django.utils.encoding import smart_unicode, force_unicode
from haystack.backends import BaseSearchBackend, BaseSearchQuery, SearchNode, log_query
from haystack.exceptions import MissingDependency
from haystack.exceptions import MissingDependency, HaystackError
from haystack.fields import DateField, DateTimeField, IntegerField, FloatField, BooleanField, MultiValueField
from haystack.models import SearchResult
from haystack.utils import get_identifier
@ -42,13 +42,16 @@ except ImportError:
raise MissingDependency("The 'xapian' backend requires the installation of 'xapian'. Please refer to the documentation.")
DEFAULT_MAX_RESULTS = 100000
DOCUMENT_ID_TERM_PREFIX = 'Q'
DOCUMENT_CUSTOM_TERM_PREFIX = 'X'
DOCUMENT_CT_TERM_PREFIX = DOCUMENT_CUSTOM_TERM_PREFIX + 'CONTENTTYPE'
class InvalidIndexError(HaystackError):
"""Raised when an index can not be opened."""
pass
class XHValueRangeProcessor(xapian.ValueRangeProcessor):
def __init__(self, sb):
self.sb = sb
@ -258,7 +261,7 @@ class SearchBackend(BaseSearchBackend):
if not models:
query = xapian.Query('')
enquire = self._enquire(database, query)
for match in enquire.get_mset(0, DEFAULT_MAX_RESULTS):
for match in enquire.get_mset(0, self.document_count()):
database.delete_document(match.docid)
else:
for model in models:
@ -268,7 +271,7 @@ class SearchBackend(BaseSearchBackend):
)
@log_query
def search(self, query, sort_by=None, start_offset=0,
end_offset=DEFAULT_MAX_RESULTS, fields='', highlight=False,
end_offset=0, fields='', highlight=False,
facets=None, date_facets=None, query_facets=None,
narrow_queries=None, boost=None, spelling_query=None,
limit_to_registered_models=True, **kwargs):
@ -281,7 +284,7 @@ class SearchBackend(BaseSearchBackend):
Optional arguments:
`sort_by` -- Sort results by specified field (default = None)
`start_offset` -- Slice results from `start_offset` (default = 0)
`end_offset` -- Slice results at `end_offset` (default = 10,000)
`end_offset` -- Slice results at `end_offset` (default = 0), if 0, then all documents
`fields` -- Filter results on `fields` (default = '')
`highlight` -- Highlight terms in results (default = False)
`facets` -- Facet results on fields (default = None)
@ -327,12 +330,12 @@ class SearchBackend(BaseSearchBackend):
if limit_to_registered_models:
if narrow_queries is None:
narrow_queries = []
narrow_queries = set()
registered_models = self.build_registered_models_list()
if len(registered_models) > 0:
narrow_queries.append(
narrow_queries.add(
' '.join(['django_ct:%s' % model for model in registered_models])
)
@ -352,6 +355,8 @@ class SearchBackend(BaseSearchBackend):
'dates': {},
'queries': {},
}
if not end_offset:
end_offset = self.document_count()
matches = enquire.get_mset(start_offset, (end_offset - start_offset))
for match in matches:
@ -393,14 +398,10 @@ class SearchBackend(BaseSearchBackend):
"""
Retrieves the total document count for the search index.
"""
try:
database = self._database()
except xapian.DatabaseOpeningError:
return 0
return database.get_doccount()
return self._database().get_doccount()
def more_like_this(self, model_instance, additional_query_string=None,
start_offset=0, end_offset=DEFAULT_MAX_RESULTS,
start_offset=0, end_offset=0,
limit_to_registered_models=True, **kwargs):
"""
Given a model instance, returns a result set of similar documents.
@ -413,7 +414,7 @@ class SearchBackend(BaseSearchBackend):
`additional_query_string` -- An additional query string to narrow
results
`start_offset` -- The starting offset (default=0)
`end_offset` -- The ending offset (default=None)
`end_offset` -- The ending offset (default=0), if 0, then all documents
`limit_to_registered_models` -- Limit returned results to models registered in the current `SearchSite` (default = True)
Returns:
@ -436,10 +437,12 @@ class SearchBackend(BaseSearchBackend):
query = xapian.Query(DOCUMENT_ID_TERM_PREFIX + get_identifier(model_instance))
enquire = self._enquire(database, query)
rset = xapian.RSet()
for match in enquire.get_mset(0, DEFAULT_MAX_RESULTS):
if not end_offset:
end_offset = self.document_count()
for match in enquire.get_mset(0, end_offset):
rset.add_document(match.docid)
query = xapian.Query(xapian.Query.OP_OR,
[expand.term for expand in enquire.get_eset(DEFAULT_MAX_RESULTS, rset, XHExpandDecider())]
[expand.term for expand in enquire.get_eset(match.document.termlist_count(), rset, XHExpandDecider())]
)
query = xapian.Query(
xapian.Query.OP_AND_NOT, [query, DOCUMENT_ID_TERM_PREFIX + get_identifier(model_instance)]
@ -449,8 +452,8 @@ class SearchBackend(BaseSearchBackend):
registered_models = self.build_registered_models_list()
if len(registered_models) > 0:
narrow_queries = []
narrow_queries.append(
narrow_queries = set()
narrow_queries.add(
' '.join(['django_ct:%s' % model for model in registered_models])
)
if additional_query_string:
@ -720,7 +723,10 @@ class SearchBackend(BaseSearchBackend):
database.set_metadata('schema', pickle.dumps(self.schema, pickle.HIGHEST_PROTOCOL))
database.set_metadata('content', pickle.dumps(self.content_field_name, pickle.HIGHEST_PROTOCOL))
else:
database = xapian.Database(settings.HAYSTACK_XAPIAN_PATH)
try:
database = xapian.Database(settings.HAYSTACK_XAPIAN_PATH)
except xapian.DatabaseOpeningError:
raise InvalidIndexError(u'Unable to open index at %s' % settings.HAYSTACK_XAPIAN_PATH)
self.schema = pickle.loads(database.get_metadata('schema'))
self.content_field_name = pickle.loads(database.get_metadata('content'))