mirror of
https://github.com/Hopiu/xapian-haystack.git
synced 2026-03-16 22:20:31 +00:00
Fixed pylint errors
This commit is contained in:
parent
ec68cb0917
commit
f9e17b7a01
2 changed files with 276 additions and 281 deletions
|
|
@ -3,7 +3,6 @@
|
|||
|
||||
# Based on original code by Daniel Lindsley as part of the Haystack test suite.
|
||||
|
||||
import cPickle as pickle
|
||||
import datetime
|
||||
import os
|
||||
import shutil
|
||||
|
|
@ -17,7 +16,6 @@ from django.test import TestCase
|
|||
from haystack import connections, reset_search_queries
|
||||
from haystack import indexes
|
||||
from haystack.backends.xapian_backend import _marshal_value
|
||||
from haystack.exceptions import HaystackError
|
||||
from haystack.models import SearchResult
|
||||
from haystack.query import SearchQuerySet, SQ
|
||||
from haystack.utils.loading import UnifiedIndex
|
||||
|
|
@ -36,16 +34,16 @@ class XapianMockModel(models.Model):
|
|||
pub_date = models.DateTimeField(default=datetime.datetime.now)
|
||||
exp_date = models.DateTimeField(default=datetime.datetime.now)
|
||||
tag = models.ForeignKey(MockTag)
|
||||
|
||||
|
||||
value = models.IntegerField(default=0)
|
||||
flag = models.BooleanField(default=True)
|
||||
slug = models.SlugField()
|
||||
popularity = models.FloatField(default=0.0)
|
||||
url = models.URLField()
|
||||
|
||||
|
||||
def __unicode__(self):
|
||||
return self.author
|
||||
|
||||
|
||||
def hello(self):
|
||||
return 'World!'
|
||||
|
||||
|
|
@ -65,16 +63,16 @@ class XapianMockSearchIndex(indexes.SearchIndex):
|
|||
month = indexes.CharField(indexed=False)
|
||||
url = indexes.CharField(model_attr='url')
|
||||
empty = indexes.CharField()
|
||||
|
||||
|
||||
# Various MultiValueFields
|
||||
sites = indexes.MultiValueField()
|
||||
tags = indexes.MultiValueField()
|
||||
keys = indexes.MultiValueField()
|
||||
titles = indexes.MultiValueField()
|
||||
|
||||
|
||||
def get_model(self):
|
||||
return XapianMockModel
|
||||
|
||||
|
||||
def prepare_sites(self, obj):
|
||||
return ['%d' % (i * obj.id) for i in xrange(1, 4)]
|
||||
|
||||
|
|
@ -96,7 +94,6 @@ class XapianMockSearchIndex(indexes.SearchIndex):
|
|||
return ['object two title one', 'object two title two']
|
||||
else:
|
||||
return ['object three title one', 'object three title two']
|
||||
pub_date = indexes.DateField(model_attr='pub_date')
|
||||
|
||||
def prepare_month(self, obj):
|
||||
return '%02d' % obj.pub_date.month
|
||||
|
|
@ -113,7 +110,7 @@ class XapianBoostMockSearchIndex(indexes.SearchIndex):
|
|||
author = indexes.CharField(model_attr='author', weight=2.0)
|
||||
editor = indexes.CharField(model_attr='editor')
|
||||
pub_date = indexes.DateField(model_attr='pub_date')
|
||||
|
||||
|
||||
def get_model(self):
|
||||
return AFourthMockModel
|
||||
|
||||
|
|
@ -121,16 +118,16 @@ class XapianBoostMockSearchIndex(indexes.SearchIndex):
|
|||
class XapianSearchBackendTestCase(TestCase):
|
||||
def setUp(self):
|
||||
super(XapianSearchBackendTestCase, self).setUp()
|
||||
|
||||
|
||||
self.old_ui = connections['default'].get_unified_index()
|
||||
self.ui = UnifiedIndex()
|
||||
self.index = XapianMockSearchIndex()
|
||||
self.ui.build(indexes=[self.index])
|
||||
self.backend = connections['default'].get_backend()
|
||||
connections['default']._index = self.ui
|
||||
|
||||
|
||||
self.sample_objs = []
|
||||
|
||||
|
||||
for i in xrange(1, 4):
|
||||
mock = XapianMockModel()
|
||||
mock.id = i
|
||||
|
|
@ -142,70 +139,70 @@ class XapianSearchBackendTestCase(TestCase):
|
|||
mock.slug = 'http://example.com/%d/' % i
|
||||
mock.url = 'http://example.com/%d/' % i
|
||||
self.sample_objs.append(mock)
|
||||
|
||||
|
||||
self.sample_objs[0].popularity = 834.0
|
||||
self.sample_objs[1].popularity = 35.5
|
||||
self.sample_objs[2].popularity = 972.0
|
||||
|
||||
|
||||
def tearDown(self):
|
||||
if os.path.exists(settings.HAYSTACK_CONNECTIONS['default']['PATH']):
|
||||
shutil.rmtree(settings.HAYSTACK_CONNECTIONS['default']['PATH'])
|
||||
|
||||
|
||||
connections['default']._index = self.old_ui
|
||||
super(XapianSearchBackendTestCase, self).tearDown()
|
||||
|
||||
|
||||
def test_update(self):
|
||||
self.backend.update(self.index, self.sample_objs)
|
||||
|
||||
|
||||
self.assertEqual(self.backend.document_count(), 3)
|
||||
self.assertEqual([result.pk for result in self.backend.search(xapian.Query(''))['results']], [1, 2, 3])
|
||||
|
||||
|
||||
def test_duplicate_update(self):
|
||||
self.backend.update(self.index, self.sample_objs)
|
||||
self.backend.update(self.index, self.sample_objs) # Duplicates should be updated, not appended -- http://github.com/notanumber/xapian-haystack/issues/#issue/6
|
||||
|
||||
self.backend.update(self.index, self.sample_objs) # Duplicates should be updated, not appended -- http://github.com/notanumber/xapian-haystack/issues/#issue/6
|
||||
|
||||
self.assertEqual(self.backend.document_count(), 3)
|
||||
|
||||
|
||||
def test_remove(self):
|
||||
self.backend.update(self.index, self.sample_objs)
|
||||
self.assertEqual(self.backend.document_count(), 3)
|
||||
|
||||
|
||||
self.backend.remove(self.sample_objs[0])
|
||||
self.assertEqual(self.backend.document_count(), 2)
|
||||
self.assertEqual([result.pk for result in self.backend.search(xapian.Query(''))['results']], [2, 3])
|
||||
|
||||
|
||||
def test_clear(self):
|
||||
self.backend.update(self.index, self.sample_objs)
|
||||
self.assertEqual(self.backend.document_count(), 3)
|
||||
|
||||
|
||||
self.backend.clear()
|
||||
self.assertEqual(self.backend.document_count(), 0)
|
||||
|
||||
|
||||
self.backend.update(self.index, self.sample_objs)
|
||||
self.assertEqual(self.backend.document_count(), 3)
|
||||
|
||||
|
||||
self.backend.clear([AnotherMockModel])
|
||||
self.assertEqual(self.backend.document_count(), 3)
|
||||
|
||||
|
||||
self.backend.clear([XapianMockModel])
|
||||
self.assertEqual(self.backend.document_count(), 0)
|
||||
|
||||
|
||||
self.backend.update(self.index, self.sample_objs)
|
||||
self.assertEqual(self.backend.document_count(), 3)
|
||||
|
||||
|
||||
self.backend.clear([AnotherMockModel, XapianMockModel])
|
||||
self.assertEqual(self.backend.document_count(), 0)
|
||||
|
||||
|
||||
def test_search(self):
|
||||
self.backend.update(self.index, self.sample_objs)
|
||||
self.assertEqual(self.backend.document_count(), 3)
|
||||
|
||||
|
||||
self.assertEqual(self.backend.search(xapian.Query()), {'hits': 0, 'results': []})
|
||||
self.assertEqual(self.backend.search(xapian.Query(''))['hits'], 3)
|
||||
self.assertEqual([result.pk for result in self.backend.search(xapian.Query(''))['results']], [1, 2, 3])
|
||||
self.assertEqual(self.backend.search(xapian.Query('indexed'))['hits'], 3)
|
||||
self.assertEqual([result.pk for result in self.backend.search(xapian.Query(''))['results']], [1, 2, 3])
|
||||
|
||||
|
||||
# Ensure that swapping the ``result_class`` works.
|
||||
self.assertTrue(isinstance(self.backend.search(xapian.Query('indexed'), result_class=MockSearchResult)['results'][0], MockSearchResult))
|
||||
|
||||
|
|
@ -219,33 +216,33 @@ class XapianSearchBackendTestCase(TestCase):
|
|||
def test_search_by_mvf(self):
|
||||
self.backend.update(self.index, self.sample_objs)
|
||||
self.assertEqual(self.backend.document_count(), 3)
|
||||
|
||||
|
||||
self.assertEqual(self.backend.search(xapian.Query('ab'))['hits'], 1)
|
||||
self.assertEqual(self.backend.search(xapian.Query('b'))['hits'], 1)
|
||||
self.assertEqual(self.backend.search(xapian.Query('to'))['hits'], 1)
|
||||
self.assertEqual(self.backend.search(xapian.Query('one'))['hits'], 3)
|
||||
|
||||
|
||||
def test_field_facets(self):
|
||||
self.backend.update(self.index, self.sample_objs)
|
||||
self.assertEqual(self.backend.document_count(), 3)
|
||||
|
||||
|
||||
self.assertEqual(self.backend.search(xapian.Query(), facets=['name']), {'hits': 0, 'results': []})
|
||||
results = self.backend.search(xapian.Query('indexed'), facets=['name'])
|
||||
self.assertEqual(results['hits'], 3)
|
||||
self.assertEqual(results['facets']['fields']['name'], [('david1', 1), ('david2', 1), ('david3', 1)])
|
||||
|
||||
|
||||
results = self.backend.search(xapian.Query('indexed'), facets=['flag'])
|
||||
self.assertEqual(results['hits'], 3)
|
||||
self.assertEqual(results['facets']['fields']['flag'], [(False, 1), (True, 2)])
|
||||
|
||||
|
||||
results = self.backend.search(xapian.Query('indexed'), facets=['sites'])
|
||||
self.assertEqual(results['hits'], 3)
|
||||
self.assertEqual(results['facets']['fields']['sites'], [('1', 1), ('3', 2), ('2', 2), ('4', 1), ('6', 2), ('9', 1)])
|
||||
|
||||
|
||||
def test_date_facets(self):
|
||||
self.backend.update(self.index, self.sample_objs)
|
||||
self.assertEqual(self.backend.document_count(), 3)
|
||||
|
||||
|
||||
self.assertEqual(self.backend.search(xapian.Query(), date_facets={'pub_date': {'start_date': datetime.datetime(2008, 10, 26), 'end_date': datetime.datetime(2009, 3, 26), 'gap_by': 'month'}}), {'hits': 0, 'results': []})
|
||||
results = self.backend.search(xapian.Query('indexed'), date_facets={'pub_date': {'start_date': datetime.datetime(2008, 10, 26), 'end_date': datetime.datetime(2009, 3, 26), 'gap_by': 'month'}})
|
||||
self.assertEqual(results['hits'], 3)
|
||||
|
|
@ -256,7 +253,7 @@ class XapianSearchBackendTestCase(TestCase):
|
|||
('2008-11-26T00:00:00', 0),
|
||||
('2008-10-26T00:00:00', 0),
|
||||
])
|
||||
|
||||
|
||||
results = self.backend.search(xapian.Query('indexed'), date_facets={'pub_date': {'start_date': datetime.datetime(2009, 02, 01), 'end_date': datetime.datetime(2009, 3, 15), 'gap_by': 'day', 'gap_amount': 15}})
|
||||
self.assertEqual(results['hits'], 3)
|
||||
self.assertEqual(results['facets']['dates']['pub_date'], [
|
||||
|
|
@ -264,107 +261,107 @@ class XapianSearchBackendTestCase(TestCase):
|
|||
('2009-02-16T00:00:00', 3),
|
||||
('2009-02-01T00:00:00', 0)
|
||||
])
|
||||
|
||||
|
||||
def test_query_facets(self):
|
||||
self.backend.update(self.index, self.sample_objs)
|
||||
self.assertEqual(self.backend.document_count(), 3)
|
||||
|
||||
|
||||
self.assertEqual(self.backend.search(xapian.Query(), query_facets={'name': 'da*'}), {'hits': 0, 'results': []})
|
||||
results = self.backend.search(xapian.Query('indexed'), query_facets={'name': 'da*'})
|
||||
self.assertEqual(results['hits'], 3)
|
||||
self.assertEqual(results['facets']['queries']['name'], ('da*', 3))
|
||||
|
||||
|
||||
def test_narrow_queries(self):
|
||||
self.backend.update(self.index, self.sample_objs)
|
||||
self.assertEqual(self.backend.document_count(), 3)
|
||||
|
||||
|
||||
self.assertEqual(self.backend.search(xapian.Query(), narrow_queries=set(['name:david1'])), {'hits': 0, 'results': []})
|
||||
results = self.backend.search(xapian.Query('indexed'), narrow_queries=set(['name:david1']))
|
||||
self.assertEqual(results['hits'], 1)
|
||||
|
||||
|
||||
def test_highlight(self):
|
||||
self.backend.update(self.index, self.sample_objs)
|
||||
self.assertEqual(self.backend.document_count(), 3)
|
||||
|
||||
|
||||
self.assertEqual(self.backend.search(xapian.Query(), highlight=True), {'hits': 0, 'results': []})
|
||||
self.assertEqual(self.backend.search(xapian.Query('indexed'), highlight=True)['hits'], 3)
|
||||
self.assertEqual([result.highlighted['text'] for result in self.backend.search(xapian.Query('indexed'), highlight=True)['results']], ['<em>indexed</em>!\n1', '<em>indexed</em>!\n2', '<em>indexed</em>!\n3'])
|
||||
|
||||
|
||||
def test_spelling_suggestion(self):
|
||||
self.backend.update(self.index, self.sample_objs)
|
||||
self.assertEqual(self.backend.document_count(), 3)
|
||||
|
||||
|
||||
self.assertEqual(self.backend.search(xapian.Query('indxe'))['hits'], 0)
|
||||
self.assertEqual(self.backend.search(xapian.Query('indxe'))['spelling_suggestion'], 'indexed')
|
||||
|
||||
|
||||
self.assertEqual(self.backend.search(xapian.Query('indxed'))['hits'], 0)
|
||||
self.assertEqual(self.backend.search(xapian.Query('indxed'))['spelling_suggestion'], 'indexed')
|
||||
|
||||
|
||||
self.assertEqual(self.backend.search(xapian.Query('foo'))['hits'], 0)
|
||||
self.assertEqual(self.backend.search(xapian.Query('foo'), spelling_query='indexy')['spelling_suggestion'], 'indexed')
|
||||
|
||||
|
||||
self.assertEqual(self.backend.search(xapian.Query('XNAMEdavid'))['hits'], 0)
|
||||
self.assertEqual(self.backend.search(xapian.Query('XNAMEdavid'))['spelling_suggestion'], 'david1')
|
||||
|
||||
|
||||
def test_more_like_this(self):
|
||||
self.backend.update(self.index, self.sample_objs)
|
||||
self.assertEqual(self.backend.document_count(), 3)
|
||||
|
||||
|
||||
results = self.backend.more_like_this(self.sample_objs[0])
|
||||
self.assertEqual(results['hits'], 2)
|
||||
self.assertEqual([result.pk for result in results['results']], [3, 2])
|
||||
|
||||
|
||||
results = self.backend.more_like_this(self.sample_objs[0], additional_query=xapian.Query('david3'))
|
||||
self.assertEqual(results['hits'], 1)
|
||||
self.assertEqual([result.pk for result in results['results']], [3])
|
||||
|
||||
|
||||
results = self.backend.more_like_this(self.sample_objs[0], limit_to_registered_models=True)
|
||||
self.assertEqual(results['hits'], 2)
|
||||
self.assertEqual([result.pk for result in results['results']], [3, 2])
|
||||
|
||||
|
||||
# Ensure that swapping the ``result_class`` works.
|
||||
self.assertTrue(isinstance(self.backend.more_like_this(self.sample_objs[0], result_class=MockSearchResult)['results'][0], MockSearchResult))
|
||||
|
||||
|
||||
def test_order_by(self):
|
||||
self.backend.update(self.index, self.sample_objs)
|
||||
self.assertEqual(self.backend.document_count(), 3)
|
||||
|
||||
|
||||
results = self.backend.search(xapian.Query(''), sort_by=['pub_date'])
|
||||
self.assertEqual([result.pk for result in results['results']], [3, 2, 1])
|
||||
|
||||
|
||||
results = self.backend.search(xapian.Query(''), sort_by=['-pub_date'])
|
||||
self.assertEqual([result.pk for result in results['results']], [1, 2, 3])
|
||||
|
||||
|
||||
results = self.backend.search(xapian.Query(''), sort_by=['exp_date'])
|
||||
self.assertEqual([result.pk for result in results['results']], [1, 2, 3])
|
||||
|
||||
|
||||
results = self.backend.search(xapian.Query(''), sort_by=['-exp_date'])
|
||||
self.assertEqual([result.pk for result in results['results']], [3, 2, 1])
|
||||
|
||||
|
||||
results = self.backend.search(xapian.Query(''), sort_by=['id'])
|
||||
self.assertEqual([result.pk for result in results['results']], [1, 2, 3])
|
||||
|
||||
|
||||
results = self.backend.search(xapian.Query(''), sort_by=['-id'])
|
||||
self.assertEqual([result.pk for result in results['results']], [3, 2, 1])
|
||||
|
||||
|
||||
results = self.backend.search(xapian.Query(''), sort_by=['value'])
|
||||
self.assertEqual([result.pk for result in results['results']], [1, 2, 3])
|
||||
|
||||
|
||||
results = self.backend.search(xapian.Query(''), sort_by=['-value'])
|
||||
self.assertEqual([result.pk for result in results['results']], [3, 2, 1])
|
||||
|
||||
|
||||
results = self.backend.search(xapian.Query(''), sort_by=['popularity'])
|
||||
self.assertEqual([result.pk for result in results['results']], [2, 1, 3])
|
||||
|
||||
|
||||
results = self.backend.search(xapian.Query(''), sort_by=['-popularity'])
|
||||
self.assertEqual([result.pk for result in results['results']], [3, 1, 2])
|
||||
|
||||
|
||||
results = self.backend.search(xapian.Query(''), sort_by=['flag', 'id'])
|
||||
self.assertEqual([result.pk for result in results['results']], [2, 1, 3])
|
||||
|
||||
|
||||
results = self.backend.search(xapian.Query(''), sort_by=['flag', '-id'])
|
||||
self.assertEqual([result.pk for result in results['results']], [2, 3, 1])
|
||||
|
||||
|
||||
def test_verify_type(self):
|
||||
self.backend.update(self.index, self.sample_objs)
|
||||
self.assertEqual(self.backend.document_count(), 3)
|
||||
|
|
@ -384,7 +381,7 @@ class XapianSearchBackendTestCase(TestCase):
|
|||
self.assertEqual(_marshal_value(datetime.datetime(2009, 5, 9, 0, 0)), u'20090509000000')
|
||||
self.assertEqual(_marshal_value(datetime.datetime(1899, 5, 18, 0, 0)), u'18990518000000')
|
||||
self.assertEqual(_marshal_value(datetime.datetime(2009, 5, 18, 1, 16, 30, 250)), u'20090518011630000250')
|
||||
|
||||
|
||||
def test_build_schema(self):
|
||||
(content_field_name, fields) = self.backend.build_schema(connections['default'].get_unified_index().all_searchfields())
|
||||
self.assertEqual(content_field_name, 'text')
|
||||
|
|
@ -406,7 +403,7 @@ class XapianSearchBackendTestCase(TestCase):
|
|||
{'column': 13, 'type': 'text', 'field_name': 'url', 'multi_valued': 'false'},
|
||||
{'column': 14, 'type': 'long', 'field_name': 'value', 'multi_valued': 'false'}
|
||||
])
|
||||
|
||||
|
||||
def test_parse_query(self):
|
||||
self.backend.update(self.index, self.sample_objs)
|
||||
self.assertEqual(str(self.backend.parse_query('indexed')), 'Xapian::Query(Zindex:(pos=1))')
|
||||
|
|
@ -430,7 +427,7 @@ class LiveXapianMockSearchIndex(indexes.SearchIndex):
|
|||
pub_date = indexes.DateField(model_attr='pub_date')
|
||||
created = indexes.DateField()
|
||||
title = indexes.CharField()
|
||||
|
||||
|
||||
def get_model(self):
|
||||
return MockModel
|
||||
|
||||
|
|
@ -440,10 +437,10 @@ class LiveXapianSearchQueryTestCase(TestCase):
|
|||
SearchQuery specific tests
|
||||
"""
|
||||
fixtures = ['initial_data.json']
|
||||
|
||||
|
||||
def setUp(self):
|
||||
super(LiveXapianSearchQueryTestCase, self).setUp()
|
||||
|
||||
|
||||
self.old_ui = connections['default'].get_unified_index()
|
||||
ui = UnifiedIndex()
|
||||
index = LiveXapianMockSearchIndex()
|
||||
|
|
@ -451,38 +448,38 @@ class LiveXapianSearchQueryTestCase(TestCase):
|
|||
backend = connections['default'].get_backend()
|
||||
connections['default']._index = ui
|
||||
backend.update(index, MockModel.objects.all())
|
||||
|
||||
|
||||
self.sq = connections['default'].get_query()
|
||||
|
||||
|
||||
def tearDown(self):
|
||||
connections['default']._index = self.old_ui
|
||||
super(LiveXapianSearchQueryTestCase, self).tearDown()
|
||||
|
||||
|
||||
def test_get_spelling(self):
|
||||
self.sq.add_filter(SQ(content='indxd'))
|
||||
self.assertEqual(self.sq.get_spelling_suggestion(), u'indexed')
|
||||
self.assertEqual(self.sq.get_spelling_suggestion('indxd'), u'indexed')
|
||||
|
||||
|
||||
def test_startswith(self):
|
||||
self.sq.add_filter(SQ(name__startswith='da'))
|
||||
self.assertEqual([result.pk for result in self.sq.get_results()], [1, 2, 3])
|
||||
|
||||
|
||||
def test_build_query_gt(self):
|
||||
self.sq.add_filter(SQ(name__gt='m'))
|
||||
self.assertEqual(str(self.sq.build_query()), u'Xapian::Query((<alldocuments> AND_NOT VALUE_RANGE 2 a m))')
|
||||
|
||||
|
||||
def test_build_query_gte(self):
|
||||
self.sq.add_filter(SQ(name__gte='m'))
|
||||
self.assertEqual(str(self.sq.build_query()), u'Xapian::Query(VALUE_RANGE 2 m zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz)')
|
||||
|
||||
|
||||
def test_build_query_lt(self):
|
||||
self.sq.add_filter(SQ(name__lt='m'))
|
||||
self.assertEqual(str(self.sq.build_query()), u'Xapian::Query((<alldocuments> AND_NOT VALUE_RANGE 2 m zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz))')
|
||||
|
||||
|
||||
def test_build_query_lte(self):
|
||||
self.sq.add_filter(SQ(name__lte='m'))
|
||||
self.assertEqual(str(self.sq.build_query()), u'Xapian::Query(VALUE_RANGE 2 a m)')
|
||||
|
||||
|
||||
def test_build_query_multiple_filter_types(self):
|
||||
self.sq.add_filter(SQ(content='why'))
|
||||
self.sq.add_filter(SQ(pub_date__lte=datetime.datetime(2009, 2, 10, 1, 59, 0)))
|
||||
|
|
@ -491,18 +488,18 @@ class LiveXapianSearchQueryTestCase(TestCase):
|
|||
self.sq.add_filter(SQ(title__gte='B'))
|
||||
self.sq.add_filter(SQ(id__in=[1, 2, 3]))
|
||||
self.assertEqual(str(self.sq.build_query()), u'Xapian::Query(((Zwhi OR why) AND VALUE_RANGE 3 00010101000000 20090210015900 AND (<alldocuments> AND_NOT VALUE_RANGE 2 a david) AND (<alldocuments> AND_NOT VALUE_RANGE 1 20090212121300 99990101000000) AND VALUE_RANGE 5 b zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz AND (Q1 OR Q2 OR Q3)))')
|
||||
|
||||
|
||||
def test_log_query(self):
|
||||
reset_search_queries()
|
||||
self.assertEqual(len(connections['default'].queries), 0)
|
||||
|
||||
|
||||
# Stow.
|
||||
old_debug = settings.DEBUG
|
||||
settings.DEBUG = False
|
||||
|
||||
|
||||
len(self.sq.get_results())
|
||||
self.assertEqual(len(connections['default'].queries), 0)
|
||||
|
||||
|
||||
settings.DEBUG = True
|
||||
# Redefine it to clear out the cached results.
|
||||
self.sq = connections['default'].get_query()
|
||||
|
|
@ -510,7 +507,7 @@ class LiveXapianSearchQueryTestCase(TestCase):
|
|||
len(self.sq.get_results())
|
||||
self.assertEqual(len(connections['default'].queries), 1)
|
||||
self.assertEqual(str(connections['default'].queries[0]['query_string']), u'Xapian::Query((ZXNAMEbar OR XNAMEbar))')
|
||||
|
||||
|
||||
# And again, for good measure.
|
||||
self.sq = connections['default'].get_query()
|
||||
self.sq.add_filter(SQ(name='bar'))
|
||||
|
|
@ -519,7 +516,7 @@ class LiveXapianSearchQueryTestCase(TestCase):
|
|||
self.assertEqual(len(connections['default'].queries), 2)
|
||||
self.assertEqual(str(connections['default'].queries[0]['query_string']), u'Xapian::Query((ZXNAMEbar OR XNAMEbar))')
|
||||
self.assertEqual(str(connections['default'].queries[1]['query_string']), u'Xapian::Query(((ZXNAMEbar OR XNAMEbar) AND (ZXTEXTmoof OR XTEXTmoof)))')
|
||||
|
||||
|
||||
# Restore.
|
||||
settings.DEBUG = old_debug
|
||||
|
||||
|
|
@ -529,10 +526,10 @@ class LiveXapianSearchQuerySetTestCase(TestCase):
|
|||
SearchQuerySet specific tests
|
||||
"""
|
||||
fixtures = ['initial_data.json']
|
||||
|
||||
|
||||
def setUp(self):
|
||||
super(LiveXapianSearchQuerySetTestCase, self).setUp()
|
||||
|
||||
|
||||
self.old_ui = connections['default'].get_unified_index()
|
||||
self.ui = UnifiedIndex()
|
||||
self.index = LiveXapianMockSearchIndex()
|
||||
|
|
@ -540,23 +537,23 @@ class LiveXapianSearchQuerySetTestCase(TestCase):
|
|||
self.backend = connections['default'].get_backend()
|
||||
connections['default']._index = self.ui
|
||||
self.backend.update(self.index, MockModel.objects.all())
|
||||
|
||||
|
||||
self.sq = connections['default'].get_query()
|
||||
self.sqs = SearchQuerySet()
|
||||
|
||||
|
||||
def tearDown(self):
|
||||
connections['default']._index = self.old_ui
|
||||
super(LiveXapianSearchQuerySetTestCase, self).tearDown()
|
||||
|
||||
|
||||
def test_result_class(self):
|
||||
# Assert that we're defaulting to ``SearchResult``.
|
||||
sqs = self.sqs.all()
|
||||
self.assertTrue(isinstance(sqs[0], SearchResult))
|
||||
|
||||
|
||||
# Custom class.
|
||||
sqs = self.sqs.result_class(MockSearchResult).all()
|
||||
self.assertTrue(isinstance(sqs[0], MockSearchResult))
|
||||
|
||||
|
||||
# Reset to default.
|
||||
sqs = self.sqs.result_class(None).all()
|
||||
self.assertTrue(isinstance(sqs[0], SearchResult))
|
||||
|
|
@ -565,7 +562,7 @@ class LiveXapianSearchQuerySetTestCase(TestCase):
|
|||
class XapianBoostBackendTestCase(TestCase):
|
||||
def setUp(self):
|
||||
super(XapianBoostBackendTestCase, self).setUp()
|
||||
|
||||
|
||||
# Stow.
|
||||
self.old_ui = connections['default'].get_unified_index()
|
||||
self.ui = UnifiedIndex()
|
||||
|
|
@ -594,9 +591,9 @@ class XapianBoostBackendTestCase(TestCase):
|
|||
|
||||
def test_boost(self):
|
||||
self.sb.update(self.index, self.sample_objs)
|
||||
|
||||
|
||||
sqs = SearchQuerySet()
|
||||
|
||||
|
||||
self.assertEqual(len(sqs.all()), 4)
|
||||
|
||||
results = sqs.filter(SQ(author='daniel') | SQ(editor='daniel'))
|
||||
|
|
|
|||
|
|
@ -11,7 +11,6 @@ import os
|
|||
import re
|
||||
import shutil
|
||||
import sys
|
||||
import warnings
|
||||
|
||||
from django.conf import settings
|
||||
from django.core.exceptions import ImproperlyConfigured
|
||||
|
|
@ -19,9 +18,8 @@ from django.utils.encoding import force_unicode
|
|||
|
||||
from haystack import connections
|
||||
from haystack.backends import BaseEngine, BaseSearchBackend, BaseSearchQuery, SearchNode, log_query
|
||||
from haystack.constants import ID, DJANGO_CT, DJANGO_ID
|
||||
from haystack.exceptions import HaystackError, MissingDependency, MoreLikeThisError
|
||||
from haystack.fields import DateField, DateTimeField, IntegerField, FloatField, BooleanField, MultiValueField
|
||||
from haystack.constants import ID
|
||||
from haystack.exceptions import HaystackError, MissingDependency
|
||||
from haystack.models import SearchResult
|
||||
from haystack.utils import get_identifier
|
||||
|
||||
|
|
@ -36,7 +34,7 @@ DOCUMENT_CUSTOM_TERM_PREFIX = 'X'
|
|||
DOCUMENT_CT_TERM_PREFIX = DOCUMENT_CUSTOM_TERM_PREFIX + 'CONTENTTYPE'
|
||||
|
||||
DEFAULT_XAPIAN_FLAGS = (
|
||||
xapian.QueryParser.FLAG_PHRASE |
|
||||
xapian.QueryParser.FLAG_PHRASE |
|
||||
xapian.QueryParser.FLAG_BOOLEAN |
|
||||
xapian.QueryParser.FLAG_LOVEHATE |
|
||||
xapian.QueryParser.FLAG_WILDCARD |
|
||||
|
|
@ -54,7 +52,7 @@ class XHValueRangeProcessor(xapian.ValueRangeProcessor):
|
|||
# FIXME: This needs to get smarter about pulling the right backend.
|
||||
self.backend = backend or XapianSearchBackend()
|
||||
xapian.ValueRangeProcessor.__init__(self)
|
||||
|
||||
|
||||
def __call__(self, begin, end):
|
||||
"""
|
||||
Construct a tuple for value range processing.
|
||||
|
|
@ -71,7 +69,7 @@ class XHValueRangeProcessor(xapian.ValueRangeProcessor):
|
|||
if field_dict['field_name'] == field_name:
|
||||
if not begin:
|
||||
if field_dict['type'] == 'text':
|
||||
begin = u'a' # TODO: A better way of getting a min text value?
|
||||
begin = u'a' # TODO: A better way of getting a min text value?
|
||||
elif field_dict['type'] == 'long':
|
||||
begin = -sys.maxint - 1
|
||||
elif field_dict['type'] == 'float':
|
||||
|
|
@ -80,7 +78,7 @@ class XHValueRangeProcessor(xapian.ValueRangeProcessor):
|
|||
begin = u'00010101000000'
|
||||
elif end == '*':
|
||||
if field_dict['type'] == 'text':
|
||||
end = u'z' * 100 # TODO: A better way of getting a max text value?
|
||||
end = u'z' * 100 # TODO: A better way of getting a max text value?
|
||||
elif field_dict['type'] == 'long':
|
||||
end = sys.maxint
|
||||
elif field_dict['type'] == 'float':
|
||||
|
|
@ -101,7 +99,7 @@ class XHExpandDecider(xapian.ExpandDecider):
|
|||
"""
|
||||
Return True if the term should be used for expanding the search
|
||||
query, False otherwise.
|
||||
|
||||
|
||||
Currently, we only want to ignore terms beginning with `DOCUMENT_CT_TERM_PREFIX`
|
||||
"""
|
||||
if term.startswith(DOCUMENT_CT_TERM_PREFIX):
|
||||
|
|
@ -113,15 +111,15 @@ class XapianSearchBackend(BaseSearchBackend):
|
|||
"""
|
||||
`SearchBackend` defines the Xapian search backend for use with the Haystack
|
||||
API for Django search.
|
||||
|
||||
|
||||
It uses the Xapian Python bindings to interface with Xapian, and as
|
||||
such is subject to this bug: <http://trac.xapian.org/ticket/364> when
|
||||
Django is running with mod_python or mod_wsgi under Apache.
|
||||
|
||||
|
||||
Until this issue has been fixed by Xapian, it is neccessary to set
|
||||
`WSGIApplicationGroup to %{GLOBAL}` when using mod_wsgi, or
|
||||
`PythonInterpreter main_interpreter` when using mod_python.
|
||||
|
||||
|
||||
In order to use this backend, `PATH` must be included in the
|
||||
`connection_options`. This should point to a location where you would your
|
||||
indexes to reside.
|
||||
|
|
@ -129,74 +127,74 @@ class XapianSearchBackend(BaseSearchBackend):
|
|||
def __init__(self, connection_alias, language='english', **connection_options):
|
||||
"""
|
||||
Instantiates an instance of `SearchBackend`.
|
||||
|
||||
|
||||
Optional arguments:
|
||||
`connection_alias` -- The name of the connection
|
||||
`language` -- The stemming language (default = 'english')
|
||||
`**connection_options` -- The various options needed to setup
|
||||
the backend.
|
||||
|
||||
|
||||
Also sets the stemming language to be used to `language`.
|
||||
"""
|
||||
super(XapianSearchBackend, self).__init__(connection_alias, **connection_options)
|
||||
|
||||
|
||||
if not 'PATH' in connection_options:
|
||||
raise ImproperlyConfigured("You must specify a 'PATH' in your settings for connection '%s'." % connection_alias)
|
||||
|
||||
|
||||
self.path = connection_options.get('PATH')
|
||||
|
||||
|
||||
if not os.path.exists(self.path):
|
||||
os.makedirs(self.path)
|
||||
|
||||
|
||||
self.flags = connection_options.get('FLAGS', DEFAULT_XAPIAN_FLAGS)
|
||||
self.language = language
|
||||
self._schema = None
|
||||
self._content_field_name = None
|
||||
|
||||
|
||||
@property
|
||||
def schema(self):
|
||||
if not self._schema:
|
||||
self._content_field_name, self._schema = self.build_schema(connections[self.connection_alias].get_unified_index().all_searchfields())
|
||||
self._content_field_name, self._schema = self.build_schema(connections[self.connection_alias].get_unified_index().all_searchfields())
|
||||
return self._schema
|
||||
|
||||
@property
|
||||
def content_field_name(self):
|
||||
if not self._content_field_name:
|
||||
self._content_field_name, self._schema = self.build_schema(connections[self.connection_alias].get_unified_index().all_searchfields())
|
||||
self._content_field_name, self._schema = self.build_schema(connections[self.connection_alias].get_unified_index().all_searchfields())
|
||||
return self._content_field_name
|
||||
|
||||
|
||||
def update(self, index, iterable):
|
||||
"""
|
||||
Updates the `index` with any objects in `iterable` by adding/updating
|
||||
the database as needed.
|
||||
|
||||
|
||||
Required arguments:
|
||||
`index` -- The `SearchIndex` to process
|
||||
`iterable` -- An iterable of model instances to index
|
||||
|
||||
|
||||
For each object in `iterable`, a document is created containing all
|
||||
of the terms extracted from `index.full_prepare(obj)` with field prefixes,
|
||||
and 'as-is' as needed. Also, if the field type is 'text' it will be
|
||||
of the terms extracted from `index.full_prepare(obj)` with field prefixes,
|
||||
and 'as-is' as needed. Also, if the field type is 'text' it will be
|
||||
stemmed and stored with the 'Z' prefix as well.
|
||||
|
||||
|
||||
eg. `content:Testing` ==> `testing, Ztest, ZXCONTENTtest, XCONTENTtest`
|
||||
|
||||
|
||||
Each document also contains an extra term in the format:
|
||||
|
||||
|
||||
`XCONTENTTYPE<app_name>.<model_name>`
|
||||
|
||||
|
||||
As well as a unique identifier in the the format:
|
||||
|
||||
|
||||
`Q<app_name>.<model_name>.<pk>`
|
||||
|
||||
|
||||
eg.: foo.bar (pk=1) ==> `Qfoo.bar.1`, `XCONTENTTYPEfoo.bar`
|
||||
|
||||
|
||||
This is useful for querying for a specific document corresponding to
|
||||
a model instance.
|
||||
|
||||
|
||||
The document also contains a pickled version of the object itself and
|
||||
the document ID in the document data field.
|
||||
|
||||
|
||||
Finally, we also store field values to be used for sorting data. We
|
||||
store these in the document value slots (position zero is reserver
|
||||
for the document ID). All values are stored as unicode strings with
|
||||
|
|
@ -207,14 +205,14 @@ class XapianSearchBackend(BaseSearchBackend):
|
|||
try:
|
||||
for obj in iterable:
|
||||
document = xapian.Document()
|
||||
|
||||
|
||||
term_generator = xapian.TermGenerator()
|
||||
term_generator.set_database(database)
|
||||
term_generator.set_stemmer(xapian.Stem(self.language))
|
||||
if self.include_spelling is True:
|
||||
term_generator.set_flags(xapian.TermGenerator.FLAG_SPELLING)
|
||||
term_generator.set_document(document)
|
||||
|
||||
|
||||
document_id = DOCUMENT_ID_TERM_PREFIX + get_identifier(obj)
|
||||
data = index.full_prepare(obj)
|
||||
weights = index.get_field_weights()
|
||||
|
|
@ -256,7 +254,7 @@ class XapianSearchBackend(BaseSearchBackend):
|
|||
if len(term.split()) == 1:
|
||||
document.add_term(term, weight)
|
||||
document.add_term(prefix + term, weight)
|
||||
|
||||
|
||||
document.set_data(pickle.dumps(
|
||||
(obj._meta.app_label, obj._meta.module_name, obj.pk, data),
|
||||
pickle.HIGHEST_PROTOCOL
|
||||
|
|
@ -267,35 +265,35 @@ class XapianSearchBackend(BaseSearchBackend):
|
|||
(obj._meta.app_label, obj._meta.module_name)
|
||||
)
|
||||
database.replace_document(document_id, document)
|
||||
|
||||
|
||||
except UnicodeDecodeError:
|
||||
sys.stderr.write('Chunk failed.\n')
|
||||
pass
|
||||
|
||||
|
||||
finally:
|
||||
database = None
|
||||
|
||||
|
||||
def remove(self, obj):
|
||||
"""
|
||||
Remove indexes for `obj` from the database.
|
||||
|
||||
|
||||
We delete all instances of `Q<app_name>.<model_name>.<pk>` which
|
||||
should be unique to this object.
|
||||
"""
|
||||
database = self._database(writable=True)
|
||||
database.delete_document(DOCUMENT_ID_TERM_PREFIX + get_identifier(obj))
|
||||
|
||||
|
||||
def clear(self, models=[]):
|
||||
"""
|
||||
Clear all instances of `models` from the database or all models, if
|
||||
not specified.
|
||||
|
||||
|
||||
Optional Arguments:
|
||||
`models` -- Models to clear from the database (default = [])
|
||||
|
||||
|
||||
If `models` is empty, an empty query is executed which matches all
|
||||
documents in the database. Afterwards, each match is deleted.
|
||||
|
||||
|
||||
Otherwise, for each model, a `delete_document` call is issued with
|
||||
the term `XCONTENTTYPE<app_name>.<model_name>`. This will delete
|
||||
all documents with the specified model type.
|
||||
|
|
@ -327,10 +325,10 @@ class XapianSearchBackend(BaseSearchBackend):
|
|||
limit_to_registered_models=True, result_class=None, **kwargs):
|
||||
"""
|
||||
Executes the Xapian::query as defined in `query`.
|
||||
|
||||
|
||||
Required arguments:
|
||||
`query` -- Search query to execute
|
||||
|
||||
|
||||
Optional arguments:
|
||||
`sort_by` -- Sort results by specified field (default = None)
|
||||
`start_offset` -- Slice results from `start_offset` (default = 0)
|
||||
|
|
@ -343,7 +341,7 @@ class XapianSearchBackend(BaseSearchBackend):
|
|||
`narrow_queries` -- Narrow queries (default = None)
|
||||
`spelling_query` -- An optional query to execute spelling suggestion on
|
||||
`limit_to_registered_models` -- Limit returned results to models registered in the current `SearchSite` (default = True)
|
||||
|
||||
|
||||
Returns:
|
||||
A dictionary with the following keys:
|
||||
`results` -- A list of `SearchResult`
|
||||
|
|
@ -353,9 +351,9 @@ class XapianSearchBackend(BaseSearchBackend):
|
|||
`dates` -- A list of date facets
|
||||
`queries` -- A list of query facets
|
||||
If faceting was not used, the `facets` key will not be present
|
||||
|
||||
|
||||
If `query` is None, returns no results.
|
||||
|
||||
|
||||
If `INCLUDE_SPELLING` was enabled in the connection options, the
|
||||
extra flag `FLAG_SPELLING_CORRECTION` will be passed to the query parser
|
||||
and any suggestions for spell correction will be returned as well as
|
||||
|
|
@ -366,27 +364,27 @@ class XapianSearchBackend(BaseSearchBackend):
|
|||
'results': [],
|
||||
'hits': 0,
|
||||
}
|
||||
|
||||
|
||||
database = self._database()
|
||||
|
||||
|
||||
if result_class is None:
|
||||
result_class = SearchResult
|
||||
|
||||
|
||||
if self.include_spelling is True:
|
||||
spelling_suggestion = self._do_spelling_suggestion(database, query, spelling_query)
|
||||
else:
|
||||
spelling_suggestion = ''
|
||||
|
||||
|
||||
if narrow_queries is not None:
|
||||
query = xapian.Query(
|
||||
xapian.Query.OP_AND, query, xapian.Query(
|
||||
xapian.Query.OP_OR, [self.parse_query(narrow_query) for narrow_query in narrow_queries]
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
if limit_to_registered_models:
|
||||
registered_models = self.build_models_list()
|
||||
|
||||
|
||||
if len(registered_models) > 0:
|
||||
query = xapian.Query(
|
||||
xapian.Query.OP_AND, query,
|
||||
|
|
@ -396,37 +394,37 @@ class XapianSearchBackend(BaseSearchBackend):
|
|||
]
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
enquire = xapian.Enquire(database)
|
||||
if hasattr(settings, 'HAYSTACK_XAPIAN_WEIGHTING_SCHEME'):
|
||||
enquire.set_weighting_scheme(xapian.BM25Weight(*settings.HAYSTACK_XAPIAN_WEIGHTING_SCHEME))
|
||||
enquire.set_query(query)
|
||||
|
||||
|
||||
if sort_by:
|
||||
sorter = xapian.MultiValueSorter()
|
||||
|
||||
|
||||
for sort_field in sort_by:
|
||||
if sort_field.startswith('-'):
|
||||
reverse = True
|
||||
sort_field = sort_field[1:] # Strip the '-'
|
||||
sort_field = sort_field[1:] # Strip the '-'
|
||||
else:
|
||||
reverse = False # Reverse is inverted in Xapian -- http://trac.xapian.org/ticket/311
|
||||
reverse = False # Reverse is inverted in Xapian -- http://trac.xapian.org/ticket/311
|
||||
sorter.add(self._value_column(sort_field), reverse)
|
||||
|
||||
|
||||
enquire.set_sort_by_key_then_relevance(sorter, True)
|
||||
|
||||
|
||||
results = []
|
||||
facets_dict = {
|
||||
'fields': {},
|
||||
'dates': {},
|
||||
'queries': {},
|
||||
}
|
||||
|
||||
|
||||
if not end_offset:
|
||||
end_offset = database.get_doccount() - start_offset
|
||||
|
||||
|
||||
matches = self._get_enquire_mset(database, enquire, start_offset, end_offset)
|
||||
|
||||
|
||||
for match in matches:
|
||||
app_label, module_name, pk, model_data = pickle.loads(self._get_document_data(database, match.document))
|
||||
if highlight:
|
||||
|
|
@ -438,71 +436,71 @@ class XapianSearchBackend(BaseSearchBackend):
|
|||
results.append(
|
||||
result_class(app_label, module_name, pk, match.percent, **model_data)
|
||||
)
|
||||
|
||||
|
||||
if facets:
|
||||
facets_dict['fields'] = self._do_field_facets(results, facets)
|
||||
if date_facets:
|
||||
facets_dict['dates'] = self._do_date_facets(results, date_facets)
|
||||
if query_facets:
|
||||
facets_dict['queries'] = self._do_query_facets(results, query_facets)
|
||||
|
||||
|
||||
return {
|
||||
'results': results,
|
||||
'hits': self._get_hit_count(database, enquire),
|
||||
'facets': facets_dict,
|
||||
'spelling_suggestion': spelling_suggestion,
|
||||
}
|
||||
|
||||
|
||||
def more_like_this(self, model_instance, additional_query=None,
|
||||
start_offset=0, end_offset=None,
|
||||
limit_to_registered_models=True, result_class=None, **kwargs):
|
||||
"""
|
||||
Given a model instance, returns a result set of similar documents.
|
||||
|
||||
|
||||
Required arguments:
|
||||
`model_instance` -- The model instance to use as a basis for
|
||||
retrieving similar documents.
|
||||
|
||||
|
||||
Optional arguments:
|
||||
`additional_query` -- An additional query to narrow results
|
||||
`start_offset` -- The starting offset (default=0)
|
||||
`end_offset` -- The ending offset (default=None), if None, then all documents
|
||||
`limit_to_registered_models` -- Limit returned results to models registered in the current `SearchSite` (default = True)
|
||||
|
||||
|
||||
Returns:
|
||||
A dictionary with the following keys:
|
||||
`results` -- A list of `SearchResult`
|
||||
`hits` -- The total available results
|
||||
|
||||
|
||||
Opens a database connection, then builds a simple query using the
|
||||
`model_instance` to build the unique identifier.
|
||||
|
||||
|
||||
For each document retrieved(should always be one), adds an entry into
|
||||
an RSet (relevance set) with the document id, then, uses the RSet
|
||||
to query for an ESet (A set of terms that can be used to suggest
|
||||
expansions to the original query), omitting any document that was in
|
||||
the original query.
|
||||
|
||||
|
||||
Finally, processes the resulting matches and returns.
|
||||
"""
|
||||
database = self._database()
|
||||
|
||||
|
||||
if result_class is None:
|
||||
result_class = SearchResult
|
||||
|
||||
|
||||
query = xapian.Query(DOCUMENT_ID_TERM_PREFIX + get_identifier(model_instance))
|
||||
|
||||
|
||||
enquire = xapian.Enquire(database)
|
||||
enquire.set_query(query)
|
||||
|
||||
|
||||
rset = xapian.RSet()
|
||||
|
||||
|
||||
if not end_offset:
|
||||
end_offset = database.get_doccount()
|
||||
|
||||
|
||||
for match in self._get_enquire_mset(database, enquire, 0, end_offset):
|
||||
rset.add_document(match.docid)
|
||||
|
||||
|
||||
query = xapian.Query(
|
||||
xapian.Query.OP_ELITE_SET,
|
||||
[expand.term for expand in enquire.get_eset(match.document.termlist_count(), rset, XHExpandDecider())],
|
||||
|
|
@ -513,7 +511,7 @@ class XapianSearchBackend(BaseSearchBackend):
|
|||
)
|
||||
if limit_to_registered_models:
|
||||
registered_models = self.build_models_list()
|
||||
|
||||
|
||||
if len(registered_models) > 0:
|
||||
query = xapian.Query(
|
||||
xapian.Query.OP_AND, query,
|
||||
|
|
@ -527,12 +525,12 @@ class XapianSearchBackend(BaseSearchBackend):
|
|||
query = xapian.Query(
|
||||
xapian.Query.OP_AND, query, additional_query
|
||||
)
|
||||
|
||||
|
||||
enquire.set_query(query)
|
||||
|
||||
|
||||
results = []
|
||||
matches = self._get_enquire_mset(database, enquire, start_offset, end_offset)
|
||||
|
||||
|
||||
for match in matches:
|
||||
app_label, module_name, pk, model_data = pickle.loads(self._get_document_data(database, match.document))
|
||||
results.append(
|
||||
|
|
@ -549,21 +547,21 @@ class XapianSearchBackend(BaseSearchBackend):
|
|||
},
|
||||
'spelling_suggestion': None,
|
||||
}
|
||||
|
||||
|
||||
def parse_query(self, query_string):
|
||||
"""
|
||||
Given a `query_string`, will attempt to return a xapian.Query
|
||||
|
||||
|
||||
Required arguments:
|
||||
``query_string`` -- A query string to parse
|
||||
|
||||
|
||||
Returns a xapian.Query
|
||||
"""
|
||||
if query_string == '*':
|
||||
return xapian.Query('') # Match everything
|
||||
return xapian.Query('') # Match everything
|
||||
elif query_string == '':
|
||||
return xapian.Query() # Match nothing
|
||||
|
||||
return xapian.Query() # Match nothing
|
||||
|
||||
qp = xapian.QueryParser()
|
||||
qp.set_database(self._database())
|
||||
qp.set_stemmer(xapian.Stem(self.language))
|
||||
|
|
@ -575,19 +573,19 @@ class XapianSearchBackend(BaseSearchBackend):
|
|||
field_dict['field_name'],
|
||||
DOCUMENT_CUSTOM_TERM_PREFIX + field_dict['field_name'].upper()
|
||||
)
|
||||
|
||||
|
||||
vrp = XHValueRangeProcessor(self)
|
||||
qp.add_valuerangeprocessor(vrp)
|
||||
|
||||
|
||||
return qp.parse_query(query_string, self.flags)
|
||||
|
||||
|
||||
def build_schema(self, fields):
|
||||
"""
|
||||
Build the schema from fields.
|
||||
|
||||
|
||||
Required arguments:
|
||||
``fields`` -- A list of fields in the index
|
||||
|
||||
|
||||
Returns a list of fields in dictionary format ready for inclusion in
|
||||
an indexed meta-data.
|
||||
"""
|
||||
|
|
@ -596,11 +594,11 @@ class XapianSearchBackend(BaseSearchBackend):
|
|||
{'field_name': ID, 'type': 'text', 'multi_valued': 'false', 'column': 0},
|
||||
]
|
||||
column = len(schema_fields)
|
||||
|
||||
|
||||
for field_name, field_class in sorted(fields.items(), key=lambda n: n[0]):
|
||||
if field_class.document is True:
|
||||
content_field_name = field_class.index_fieldname
|
||||
|
||||
|
||||
if field_class.indexed is True:
|
||||
field_data = {
|
||||
'field_name': field_class.index_fieldname,
|
||||
|
|
@ -608,7 +606,7 @@ class XapianSearchBackend(BaseSearchBackend):
|
|||
'multi_valued': 'false',
|
||||
'column': column,
|
||||
}
|
||||
|
||||
|
||||
if field_class.field_type in ['date', 'datetime']:
|
||||
field_data['type'] = 'date'
|
||||
elif field_class.field_type == 'integer':
|
||||
|
|
@ -617,81 +615,81 @@ class XapianSearchBackend(BaseSearchBackend):
|
|||
field_data['type'] = 'float'
|
||||
elif field_class.field_type == 'boolean':
|
||||
field_data['type'] = 'boolean'
|
||||
|
||||
|
||||
if field_class.is_multivalued:
|
||||
field_data['multi_valued'] = 'true'
|
||||
|
||||
|
||||
schema_fields.append(field_data)
|
||||
column += 1
|
||||
|
||||
return (content_field_name, schema_fields)
|
||||
|
||||
|
||||
def _do_highlight(self, content, query, tag='em'):
|
||||
"""
|
||||
Highlight `query` terms in `content` with html `tag`.
|
||||
|
||||
|
||||
This method assumes that the input text (`content`) does not contain
|
||||
any special formatting. That is, it does not contain any html tags
|
||||
or similar markup that could be screwed up by the highlighting.
|
||||
|
||||
|
||||
Required arguments:
|
||||
`content` -- Content to search for instances of `text`
|
||||
`text` -- The text to be highlighted
|
||||
"""
|
||||
for term in query:
|
||||
for match in re.findall('[^A-Z]+', term): # Ignore field identifiers
|
||||
for match in re.findall('[^A-Z]+', term): # Ignore field identifiers
|
||||
match_re = re.compile(match, re.I)
|
||||
content = match_re.sub('<%s>%s</%s>' % (tag, term, tag), content)
|
||||
|
||||
|
||||
return content
|
||||
|
||||
|
||||
def _do_field_facets(self, results, field_facets):
|
||||
"""
|
||||
Private method that facets a document by field name.
|
||||
|
||||
|
||||
Fields of type MultiValueField will be faceted on each item in the
|
||||
(containing) list.
|
||||
|
||||
|
||||
Required arguments:
|
||||
`results` -- A list SearchResults to facet
|
||||
`field_facets` -- A list of fields to facet on
|
||||
"""
|
||||
facet_dict = {}
|
||||
|
||||
|
||||
# DS_TODO: Improve this algorithm. Currently, runs in O(N^2), ouch.
|
||||
for field in field_facets:
|
||||
facet_list = {}
|
||||
|
||||
|
||||
for result in results:
|
||||
field_value = getattr(result, field)
|
||||
if self._multi_value_field(field):
|
||||
for item in field_value: # Facet each item in a MultiValueField
|
||||
for item in field_value: # Facet each item in a MultiValueField
|
||||
facet_list[item] = facet_list.get(item, 0) + 1
|
||||
else:
|
||||
facet_list[field_value] = facet_list.get(field_value, 0) + 1
|
||||
|
||||
|
||||
facet_dict[field] = facet_list.items()
|
||||
|
||||
|
||||
return facet_dict
|
||||
|
||||
|
||||
def _do_date_facets(self, results, date_facets):
|
||||
"""
|
||||
Private method that facets a document by date ranges
|
||||
|
||||
|
||||
Required arguments:
|
||||
`results` -- A list SearchResults to facet
|
||||
`date_facets` -- A dictionary containing facet parameters:
|
||||
{'field': {'start_date': ..., 'end_date': ...: 'gap_by': '...', 'gap_amount': n}}
|
||||
nb., gap must be one of the following:
|
||||
year|month|day|hour|minute|second
|
||||
|
||||
|
||||
For each date facet field in `date_facets`, generates a list
|
||||
of date ranges (from `start_date` to `end_date` by `gap_by`) then
|
||||
iterates through `results` and tallies the count for each date_facet.
|
||||
|
||||
|
||||
Returns a dictionary of date facets (fields) containing a list with
|
||||
entries for each range and a count of documents matching the range.
|
||||
|
||||
|
||||
eg. {
|
||||
'pub_date': [
|
||||
('2009-01-01T00:00:00Z', 5),
|
||||
|
|
@ -703,7 +701,7 @@ class XapianSearchBackend(BaseSearchBackend):
|
|||
}
|
||||
"""
|
||||
facet_dict = {}
|
||||
|
||||
|
||||
for date_facet, facet_params in date_facets.iteritems():
|
||||
gap_type = facet_params.get('gap_by')
|
||||
gap_value = facet_params.get('gap_amount', 1)
|
||||
|
|
@ -733,9 +731,9 @@ class XapianSearchBackend(BaseSearchBackend):
|
|||
date_range += datetime.timedelta(minutes=int(gap_value))
|
||||
elif gap_type == 'second':
|
||||
date_range += datetime.timedelta(seconds=int(gap_value))
|
||||
|
||||
facet_list = sorted(facet_list, key=lambda n:n[0], reverse=True)
|
||||
|
||||
|
||||
facet_list = sorted(facet_list, key=lambda n: n[0], reverse=True)
|
||||
|
||||
for result in results:
|
||||
result_date = getattr(result, date_facet)
|
||||
if result_date:
|
||||
|
|
@ -749,43 +747,43 @@ class XapianSearchBackend(BaseSearchBackend):
|
|||
if result_date > datetime.datetime(*(time.strptime(facet_date[0], '%Y-%m-%dT%H:%M:%S')[0:6])):
|
||||
facet_list[n] = (facet_list[n][0], (facet_list[n][1] + 1))
|
||||
break
|
||||
|
||||
|
||||
facet_dict[date_facet] = facet_list
|
||||
|
||||
|
||||
return facet_dict
|
||||
|
||||
|
||||
def _do_query_facets(self, results, query_facets):
|
||||
"""
|
||||
Private method that facets a document by query
|
||||
|
||||
|
||||
Required arguments:
|
||||
`results` -- A list SearchResults to facet
|
||||
`query_facets` -- A dictionary containing facet parameters:
|
||||
{'field': 'query', [...]}
|
||||
|
||||
|
||||
For each query in `query_facets`, generates a dictionary entry with
|
||||
the field name as the key and a tuple with the query and result count
|
||||
as the value.
|
||||
|
||||
|
||||
eg. {'name': ('a*', 5)}
|
||||
"""
|
||||
facet_dict = {}
|
||||
|
||||
|
||||
for field, query in query_facets.iteritems():
|
||||
facet_dict[field] = (query, self.search(self.parse_query(query))['hits'])
|
||||
|
||||
return facet_dict
|
||||
|
||||
|
||||
def _do_spelling_suggestion(self, database, query, spelling_query):
|
||||
"""
|
||||
Private method that returns a single spelling suggestion based on
|
||||
`spelling_query` or `query`.
|
||||
|
||||
|
||||
Required arguments:
|
||||
`database` -- The database to check spelling against
|
||||
`query` -- The query to check
|
||||
`spelling_query` -- If not None, this will be checked instead of `query`
|
||||
|
||||
|
||||
Returns a string with a suggested spelling
|
||||
"""
|
||||
if spelling_query:
|
||||
|
|
@ -793,14 +791,14 @@ class XapianSearchBackend(BaseSearchBackend):
|
|||
return ' '.join([database.get_spelling_suggestion(term) for term in spelling_query.split()])
|
||||
else:
|
||||
return database.get_spelling_suggestion(spelling_query)
|
||||
|
||||
|
||||
term_set = set()
|
||||
for term in query:
|
||||
for match in re.findall('[^A-Z]+', term): # Ignore field identifiers
|
||||
for match in re.findall('[^A-Z]+', term): # Ignore field identifiers
|
||||
term_set.add(database.get_spelling_suggestion(match))
|
||||
|
||||
|
||||
return ' '.join(term_set)
|
||||
|
||||
|
||||
def _database(self, writable=False):
|
||||
"""
|
||||
Private method that returns a xapian.Database for use.
|
||||
|
|
@ -860,7 +858,7 @@ class XapianSearchBackend(BaseSearchBackend):
|
|||
"""
|
||||
Given a database and enquire instance, returns the estimated number
|
||||
of matches.
|
||||
|
||||
|
||||
Required arguments:
|
||||
`database` -- The database to be queried
|
||||
`enquire` -- The enquire instance
|
||||
|
|
@ -873,25 +871,25 @@ class XapianSearchBackend(BaseSearchBackend):
|
|||
"""
|
||||
Private method that returns the column value slot in the database
|
||||
for a given field.
|
||||
|
||||
|
||||
Required arguemnts:
|
||||
`field` -- The field to lookup
|
||||
|
||||
|
||||
Returns an integer with the column location (0 indexed).
|
||||
"""
|
||||
for field_dict in self.schema:
|
||||
if field_dict['field_name'] == field:
|
||||
return field_dict['column']
|
||||
return 0
|
||||
|
||||
|
||||
def _multi_value_field(self, field):
|
||||
"""
|
||||
Private method that returns `True` if a field is multi-valued, else
|
||||
`False`.
|
||||
|
||||
|
||||
Required arguemnts:
|
||||
`field` -- The field to lookup
|
||||
|
||||
|
||||
Returns a boolean value indicating whether the field is multi-valued.
|
||||
"""
|
||||
for field_dict in self.schema:
|
||||
|
|
@ -911,7 +909,7 @@ class XapianSearchQuery(BaseSearchQuery):
|
|||
|
||||
if self.end_offset is not None:
|
||||
kwargs['end_offset'] = self.end_offset - self.start_offset
|
||||
|
||||
|
||||
return kwargs
|
||||
|
||||
def build_query(self):
|
||||
|
|
@ -919,7 +917,7 @@ class XapianSearchQuery(BaseSearchQuery):
|
|||
query = xapian.Query('')
|
||||
else:
|
||||
query = self._query_from_search_node(self.query_filter)
|
||||
|
||||
|
||||
if self.models:
|
||||
subqueries = [
|
||||
xapian.Query(
|
||||
|
|
@ -927,14 +925,14 @@ class XapianSearchQuery(BaseSearchQuery):
|
|||
DOCUMENT_CT_TERM_PREFIX,
|
||||
model._meta.app_label, model._meta.module_name
|
||||
)
|
||||
), 0 # Pure boolean sub-query
|
||||
), 0 # Pure boolean sub-query
|
||||
) for model in self.models
|
||||
]
|
||||
query = xapian.Query(
|
||||
xapian.Query.OP_AND, query,
|
||||
xapian.Query(xapian.Query.OP_OR, subqueries)
|
||||
)
|
||||
|
||||
|
||||
if self.boost:
|
||||
subqueries = [
|
||||
xapian.Query(
|
||||
|
|
@ -945,12 +943,12 @@ class XapianSearchQuery(BaseSearchQuery):
|
|||
xapian.Query.OP_AND_MAYBE, query,
|
||||
xapian.Query(xapian.Query.OP_OR, subqueries)
|
||||
)
|
||||
|
||||
|
||||
return query
|
||||
|
||||
|
||||
def _query_from_search_node(self, search_node, is_not=False):
|
||||
query_list = []
|
||||
|
||||
|
||||
for child in search_node.children:
|
||||
if isinstance(child, SearchNode):
|
||||
query_list.append(
|
||||
|
|
@ -959,16 +957,16 @@ class XapianSearchQuery(BaseSearchQuery):
|
|||
else:
|
||||
expression, term = child
|
||||
field, filter_type = search_node.split_expression(expression)
|
||||
|
||||
|
||||
# Handle when we've got a ``ValuesListQuerySet``...
|
||||
if hasattr(term, 'values_list'):
|
||||
term = list(term)
|
||||
|
||||
|
||||
if isinstance(term, (list, tuple)):
|
||||
term = [_marshal_term(t) for t in term]
|
||||
else:
|
||||
term = _marshal_term(term)
|
||||
|
||||
|
||||
if field == 'content':
|
||||
query_list.append(self._content_field(term, is_not))
|
||||
else:
|
||||
|
|
@ -986,21 +984,21 @@ class XapianSearchQuery(BaseSearchQuery):
|
|||
query_list.append(self._filter_startswith(term, field, is_not))
|
||||
elif filter_type == 'in':
|
||||
query_list.append(self._filter_in(term, field, is_not))
|
||||
|
||||
|
||||
if search_node.connector == 'OR':
|
||||
return xapian.Query(xapian.Query.OP_OR, query_list)
|
||||
else:
|
||||
return xapian.Query(xapian.Query.OP_AND, query_list)
|
||||
|
||||
|
||||
def _content_field(self, term, is_not):
|
||||
"""
|
||||
Private method that returns a xapian.Query that searches for `value`
|
||||
in all fields.
|
||||
|
||||
|
||||
Required arguments:
|
||||
``term`` -- The term to search for
|
||||
``is_not`` -- Invert the search results
|
||||
|
||||
|
||||
Returns:
|
||||
A xapian.Query
|
||||
"""
|
||||
|
|
@ -1018,17 +1016,17 @@ class XapianSearchQuery(BaseSearchQuery):
|
|||
return xapian.Query(xapian.Query.OP_AND_NOT, self._all_query(), self._term_query(term))
|
||||
else:
|
||||
return self._term_query(term)
|
||||
|
||||
|
||||
def _filter_exact(self, term, field, is_not):
|
||||
"""
|
||||
Private method that returns a xapian.Query that searches for `term`
|
||||
in a specified `field`.
|
||||
|
||||
|
||||
Required arguments:
|
||||
``term`` -- The term to search for
|
||||
``field`` -- The field to search
|
||||
``is_not`` -- Invert the search results
|
||||
|
||||
|
||||
Returns:
|
||||
A xapian.Query
|
||||
"""
|
||||
|
|
@ -1044,17 +1042,17 @@ class XapianSearchQuery(BaseSearchQuery):
|
|||
return xapian.Query(xapian.Query.OP_AND_NOT, self._all_query(), self._term_query(term, field))
|
||||
else:
|
||||
return self._term_query(term, field)
|
||||
|
||||
|
||||
def _filter_in(self, term_list, field, is_not):
|
||||
"""
|
||||
Private method that returns a xapian.Query that searches for any term
|
||||
of `value_list` in a specified `field`.
|
||||
|
||||
|
||||
Required arguments:
|
||||
``term_list`` -- The terms to search for
|
||||
``field`` -- The field to search
|
||||
``is_not`` -- Invert the search results
|
||||
|
||||
|
||||
Returns:
|
||||
A xapian.Query
|
||||
"""
|
||||
|
|
@ -1072,17 +1070,17 @@ class XapianSearchQuery(BaseSearchQuery):
|
|||
return xapian.Query(xapian.Query.OP_AND_NOT, self._all_query(), xapian.Query(xapian.Query.OP_OR, query_list))
|
||||
else:
|
||||
return xapian.Query(xapian.Query.OP_OR, query_list)
|
||||
|
||||
|
||||
def _filter_startswith(self, term, field, is_not):
|
||||
"""
|
||||
Private method that returns a xapian.Query that searches for any term
|
||||
that begins with `term` in a specified `field`.
|
||||
|
||||
|
||||
Required arguments:
|
||||
``term`` -- The terms to search for
|
||||
``field`` -- The field to search
|
||||
``is_not`` -- Invert the search results
|
||||
|
||||
|
||||
Returns:
|
||||
A xapian.Query
|
||||
"""
|
||||
|
|
@ -1093,10 +1091,10 @@ class XapianSearchQuery(BaseSearchQuery):
|
|||
self.backend.parse_query('%s:%s*' % (field, term)),
|
||||
)
|
||||
return self.backend.parse_query('%s:%s*' % (field, term))
|
||||
|
||||
|
||||
def _filter_gt(self, term, field, is_not):
|
||||
return self._filter_lte(term, field, is_not=(is_not != True))
|
||||
|
||||
|
||||
def _filter_lt(self, term, field, is_not):
|
||||
return self._filter_gte(term, field, is_not=(is_not != True))
|
||||
|
||||
|
|
@ -1113,7 +1111,7 @@ class XapianSearchQuery(BaseSearchQuery):
|
|||
xapian.Query(xapian.Query.OP_VALUE_RANGE, pos, begin, end)
|
||||
)
|
||||
return xapian.Query(xapian.Query.OP_VALUE_RANGE, pos, begin, end)
|
||||
|
||||
|
||||
def _filter_lte(self, term, field, is_not):
|
||||
"""
|
||||
Private method that returns a xapian.Query that searches for any term
|
||||
|
|
@ -1131,26 +1129,26 @@ class XapianSearchQuery(BaseSearchQuery):
|
|||
def _all_query(self):
|
||||
"""
|
||||
Private method that returns a xapian.Query that returns all documents,
|
||||
|
||||
|
||||
Returns:
|
||||
A xapian.Query
|
||||
"""
|
||||
return xapian.Query('')
|
||||
|
||||
|
||||
def _term_query(self, term, field=None):
|
||||
"""
|
||||
Private method that returns a term based xapian.Query that searches
|
||||
for `term`.
|
||||
|
||||
|
||||
Required arguments:
|
||||
``term`` -- The term to search for
|
||||
``field`` -- The field to search (If `None`, all fields)
|
||||
|
||||
|
||||
Returns:
|
||||
A xapian.Query
|
||||
"""
|
||||
stem = xapian.Stem(self.backend.language)
|
||||
|
||||
|
||||
if field == 'id':
|
||||
return xapian.Query('%s%s' % (DOCUMENT_ID_TERM_PREFIX, term))
|
||||
elif field == 'django_ct':
|
||||
|
|
@ -1165,22 +1163,22 @@ class XapianSearchQuery(BaseSearchQuery):
|
|||
else:
|
||||
stemmed = 'Z%s' % stem(term)
|
||||
unstemmed = term
|
||||
|
||||
|
||||
return xapian.Query(
|
||||
xapian.Query.OP_OR,
|
||||
xapian.Query(stemmed),
|
||||
xapian.Query(unstemmed)
|
||||
)
|
||||
|
||||
|
||||
def _phrase_query(self, term_list, field=None):
|
||||
"""
|
||||
Private method that returns a phrase based xapian.Query that searches
|
||||
for terms in `term_list.
|
||||
|
||||
|
||||
Required arguments:
|
||||
``term_list`` -- The terms to search for
|
||||
``field`` -- The field to search (If `None`, all fields)
|
||||
|
||||
|
||||
Returns:
|
||||
A xapian.Query
|
||||
"""
|
||||
|
|
|
|||
Loading…
Reference in a new issue