diff --git a/tests/xapian_tests/models.py b/tests/xapian_tests/models.py
index 51c415e..36a5931 100644
--- a/tests/xapian_tests/models.py
+++ b/tests/xapian_tests/models.py
@@ -1,2 +1,12 @@
-# Copyright (C) 2009, 2010, 2011, 2012 David Sauve
-# Copyright (C) 2009, 2010 Trapeze
+from django.db import models
+
+
+class Document(models.Model):
+ type_name = models.CharField(max_length=50)
+ number = models.IntegerField()
+ name = models.CharField(max_length=200)
+
+ date = models.DateField()
+
+ summary = models.TextField()
+ text = models.TextField()
diff --git a/tests/xapian_tests/search_indexes.py b/tests/xapian_tests/search_indexes.py
new file mode 100644
index 0000000..76c0df4
--- /dev/null
+++ b/tests/xapian_tests/search_indexes.py
@@ -0,0 +1,26 @@
+from haystack import indexes
+
+from . import models
+
+
+class DocumentIndex(indexes.SearchIndex):
+ text = indexes.CharField(document=True)
+ summary = indexes.CharField(model_attr='summary')
+
+ type_name = indexes.CharField(model_attr='type_name')
+
+ number = indexes.IntegerField(model_attr='number')
+
+ name = indexes.CharField(model_attr='name')
+ date = indexes.DateField(model_attr='date')
+
+ tags = indexes.MultiValueField()
+
+ def get_model(self):
+ return models.Document()
+
+ def prepare_tags(self, obj):
+ l = [['tag', 'tag-test', 'tag-test-test'],
+ ['tag', 'tag-test'],
+ ['tag']]
+ return l[obj.id % 3]
diff --git a/tests/xapian_tests/tests/test_xapian_backend.py b/tests/xapian_tests/tests/test_backend.py
similarity index 53%
rename from tests/xapian_tests/tests/test_xapian_backend.py
rename to tests/xapian_tests/tests/test_backend.py
index 21f5ad7..2e91f58 100644
--- a/tests/xapian_tests/tests/test_xapian_backend.py
+++ b/tests/xapian_tests/tests/test_backend.py
@@ -6,27 +6,29 @@ import xapian
import subprocess
import os
-from django.conf import settings
from django.db import models
from django.test import TestCase
-from haystack import connections, reset_search_queries
+from haystack import connections
from haystack import indexes
-from haystack.backends.xapian_backend import InvalidIndexError, _marshal_value
-from haystack.models import SearchResult
-from haystack.query import SearchQuerySet, SQ
+from haystack.backends.xapian_backend import InvalidIndexError, _term_to_xapian_value
from haystack.utils.loading import UnifiedIndex
-from core.models import MockTag, MockModel, AnotherMockModel, AFourthMockModel
+from core.models import MockTag, MockModel, AnotherMockModel
from core.tests.mocks import MockSearchResult
def get_terms(backend, *args):
- result = subprocess.check_output(['delve'] + list(args) + [backend.path], env=os.environ.copy())
+ result = subprocess.check_output(['delve'] + list(args) + [backend.path],
+ env=os.environ.copy()).decode('utf-8')
result = result.split(": ")[1].strip()
return result.split(" ")
+def pks(results):
+ return [result.pk for result in results]
+
+
class XapianMockModel(models.Model):
"""
Same as tests.core.MockModel with a few extra fields for testing various
@@ -74,7 +76,7 @@ class XapianMockSearchIndex(indexes.SearchIndex):
return XapianMockModel
def prepare_sites(self, obj):
- return ['%d' % (i * obj.id) for i in xrange(1, 4)]
+ return ['%d' % (i * obj.id) for i in range(1, 4)]
def prepare_tags(self, obj):
if obj.id == 1:
@@ -85,7 +87,7 @@ class XapianMockSearchIndex(indexes.SearchIndex):
return ['an', 'to', 'or']
def prepare_keys(self, obj):
- return [i * obj.id for i in xrange(1, 4)]
+ return [i * obj.id for i in range(1, 4)]
def prepare_titles(self, obj):
if obj.id == 1:
@@ -105,7 +107,17 @@ class XapianMockSearchIndex(indexes.SearchIndex):
class XapianSimpleMockIndex(indexes.SearchIndex):
text = indexes.CharField(document=True)
author = indexes.CharField(model_attr='author')
- pub_date = indexes.DateTimeField(model_attr='pub_date')
+ url = indexes.CharField()
+ non_anscii = indexes.CharField()
+
+ datetime = indexes.DateTimeField(model_attr='pub_date')
+ date = indexes.DateField()
+
+ number = indexes.IntegerField()
+ float_number = indexes.FloatField()
+ decimal_number = indexes.DecimalField()
+
+ multi_value = indexes.MultiValueField()
def get_model(self):
return MockModel
@@ -113,11 +125,38 @@ class XapianSimpleMockIndex(indexes.SearchIndex):
def prepare_text(self, obj):
return 'this_is_a_word'
+ def prepare_author(self, obj):
+ return 'david'
+
+ def prepare_url(self, obj):
+ return 'http://example.com/1/'
+
+ def prepare_non_anscii(self, obj):
+ return 'thsi sdas das corrup\xe7\xe3o das'
+
+ def prepare_datetime(self, obj):
+ return datetime.datetime(2009, 2, 25, 1, 1, 1)
+
+ def prepare_date(self, obj):
+ return datetime.date(2008, 8, 8)
+
+ def prepare_number(self, obj):
+ return 123456789
+
+ def prepare_float_number(self, obj):
+ return 123.123456789
+
+ def prepare_decimal_number(self, obj):
+ return '22.34'
+
+ def prepare_multi_value(self, obj):
+ return ['tag', 'tag-test', 'tag-test-test']
+
class HaystackBackendTestCase(object):
"""
- An abstract TestCase that implements a hack to ensure connections
- has the mock index
+ Abstract TestCase that implements an hack to ensure `connections`
+ has the right index
It has a method get_index() that returns a SearchIndex
that must be overwritten.
@@ -141,40 +180,23 @@ class HaystackBackendTestCase(object):
connections['default']._index = self.old_ui
-class XapianBackendTestCase(HaystackBackendTestCase, TestCase):
+class BackendIndexationTestCase(HaystackBackendTestCase, TestCase):
+ """
+ Tests indexation behavior.
+
+ Tests related to how the backend indexes terms,
+ values, and others go here.
+ """
def get_index(self):
return XapianSimpleMockIndex()
def setUp(self):
- super(XapianBackendTestCase, self).setUp()
-
+ super(BackendIndexationTestCase, self).setUp()
mock = XapianMockModel()
mock.id = 1
- mock.author = 'david'
- mock.pub_date = datetime.date(2009, 2, 25)
-
self.backend.update(self.index, [mock])
- def test_fields(self):
- """
- Tests that all fields are in the database
- """
- terms = get_terms(self.backend, '-a')
- for field in ['author', 'pub_date', 'text']:
- is_inside = False
- for term in terms:
- if "X%s" % field.upper() in term:
- is_inside = True
- break
- self.assertTrue(is_inside, field)
-
- def test_text(self):
- terms = get_terms(self.backend, '-a')
-
- self.assertTrue('this_is_a_word' in terms)
- self.assertTrue('Zthis_is_a_word' in terms)
-
def test_app_is_not_split(self):
"""
Tests that the app path is not split
@@ -195,18 +217,117 @@ class XapianBackendTestCase(HaystackBackendTestCase, TestCase):
self.assertFalse('xapianmockmodel' in terms)
self.assertFalse('tests' in terms)
+ def test_fields_exist(self):
+ """
+ Tests that all fields are in the database
+ """
+ terms = get_terms(self.backend, '-a')
+ for field in ['author', 'datetime', 'text', 'url']:
+ is_inside = False
+ for term in terms:
+ if term.startswith("X%s" % field.upper()):
+ is_inside = True
+ break
+ self.assertTrue(is_inside, field)
-class XapianSearchBackendTestCase(HaystackBackendTestCase, TestCase):
+ def test_text_field(self):
+ terms = get_terms(self.backend, '-a')
+ self.assertTrue('this_is_a_word' in terms)
+ self.assertTrue('Zthis_is_a_word' in terms)
+ self.assertTrue('ZXTEXTthis_is_a_word' in terms)
+ self.assertTrue('XTEXTthis_is_a_word' in terms)
+
+ def test_author_field(self):
+ terms = get_terms(self.backend, '-a')
+
+ self.assertTrue('XAUTHORdavid' in terms)
+ self.assertTrue('ZXAUTHORdavid' in terms)
+ self.assertTrue('Zdavid' in terms)
+ self.assertTrue('david' in terms)
+
+ def test_datetime_field(self):
+ terms = get_terms(self.backend, '-a')
+
+ self.assertFalse('XDATETIME20090225000000' in terms)
+ self.assertFalse('ZXDATETIME20090225000000' in terms)
+ self.assertFalse('20090225000000' in terms)
+
+ self.assertTrue('XDATETIME2009-02-25' in terms)
+ self.assertTrue('2009-02-25' in terms)
+ self.assertTrue('01:01:01' in terms)
+ self.assertTrue('XDATETIME01:01:01' in terms)
+
+ def test_date_field(self):
+ terms = get_terms(self.backend, '-a')
+
+ self.assertTrue('XDATE2008-08-08' in terms)
+ self.assertTrue('2008-08-08' in terms)
+ self.assertFalse('XDATE00:00:00' in terms)
+ self.assertFalse('00:00:00' in terms)
+
+ def test_url_field(self):
+ terms = get_terms(self.backend, '-a')
+ self.assertTrue('http://example.com/1/' in terms)
+
+ def test_integer_field(self):
+ terms = get_terms(self.backend, '-a')
+ self.assertTrue('123456789' in terms)
+ self.assertTrue('XNUMBER123456789' in terms)
+ self.assertFalse('ZXNUMBER123456789' in terms)
+
+ def test_float_field(self):
+ terms = get_terms(self.backend, '-a')
+ self.assertTrue('123.123456789' in terms)
+ self.assertTrue('XFLOAT_NUMBER123.123456789' in terms)
+ self.assertFalse('ZXFLOAT_NUMBER123.123456789' in terms)
+
+ def test_decimal_field(self):
+ terms = get_terms(self.backend, '-a')
+ self.assertTrue('22.34' in terms)
+ self.assertTrue('XDECIMAL_NUMBER22.34' in terms)
+ self.assertFalse('ZXDECIMAL_NUMBER22.34' in terms)
+
+ def test_multivalue_field(self):
+ """
+ Regression test for #103
+ """
+ terms = get_terms(self.backend, '-a')
+ self.assertTrue('tag' in terms)
+ self.assertTrue('tag-test' in terms)
+ self.assertTrue('tag-test-test' in terms)
+
+ self.assertTrue('XMULTI_VALUEtag' in terms)
+ self.assertTrue('XMULTI_VALUEtag-test' in terms)
+ self.assertTrue('XMULTI_VALUEtag-test-test' in terms)
+
+ # these and only these terms
+ # 3 for the exact term (^{term}$)
+ self.assertEqual(len([term for term in terms if term.startswith('XMULTI_VALUE')]), 6)
+ # no stem for exact multivalues.
+ self.assertEqual(len([term for term in terms if term.startswith('ZXMULTI_VALUE')]), 0)
+
+ def test_non_ascii_chars(self):
+ terms = get_terms(self.backend, '-a')
+ self.assertIn('corrup\xe7\xe3o', terms)
+
+
+class BackendFeaturesTestCase(HaystackBackendTestCase, TestCase):
+ """
+ Tests supported features on the backend side.
+
+ Tests to features implemented on the backend
+ go here.
+ """
def get_index(self):
return XapianMockSearchIndex()
def setUp(self):
- super(XapianSearchBackendTestCase, self).setUp()
+ super(BackendFeaturesTestCase, self).setUp()
self.sample_objs = []
- for i in xrange(1, 4):
+ for i in range(1, 4):
mock = XapianMockModel()
mock.id = i
mock.author = 'david%s' % i
@@ -225,19 +346,20 @@ class XapianSearchBackendTestCase(HaystackBackendTestCase, TestCase):
self.backend.update(self.index, self.sample_objs)
def test_update(self):
- self.assertEqual(self.backend.document_count(), 3)
- self.assertEqual([result.pk for result in self.backend.search(xapian.Query(''))['results']], [1, 2, 3])
+ self.assertEqual(pks(self.backend.search(xapian.Query(''))['results']),
+ [1, 2, 3])
def test_duplicate_update(self):
- # Duplicates should be updated, not appended -- http://github.com/notanumber/xapian-haystack/issues/#issue/6
+ """
+ Regression test for #6.
+ """
self.backend.update(self.index, self.sample_objs)
-
self.assertEqual(self.backend.document_count(), 3)
def test_remove(self):
self.backend.remove(self.sample_objs[0])
- self.assertEqual(self.backend.document_count(), 2)
- self.assertEqual([result.pk for result in self.backend.search(xapian.Query(''))['results']], [2, 3])
+ self.assertEqual(pks(self.backend.search(xapian.Query(''))['results']),
+ [2, 3])
def test_clear(self):
self.backend.clear()
@@ -259,18 +381,20 @@ class XapianSearchBackendTestCase(HaystackBackendTestCase, TestCase):
self.assertEqual(self.backend.document_count(), 0)
def test_search(self):
+ # no match query
self.assertEqual(self.backend.search(xapian.Query()), {'hits': 0, 'results': []})
- self.assertEqual(self.backend.search(xapian.Query(''))['hits'], 3)
- self.assertEqual([result.pk for result in self.backend.search(xapian.Query(''))['results']], [1, 2, 3])
- self.assertEqual(self.backend.search(xapian.Query('indexed'))['hits'], 3)
- self.assertEqual([result.pk for result in self.backend.search(xapian.Query(''))['results']], [1, 2, 3])
+ # all match query
+ self.assertEqual(pks(self.backend.search(xapian.Query(''))['results']),
+ [1, 2, 3])
- # Ensure that swapping the ``result_class`` works.
- self.assertTrue(isinstance(self.backend.search(xapian.Query('indexed'), result_class=MockSearchResult)['results'][0], MockSearchResult))
+ # Other `result_class`
+ self.assertTrue(isinstance(self.backend.search(xapian.Query('indexed'),
+ result_class=MockSearchResult)['results'][0],
+ MockSearchResult))
def test_search_field_with_punctuation(self):
- # self.assertEqual(self.backend.search(xapian.Query('http://example.com/'))['hits'], 3)
- self.assertEqual([result.pk for result in self.backend.search(xapian.Query('http://example.com/1/'))['results']], [1])
+ self.assertEqual(pks(self.backend.search(xapian.Query('http://example.com/1/'))['results']),
+ [1])
def test_search_by_mvf(self):
self.assertEqual(self.backend.search(xapian.Query('ab'))['hits'], 1)
@@ -279,22 +403,39 @@ class XapianSearchBackendTestCase(HaystackBackendTestCase, TestCase):
self.assertEqual(self.backend.search(xapian.Query('one'))['hits'], 3)
def test_field_facets(self):
- self.assertEqual(self.backend.search(xapian.Query(), facets=['name']), {'hits': 0, 'results': []})
+ self.assertEqual(self.backend.search(xapian.Query(), facets=['name']),
+ {'hits': 0, 'results': []})
+
results = self.backend.search(xapian.Query('indexed'), facets=['name'])
self.assertEqual(results['hits'], 3)
- self.assertEqual(results['facets']['fields']['name'], [('david1', 1), ('david2', 1), ('david3', 1)])
+ self.assertEqual(results['facets']['fields']['name'],
+ [('david1', 1), ('david2', 1), ('david3', 1)])
results = self.backend.search(xapian.Query('indexed'), facets=['flag'])
self.assertEqual(results['hits'], 3)
- self.assertEqual(results['facets']['fields']['flag'], [(False, 1), (True, 2)])
+ self.assertEqual(results['facets']['fields']['flag'],
+ [(False, 1), (True, 2)])
results = self.backend.search(xapian.Query('indexed'), facets=['sites'])
self.assertEqual(results['hits'], 3)
- self.assertEqual(results['facets']['fields']['sites'], [('1', 1), ('3', 2), ('2', 2), ('4', 1), ('6', 2), ('9', 1)])
+ self.assertEqual(results['facets']['fields']['sites'],
+ [('1', 1), ('3', 2), ('2', 2), ('4', 1), ('6', 2), ('9', 1)])
+
+ def test_raise_index_error_on_wrong_field(self):
+ """
+ Regression test for #109.
+ """
+ self.assertRaises(InvalidIndexError, self.backend.search, xapian.Query(''), facets=['dsdas'])
def test_date_facets(self):
- self.assertEqual(self.backend.search(xapian.Query(), date_facets={'pub_date': {'start_date': datetime.datetime(2008, 10, 26), 'end_date': datetime.datetime(2009, 3, 26), 'gap_by': 'month'}}), {'hits': 0, 'results': []})
- results = self.backend.search(xapian.Query('indexed'), date_facets={'pub_date': {'start_date': datetime.datetime(2008, 10, 26), 'end_date': datetime.datetime(2009, 3, 26), 'gap_by': 'month'}})
+ facets = {'pub_date': {'start_date': datetime.datetime(2008, 10, 26),
+ 'end_date': datetime.datetime(2009, 3, 26),
+ 'gap_by': 'month'}}
+
+ self.assertEqual(self.backend.search(xapian.Query(), date_facets=facets),
+ {'hits': 0, 'results': []})
+
+ results = self.backend.search(xapian.Query('indexed'), date_facets=facets)
self.assertEqual(results['hits'], 3)
self.assertEqual(results['facets']['dates']['pub_date'], [
('2009-02-26T00:00:00', 0),
@@ -304,7 +445,11 @@ class XapianSearchBackendTestCase(HaystackBackendTestCase, TestCase):
('2008-10-26T00:00:00', 0),
])
- results = self.backend.search(xapian.Query('indexed'), date_facets={'pub_date': {'start_date': datetime.datetime(2009, 02, 01), 'end_date': datetime.datetime(2009, 3, 15), 'gap_by': 'day', 'gap_amount': 15}})
+ facets = {'pub_date': {'start_date': datetime.datetime(2009, 2, 1),
+ 'end_date': datetime.datetime(2009, 3, 15),
+ 'gap_by': 'day',
+ 'gap_amount': 15}}
+ results = self.backend.search(xapian.Query('indexed'), date_facets=facets)
self.assertEqual(results['hits'], 3)
self.assertEqual(results['facets']['dates']['pub_date'], [
('2009-03-03T00:00:00', 0),
@@ -313,111 +458,131 @@ class XapianSearchBackendTestCase(HaystackBackendTestCase, TestCase):
])
def test_query_facets(self):
- self.assertEqual(self.backend.search(xapian.Query(), query_facets={'name': 'da*'}), {'hits': 0, 'results': []})
+ self.assertEqual(self.backend.search(xapian.Query(), query_facets={'name': 'da*'}),
+ {'hits': 0, 'results': []})
+
results = self.backend.search(xapian.Query('indexed'), query_facets={'name': 'da*'})
self.assertEqual(results['hits'], 3)
self.assertEqual(results['facets']['queries']['name'], ('da*', 3))
def test_narrow_queries(self):
- self.assertEqual(self.backend.search(xapian.Query(), narrow_queries={'name:david1'}), {'hits': 0, 'results': []})
+ self.assertEqual(self.backend.search(xapian.Query(), narrow_queries={'name:david1'}),
+ {'hits': 0, 'results': []})
results = self.backend.search(xapian.Query('indexed'), narrow_queries={'name:david1'})
self.assertEqual(results['hits'], 1)
def test_highlight(self):
- self.assertEqual(self.backend.search(xapian.Query(), highlight=True), {'hits': 0, 'results': []})
+ self.assertEqual(self.backend.search(xapian.Query(), highlight=True),
+ {'hits': 0, 'results': []})
self.assertEqual(self.backend.search(xapian.Query('indexed'), highlight=True)['hits'], 3)
- self.assertEqual([result.highlighted['text'] for result in self.backend.search(xapian.Query('indexed'), highlight=True)['results']], ['indexed!\n1', 'indexed!\n2', 'indexed!\n3'])
+
+ results = self.backend.search(xapian.Query('indexed'), highlight=True)['results']
+ self.assertEqual([result.highlighted['text'] for result in results],
+ ['indexed!\n1', 'indexed!\n2', 'indexed!\n3'])
def test_spelling_suggestion(self):
self.assertEqual(self.backend.search(xapian.Query('indxe'))['hits'], 0)
- self.assertEqual(self.backend.search(xapian.Query('indxe'))['spelling_suggestion'], 'indexed')
+ self.assertEqual(self.backend.search(xapian.Query('indxe'))['spelling_suggestion'],
+ 'indexed')
self.assertEqual(self.backend.search(xapian.Query('indxed'))['hits'], 0)
- self.assertEqual(self.backend.search(xapian.Query('indxed'))['spelling_suggestion'], 'indexed')
+ self.assertEqual(self.backend.search(xapian.Query('indxed'))['spelling_suggestion'],
+ 'indexed')
self.assertEqual(self.backend.search(xapian.Query('foo'))['hits'], 0)
- self.assertEqual(self.backend.search(xapian.Query('foo'), spelling_query='indexy')['spelling_suggestion'], 'indexed')
+ self.assertEqual(self.backend.search(xapian.Query('foo'), spelling_query='indexy')['spelling_suggestion'],
+ 'indexed')
self.assertEqual(self.backend.search(xapian.Query('XNAMEdavid'))['hits'], 0)
- self.assertEqual(self.backend.search(xapian.Query('XNAMEdavid'))['spelling_suggestion'], 'david1')
+ self.assertEqual(self.backend.search(xapian.Query('XNAMEdavid'))['spelling_suggestion'],
+ 'david1')
def test_more_like_this(self):
results = self.backend.more_like_this(self.sample_objs[0])
- self.assertEqual(results['hits'], 2)
- self.assertEqual([result.pk for result in results['results']], [3, 2])
- results = self.backend.more_like_this(self.sample_objs[0], additional_query=xapian.Query('david3'))
- self.assertEqual(results['hits'], 1)
- self.assertEqual([result.pk for result in results['results']], [3])
+ self.assertEqual(pks(results['results']), [3, 2])
- results = self.backend.more_like_this(self.sample_objs[0], limit_to_registered_models=True)
- self.assertEqual(results['hits'], 2)
- self.assertEqual([result.pk for result in results['results']], [3, 2])
+ results = self.backend.more_like_this(self.sample_objs[0],
+ additional_query=xapian.Query('david3'))
- # Ensure that swapping the ``result_class`` works.
- self.assertTrue(isinstance(self.backend.more_like_this(self.sample_objs[0], result_class=MockSearchResult)['results'][0], MockSearchResult))
+ self.assertEqual(pks(results['results']), [3])
+
+ results = self.backend.more_like_this(self.sample_objs[0],
+ limit_to_registered_models=True)
+
+ self.assertEqual(pks(results['results']), [3, 2])
+
+ # Other `result_class`
+ self.assertTrue(isinstance(self.backend.more_like_this(self.sample_objs[0],
+ result_class=MockSearchResult)['results'][0],
+ MockSearchResult))
def test_order_by(self):
results = self.backend.search(xapian.Query(''), sort_by=['pub_date'])
- self.assertEqual([result.pk for result in results['results']], [3, 2, 1])
+ self.assertEqual(pks(results['results']), [3, 2, 1])
results = self.backend.search(xapian.Query(''), sort_by=['-pub_date'])
- self.assertEqual([result.pk for result in results['results']], [1, 2, 3])
+ self.assertEqual(pks(results['results']), [1, 2, 3])
results = self.backend.search(xapian.Query(''), sort_by=['exp_date'])
- self.assertEqual([result.pk for result in results['results']], [1, 2, 3])
+ self.assertEqual(pks(results['results']), [1, 2, 3])
results = self.backend.search(xapian.Query(''), sort_by=['-exp_date'])
- self.assertEqual([result.pk for result in results['results']], [3, 2, 1])
+ self.assertEqual(pks(results['results']), [3, 2, 1])
results = self.backend.search(xapian.Query(''), sort_by=['id'])
- self.assertEqual([result.pk for result in results['results']], [1, 2, 3])
+ self.assertEqual(pks(results['results']), [1, 2, 3])
results = self.backend.search(xapian.Query(''), sort_by=['-id'])
- self.assertEqual([result.pk for result in results['results']], [3, 2, 1])
+ self.assertEqual(pks(results['results']), [3, 2, 1])
results = self.backend.search(xapian.Query(''), sort_by=['value'])
- self.assertEqual([result.pk for result in results['results']], [1, 2, 3])
+ self.assertEqual(pks(results['results']), [1, 2, 3])
results = self.backend.search(xapian.Query(''), sort_by=['-value'])
- self.assertEqual([result.pk for result in results['results']], [3, 2, 1])
+ self.assertEqual(pks(results['results']), [3, 2, 1])
results = self.backend.search(xapian.Query(''), sort_by=['popularity'])
- self.assertEqual([result.pk for result in results['results']], [2, 1, 3])
+ self.assertEqual(pks(results['results']), [2, 1, 3])
results = self.backend.search(xapian.Query(''), sort_by=['-popularity'])
- self.assertEqual([result.pk for result in results['results']], [3, 1, 2])
+ self.assertEqual(pks(results['results']), [3, 1, 2])
results = self.backend.search(xapian.Query(''), sort_by=['flag', 'id'])
- self.assertEqual([result.pk for result in results['results']], [2, 1, 3])
+ self.assertEqual(pks(results['results']), [2, 1, 3])
results = self.backend.search(xapian.Query(''), sort_by=['flag', '-id'])
- self.assertEqual([result.pk for result in results['results']], [2, 3, 1])
+ self.assertEqual(pks(results['results']), [2, 3, 1])
def test_verify_type(self):
self.assertEqual([result.month for result in self.backend.search(xapian.Query(''))['results']],
['02', '02', '02'])
- def test__marshal_value(self):
- self.assertEqual(_marshal_value('abc'), 'abc')
- self.assertEqual(_marshal_value(1), '000000000001')
- self.assertEqual(_marshal_value(2653), '000000002653')
- self.assertEqual(_marshal_value(25.5), b'\xb2`')
- self.assertEqual(_marshal_value([1, 2, 3]), '[1, 2, 3]')
- self.assertEqual(_marshal_value((1, 2, 3)), '(1, 2, 3)')
- self.assertEqual(_marshal_value({'a': 1, 'c': 3, 'b': 2}), "{u'a': 1, u'c': 3, u'b': 2}")
- self.assertEqual(_marshal_value(datetime.datetime(2009, 5, 9, 16, 14)), '20090509161400')
- self.assertEqual(_marshal_value(datetime.datetime(2009, 5, 9, 0, 0)), '20090509000000')
- self.assertEqual(_marshal_value(datetime.datetime(1899, 5, 18, 0, 0)), '18990518000000')
- self.assertEqual(_marshal_value(datetime.datetime(2009, 5, 18, 1, 16, 30, 250)), '20090518011630000250')
+ def test_term_to_xapian_value(self):
+ self.assertEqual(_term_to_xapian_value('abc', 'text'), 'abc')
+ self.assertEqual(_term_to_xapian_value(1, 'integer'), '000000000001')
+ self.assertEqual(_term_to_xapian_value(2653, 'integer'), '000000002653')
+ self.assertEqual(_term_to_xapian_value(25.5, 'float'), b'\xb2`')
+ self.assertEqual(_term_to_xapian_value([1, 2, 3], 'text'), '[1, 2, 3]')
+ self.assertEqual(_term_to_xapian_value((1, 2, 3), 'text'), '(1, 2, 3)')
+ self.assertEqual(_term_to_xapian_value({'a': 1, 'c': 3, 'b': 2}, 'text'),
+ "{u'a': 1, u'c': 3, u'b': 2}")
+ self.assertEqual(_term_to_xapian_value(datetime.datetime(2009, 5, 9, 16, 14), 'datetime'),
+ '20090509161400')
+ self.assertEqual(_term_to_xapian_value(datetime.datetime(2009, 5, 9, 0, 0), 'date'),
+ '20090509000000')
+ self.assertEqual(_term_to_xapian_value(datetime.datetime(1899, 5, 18, 0, 0), 'date'),
+ '18990518000000')
def test_build_schema(self):
- (content_field_name, fields) = self.backend.build_schema(connections['default'].get_unified_index().all_searchfields())
+ search_fields = connections['default'].get_unified_index().all_searchfields()
+ (content_field_name, fields) = self.backend.build_schema(search_fields)
+
self.assertEqual(content_field_name, 'text')
self.assertEqual(len(fields), 14 + 3)
self.assertEqual(fields, [
{'column': 0, 'type': 'text', 'field_name': 'id', 'multi_valued': 'false'},
- {'column': 1, 'type': 'long', 'field_name': 'django_id', 'multi_valued': 'false'},
+ {'column': 1, 'type': 'integer', 'field_name': 'django_id', 'multi_valued': 'false'},
{'column': 2, 'type': 'text', 'field_name': 'django_ct', 'multi_valued': 'false'},
{'column': 3, 'type': 'text', 'field_name': 'empty', 'multi_valued': 'false'},
{'column': 4, 'type': 'date', 'field_name': 'exp_date', 'multi_valued': 'false'},
@@ -432,33 +597,46 @@ class XapianSearchBackendTestCase(HaystackBackendTestCase, TestCase):
{'column': 13, 'type': 'text', 'field_name': 'text', 'multi_valued': 'false'},
{'column': 14, 'type': 'text', 'field_name': 'titles', 'multi_valued': 'true'},
{'column': 15, 'type': 'text', 'field_name': 'url', 'multi_valued': 'false'},
- {'column': 16, 'type': 'long', 'field_name': 'value', 'multi_valued': 'false'}
+ {'column': 16, 'type': 'integer', 'field_name': 'value', 'multi_valued': 'false'}
])
def test_parse_query(self):
- self.assertEqual(str(self.backend.parse_query('indexed')), 'Xapian::Query(Zindex:(pos=1))')
- self.assertEqual(str(self.backend.parse_query('name:david')), 'Xapian::Query(ZXNAMEdavid:(pos=1))')
+ self.assertEqual(str(self.backend.parse_query('indexed')),
+ 'Xapian::Query(Zindex:(pos=1))')
+ self.assertEqual(str(self.backend.parse_query('name:david')),
+ 'Xapian::Query(ZXNAMEdavid:(pos=1))')
if xapian.minor_version() >= 2:
- self.assertEqual(str(self.backend.parse_query('name:da*')), 'Xapian::Query((XNAMEdavid1:(pos=1) SYNONYM XNAMEdavid2:(pos=1) SYNONYM XNAMEdavid3:(pos=1)))')
+ self.assertEqual(str(self.backend.parse_query('name:da*')),
+ 'Xapian::Query(('
+ 'XNAMEdavid1:(pos=1) SYNONYM '
+ 'XNAMEdavid2:(pos=1) SYNONYM '
+ 'XNAMEdavid3:(pos=1)))')
else:
- self.assertEqual(str(self.backend.parse_query('name:da*')), 'Xapian::Query((XNAMEdavid1:(pos=1) OR XNAMEdavid2:(pos=1) OR XNAMEdavid3:(pos=1)))')
+ self.assertEqual(str(self.backend.parse_query('name:da*')),
+ 'Xapian::Query(('
+ 'XNAMEdavid1:(pos=1) OR '
+ 'XNAMEdavid2:(pos=1) OR '
+ 'XNAMEdavid3:(pos=1)))')
self.assertEqual(str(self.backend.parse_query('name:david1..david2')),
'Xapian::Query(VALUE_RANGE 7 david1 david2)')
self.assertEqual(str(self.backend.parse_query('value:0..10')),
'Xapian::Query(VALUE_RANGE 16 000000000000 000000000010)')
self.assertEqual(str(self.backend.parse_query('value:..10')),
- 'Xapian::Query(VALUE_RANGE 16 %012d 000000000010)' % (-sys.maxint - 1))
+ 'Xapian::Query(VALUE_RANGE 16 %012d 000000000010)' % (-sys.maxsize - 1))
self.assertEqual(str(self.backend.parse_query('value:10..*')),
- 'Xapian::Query(VALUE_RANGE 16 000000000010 %012d)' % sys.maxint)
+ 'Xapian::Query(VALUE_RANGE 16 000000000010 %012d)' % sys.maxsize)
self.assertEqual(str(self.backend.parse_query('popularity:25.5..100.0')),
b'Xapian::Query(VALUE_RANGE 9 \xb2` \xba@)')
def test_order_by_django_id(self):
- self.backend.clear()
+ """
+ We need this test because ordering on more than
+ 10 entries was not correct at some point.
+ """
self.sample_objs = []
- number_list = range(1, 101)
+ number_list = list(range(1, 101))
for i in number_list:
mock = XapianMockModel()
mock.id = i
@@ -476,8 +654,7 @@ class XapianSearchBackendTestCase(HaystackBackendTestCase, TestCase):
self.backend.update(self.index, self.sample_objs)
results = self.backend.search(xapian.Query(''), sort_by=['-django_id'])
- self.assertEqual(results['hits'], len(number_list))
- self.assertEqual([result.pk for result in results['results']], list(reversed(number_list)))
+ self.assertEqual(pks(results['results']), list(reversed(number_list)))
def test_more_like_this_with_unindexed_model(self):
"""
@@ -496,187 +673,3 @@ class XapianSearchBackendTestCase(HaystackBackendTestCase, TestCase):
self.backend.silently_fail = False
self.assertRaises(InvalidIndexError, self.backend.more_like_this, mock)
-
-
-class LiveXapianMockSearchIndex(indexes.SearchIndex):
- text = indexes.CharField(document=True, use_template=True)
- name = indexes.CharField(model_attr='author', faceted=True)
- pub_date = indexes.DateField(model_attr='pub_date')
- created = indexes.DateField()
- title = indexes.CharField()
-
- def get_model(self):
- return MockModel
-
-
-class LiveXapianSearchQueryTestCase(HaystackBackendTestCase, TestCase):
- """
- SearchQuery specific tests
- """
- fixtures = ['initial_data.json']
-
- def get_index(self):
- return LiveXapianMockSearchIndex()
-
- def setUp(self):
- super(LiveXapianSearchQueryTestCase, self).setUp()
-
- self.backend.update(self.index, MockModel.objects.all())
-
- self.sq = connections['default'].get_query()
-
- def test_get_spelling(self):
- self.sq.add_filter(SQ(content='indxd'))
- self.assertEqual(self.sq.get_spelling_suggestion(), 'indexed')
- self.assertEqual(self.sq.get_spelling_suggestion('indxd'), 'indexed')
-
- def test_startswith(self):
- self.sq.add_filter(SQ(name__startswith='da'))
- self.assertEqual([result.pk for result in self.sq.get_results()], [1, 2, 3])
-
- def test_build_query_gt(self):
- self.sq.add_filter(SQ(name__gt='m'))
- self.assertEqual(str(self.sq.build_query()), 'Xapian::Query(( AND_NOT VALUE_RANGE 4 a m))')
-
- def test_build_query_gte(self):
- self.sq.add_filter(SQ(name__gte='m'))
- self.assertEqual(str(self.sq.build_query()), 'Xapian::Query(VALUE_RANGE 4 m zzzzzzzzzzzzzzzzzzzzzzzzzzzz'
- 'zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz'
- 'zzzzzzzzzzzzzz)')
-
- def test_build_query_lt(self):
- self.sq.add_filter(SQ(name__lt='m'))
- self.assertEqual(str(self.sq.build_query()), 'Xapian::Query(( AND_NOT VALUE_RANGE 4 m zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz))')
-
- def test_build_query_lte(self):
- self.sq.add_filter(SQ(name__lte='m'))
- self.assertEqual(str(self.sq.build_query()), 'Xapian::Query(VALUE_RANGE 4 a m)')
-
- def test_build_query_multiple_filter_types(self):
- self.sq.add_filter(SQ(content='why'))
- self.sq.add_filter(SQ(pub_date__lte=datetime.datetime(2009, 2, 10, 1, 59, 0)))
- self.sq.add_filter(SQ(name__gt='david'))
- self.sq.add_filter(SQ(created__lt=datetime.datetime(2009, 2, 12, 12, 13, 0)))
- self.sq.add_filter(SQ(title__gte='B'))
- self.sq.add_filter(SQ(id__in=[1, 2, 3]))
- self.assertEqual(str(self.sq.build_query()),
- 'Xapian::Query(((Zwhi OR why) AND VALUE_RANGE 6 00010101000000 20090210015900 AND '
- '( AND_NOT VALUE_RANGE 4 a david) AND '
- '( AND_NOT VALUE_RANGE 3 20090212121300 99990101000000) AND '
- 'VALUE_RANGE 8 b zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz AND '
- '(Q1 OR Q2 OR Q3)))')
-
- def test_log_query(self):
- reset_search_queries()
- self.assertEqual(len(connections['default'].queries), 0)
-
- # Stow.
- old_debug = settings.DEBUG
- settings.DEBUG = False
-
- len(self.sq.get_results())
- self.assertEqual(len(connections['default'].queries), 0)
-
- settings.DEBUG = True
- # Redefine it to clear out the cached results.
- self.sq = connections['default'].get_query()
- self.sq.add_filter(SQ(name='bar'))
- len(self.sq.get_results())
- self.assertEqual(len(connections['default'].queries), 1)
- self.assertEqual(str(connections['default'].queries[0]['query_string']), 'Xapian::Query((ZXNAMEbar OR XNAMEbar))')
-
- # And again, for good measure.
- self.sq = connections['default'].get_query()
- self.sq.add_filter(SQ(name='bar'))
- self.sq.add_filter(SQ(text='moof'))
- len(self.sq.get_results())
- self.assertEqual(len(connections['default'].queries), 2)
- self.assertEqual(str(connections['default'].queries[0]['query_string']), 'Xapian::Query((ZXNAMEbar OR XNAMEbar))')
- self.assertEqual(str(connections['default'].queries[1]['query_string']), 'Xapian::Query(((ZXNAMEbar OR XNAMEbar) AND (ZXTEXTmoof OR XTEXTmoof)))')
-
- # Restore.
- settings.DEBUG = old_debug
-
-
-class LiveXapianSearchQuerySetTestCase(HaystackBackendTestCase, TestCase):
- """
- SearchQuerySet specific tests
- """
- fixtures = ['initial_data.json']
-
- def get_index(self):
- return LiveXapianMockSearchIndex()
-
- def setUp(self):
- super(LiveXapianSearchQuerySetTestCase, self).setUp()
-
- self.backend.update(self.index, MockModel.objects.all())
- self.sq = connections['default'].get_query()
- self.sqs = SearchQuerySet()
-
- def test_result_class(self):
- # Assert that we're defaulting to ``SearchResult``.
- sqs = self.sqs.all()
- self.assertTrue(isinstance(sqs[0], SearchResult))
-
- # Custom class.
- sqs = self.sqs.result_class(MockSearchResult).all()
- self.assertTrue(isinstance(sqs[0], MockSearchResult))
-
- # Reset to default.
- sqs = self.sqs.result_class(None).all()
- self.assertTrue(isinstance(sqs[0], SearchResult))
-
- def test_facet(self):
- self.assertEqual(len(self.sqs.facet('name').facet_counts()['fields']['name']), 3)
-
-
-class XapianBoostMockSearchIndex(indexes.SearchIndex):
- text = indexes.CharField(
- document=True, use_template=True,
- template_name='search/indexes/core/mockmodel_template.txt'
- )
- author = indexes.CharField(model_attr='author', weight=2.0)
- editor = indexes.CharField(model_attr='editor')
- pub_date = indexes.DateField(model_attr='pub_date')
-
- def get_model(self):
- return AFourthMockModel
-
-
-class XapianBoostBackendTestCase(HaystackBackendTestCase, TestCase):
-
- def get_index(self):
- return XapianBoostMockSearchIndex()
-
- def setUp(self):
- super(XapianBoostBackendTestCase, self).setUp()
-
- self.sample_objs = []
- for i in xrange(1, 5):
- mock = AFourthMockModel()
- mock.id = i
- if i % 2:
- mock.author = 'daniel'
- mock.editor = 'david'
- else:
- mock.author = 'david'
- mock.editor = 'daniel'
- mock.pub_date = datetime.date(2009, 2, 25) - datetime.timedelta(days=i)
- self.sample_objs.append(mock)
-
- self.backend.update(self.index, self.sample_objs)
-
- def test_boost(self):
- sqs = SearchQuerySet()
-
- self.assertEqual(len(sqs.all()), 4)
-
- results = sqs.filter(SQ(author='daniel') | SQ(editor='daniel'))
-
- self.assertEqual([result.id for result in results], [
- 'core.afourthmockmodel.1',
- 'core.afourthmockmodel.3',
- 'core.afourthmockmodel.2',
- 'core.afourthmockmodel.4'
- ])
diff --git a/tests/xapian_tests/tests/test_interface.py b/tests/xapian_tests/tests/test_interface.py
new file mode 100644
index 0000000..3b16a22
--- /dev/null
+++ b/tests/xapian_tests/tests/test_interface.py
@@ -0,0 +1,203 @@
+from __future__ import unicode_literals
+
+import datetime
+from django.db.models import Q
+from django.test import TestCase
+
+from haystack import connections
+from haystack.inputs import AutoQuery
+from haystack.query import SearchQuerySet
+
+from xapian_tests.models import Document
+from xapian_tests.search_indexes import DocumentIndex
+from xapian_tests.tests.test_backend import pks
+
+
+class InterfaceTestCase(TestCase):
+ """
+ Tests the interface of Xapian-Haystack.
+
+ Tests related to usability and expected behavior
+ go here.
+ """
+
+ def setUp(self):
+ super(InterfaceTestCase, self).setUp()
+
+ types_names = ['book', 'magazine', 'article']
+ texts = ['This is a huge text',
+ 'This is a medium text',
+ 'This is a small text']
+ dates = [datetime.date(year=2010, month=1, day=1),
+ datetime.date(year=2010, month=2, day=1),
+ datetime.date(year=2010, month=3, day=1)]
+
+ summaries = ['This is a huge corrup\xe7\xe3o summary',
+ 'This is a medium summary',
+ 'This is a small summary']
+
+ for i in range(1, 13):
+ doc = Document()
+ doc.type_name = types_names[i % 3]
+ doc.number = i * 2
+ doc.name = "%s %d" % (doc.type_name, doc.number)
+ doc.date = dates[i % 3]
+
+ doc.summary = summaries[i % 3]
+ doc.text = texts[i % 3]
+ doc.save()
+
+ self.index = DocumentIndex()
+ self.ui = connections['default'].get_unified_index()
+ self.ui.build(indexes=[self.index])
+
+ self.backend = connections['default'].get_backend()
+ self.backend.update(self.index, Document.objects.all())
+
+ self.queryset = SearchQuerySet()
+
+ def tearDown(self):
+ Document.objects.all().delete()
+ self.backend.clear()
+ super(InterfaceTestCase, self).tearDown()
+
+ def test_count(self):
+ self.assertEqual(self.queryset.count(), Document.objects.count())
+
+ def test_content_search(self):
+ result = self.queryset.filter(content='medium this')
+ self.assertEqual(sorted(pks(result)),
+ pks(Document.objects.all()))
+
+ # documents with "medium" AND "this" have higher score
+ self.assertEqual(pks(result)[:4], [1, 4, 7, 10])
+
+ def test_field_search(self):
+ self.assertEqual(pks(self.queryset.filter(name='8')), [4])
+ self.assertEqual(pks(self.queryset.filter(type_name='book')),
+ pks(Document.objects.filter(type_name='book')))
+
+ self.assertEqual(pks(self.queryset.filter(text='text huge')),
+ pks(Document.objects.filter(text__contains='text huge')))
+
+ def test_field_contains(self):
+ self.assertEqual(pks(self.queryset.filter(summary='huge')),
+ pks(Document.objects.filter(summary__contains='huge')))
+
+ result = self.queryset.filter(summary='huge summary')
+ self.assertEqual(sorted(pks(result)),
+ pks(Document.objects.all()))
+
+ # documents with "huge" AND "summary" have higher score
+ self.assertEqual(pks(result)[:4], [3, 6, 9, 12])
+
+ def test_field_exact(self):
+ self.assertEqual(pks(self.queryset.filter(name__exact='8')), [])
+ self.assertEqual(pks(self.queryset.filter(name__exact='magazine 2')), [1])
+
+ def test_content_exact(self):
+ self.assertEqual(pks(self.queryset.filter(content__exact='huge')), [])
+
+ def test_content_and(self):
+ self.assertEqual(pks(self.queryset.filter(content='huge').filter(summary='medium')), [])
+
+ self.assertEqual(len(self.queryset.filter(content='huge this')), 12)
+ self.assertEqual(len(self.queryset.filter(content='huge this').filter(summary='huge')), 4)
+
+ def test_content_or(self):
+ self.assertEqual(len(self.queryset.filter(content='huge medium')), 8)
+ self.assertEqual(len(self.queryset.filter(content='huge medium small')), 12)
+
+ def test_field_and(self):
+ self.assertEqual(pks(self.queryset.filter(name='8').filter(name='4')), [])
+
+ def test_field_or(self):
+ self.assertEqual(pks(self.queryset.filter(name='8 4')), [2, 4])
+
+ def test_field_in(self):
+ self.assertEqual(set(pks(self.queryset.filter(name__in=['magazine 2', 'article 4']))),
+ set(pks(Document.objects.filter(name__in=['magazine 2', 'article 4']))))
+
+ self.assertEqual(pks(self.queryset.filter(number__in=[4])),
+ pks(Document.objects.filter(number__in=[4])))
+
+ self.assertEqual(pks(self.queryset.filter(number__in=[4, 8])),
+ pks(Document.objects.filter(number__in=[4, 8])))
+
+ def test_private_fields(self):
+ self.assertEqual(pks(self.queryset.filter(django_id=4)),
+ pks(Document.objects.filter(id__in=[4])))
+ self.assertEqual(pks(self.queryset.filter(django_id__in=[2, 4])),
+ pks(Document.objects.filter(id__in=[2, 4])))
+
+ self.assertEqual(set(pks(self.queryset.models(Document))),
+ set(pks(Document.objects.all())))
+
+ def test_field_startswith(self):
+ self.assertEqual(len(self.queryset.filter(name__startswith='magaz')), 4)
+ self.assertEqual(set(pks(self.queryset.filter(text__startswith='This is'))),
+ set(pks(Document.objects.filter(text__startswith='This is'))))
+
+ def test_auto_query(self):
+ self.assertEqual(set(pks(self.queryset.auto_query("huge OR medium"))),
+ set(pks(Document.objects.filter(Q(text__contains="huge") |
+ Q(text__contains="medium")))))
+
+ self.assertEqual(set(pks(self.queryset.auto_query("huge AND medium"))),
+ set(pks(Document.objects.filter(Q(text__contains="huge") &
+ Q(text__contains="medium")))))
+
+ self.assertEqual(set(pks(self.queryset.auto_query("text:huge text:-this"))),
+ set(pks(Document.objects.filter(Q(text__contains="huge") &
+ ~Q(text__contains="this")))))
+
+ self.assertEqual(len(self.queryset.filter(name=AutoQuery("8 OR 4"))), 2)
+ self.assertEqual(len(self.queryset.filter(name=AutoQuery("8 AND 4"))), 0)
+
+ def test_value_range(self):
+ self.assertEqual(set(pks(self.queryset.filter(number__lt=3))),
+ set(pks(Document.objects.filter(number__lt=3))))
+
+ self.assertEqual(set(pks(self.queryset.filter(django_id__gte=6))),
+ set(pks(Document.objects.filter(id__gte=6))))
+
+ def test_date_range(self):
+ date = datetime.date(year=2010, month=2, day=1)
+ self.assertEqual(set(pks(self.queryset.filter(date__gte=date))),
+ set(pks(Document.objects.filter(date__gte=date))))
+
+ date = datetime.date(year=2010, month=3, day=1)
+ self.assertEqual(set(pks(self.queryset.filter(date__lte=date))),
+ set(pks(Document.objects.filter(date__lte=date))))
+
+ def test_order_by(self):
+ # private order
+ self.assertEqual(pks(self.queryset.order_by("-django_id")),
+ pks(Document.objects.order_by("-id")))
+
+ # value order
+ self.assertEqual(pks(self.queryset.order_by("number")),
+ pks(Document.objects.order_by("number")))
+
+ # text order
+ self.assertEqual(pks(self.queryset.order_by("summary")),
+ pks(Document.objects.order_by("summary")))
+
+ # date order
+ self.assertEqual(pks(self.queryset.order_by("-date")),
+ pks(Document.objects.order_by("-date")))
+
+ def test_non_ascii_search(self):
+ """
+ Regression test for #119.
+ """
+ self.assertEqual(pks(self.queryset.filter(content='corrup\xe7\xe3o')),
+ pks(Document.objects.filter(summary__contains='corrup\xe7\xe3o')))
+
+ def test_multi_values_exact_search(self):
+ """
+ Regression test for #103
+ """
+ self.assertEqual(len(self.queryset.filter(tags__exact='tag')), 12)
+ self.assertEqual(len(self.queryset.filter(tags__exact='tag-test')), 8)
+ self.assertEqual(len(self.queryset.filter(tags__exact='tag-test-test')), 4)
diff --git a/tests/xapian_tests/tests/test_query.py b/tests/xapian_tests/tests/test_query.py
new file mode 100644
index 0000000..244cd41
--- /dev/null
+++ b/tests/xapian_tests/tests/test_query.py
@@ -0,0 +1,436 @@
+from __future__ import unicode_literals
+
+import datetime
+
+from django.conf import settings
+from django.test import TestCase
+
+from haystack import indexes
+from haystack import connections, reset_search_queries
+from haystack.models import SearchResult
+from haystack.query import SearchQuerySet, SQ
+
+from core.models import MockModel, AnotherMockModel, AFourthMockModel
+from core.tests.mocks import MockSearchResult
+from xapian_tests.tests.test_backend import HaystackBackendTestCase
+
+
+class MockQueryIndex(indexes.SearchIndex):
+ text = indexes.CharField(document=True)
+ pub_date = indexes.DateTimeField()
+ title = indexes.CharField()
+ foo = indexes.CharField()
+
+ def get_model(self):
+ return MockModel
+
+
+class XapianSearchQueryTestCase(HaystackBackendTestCase, TestCase):
+ def get_index(self):
+ return MockQueryIndex()
+
+ def setUp(self):
+ super(XapianSearchQueryTestCase, self).setUp()
+ self.sq = connections['default'].get_query()
+
+ def test_build_query_all(self):
+ self.assertEqual(str(self.sq.build_query()),
+ 'Xapian::Query()')
+
+ def test_build_query_single_word(self):
+ self.sq.add_filter(SQ(content='hello'))
+ self.assertEqual(str(self.sq.build_query()),
+ 'Xapian::Query((Zhello OR hello))')
+
+ def test_build_query_single_word_not(self):
+ self.sq.add_filter(~SQ(content='hello'))
+ self.assertEqual(str(self.sq.build_query()),
+ 'Xapian::Query(( AND_NOT (Zhello OR hello)))')
+
+ def test_build_query_single_word_field_exact(self):
+ self.sq.add_filter(SQ(foo='hello'))
+ self.assertEqual(str(self.sq.build_query()),
+ 'Xapian::Query((ZXFOOhello OR XFOOhello))')
+
+ def test_build_query_single_word_field_exact_not(self):
+ self.sq.add_filter(~SQ(foo='hello'))
+ self.assertEqual(str(self.sq.build_query()),
+ 'Xapian::Query(( AND_NOT (ZXFOOhello OR XFOOhello)))')
+
+ def test_build_query_boolean(self):
+ self.sq.add_filter(SQ(content=True))
+ self.assertEqual(str(self.sq.build_query()),
+ 'Xapian::Query((Ztrue OR true))')
+
+ def test_build_query_date(self):
+ self.sq.add_filter(SQ(content=datetime.date(2009, 5, 8)))
+ self.assertEqual(str(self.sq.build_query()),
+ 'Xapian::Query((Z2009-05-08 OR 2009-05-08))')
+
+ def test_build_query_date_not(self):
+ self.sq.add_filter(~SQ(content=datetime.date(2009, 5, 8)))
+ self.assertEqual(str(self.sq.build_query()),
+ 'Xapian::Query(( AND_NOT (Z2009-05-08 OR 2009-05-08)))')
+
+ def test_build_query_datetime(self):
+ self.sq.add_filter(SQ(content=datetime.datetime(2009, 5, 8, 11, 28)))
+ self.assertEqual(str(self.sq.build_query()),
+ 'Xapian::Query((Z2009-05-08 OR 2009-05-08 OR Z11:28:00 OR 11:28:00))')
+
+ def test_build_query_datetime_not(self):
+ self.sq.add_filter(~SQ(content=datetime.datetime(2009, 5, 8, 11, 28)))
+ self.assertEqual(str(self.sq.build_query()),
+ 'Xapian::Query(( AND_NOT '
+ '(Z2009-05-08 OR 2009-05-08 OR Z11:28:00 OR 11:28:00)))')
+
+ def test_build_query_float(self):
+ self.sq.add_filter(SQ(content=25.52))
+ self.assertEqual(str(self.sq.build_query()),
+ 'Xapian::Query((Z25.52 OR 25.52))')
+
+ def test_build_query_multiple_words_and(self):
+ self.sq.add_filter(SQ(content='hello'))
+ self.sq.add_filter(SQ(content='world'))
+ self.assertEqual(str(self.sq.build_query()),
+ 'Xapian::Query(((Zhello OR hello) AND (Zworld OR world)))')
+
+ def test_build_query_multiple_words_not(self):
+ self.sq.add_filter(~SQ(content='hello'))
+ self.sq.add_filter(~SQ(content='world'))
+ self.assertEqual(str(self.sq.build_query()),
+ 'Xapian::Query(('
+ '( AND_NOT (Zhello OR hello)) AND '
+ '( AND_NOT (Zworld OR world))))')
+
+ def test_build_query_multiple_words_or(self):
+ self.sq.add_filter(SQ(content='hello') | SQ(content='world'))
+ self.assertEqual(str(self.sq.build_query()),
+ 'Xapian::Query((Zhello OR hello OR Zworld OR world))')
+
+ def test_build_query_multiple_words_or_not(self):
+ self.sq.add_filter(~SQ(content='hello') | ~SQ(content='world'))
+ self.assertEqual(str(self.sq.build_query()),
+ 'Xapian::Query(('
+ '( AND_NOT (Zhello OR hello)) OR '
+ '( AND_NOT (Zworld OR world))))')
+
+ def test_build_query_multiple_words_mixed(self):
+ self.sq.add_filter(SQ(content='why') | SQ(content='hello'))
+ self.sq.add_filter(~SQ(content='world'))
+ self.assertEqual(str(self.sq.build_query()),
+ 'Xapian::Query(('
+ '(Zwhi OR why OR Zhello OR hello) AND '
+ '( AND_NOT (Zworld OR world))))')
+
+ def test_build_query_multiple_word_field_exact(self):
+ self.sq.add_filter(SQ(foo='hello'))
+ self.sq.add_filter(SQ(title='world'))
+ self.assertEqual(str(self.sq.build_query()),
+ 'Xapian::Query(('
+ '(ZXFOOhello OR XFOOhello) AND '
+ '(ZXTITLEworld OR XTITLEworld)))')
+
+ def test_build_query_multiple_word_field_exact_not(self):
+ self.sq.add_filter(~SQ(foo='hello'))
+ self.sq.add_filter(~SQ(title='world'))
+ self.assertEqual(str(self.sq.build_query()),
+ 'Xapian::Query(('
+ '( AND_NOT (ZXFOOhello OR XFOOhello)) AND '
+ '( AND_NOT (ZXTITLEworld OR XTITLEworld))))')
+
+ def test_build_query_or(self):
+ self.sq.add_filter(SQ(content='hello world'))
+ self.assertEqual(str(self.sq.build_query()),
+ 'Xapian::Query((Zhello OR hello OR Zworld OR world))')
+
+ def test_build_query_not_or(self):
+ self.sq.add_filter(~SQ(content='hello world'))
+ self.assertEqual(str(self.sq.build_query()),
+ 'Xapian::Query('
+ '( AND_NOT (Zhello OR hello OR Zworld OR world)))')
+
+ def test_build_query_boost(self):
+ self.sq.add_filter(SQ(content='hello'))
+ self.sq.add_boost('world', 5)
+ self.assertEqual(str(self.sq.build_query()),
+ 'Xapian::Query(('
+ '(Zhello OR hello) AND_MAYBE '
+ '5 * (Zworld OR world)))')
+
+ def test_build_query_not_in_filter_single_words(self):
+ self.sq.add_filter(SQ(content='why'))
+ self.sq.add_filter(~SQ(title__in=["Dune", "Jaws"]))
+ self.assertEqual(str(self.sq.build_query()),
+ 'Xapian::Query(((Zwhi OR why) AND '
+ '( AND_NOT (XTITLE^dune$ OR '
+ 'XTITLE^jaws$))))')
+
+ def test_build_query_in_filter_multiple_words(self):
+ self.sq.add_filter(SQ(content='why'))
+ self.sq.add_filter(SQ(title__in=["A Famous Paper", "An Infamous Article"]))
+ self.assertEqual(str(self.sq.build_query()),
+ 'Xapian::Query(((Zwhi OR why) AND '
+ '((XTITLEa PHRASE 3 XTITLEfamous PHRASE 3 XTITLEpaper) OR '
+ '(XTITLEan PHRASE 3 XTITLEinfamous PHRASE 3 XTITLEarticle))))')
+
+ def test_build_query_in_filter_multiple_words_with_punctuation(self):
+ self.sq.add_filter(SQ(title__in=["A Famous Paper", "An Infamous Article", "My Store Inc."]))
+ self.assertEqual(str(self.sq.build_query()),
+ 'Xapian::Query(('
+ '(XTITLEa PHRASE 3 XTITLEfamous PHRASE 3 XTITLEpaper) OR '
+ '(XTITLEan PHRASE 3 XTITLEinfamous PHRASE 3 XTITLEarticle) OR '
+ '(XTITLEmy PHRASE 3 XTITLEstore PHRASE 3 XTITLEinc.)))')
+
+ def test_build_query_not_in_filter_multiple_words(self):
+ self.sq.add_filter(SQ(content='why'))
+ self.sq.add_filter(~SQ(title__in=["A Famous Paper", "An Infamous Article"]))
+ self.assertEqual(str(self.sq.build_query()),
+ 'Xapian::Query(((Zwhi OR why) AND '
+ '( AND_NOT ((XTITLEa PHRASE 3 '
+ 'XTITLEfamous PHRASE 3 '
+ 'XTITLEpaper) OR (XTITLEan PHRASE 3 '
+ 'XTITLEinfamous PHRASE 3 XTITLEarticle)))))')
+
+ def test_build_query_in_filter_datetime(self):
+ self.sq.add_filter(SQ(content='why'))
+ self.sq.add_filter(SQ(pub_date__in=[datetime.datetime(2009, 7, 6, 1, 56, 21)]))
+ self.assertEqual(str(self.sq.build_query()),
+ 'Xapian::Query(((Zwhi OR why) AND '
+ '(XPUB_DATE2009-07-06 AND_MAYBE XPUB_DATE01:56:21)))')
+
+ def test_clean(self):
+ self.assertEqual(self.sq.clean('hello world'), 'hello world')
+ self.assertEqual(self.sq.clean('hello AND world'), 'hello AND world')
+ self.assertEqual(self.sq.clean('hello AND OR NOT TO + - && || ! ( ) { } [ ] ^ " ~ * ? : \ world'),
+ 'hello AND OR NOT TO + - && || ! ( ) { } [ ] ^ " ~ * ? : \ world')
+ self.assertEqual(self.sq.clean('so please NOTe i am in a bAND and bORed'),
+ 'so please NOTe i am in a bAND and bORed')
+
+ def test_build_query_with_models(self):
+ self.sq.add_filter(SQ(content='hello'))
+ self.sq.add_model(MockModel)
+ self.assertEqual(str(self.sq.build_query()),
+ 'Xapian::Query(((Zhello OR hello) AND '
+ '0 * CONTENTTYPEcore.mockmodel))')
+
+ self.sq.add_model(AnotherMockModel)
+
+ self.assertTrue(str(self.sq.build_query()) in (
+ 'Xapian::Query(((Zhello OR hello) AND '
+ '(0 * CONTENTTYPEcore.anothermockmodel OR '
+ '0 * CONTENTTYPEcore.mockmodel)))',
+ 'Xapian::Query(((Zhello OR hello) AND '
+ '(0 * CONTENTTYPEcore.mockmodel OR '
+ '0 * CONTENTTYPEcore.anothermockmodel)))'))
+
+ def test_build_query_with_punctuation(self):
+ self.sq.add_filter(SQ(content='http://www.example.com'))
+ self.assertEqual(str(self.sq.build_query()), 'Xapian::Query((Zhttp://www.example.com OR '
+ 'http://www.example.com))')
+
+ def test_in_filter_values_list(self):
+ self.sq.add_filter(SQ(content='why'))
+ self.sq.add_filter(SQ(title__in=MockModel.objects.values_list('id', flat=True)))
+ self.assertEqual(str(self.sq.build_query()),
+ 'Xapian::Query(((Zwhi OR why) AND '
+ '(XTITLE^1$ OR XTITLE^2$ OR XTITLE^3$)))')
+
+
+class MockSearchIndex(indexes.SearchIndex):
+ text = indexes.CharField(document=True, use_template=True)
+ name = indexes.CharField(model_attr='author', faceted=True)
+ pub_date = indexes.DateTimeField(model_attr='pub_date')
+ title = indexes.CharField()
+
+ def get_model(self):
+ return MockModel
+
+
+class SearchQueryTestCase(HaystackBackendTestCase, TestCase):
+ """
+ Tests expected behavior of
+ SearchQuery.
+ """
+ fixtures = ['initial_data.json']
+
+ def get_index(self):
+ return MockSearchIndex()
+
+ def setUp(self):
+ super(SearchQueryTestCase, self).setUp()
+
+ self.backend.update(self.index, MockModel.objects.all())
+
+ self.sq = connections['default'].get_query()
+
+ def test_get_spelling(self):
+ self.sq.add_filter(SQ(content='indxd'))
+ self.assertEqual(self.sq.get_spelling_suggestion(), 'indexed')
+ self.assertEqual(self.sq.get_spelling_suggestion('indxd'), 'indexed')
+
+ def test_startswith(self):
+ self.sq.add_filter(SQ(name__startswith='da'))
+ self.assertEqual([result.pk for result in self.sq.get_results()], [1, 2, 3])
+
+ def test_build_query_gt(self):
+ self.sq.add_filter(SQ(name__gt='m'))
+ self.assertEqual(str(self.sq.build_query()),
+ 'Xapian::Query(( AND_NOT VALUE_RANGE 3 a m))')
+
+ def test_build_query_gte(self):
+ self.sq.add_filter(SQ(name__gte='m'))
+ self.assertEqual(str(self.sq.build_query()),
+ 'Xapian::Query(VALUE_RANGE 3 m zzzzzzzzzzzzzzzzzzzzzzzzzzzz'
+ 'zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz'
+ 'zzzzzzzzzzzzzz)')
+
+ def test_build_query_lt(self):
+ self.sq.add_filter(SQ(name__lt='m'))
+ self.assertEqual(str(self.sq.build_query()),
+ 'Xapian::Query(( AND_NOT '
+ 'VALUE_RANGE 3 m zzzzzzzzzzzzzzzzzzzzzz'
+ 'zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz'
+ 'zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz))')
+
+ def test_build_query_lte(self):
+ self.sq.add_filter(SQ(name__lte='m'))
+ self.assertEqual(str(self.sq.build_query()), 'Xapian::Query(VALUE_RANGE 3 a m)')
+
+ def test_build_query_multiple_filter_types(self):
+ self.sq.add_filter(SQ(content='why'))
+ self.sq.add_filter(SQ(pub_date__lte=datetime.datetime(2009, 2, 10, 1, 59, 0)))
+ self.sq.add_filter(SQ(name__gt='david'))
+ self.sq.add_filter(SQ(title__gte='B'))
+ self.sq.add_filter(SQ(django_id__in=[1, 2, 3]))
+ self.assertEqual(str(self.sq.build_query()),
+ 'Xapian::Query(((Zwhi OR why) AND '
+ 'VALUE_RANGE 5 00010101000000 20090210015900 AND '
+ '( AND_NOT VALUE_RANGE 3 a david) AND '
+ 'VALUE_RANGE 7 b zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz'
+ 'zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz AND '
+ '(QQ000000000001 OR QQ000000000002 OR QQ000000000003)))')
+
+ def test_log_query(self):
+ reset_search_queries()
+ self.assertEqual(len(connections['default'].queries), 0)
+
+ # Stow.
+ old_debug = settings.DEBUG
+ settings.DEBUG = False
+
+ len(self.sq.get_results())
+ self.assertEqual(len(connections['default'].queries), 0)
+
+ settings.DEBUG = True
+ # Redefine it to clear out the cached results.
+ self.sq = connections['default'].get_query()
+ self.sq.add_filter(SQ(name='bar'))
+ len(self.sq.get_results())
+ self.assertEqual(len(connections['default'].queries), 1)
+ self.assertEqual(str(connections['default'].queries[0]['query_string']),
+ 'Xapian::Query((ZXNAMEbar OR XNAMEbar))')
+
+ # And again, for good measure.
+ self.sq = connections['default'].get_query()
+ self.sq.add_filter(SQ(name='bar'))
+ self.sq.add_filter(SQ(text='moof'))
+ len(self.sq.get_results())
+ self.assertEqual(len(connections['default'].queries), 2)
+ self.assertEqual(str(connections['default'].queries[0]['query_string']),
+ 'Xapian::Query(('
+ 'ZXNAMEbar OR '
+ 'XNAMEbar))')
+ self.assertEqual(str(connections['default'].queries[1]['query_string']),
+ 'Xapian::Query(('
+ '(ZXNAMEbar OR XNAMEbar) AND '
+ '(ZXTEXTmoof OR XTEXTmoof)))')
+
+ # Restore.
+ settings.DEBUG = old_debug
+
+
+class LiveSearchQuerySetTestCase(HaystackBackendTestCase, TestCase):
+ """
+ SearchQuerySet specific tests
+ """
+ fixtures = ['initial_data.json']
+
+ def get_index(self):
+ return MockSearchIndex()
+
+ def setUp(self):
+ super(LiveSearchQuerySetTestCase, self).setUp()
+
+ self.backend.update(self.index, MockModel.objects.all())
+ self.sq = connections['default'].get_query()
+ self.sqs = SearchQuerySet()
+
+ def test_result_class(self):
+ # Assert that we're defaulting to ``SearchResult``.
+ sqs = self.sqs.all()
+ self.assertTrue(isinstance(sqs[0], SearchResult))
+
+ # Custom class.
+ sqs = self.sqs.result_class(MockSearchResult).all()
+ self.assertTrue(isinstance(sqs[0], MockSearchResult))
+
+ # Reset to default.
+ sqs = self.sqs.result_class(None).all()
+ self.assertTrue(isinstance(sqs[0], SearchResult))
+
+ def test_facet(self):
+ self.assertEqual(len(self.sqs.facet('name').facet_counts()['fields']['name']), 3)
+
+
+class BoostMockSearchIndex(indexes.SearchIndex):
+ text = indexes.CharField(
+ document=True, use_template=True,
+ template_name='search/indexes/core/mockmodel_template.txt'
+ )
+ author = indexes.CharField(model_attr='author', weight=2.0)
+ editor = indexes.CharField(model_attr='editor')
+ pub_date = indexes.DateField(model_attr='pub_date')
+
+ def get_model(self):
+ return AFourthMockModel
+
+
+class BoostFieldTestCase(HaystackBackendTestCase, TestCase):
+ """
+ Tests boosted fields.
+ """
+
+ def get_index(self):
+ return BoostMockSearchIndex()
+
+ def setUp(self):
+ super(BoostFieldTestCase, self).setUp()
+
+ self.sample_objs = []
+ for i in range(1, 5):
+ mock = AFourthMockModel()
+ mock.id = i
+ if i % 2:
+ mock.author = 'daniel'
+ mock.editor = 'david'
+ else:
+ mock.author = 'david'
+ mock.editor = 'daniel'
+ mock.pub_date = datetime.date(2009, 2, 25) - datetime.timedelta(days=i)
+ self.sample_objs.append(mock)
+
+ self.backend.update(self.index, self.sample_objs)
+
+ def test_boost(self):
+ sqs = SearchQuerySet()
+
+ self.assertEqual(len(sqs.all()), 4)
+
+ results = sqs.filter(SQ(author='daniel') | SQ(editor='daniel'))
+
+ self.assertEqual([result.id for result in results], [
+ 'core.afourthmockmodel.1',
+ 'core.afourthmockmodel.3',
+ 'core.afourthmockmodel.2',
+ 'core.afourthmockmodel.4'
+ ])
diff --git a/tests/xapian_tests/tests/test_xapian_query.py b/tests/xapian_tests/tests/test_xapian_query.py
deleted file mode 100644
index adb3678..0000000
--- a/tests/xapian_tests/tests/test_xapian_query.py
+++ /dev/null
@@ -1,169 +0,0 @@
-from __future__ import unicode_literals
-
-import datetime
-import os
-import shutil
-
-from django.conf import settings
-from django.test import TestCase
-
-from haystack import connections
-from haystack.query import SQ
-
-from core.models import MockModel, AnotherMockModel
-
-
-class XapianSearchQueryTestCase(TestCase):
- def setUp(self):
- super(XapianSearchQueryTestCase, self).setUp()
- self.sq = connections['default'].get_query()
-
- def tearDown(self):
- if os.path.exists(settings.HAYSTACK_CONNECTIONS['default']['PATH']):
- shutil.rmtree(settings.HAYSTACK_CONNECTIONS['default']['PATH'])
-
- super(XapianSearchQueryTestCase, self).tearDown()
-
- def test_build_query_all(self):
- self.assertEqual(str(self.sq.build_query()), 'Xapian::Query()')
-
- def test_build_query_single_word(self):
- self.sq.add_filter(SQ(content='hello'))
- self.assertEqual(str(self.sq.build_query()), 'Xapian::Query((Zhello OR hello))')
-
- def test_build_query_single_word_not(self):
- self.sq.add_filter(~SQ(content='hello'))
- self.assertEqual(str(self.sq.build_query()), 'Xapian::Query(( AND_NOT (Zhello OR hello)))')
-
- def test_build_query_single_word_field_exact(self):
- self.sq.add_filter(SQ(foo='hello'))
- self.assertEqual(str(self.sq.build_query()), 'Xapian::Query((ZXFOOhello OR XFOOhello))')
-
- def test_build_query_single_word_field_exact_not(self):
- self.sq.add_filter(~SQ(foo='hello'))
- self.assertEqual(str(self.sq.build_query()), 'Xapian::Query(( AND_NOT (ZXFOOhello OR XFOOhello)))')
-
- def test_build_query_boolean(self):
- self.sq.add_filter(SQ(content=True))
- self.assertEqual(str(self.sq.build_query()), 'Xapian::Query((Ztrue OR true))')
-
- def test_build_query_date(self):
- self.sq.add_filter(SQ(content=datetime.date(2009, 5, 8)))
- self.assertEqual(str(self.sq.build_query()), 'Xapian::Query((Z20090508000000 OR 20090508000000))')
-
- def test_build_query_date_not(self):
- self.sq.add_filter(~SQ(content=datetime.date(2009, 5, 8)))
- self.assertEqual(str(self.sq.build_query()), 'Xapian::Query(( AND_NOT (Z20090508000000 OR 20090508000000)))')
-
- def test_build_query_datetime(self):
- self.sq.add_filter(SQ(content=datetime.datetime(2009, 5, 8, 11, 28)))
- self.assertEqual(str(self.sq.build_query()), 'Xapian::Query((Z20090508112800 OR 20090508112800))')
-
- def test_build_query_datetime_not(self):
- self.sq.add_filter(~SQ(content=datetime.datetime(2009, 5, 8, 11, 28)))
- self.assertEqual(str(self.sq.build_query()), 'Xapian::Query(( AND_NOT (Z20090508112800 OR 20090508112800)))')
-
- def test_build_query_float(self):
- self.sq.add_filter(SQ(content=25.52))
- self.assertEqual(str(self.sq.build_query()), 'Xapian::Query((Z25.52 OR 25.52))')
-
- def test_build_query_multiple_words_and(self):
- self.sq.add_filter(SQ(content='hello'))
- self.sq.add_filter(SQ(content='world'))
- self.assertEqual(str(self.sq.build_query()), 'Xapian::Query(((Zhello OR hello) AND (Zworld OR world)))')
-
- def test_build_query_multiple_words_not(self):
- self.sq.add_filter(~SQ(content='hello'))
- self.sq.add_filter(~SQ(content='world'))
- self.assertEqual(str(self.sq.build_query()), 'Xapian::Query((( AND_NOT (Zhello OR hello)) AND ( AND_NOT (Zworld OR world))))')
-
- def test_build_query_multiple_words_or(self):
- self.sq.add_filter(SQ(content='hello') | SQ(content='world'))
- self.assertEqual(str(self.sq.build_query()), 'Xapian::Query((Zhello OR hello OR Zworld OR world))')
-
- def test_build_query_multiple_words_or_not(self):
- self.sq.add_filter(~SQ(content='hello') | ~SQ(content='world'))
- self.assertEqual(str(self.sq.build_query()), 'Xapian::Query((( AND_NOT (Zhello OR hello)) OR ( AND_NOT (Zworld OR world))))')
-
- def test_build_query_multiple_words_mixed(self):
- self.sq.add_filter(SQ(content='why') | SQ(content='hello'))
- self.sq.add_filter(~SQ(content='world'))
- self.assertEqual(str(self.sq.build_query()), 'Xapian::Query(((Zwhi OR why OR Zhello OR hello) AND ( AND_NOT (Zworld OR world))))')
-
- def test_build_query_multiple_word_field_exact(self):
- self.sq.add_filter(SQ(foo='hello'))
- self.sq.add_filter(SQ(bar='world'))
- self.assertEqual(str(self.sq.build_query()), 'Xapian::Query(((ZXFOOhello OR XFOOhello) AND (ZXBARworld OR XBARworld)))')
-
- def test_build_query_multiple_word_field_exact_not(self):
- self.sq.add_filter(~SQ(foo='hello'))
- self.sq.add_filter(~SQ(bar='world'))
- self.assertEqual(str(self.sq.build_query()), 'Xapian::Query((( AND_NOT (ZXFOOhello OR XFOOhello)) AND ( AND_NOT (ZXBARworld OR XBARworld))))')
-
- def test_build_query_phrase(self):
- self.sq.add_filter(SQ(content='hello world'))
- self.assertEqual(str(self.sq.build_query()), 'Xapian::Query((hello PHRASE 2 world))')
-
- def test_build_query_phrase_not(self):
- self.sq.add_filter(~SQ(content='hello world'))
- self.assertEqual(str(self.sq.build_query()), 'Xapian::Query(( AND_NOT (hello PHRASE 2 world)))')
-
- def test_build_query_boost(self):
- self.sq.add_filter(SQ(content='hello'))
- self.sq.add_boost('world', 5)
- self.assertEqual(str(self.sq.build_query()), 'Xapian::Query(((Zhello OR hello) AND_MAYBE 5 * (Zworld OR world)))')
-
- def test_build_query_in_filter_single_words(self):
- self.sq.add_filter(SQ(content='why'))
- self.sq.add_filter(SQ(title__in=["Dune", "Jaws"]))
- self.assertEqual(str(self.sq.build_query()), 'Xapian::Query(((Zwhi OR why) AND (ZXTITLEdune OR XTITLEdune OR ZXTITLEjaw OR XTITLEjaws)))')
-
- def test_build_query_not_in_filter_single_words(self):
- self.sq.add_filter(SQ(content='why'))
- self.sq.add_filter(~SQ(title__in=["Dune", "Jaws"]))
- self.assertEqual(str(self.sq.build_query()), 'Xapian::Query(((Zwhi OR why) AND ( AND_NOT (ZXTITLEdune OR XTITLEdune OR ZXTITLEjaw OR XTITLEjaws))))')
-
- def test_build_query_in_filter_multiple_words(self):
- self.sq.add_filter(SQ(content='why'))
- self.sq.add_filter(SQ(title__in=["A Famous Paper", "An Infamous Article"]))
- self.assertEqual(str(self.sq.build_query()), 'Xapian::Query(((Zwhi OR why) AND ((XTITLEa PHRASE 3 XTITLEfamous PHRASE 3 XTITLEpaper) OR (XTITLEan PHRASE 3 XTITLEinfamous PHRASE 3 XTITLEarticle))))')
-
- def test_build_query_in_filter_multiple_words_with_punctuation(self):
- self.sq.add_filter(SQ(title__in=["A Famous Paper", "An Infamous Article", "My Store Inc."]))
- self.assertEqual(str(self.sq.build_query()), 'Xapian::Query(((XTITLEa PHRASE 3 XTITLEfamous PHRASE 3 XTITLEpaper) OR (XTITLEan PHRASE 3 XTITLEinfamous PHRASE 3 XTITLEarticle) OR (XTITLEmy PHRASE 3 XTITLEstore PHRASE 3 XTITLEinc.)))')
-
- def test_build_query_not_in_filter_multiple_words(self):
- self.sq.add_filter(SQ(content='why'))
- self.sq.add_filter(~SQ(title__in=["A Famous Paper", "An Infamous Article"]))
- self.assertEqual(str(self.sq.build_query()), 'Xapian::Query(((Zwhi OR why) AND ( AND_NOT ((XTITLEa PHRASE 3 XTITLEfamous PHRASE 3 XTITLEpaper) OR (XTITLEan PHRASE 3 XTITLEinfamous PHRASE 3 XTITLEarticle)))))')
-
- def test_build_query_in_filter_datetime(self):
- self.sq.add_filter(SQ(content='why'))
- self.sq.add_filter(SQ(pub_date__in=[datetime.datetime(2009, 7, 6, 1, 56, 21)]))
- self.assertEqual(str(self.sq.build_query()), 'Xapian::Query(((Zwhi OR why) AND (ZXPUB_DATE20090706015621 OR XPUB_DATE20090706015621)))')
-
- def test_clean(self):
- self.assertEqual(self.sq.clean('hello world'), 'hello world')
- self.assertEqual(self.sq.clean('hello AND world'), 'hello AND world')
- self.assertEqual(self.sq.clean('hello AND OR NOT TO + - && || ! ( ) { } [ ] ^ " ~ * ? : \ world'), 'hello AND OR NOT TO + - && || ! ( ) { } [ ] ^ " ~ * ? : \ world')
- self.assertEqual(self.sq.clean('so please NOTe i am in a bAND and bORed'), 'so please NOTe i am in a bAND and bORed')
-
- def test_build_query_with_models(self):
- self.sq.add_filter(SQ(content='hello'))
- self.sq.add_model(MockModel)
- self.assertEqual(str(self.sq.build_query()), 'Xapian::Query(((Zhello OR hello) AND 0 * CONTENTTYPEcore.mockmodel))')
-
- self.sq.add_model(AnotherMockModel)
-
- self.assertTrue(str(self.sq.build_query()) in (
- 'Xapian::Query(((Zhello OR hello) AND (0 * CONTENTTYPEcore.anothermockmodel OR 0 * CONTENTTYPEcore.mockmodel)))',
- 'Xapian::Query(((Zhello OR hello) AND (0 * CONTENTTYPEcore.mockmodel OR 0 * CONTENTTYPEcore.anothermockmodel)))'))
-
- def test_build_query_with_punctuation(self):
- self.sq.add_filter(SQ(content='http://www.example.com'))
- self.assertEqual(str(self.sq.build_query()), 'Xapian::Query((Zhttp://www.example.com OR http://www.example.com))')
-
- def test_in_filter_values_list(self):
- self.sq.add_filter(SQ(content='why'))
- self.sq.add_filter(SQ(title__in=MockModel.objects.values_list('id', flat=True)))
- self.assertEqual(str(self.sq.build_query()), 'Xapian::Query(((Zwhi OR why) AND (ZXTITLE1 OR XTITLE1 OR ZXTITLE2 OR XTITLE2 OR ZXTITLE3 OR XTITLE3)))')
diff --git a/xapian_backend.py b/xapian_backend.py
index 1196395..7eb9799 100755
--- a/xapian_backend.py
+++ b/xapian_backend.py
@@ -16,6 +16,7 @@ from haystack import connections
from haystack.backends import BaseEngine, BaseSearchBackend, BaseSearchQuery, SearchNode, log_query
from haystack.constants import ID, DJANGO_ID, DJANGO_CT
from haystack.exceptions import HaystackError, MissingDependency
+from haystack.inputs import AutoQuery
from haystack.models import SearchResult
from haystack.utils import get_identifier, get_model_ct
@@ -48,6 +49,18 @@ DEFAULT_XAPIAN_FLAGS = (
xapian.QueryParser.FLAG_PURE_NOT
)
+# number of documents checked by default when building facets
+# this must be improved to be relative to the total number of docs.
+DEFAULT_CHECK_AT_LEAST = 1000
+
+# field types accepted to be serialized as values in Xapian
+FIELD_TYPES = {'text', 'integer', 'date', 'datetime', 'float', 'boolean'}
+
+# defines the format used to store types in Xapian
+# this format ensures datetimes are sorted correctly
+DATETIME_FORMAT = '%Y%m%d%H%M%S'
+INTEGER_FORMAT = '%012d'
+
class InvalidIndexError(HaystackError):
"""Raised when an index can not be opened."""
@@ -76,30 +89,33 @@ class XHValueRangeProcessor(xapian.ValueRangeProcessor):
begin = begin[colon + 1:len(begin)]
for field_dict in self.backend.schema:
if field_dict['field_name'] == field_name:
+ field_type = field_dict['type']
+
if not begin:
- if field_dict['type'] == 'text':
+ if field_type == 'text':
begin = 'a' # TODO: A better way of getting a min text value?
- elif field_dict['type'] == 'long':
- begin = -sys.maxint - 1
- elif field_dict['type'] == 'float':
+ elif field_type == 'integer':
+ begin = -sys.maxsize - 1
+ elif field_type == 'float':
begin = float('-inf')
- elif field_dict['type'] == 'date' or field_dict['type'] == 'datetime':
+ elif field_type == 'date' or field_type == 'datetime':
begin = '00010101000000'
elif end == '*':
- if field_dict['type'] == 'text':
+ if field_type == 'text':
end = 'z' * 100 # TODO: A better way of getting a max text value?
- elif field_dict['type'] == 'long':
- end = sys.maxint
- elif field_dict['type'] == 'float':
+ elif field_type == 'integer':
+ end = sys.maxsize
+ elif field_type == 'float':
end = float('inf')
- elif field_dict['type'] == 'date' or field_dict['type'] == 'datetime':
+ elif field_type == 'date' or field_type == 'datetime':
end = '99990101000000'
- if field_dict['type'] == 'float':
- begin = _marshal_value(float(begin))
- end = _marshal_value(float(end))
- elif field_dict['type'] == 'long':
- begin = _marshal_value(long(begin))
- end = _marshal_value(long(end))
+
+ if field_type == 'float':
+ begin = _term_to_xapian_value(float(begin), field_type)
+ end = _term_to_xapian_value(float(end), field_type)
+ elif field_type == 'integer':
+ begin = _term_to_xapian_value(int(begin), field_type)
+ end = _term_to_xapian_value(int(end), field_type)
return field_dict['column'], str(begin), str(end)
@@ -163,12 +179,18 @@ class XapianSearchBackend(BaseSearchBackend):
# these 4 attributes are caches populated in `build_schema`
# they are checked in `_update_cache`
- self._fields = None
- self._schema = None
+ # use property to retrieve them
+ self._fields = {}
+ self._schema = []
self._content_field_name = None
self._columns = {}
def _update_cache(self):
+ """
+ To avoid build_schema every time, we cache
+ some values: they only change when a SearchIndex
+ changes, which typically restarts the Python.
+ """
fields = connections[self.connection_alias].get_unified_index().all_searchfields()
if self._fields != fields:
self._fields = fields
@@ -184,12 +206,13 @@ class XapianSearchBackend(BaseSearchBackend):
self._update_cache()
return self._content_field_name
- def column(self, field_name):
+ @property
+ def column(self):
"""
Returns the column in the database of a given field name.
"""
self._update_cache()
- return self._columns[field_name]
+ return self._columns
def update(self, index, iterable):
"""
@@ -238,15 +261,42 @@ class XapianSearchBackend(BaseSearchBackend):
if self.include_spelling is True:
term_generator.set_flags(xapian.TermGenerator.FLAG_SPELLING)
+ def add_text(termpos, prefix, term, weight):
+ term_generator.set_termpos(termpos + 1)
+ term_generator.index_text(term, weight)
+ term_generator.index_text(term, weight, prefix)
+ term_generator.increase_termpos()
+ return term_generator.get_termpos()
+
for obj in iterable:
document = xapian.Document()
term_generator.set_document(document)
+ def add_to_document(prefix, term, weight):
+ document.add_term('%s' % term, weight)
+ document.add_term(prefix + term, weight)
+ document.add_term(prefix + '^%s$' % term, weight)
+
+ def add_datetime_to_document(termpos, prefix, term, weight):
+ date, time = term.split()
+ document.add_posting(date, termpos, weight)
+ termpos += 1
+ document.add_posting(time, termpos, weight)
+ termpos += 1
+ document.add_posting(prefix + date, termpos, weight)
+ termpos += 1
+ document.add_posting(prefix + time, termpos, weight)
+ termpos += 1
+ return termpos
+
data = index.full_prepare(obj)
weights = index.get_field_weights()
+
+ termpos = 0
for field in self.schema:
+ termpos += 1
# not supported fields are ignored.
- if field['field_name'] not in data.keys():
+ if field['field_name'] not in list(data.keys()):
continue
if field['field_name'] in weights:
@@ -254,34 +304,47 @@ class XapianSearchBackend(BaseSearchBackend):
else:
weight = 1
+ value = data[field['field_name']]
+ # Private fields are indexed in a different way:
+ # `django_id` is an int and `django_ct` is text;
+ # besides, they are indexed by their (unstemmed) value.
if field['field_name'] in ('id', 'django_id', 'django_ct'):
- term = data[field['field_name']]
-
- # django_id is always an integer, thus we send
- # it to _marshal_value as int to guarantee it
- # is stored as a sortable number.
if field['field_name'] == 'django_id':
- term = int(term)
- term = _marshal_value(term)
+ value = int(value)
+ value = _term_to_xapian_value(value, field['type'])
- document.add_term(TERM_PREFIXES[field['field_name']] + term, weight)
- document.add_value(field['column'], term)
+ document.add_term(TERM_PREFIXES[field['field_name']] + value, weight)
+ document.add_value(field['column'], value)
+ continue
else:
- value = data[field['field_name']]
prefix = TERM_PREFIXES['field'] + field['field_name'].upper()
+ # if not multi_valued, we add as a document value
+ # for sorting and facets
if field['multi_valued'] == 'false':
- document.add_value(field['column'], _marshal_value(value))
- value = [value]
+ document.add_value(field['column'], _term_to_xapian_value(value, field['type']))
+ else:
+ for t in value:
+ # add the exact match of each value
+ term = _to_xapian_term(t)
+ add_to_document(prefix, term, weight)
+ # index each value with positional information
+ if ' ' in term:
+ termpos = add_text(termpos, prefix, term, weight)
+ continue
- for term in value:
- term = _marshal_term(term)
- if field['type'] == 'text':
- term_generator.index_text(term, weight)
- term_generator.index_text(term, weight, prefix)
- if len(term.split()) == 1:
- document.add_term(term, weight)
- document.add_term(prefix + term, weight)
+ term = _to_xapian_term(value)
+ # from here on the term is a string;
+ # we now decide how it is indexed
+
+ if field['type'] == 'text':
+ # text is indexed with positional information
+ termpos = add_text(termpos, prefix, term, weight)
+ elif field['type'] == 'datetime':
+ termpos = add_datetime_to_document(termpos, prefix, term, weight)
+ if term != "":
+ # all other terms are added without positional information
+ add_to_document(prefix, term, weight)
# store data without indexing it
document.set_data(pickle.dumps(
@@ -361,6 +424,18 @@ class XapianSearchBackend(BaseSearchBackend):
return query
+ def _check_field_names(self, field_names):
+ """
+ Raises InvalidIndexError if any of a field_name in field_names is
+ not indexed.
+ """
+ if field_names:
+ for field_name in field_names:
+ try:
+ self.column[field_name]
+ except KeyError:
+ raise InvalidIndexError('Trying to use non indexed field "%s"' % field_name)
+
@log_query
def search(self, query, sort_by=None, start_offset=0, end_offset=None,
fields='', highlight=False, facets=None, date_facets=None,
@@ -409,6 +484,10 @@ class XapianSearchBackend(BaseSearchBackend):
'hits': 0,
}
+ self._check_field_names(facets)
+ self._check_field_names(date_facets)
+ self._check_field_names(query_facets)
+
database = self._database()
if result_class is None:
@@ -443,7 +522,7 @@ class XapianSearchBackend(BaseSearchBackend):
sort_field = sort_field[1:] # Strip the '-'
else:
reverse = False # Reverse is inverted in Xapian -- http://trac.xapian.org/ticket/311
- sorter.add(self.column(sort_field), reverse)
+ sorter.add(self.column[sort_field], reverse)
enquire.set_sort_by_key_then_relevance(sorter, True)
@@ -457,6 +536,12 @@ class XapianSearchBackend(BaseSearchBackend):
if not end_offset:
end_offset = database.get_doccount() - start_offset
+ ## prepare spies in case of facets
+ if facets:
+ facets_spies = self._prepare_facet_field_spies(facets)
+ for spy in facets_spies:
+ enquire.add_matchspy(spy)
+
matches = self._get_enquire_mset(database, enquire, start_offset, end_offset)
for match in matches:
@@ -472,9 +557,18 @@ class XapianSearchBackend(BaseSearchBackend):
)
if facets:
- facets_dict['fields'] = self._do_field_facets(results, facets)
+ # pick single valued facets from spies
+ single_facets_dict = self._process_facet_field_spies(facets_spies)
+
+ # pick multivalued valued facets from results
+ multi_facets_dict = self._do_multivalued_field_facets(results, facets)
+
+ # merge both results (http://stackoverflow.com/a/38990/931303)
+ facets_dict['fields'] = dict(list(single_facets_dict.items()) + list(multi_facets_dict.items()))
+
if date_facets:
facets_dict['dates'] = self._do_date_facets(results, date_facets)
+
if query_facets:
facets_dict['queries'] = self._do_query_facets(results, query_facets)
@@ -641,7 +735,7 @@ class XapianSearchBackend(BaseSearchBackend):
'multi_valued': 'false',
'column': 0},
{'field_name': DJANGO_ID,
- 'type': 'long',
+ 'type': 'integer',
'multi_valued': 'false',
'column': 1},
{'field_name': DJANGO_CT,
@@ -655,7 +749,7 @@ class XapianSearchBackend(BaseSearchBackend):
column = len(schema_fields)
- for field_name, field_class in sorted(fields.items(), key=lambda n: n[0]):
+ for field_name, field_class in sorted(list(fields.items()), key=lambda n: n[0]):
if field_class.document is True:
content_field_name = field_class.index_fieldname
@@ -667,10 +761,12 @@ class XapianSearchBackend(BaseSearchBackend):
'column': column,
}
- if field_class.field_type in ['date', 'datetime']:
+ if field_class.field_type == 'date':
field_data['type'] = 'date'
+ elif field_class.field_type == 'datetime':
+ field_data['type'] = 'datetime'
elif field_class.field_type == 'integer':
- field_data['type'] = 'long'
+ field_data['type'] = 'integer'
elif field_class.field_type == 'float':
field_data['type'] = 'float'
elif field_class.field_type == 'boolean':
@@ -705,33 +801,58 @@ class XapianSearchBackend(BaseSearchBackend):
return content
- def _do_field_facets(self, results, field_facets):
+ def _prepare_facet_field_spies(self, facets):
"""
- Private method that facets a document by field name.
+ Returns a list of spies based on the facets
+ used to count frequencies.
+ """
+ spies = []
+ for facet in facets:
+ slot = self.column[facet]
+ spy = xapian.ValueCountMatchSpy(slot)
+ # add attribute "slot" to know which column this spy is targeting.
+ spy.slot = slot
+ spies.append(spy)
+ return spies
- Fields of type MultiValueField will be faceted on each item in the
- (containing) list.
+ def _process_facet_field_spies(self, spies):
+ """
+ Returns a dict of facet names with lists of
+ tuples of the form (term, term_frequency)
+ from a list of spies that observed the enquire.
+ """
+ facet_dict = {}
+ for spy in spies:
+ field = self.schema[spy.slot]
+ field_name, field_type = field['field_name'], field['type']
- Required arguments:
- `results` -- A list SearchResults to facet
- `field_facets` -- A list of fields to facet on
+ facet_dict[field_name] = []
+ for facet in list(spy.values()):
+ facet_dict[field_name].append((_from_xapian_value(facet.term, field_type),
+ facet.termfreq))
+ return facet_dict
+
+ def _do_multivalued_field_facets(self, results, field_facets):
+ """
+ Implements a multivalued field facet on the results.
+
+ This is implemented using brute force - O(N^2) -
+ because Xapian does not have it implemented yet
+ (see http://trac.xapian.org/ticket/199)
"""
facet_dict = {}
- # DS_TODO: Improve this algorithm. Currently, runs in O(N^2), ouch.
for field in field_facets:
facet_list = {}
+ if not self._multi_value_field(field):
+ continue
for result in results:
field_value = getattr(result, field)
- if self._multi_value_field(field):
- for item in field_value: # Facet each item in a MultiValueField
- facet_list[item] = facet_list.get(item, 0) + 1
- else:
- facet_list[field_value] = facet_list.get(field_value, 0) + 1
-
- facet_dict[field] = facet_list.items()
+ for item in field_value: # Facet each item in a MultiValueField
+ facet_list[item] = facet_list.get(item, 0) + 1
+ facet_dict[field] = list(facet_list.items())
return facet_dict
@staticmethod
@@ -765,7 +886,7 @@ class XapianSearchBackend(BaseSearchBackend):
"""
facet_dict = {}
- for date_facet, facet_params in date_facets.iteritems():
+ for date_facet, facet_params in list(date_facets.items()):
gap_type = facet_params.get('gap_by')
gap_value = facet_params.get('gap_amount', 1)
date_range = facet_params['start_date']
@@ -831,8 +952,7 @@ class XapianSearchBackend(BaseSearchBackend):
eg. {'name': ('a*', 5)}
"""
facet_dict = {}
-
- for field, query in dict(query_facets).items():
+ for field, query in list(dict(query_facets).items()):
facet_dict[field] = (query, self.search(self.parse_query(query))['hits'])
return facet_dict
@@ -887,7 +1007,7 @@ class XapianSearchBackend(BaseSearchBackend):
return database
@staticmethod
- def _get_enquire_mset(database, enquire, start_offset, end_offset):
+ def _get_enquire_mset(database, enquire, start_offset, end_offset, checkatleast=DEFAULT_CHECK_AT_LEAST):
"""
A safer version of Xapian.enquire.get_mset
@@ -901,10 +1021,10 @@ class XapianSearchBackend(BaseSearchBackend):
`end_offset` -- The end offset to pass to `enquire.get_mset`
"""
try:
- return enquire.get_mset(start_offset, end_offset)
+ return enquire.get_mset(start_offset, end_offset, checkatleast)
except xapian.DatabaseModifiedError:
database.reopen()
- return enquire.get_mset(start_offset, end_offset)
+ return enquire.get_mset(start_offset, end_offset, checkatleast)
@staticmethod
def _get_document_data(database, document):
@@ -989,8 +1109,9 @@ class XapianSearchQuery(BaseSearchQuery):
if self.boost:
subqueries = [
xapian.Query(
- xapian.Query.OP_SCALE_WEIGHT, self._content_field(term, False), value
- ) for term, value in self.boost.iteritems()
+ xapian.Query.OP_SCALE_WEIGHT,
+ self._term_query(term, None, None), value
+ ) for term, value in list(self.boost.items())
]
query = xapian.Query(
xapian.Query.OP_AND_MAYBE, query,
@@ -1009,169 +1130,264 @@ class XapianSearchQuery(BaseSearchQuery):
)
else:
expression, term = child
- field, filter_type = search_node.split_expression(expression)
+ field_name, filter_type = search_node.split_expression(expression)
- # Handle when we've got a ``ValuesListQuerySet``...
- if hasattr(term, 'values_list'):
- term = list(term)
-
- if isinstance(term, (list, tuple)):
- term = [_marshal_term(t) for t in term]
- else:
- term = _marshal_term(term)
-
- if field == 'content':
- query_list.append(self._content_field(term, is_not))
- else:
- if filter_type == 'contains':
- query_list.append(self._filter_contains(term, field, is_not))
- elif filter_type == 'exact':
- query_list.append(self._filter_exact(term, field, is_not))
- elif filter_type == 'gt':
- query_list.append(self._filter_gt(term, field, is_not))
- elif filter_type == 'gte':
- query_list.append(self._filter_gte(term, field, is_not))
- elif filter_type == 'lt':
- query_list.append(self._filter_lt(term, field, is_not))
- elif filter_type == 'lte':
- query_list.append(self._filter_lte(term, field, is_not))
- elif filter_type == 'startswith':
- query_list.append(self._filter_startswith(term, field, is_not))
- elif filter_type == 'in':
- query_list.append(self._filter_in(term, field, is_not))
+ constructed_query_list = self._query_from_term(term, field_name, filter_type, is_not)
+ query_list.extend(constructed_query_list)
if search_node.connector == 'OR':
return xapian.Query(xapian.Query.OP_OR, query_list)
else:
return xapian.Query(xapian.Query.OP_AND, query_list)
- def _content_field(self, term, is_not):
+ def _query_from_term(self, term, field_name, filter_type, is_not):
"""
- Private method that returns a xapian.Query that searches for `value`
- in all fields.
-
- Required arguments:
- ``term`` -- The term to search for
- ``is_not`` -- Invert the search results
-
- Returns:
- A xapian.Query
+ Uses arguments to construct a list of xapian.Query's.
"""
- # it is more than one term, we build a PHRASE
- if ' ' in term:
- query = self._phrase_query(term.split(), self.backend.content_field_name, is_content=True)
- else:
- query = self._term_query(term)
+ if field_name != 'content' and field_name not in self.backend.column:
+ raise InvalidIndexError('field "%s" not indexed' % field_name)
- if is_not:
- return xapian.Query(xapian.Query.OP_AND_NOT, self._all_query(), query)
- else:
- return query
-
- def _filter_contains(self, term, field, is_not):
- """
- Private method that returns a xapian.Query that searches for `term`
- in a specified `field`.
-
- Required arguments:
- ``term`` -- The term to search for
- ``field`` -- The field to search
- ``is_not`` -- Invert the search results
-
- Returns:
- A xapian.Query
- """
- if ' ' in term:
- return self._filter_exact(term, field, is_not)
- else:
- query = self._term_query(term, field)
- if is_not:
- return xapian.Query(xapian.Query.OP_AND_NOT, self._all_query(), query)
+ # It it is an AutoQuery, it has no filters
+ # or others, thus we short-circuit the procedure.
+ if isinstance(term, AutoQuery):
+ if field_name != 'content':
+ query = '%s:%s' % (field_name, term.prepare(self))
else:
- return query
-
- def _filter_exact(self, term, field, is_not):
- """
- Private method that returns a xapian.Query that searches for an exact
- match for `term` in a specified `field`.
-
- Required arguments:
- ``term`` -- The term to search for
- ``field`` -- The field to search
- ``is_not`` -- Invert the search results
-
- Returns:
- A xapian.Query
- """
- query = self._phrase_query(term.split(), field)
- if is_not:
- return xapian.Query(xapian.Query.OP_AND_NOT, self._all_query(), query)
- else:
- return query
-
- def _filter_in(self, term_list, field, is_not):
- """
- Private method that returns a xapian.Query that searches for any term
- of `value_list` in a specified `field`.
-
- Required arguments:
- ``term_list`` -- The terms to search for
- ``field`` -- The field to search
- ``is_not`` -- Invert the search results
-
- Returns:
- A xapian.Query
- """
+ query = term.prepare(self)
+ return [self.backend.parse_query(query)]
query_list = []
- for term in term_list:
- if ' ' in term:
- query_list.append(
- self._phrase_query(term.split(), field)
- )
+
+ # Handle `ValuesListQuerySet`.
+ if hasattr(term, 'values_list'):
+ term = list(term)
+
+ if field_name == 'content':
+ # content is the generic search:
+ # force no field_name search
+ # and the field_type to be 'text'.
+ field_name = None
+ field_type = 'text'
+
+ # we don't know what is the type(term), so we parse it.
+ # Ideally this would not be required, but
+ # some filters currently depend on the term to make decisions.
+ term = _to_xapian_term(term)
+
+ query_list.append(self._filter_contains(term, field_name, field_type, is_not))
+ # when filter has no filter_type, haystack uses
+ # filter_type = 'contains'. Here we remove it
+ # since the above query is already doing this
+ if filter_type == 'contains':
+ filter_type = None
+ else:
+ # get the field_type from the backend
+ field_type = self.backend.schema[self.backend.column[field_name]]['type']
+
+ # private fields don't accept 'contains' or 'startswith'
+ # since they have no meaning.
+ if filter_type in ('contains', 'startswith') and field_name in ('id', 'django_id', 'django_ct'):
+ filter_type = 'exact'
+
+ if field_type == 'text':
+ # we don't know what type "term" is, but we know we are searching as text
+ # so we parse it like that.
+ # Ideally this would not be required since _term_query does it, but
+ # some filters currently depend on the term to make decisions.
+ if isinstance(term, list):
+ term = [_to_xapian_term(term) for term in term]
else:
- query_list.append(
- self._term_query(term, field)
- )
+ term = _to_xapian_term(term)
+
+ # todo: we should check that the filter is valid for this field_type or raise InvalidIndexError
+ if filter_type == 'contains':
+ query_list.append(self._filter_contains(term, field_name, field_type, is_not))
+ elif filter_type == 'exact':
+ query_list.append(self._filter_exact(term, field_name, field_type, is_not))
+ elif filter_type == 'in':
+ query_list.append(self._filter_in(term, field_name, field_type, is_not))
+ elif filter_type == 'startswith':
+ query_list.append(self._filter_startswith(term, field_name, field_type, is_not))
+ elif filter_type == 'gt':
+ query_list.append(self._filter_gt(term, field_name, field_type, is_not))
+ elif filter_type == 'gte':
+ query_list.append(self._filter_gte(term, field_name, field_type, is_not))
+ elif filter_type == 'lt':
+ query_list.append(self._filter_lt(term, field_name, field_type, is_not))
+ elif filter_type == 'lte':
+ query_list.append(self._filter_lte(term, field_name, field_type, is_not))
+ return query_list
+
+ def _all_query(self):
+ """
+ Returns a match all query.
+ """
+ return xapian.Query('')
+
+ def _filter_contains(self, term, field_name, field_type, is_not):
+ """
+ Splits the sentence in terms and join them with OR,
+ using stemmed and un-stemmed.
+
+ Assumes term is not a list.
+ """
+ if field_type == 'text':
+ term_list = term.split()
+ else:
+ term_list = [term]
+
+ query = self._or_query(term_list, field_name, field_type)
+ if is_not:
+ return xapian.Query(xapian.Query.OP_AND_NOT, self._all_query(), query)
+ else:
+ return query
+
+ def _filter_in(self, term_list, field_name, field_type, is_not):
+ """
+ Returns a query that matches exactly ANY term in term_list.
+
+ Notice that:
+ A in {B,C} <=> (A = B or A = C)
+ ~(A in {B,C}) <=> ~(A = B or A = C)
+ Because OP_AND_NOT(C, D) <=> (C and ~D), then D=(A in {B,C}) requires `is_not=False`.
+
+ Assumes term is a list.
+ """
+ query_list = [self._filter_exact(term, field_name, field_type, is_not=False)
+ for term in term_list]
+
if is_not:
return xapian.Query(xapian.Query.OP_AND_NOT, self._all_query(),
xapian.Query(xapian.Query.OP_OR, query_list))
else:
return xapian.Query(xapian.Query.OP_OR, query_list)
- def _filter_startswith(self, term, field, is_not):
+ def _filter_exact(self, term, field_name, field_type, is_not):
"""
- Private method that returns a xapian.Query that searches for any term
- that begins with `term` in a specified `field`.
+ Returns a query that matches exactly the un-stemmed term
+ with positional order.
- Required arguments:
- ``term`` -- The terms to search for
- ``field`` -- The field to search
- ``is_not`` -- Invert the search results
-
- Returns:
- A xapian.Query
+ Assumes term is not a list.
"""
+
+ # this is an hack:
+ # the ideal would be to use the same idea as in _filter_contains.
+ # However, it causes tests to fail.
+ if field_type == 'text' and ' ' in term:
+ query = self._phrase_query(term.split(), field_name, field_type)
+ else:
+ query = self._term_query(term, field_name, field_type, exact=True, stemmed=False)
+
if is_not:
- return xapian.Query(
- xapian.Query.OP_AND_NOT,
- self._all_query(),
- self.backend.parse_query('%s:%s*' % (field, term)),
- )
- return self.backend.parse_query('%s:%s*' % (field, term))
+ return xapian.Query(xapian.Query.OP_AND_NOT, self._all_query(), query)
+ else:
+ return query
- def _filter_gt(self, term, field, is_not):
- return self._filter_lte(term, field, is_not=not is_not)
+ def _filter_startswith(self, term, field_name, field_type, is_not):
+ """
+ Returns a startswith query on the un-stemmed term.
- def _filter_lt(self, term, field, is_not):
- return self._filter_gte(term, field, is_not=not is_not)
+ Assumes term is not a list.
+ """
+ # TODO: if field_type is of type integer, we need to marsh the value.
+ if field_name:
+ query_string = '%s:%s*' % (field_name, term)
+ else:
+ query_string = '%s*' % term
- def _filter_gte(self, term, field, is_not):
+ query = self.backend.parse_query(query_string)
+
+ if is_not:
+ return xapian.Query(xapian.Query.OP_AND_NOT, self._all_query(), query)
+ return query
+
+ def _or_query(self, term_list, field, field_type, exact=False):
+ """
+ Joins each item of term_list decorated by _term_query with an OR.
+ """
+ term_list = [self._term_query(term, field, field_type, exact) for term in term_list]
+ return xapian.Query(xapian.Query.OP_OR, term_list)
+
+ def _phrase_query(self, term_list, field_name, field_type):
+ """
+ Returns a query that matches exact terms with
+ positional order (i.e. ["this", "thing"] != ["thing", "this"])
+ and no stem.
+
+ If `field_name` is not `None`, restrict to the field.
+ """
+ term_list = [self._term_query(term, field_name, field_type,
+ stemmed=False) for term in term_list]
+
+ query = xapian.Query(xapian.Query.OP_PHRASE, term_list)
+ return query
+
+ def _term_query(self, term, field_name, field_type, exact=False, stemmed=True):
+ """
+ Constructs a query of a single term.
+
+ If `field_name` is not `None`, the term is search on that field only.
+ If exact is `True`, the search is restricted to boolean matches.
+ """
+ # using stemmed terms in exact query is not acceptable.
+ if stemmed:
+ assert not exact
+
+ constructor = '{prefix}{term}'
+ # ^{term}$ is for boolean match of the term
+ if exact:
+ constructor = '{prefix}^{term}$'
+
+ # construct the prefix to be used.
+ prefix = ''
+ if field_name:
+ prefix = TERM_PREFIXES['field'] + field_name.upper()
+ term = _to_xapian_term(term)
+
+ if field_name in ('id', 'django_id', 'django_ct'):
+ # to ensure the value is serialized correctly.
+ if field_name == 'django_id':
+ term = int(term)
+ term = _term_to_xapian_value(term, field_type)
+ return xapian.Query('%s%s' % (TERM_PREFIXES[field_name], term))
+
+ # we construct the query dates in a slightly different way
+ if field_type == 'datetime':
+ date, time = term.split()
+ constructor = '{prefix}{term}'
+ return xapian.Query(xapian.Query.OP_AND_MAYBE,
+ constructor.format(prefix=prefix, term=date),
+ constructor.format(prefix=prefix, term=time)
+ )
+
+ # only use stem if field is text or "None"
+ if field_type not in ('text', None):
+ stemmed = False
+
+ unstemmed_term = constructor.format(prefix=prefix, term=term)
+ if stemmed:
+ stem = xapian.Stem(self.backend.language)
+ stemmed_term = 'Z' + constructor.format(prefix=prefix, term=stem(term).decode('utf-8'))
+
+ return xapian.Query(xapian.Query.OP_OR,
+ xapian.Query(stemmed_term),
+ xapian.Query(unstemmed_term)
+ )
+ else:
+ return xapian.Query(unstemmed_term)
+
+ def _filter_gt(self, term, field_name, field_type, is_not):
+ return self._filter_lte(term, field_name, field_type, is_not=not is_not)
+
+ def _filter_lt(self, term, field_name, field_type, is_not):
+ return self._filter_gte(term, field_name, field_type, is_not=not is_not)
+
+ def _filter_gte(self, term, field_name, field_type, is_not):
"""
Private method that returns a xapian.Query that searches for any term
that is greater than `term` in a specified `field`.
"""
vrp = XHValueRangeProcessor(self.backend)
- pos, begin, end = vrp('%s:%s' % (field, _marshal_value(term)), '*')
+ pos, begin, end = vrp('%s:%s' % (field_name, _term_to_xapian_value(term, field_type)), '*')
if is_not:
return xapian.Query(xapian.Query.OP_AND_NOT,
self._all_query(),
@@ -1179,13 +1395,13 @@ class XapianSearchQuery(BaseSearchQuery):
)
return xapian.Query(xapian.Query.OP_VALUE_RANGE, pos, begin, end)
- def _filter_lte(self, term, field, is_not):
+ def _filter_lte(self, term, field_name, field_type, is_not):
"""
Private method that returns a xapian.Query that searches for any term
that is less than `term` in a specified `field`.
"""
vrp = XHValueRangeProcessor(self.backend)
- pos, begin, end = vrp('%s:' % field, '%s' % _marshal_value(term))
+ pos, begin, end = vrp('%s:' % field_name, '%s' % _term_to_xapian_value(term, field_type))
if is_not:
return xapian.Query(xapian.Query.OP_AND_NOT,
self._all_query(),
@@ -1193,117 +1409,79 @@ class XapianSearchQuery(BaseSearchQuery):
)
return xapian.Query(xapian.Query.OP_VALUE_RANGE, pos, begin, end)
- @staticmethod
- def _all_query():
- """
- Private method that returns a xapian.Query that returns all documents,
- Returns:
- A xapian.Query
- """
- return xapian.Query('')
-
- def _term_query(self, term, field=None):
- """
- Private method that returns a term based xapian.Query that searches
- for `term`.
-
- Required arguments:
- ``term`` -- The term to search for
- ``field`` -- The field to search (If `None`, all fields)
-
- Returns:
- A xapian.Query
- """
- stem = xapian.Stem(self.backend.language)
-
- if field in ('id', 'django_id', 'django_ct'):
- return xapian.Query('%s%s' % (TERM_PREFIXES[field], term))
- elif field:
- stemmed = 'Z%s%s%s' % (
- TERM_PREFIXES['field'], field.upper(), stem(term)
- )
- unstemmed = '%s%s%s' % (
- TERM_PREFIXES['field'], field.upper(), term
- )
- else:
- stemmed = 'Z%s' % stem(term)
- unstemmed = term
-
- return xapian.Query(
- xapian.Query.OP_OR,
- xapian.Query(stemmed),
- xapian.Query(unstemmed)
- )
-
- @staticmethod
- def _phrase_query(term_list, field=None, is_content=False):
- """
- Private method that returns a phrase based xapian.Query that searches
- for terms in `term_list.
-
- Required arguments:
- ``term_list`` -- The terms to search for
- ``field`` -- The field to search (If `None`, all fields)
-
- Returns:
- A xapian.Query
- """
- if field and not is_content:
- term_list = ['%s%s%s' % (TERM_PREFIXES['field'], field.upper(), term) for term in term_list]
- return xapian.Query(xapian.Query.OP_PHRASE, term_list)
-
-
-def _marshal_value(value):
+def _term_to_xapian_value(term, field_type):
"""
- Private utility method that converts Python values to a string for Xapian values.
+ Converts a term to a serialized
+ Xapian value based on the field_type.
"""
- if isinstance(value, datetime.datetime):
- value = _marshal_datetime(value)
- elif isinstance(value, datetime.date):
- value = _marshal_date(value)
- elif isinstance(value, bool):
- if value:
+ assert field_type in FIELD_TYPES
+
+ def strf(dt):
+ """
+ Equivalent to datetime.datetime.strptime(dt, DATETIME_FORMAT)
+ but accepts years below 1900 (see http://stackoverflow.com/q/10263956/931303)
+ """
+ return '%04d%02d%02d%02d%02d%02d' % (
+ dt.year, dt.month, dt.day, dt.hour, dt.minute, dt.second)
+
+ if field_type == 'boolean':
+ assert isinstance(term, bool)
+ if term:
value = 't'
else:
value = 'f'
- elif isinstance(value, float):
- value = xapian.sortable_serialise(value)
- elif isinstance(value, (int, long)):
- value = '%012d' % value
- else:
- value = force_text(value).lower()
+
+ elif field_type == 'integer':
+ value = INTEGER_FORMAT % term
+ elif field_type == 'float':
+ value = xapian.sortable_serialise(term)
+ elif field_type == 'date' or field_type == 'datetime':
+ if field_type == 'date':
+ # http://stackoverflow.com/a/1937636/931303 and comments
+ term = datetime.datetime.combine(term, datetime.time())
+ value = strf(term)
+ else: # field_type == 'text'
+ value = _to_xapian_term(term)
+
return value
-def _marshal_term(term):
+def _to_xapian_term(term):
"""
- Private utility method that converts Python terms to a string for Xapian terms.
+ Converts a Python type to a
+ Xapian term that can be indexed.
"""
- if isinstance(term, datetime.datetime):
- term = _marshal_datetime(term)
- elif isinstance(term, datetime.date):
- term = _marshal_date(term)
- else:
- term = force_text(term).lower()
- return term
+ return force_text(term).lower()
-def _marshal_date(d):
- return '%04d%02d%02d000000' % (d.year, d.month, d.day)
+def _from_xapian_value(value, field_type):
+ """
+ Converts a serialized Xapian value
+ to Python equivalent based on the field_type.
-
-def _marshal_datetime(dt):
- if dt.microsecond:
- return '%04d%02d%02d%02d%02d%02d%06d' % (
- dt.year, dt.month, dt.day, dt.hour,
- dt.minute, dt.second, dt.microsecond
- )
- else:
- return '%04d%02d%02d%02d%02d%02d' % (
- dt.year, dt.month, dt.day, dt.hour,
- dt.minute, dt.second
- )
+ Doesn't accept multivalued fields.
+ """
+ assert field_type in FIELD_TYPES
+ if field_type == 'boolean':
+ if value == 't':
+ return True
+ elif value == 'f':
+ return False
+ else:
+ InvalidIndexError('Field type "%d" does not accept value "%s"' % (field_type, value))
+ elif field_type == 'integer':
+ return int(value)
+ elif field_type == 'float':
+ return xapian.sortable_unserialise(value)
+ elif field_type == 'date' or field_type == 'datetime':
+ datetime_value = datetime.datetime.strptime(value, DATETIME_FORMAT)
+ if field_type == 'datetime':
+ return datetime_value
+ else:
+ return datetime_value.date()
+ else: # field_type == 'text'
+ return value
class XapianEngine(BaseEngine):