diff --git a/tests/xapian_tests/tests/xapian_backend.py b/tests/xapian_tests/tests/xapian_backend.py index d389afd..4f6cbfb 100644 --- a/tests/xapian_tests/tests/xapian_backend.py +++ b/tests/xapian_tests/tests/xapian_backend.py @@ -53,12 +53,35 @@ class XapianMockSearchIndex(indexes.SearchIndex): flag = indexes.BooleanField(model_attr='flag') slug = indexes.CharField(indexed=False, model_attr='slug') popularity = indexes.FloatField(model_attr='popularity') + + # Various MultiValueFields sites = indexes.MultiValueField() tags = indexes.MultiValueField() + keys = indexes.MultiValueField() + titles = indexes.MultiValueField() def prepare_sites(self, obj): return ['%d' % (i * obj.id) for i in xrange(1, 4)] + def prepare_tags(self, obj): + if obj.id == 1: + return ['a', 'b', 'c'] + elif obj.id == 2: + return ['ab', 'bc', 'cd'] + else: + return ['an', 'to', 'or'] + + def prepare_keys(self, obj): + return [i * obj.id for i in xrange(1, 4)] + + def prepare_titles(self, obj): + if obj.id == 1: + return ['object one title one', 'object one title two'] + elif obj.id == 2: + return ['object two title one', 'object two title two'] + else: + return ['object three title one', 'object three title two'] + class XapianSearchBackendTestCase(TestCase): def setUp(self): @@ -84,10 +107,6 @@ class XapianSearchBackendTestCase(TestCase): self.sample_objs[0].popularity = 834.0 self.sample_objs[1].popularity = 35.5 self.sample_objs[2].popularity = 972.0 - - self.sample_objs[0].tags = ['a', 'b', 'c'] - self.sample_objs[0].tags = ['ab', 'bc', 'cd'] - self.sample_objs[0].tags = ['an', 'to', 'or'] def tearDown(self): if os.path.exists(settings.HAYSTACK_XAPIAN_PATH): @@ -124,9 +143,9 @@ class XapianSearchBackendTestCase(TestCase): self.assertEqual(len(self.xapian_search('')), 3) self.assertEqual([dict(doc) for doc in self.xapian_search('')], [ - {'flag': u't', 'name': u'david1', 'text': u'indexed!\n1', 'sites': u"['1', '2', '3']", 'pub_date': u'20090224000000', 'value': u'000000000005', 'id': u'tests.xapianmockmodel.1', 'slug': u'http://example.com/1', 'popularity': '\xca\x84', 'django_id': u'1', 'django_ct': u'tests.xapianmockmodel'}, - {'flag': u'f', 'name': u'david2', 'text': u'indexed!\n2', 'sites': u"['2', '4', '6']", 'pub_date': u'20090223000000', 'value': u'000000000010', 'id': u'tests.xapianmockmodel.2', 'slug': u'http://example.com/2', 'popularity': '\xb4p', 'django_id': u'2', 'django_ct': u'tests.xapianmockmodel'}, - {'flag': u't', 'name': u'david3', 'text': u'indexed!\n3', 'sites': u"['3', '6', '9']", 'pub_date': u'20090222000000', 'value': u'000000000015', 'id': u'tests.xapianmockmodel.3', 'slug': u'http://example.com/3', 'popularity': '\xcb\x98', 'django_id': u'3', 'django_ct': u'tests.xapianmockmodel'} + {'flag': u't', 'name': u'david1', 'tags': u"['a', 'b', 'c']", 'keys': u'[1, 2, 3]', 'text': u'indexed!\n1', 'sites': u"['1', '2', '3']", 'titles': u"['object one title one', 'object one title two']", 'pub_date': u'20090224000000', 'value': u'000000000005', 'id': u'tests.xapianmockmodel.1', 'slug': u'http://example.com/1', 'popularity': '\xca\x84', 'django_id': u'1', 'django_ct': u'tests.xapianmockmodel'}, + {'flag': u'f', 'name': u'david2', 'tags': u"['ab', 'bc', 'cd']", 'keys': u'[2, 4, 6]', 'text': u'indexed!\n2', 'sites': u"['2', '4', '6']", 'titles': u"['object two title one', 'object two title two']", 'pub_date': u'20090223000000', 'value': u'000000000010', 'id': u'tests.xapianmockmodel.2', 'slug': u'http://example.com/2', 'popularity': '\xb4p', 'django_id': u'2', 'django_ct': u'tests.xapianmockmodel'}, + {'flag': u't', 'name': u'david3', 'tags': u"['an', 'to', 'or']", 'keys': u'[3, 6, 9]', 'text': u'indexed!\n3', 'sites': u"['3', '6', '9']", 'titles': u"['object three title one', 'object three title two']", 'pub_date': u'20090222000000', 'value': u'000000000015', 'id': u'tests.xapianmockmodel.3', 'slug': u'http://example.com/3', 'popularity': '\xcb\x98', 'django_id': u'3', 'django_ct': u'tests.xapianmockmodel'} ]) def test_duplicate_update(self): @@ -142,8 +161,8 @@ class XapianSearchBackendTestCase(TestCase): self.backend.remove(self.sample_objs[0]) self.assertEqual(len(self.xapian_search('')), 2) self.assertEqual([dict(doc) for doc in self.xapian_search('')], [ - {'flag': u'f', 'name': u'david2', 'text': u'indexed!\n2', 'sites': u"['2', '4', '6']", 'pub_date': u'20090223000000', 'value': u'000000000010', 'id': u'tests.xapianmockmodel.2', 'slug': u'http://example.com/2', 'popularity': '\xb4p', 'django_id': u'2', 'django_ct': u'tests.xapianmockmodel'}, - {'flag': u't', 'name': u'david3', 'text': u'indexed!\n3', 'sites': u"['3', '6', '9']", 'pub_date': u'20090222000000', 'value': u'000000000015', 'id': u'tests.xapianmockmodel.3', 'slug': u'http://example.com/3', 'popularity': '\xcb\x98', 'django_id': u'3', 'django_ct': u'tests.xapianmockmodel'} + {'flag': u'f', 'name': u'david2', 'tags': u"['ab', 'bc', 'cd']", 'keys': u'[2, 4, 6]', 'text': u'indexed!\n2', 'sites': u"['2', '4', '6']", 'titles': u"['object two title one', 'object two title two']", 'pub_date': u'20090223000000', 'value': u'000000000010', 'id': u'tests.xapianmockmodel.2', 'slug': u'http://example.com/2', 'popularity': '\xb4p', 'django_id': u'2', 'django_ct': u'tests.xapianmockmodel'}, + {'flag': u't', 'name': u'david3', 'tags': u"['an', 'to', 'or']", 'keys': u'[3, 6, 9]', 'text': u'indexed!\n3', 'sites': u"['3', '6', '9']", 'titles': u"['object three title one', 'object three title two']", 'pub_date': u'20090222000000', 'value': u'000000000015', 'id': u'tests.xapianmockmodel.3', 'slug': u'http://example.com/3', 'popularity': '\xcb\x98', 'django_id': u'3', 'django_ct': u'tests.xapianmockmodel'} ]) def test_clear(self): @@ -177,6 +196,15 @@ class XapianSearchBackendTestCase(TestCase): self.assertEqual([result.pk for result in self.backend.search(xapian.Query(''))['results']], [1, 2, 3]) self.assertEqual(self.backend.search(xapian.Query('indexed'))['hits'], 3) self.assertEqual([result.pk for result in self.backend.search(xapian.Query(''))['results']], [1, 2, 3]) + + def test_search_by_mvf(self): + self.backend.update(self.index, self.sample_objs) + self.assertEqual(len(self.xapian_search('')), 3) + + self.assertEqual(self.backend.search(xapian.Query('ab'))['hits'], 1) + self.assertEqual(self.backend.search(xapian.Query('b'))['hits'], 1) + self.assertEqual(self.backend.search(xapian.Query('to'))['hits'], 1) + self.assertEqual(self.backend.search(xapian.Query('one'))['hits'], 3) def test_field_facets(self): self.backend.update(self.index, self.sample_objs) @@ -324,16 +352,18 @@ class XapianSearchBackendTestCase(TestCase): def test_build_schema(self): (content_field_name, fields) = self.backend.build_schema(self.site.all_searchfields()) self.assertEqual(content_field_name, 'text') - self.assertEqual(len(fields), 8) + self.assertEqual(len(fields), 10) self.assertEqual(fields, [ - {'column': 0, 'field_name': 'name', 'type': 'text', 'multi_valued': 'false'}, + {'column': 0, 'type': 'text', 'field_name': 'name', 'multi_valued': 'false'}, {'column': 1, 'type': 'text', 'field_name': 'tags', 'multi_valued': 'true'}, - {'column': 2, 'field_name': 'text', 'type': 'text', 'multi_valued': 'false'}, - {'column': 3, 'field_name': 'popularity', 'type': 'float', 'multi_valued': 'false'}, - {'column': 4, 'field_name': 'sites', 'type': 'text', 'multi_valued': 'true'}, - {'column': 5, 'field_name': 'value', 'type': 'long', 'multi_valued': 'false'}, - {'column': 6, 'field_name': 'flag', 'type': 'boolean', 'multi_valued': 'false'}, - {'column': 7, 'field_name': 'pub_date', 'type': 'date', 'multi_valued': 'false'}, + {'column': 2, 'type': 'text', 'field_name': 'keys', 'multi_valued': 'true'}, + {'column': 3, 'type': 'text', 'field_name': 'text', 'multi_valued': 'false'}, + {'column': 4, 'type': 'float', 'field_name': 'popularity', 'multi_valued': 'false'}, + {'column': 5, 'type': 'text', 'field_name': 'sites', 'multi_valued': 'true'}, + {'column': 6, 'type': 'long', 'field_name': 'value', 'multi_valued': 'false'}, + {'column': 7, 'type': 'boolean', 'field_name': 'flag', 'multi_valued': 'false'}, + {'column': 8, 'type': 'text', 'field_name': 'titles', 'multi_valued': 'true'}, + {'column': 9, 'type': 'date', 'field_name': 'pub_date', 'multi_valued': 'false'} ]) def test_parse_query(self): @@ -341,10 +371,10 @@ class XapianSearchBackendTestCase(TestCase): self.assertEqual(self.backend.parse_query('indexed').get_description(), 'Xapian::Query((indexed:(pos=1) OR Zindex:(pos=1)))') self.assertEqual(self.backend.parse_query('name:david').get_description(), 'Xapian::Query((XNAMEdavid1:(pos=1) OR XNAMEdavid2:(pos=1) OR XNAMEdavid3:(pos=1) OR ZXNAMEdavid:(pos=1)))') self.assertEqual(self.backend.parse_query('name:david1..david2').get_description(), 'Xapian::Query(VALUE_RANGE 0 david1 david2)') - self.assertEqual(self.backend.parse_query('value:0..10').get_description(), 'Xapian::Query(VALUE_RANGE 5 000000000000 000000000010)') - self.assertEqual(self.backend.parse_query('value:..10').get_description(), 'Xapian::Query(VALUE_RANGE 5 -02147483648 000000000010)') - self.assertEqual(self.backend.parse_query('value:10..*').get_description(), 'Xapian::Query(VALUE_RANGE 5 000000000010 002147483647)') - self.assertEqual(self.backend.parse_query('popularity:25.5..100.0').get_description(), 'Xapian::Query(VALUE_RANGE 3 \xb2` \xba@)') + self.assertEqual(self.backend.parse_query('value:0..10').get_description(), 'Xapian::Query(VALUE_RANGE 6 000000000000 000000000010)') + self.assertEqual(self.backend.parse_query('value:..10').get_description(), 'Xapian::Query(VALUE_RANGE 6 -02147483648 000000000010)') + self.assertEqual(self.backend.parse_query('value:10..*').get_description(), 'Xapian::Query(VALUE_RANGE 6 000000000010 002147483647)') + self.assertEqual(self.backend.parse_query('popularity:25.5..100.0').get_description(), 'Xapian::Query(VALUE_RANGE 4 \xb2` \xba@)') class LiveXapianMockSearchIndex(indexes.SearchIndex): diff --git a/xapian_backend.py b/xapian_backend.py index 665bab3..bfc9346 100755 --- a/xapian_backend.py +++ b/xapian_backend.py @@ -161,10 +161,11 @@ class SearchBackend(BaseSearchBackend): `iterable` -- An iterable of model instances to index For each object in `iterable`, a document is created containing all - of the terms extracted from `index.prepare(obj)` with stemming prefixes, - field prefixes, and 'as-is'. + of the terms extracted from `index.prepare(obj)` with field prefixes, + and 'as-is' as needed. Also, if the field type is 'text' it will be + stemmed and stored with the 'Z' prefix as well. - eg. `content:Testing` ==> `testing, Ztest, ZXCONTENTtest` + eg. `content:Testing` ==> `testing, Ztest, ZXCONTENTtest, XCONTENTtest` Each document also contains an extra term in the format: @@ -207,10 +208,24 @@ class SearchBackend(BaseSearchBackend): if field['field_name'] in data.keys(): prefix = DOCUMENT_CUSTOM_TERM_PREFIX + field['field_name'].upper() value = data[field['field_name']] - term_generator.index_text(_marshal_term(value)) - term_generator.index_text(_marshal_term(value), 1, prefix) - if field['multi_valued'] == 'false': - document.add_value(field['column'], _marshal_value(value)) + if field['type'] == 'text': + if field['multi_valued'] == 'false': + term_generator.index_text(_marshal_term(value)) + term_generator.index_text(_marshal_term(value), 1, prefix) + document.add_value(field['column'], _marshal_value(value)) + else: + for term in value: + term_generator.index_text(_marshal_term(term)) + term_generator.index_text(_marshal_term(term), 1, prefix) + else: + if field['multi_valued'] == 'false': + document.add_term(_marshal_term(value)) + document.add_term(prefix + _marshal_term(value)) + document.add_value(field['column'], _marshal_value(value)) + else: + for term in value: + document.add_term(_marshal_term(term)) + document.add_term(prefix + _marshal_term(term)) document.set_data(pickle.dumps( (obj._meta.app_label, obj._meta.module_name, obj.pk, data),