Added a check to ensure that we don't try to index empty terms

This commit is contained in:
David Sauve 2010-03-11 09:45:06 -05:00
parent 9d8af9cbcf
commit 962585a40d
2 changed files with 34 additions and 25 deletions

View file

@ -56,6 +56,7 @@ class XapianMockSearchIndex(indexes.SearchIndex):
popularity = indexes.FloatField(model_attr='popularity')
month = indexes.CharField(indexed=False)
url = indexes.CharField(model_attr='url')
empty = indexes.CharField()
# Various MultiValueFields
sites = indexes.MultiValueField()
@ -87,7 +88,10 @@ class XapianMockSearchIndex(indexes.SearchIndex):
pub_date = indexes.DateField(model_attr='pub_date')
def prepare_month(self, obj):
return "%02d" % obj.pub_date.month
return '%02d' % obj.pub_date.month
def prepare_empty(self, obj):
return ''
class XapianSearchBackendTestCase(TestCase):
@ -151,9 +155,9 @@ class XapianSearchBackendTestCase(TestCase):
self.assertEqual(len(self.xapian_search('')), 3)
self.assertEqual([dict(doc) for doc in self.xapian_search('')], [
{'flag': u't', 'name': u'david1', 'name_exact': u'david1', 'tags': u"['a', 'b', 'c']", 'keys': u'[1, 2, 3]', 'text': u'indexed!\n1', 'sites': u"['1', '2', '3']", 'titles': u"['object one title one', 'object one title two']", 'pub_date': u'20090224000000', 'value': u'000000000005', 'month': u'02', 'id': u'tests.xapianmockmodel.1', 'slug': u'http://example.com/1/', 'url': u'http://example.com/1/', 'popularity': '\xca\x84', 'django_id': u'1', 'django_ct': u'tests.xapianmockmodel'},
{'flag': u'f', 'name': u'david2', 'name_exact': u'david2', 'tags': u"['ab', 'bc', 'cd']", 'keys': u'[2, 4, 6]', 'text': u'indexed!\n2', 'sites': u"['2', '4', '6']", 'titles': u"['object two title one', 'object two title two']", 'pub_date': u'20090223000000', 'value': u'000000000010', 'month': u'02', 'id': u'tests.xapianmockmodel.2', 'slug': u'http://example.com/2/', 'url': u'http://example.com/2/', 'popularity': '\xb4p', 'django_id': u'2', 'django_ct': u'tests.xapianmockmodel'},
{'flag': u't', 'name': u'david3', 'name_exact': u'david3', 'tags': u"['an', 'to', 'or']", 'keys': u'[3, 6, 9]', 'text': u'indexed!\n3', 'sites': u"['3', '6', '9']", 'titles': u"['object three title one', 'object three title two']", 'pub_date': u'20090222000000', 'value': u'000000000015', 'month': u'02', 'id': u'tests.xapianmockmodel.3', 'slug': u'http://example.com/3/', 'url': u'http://example.com/3/', 'popularity': '\xcb\x98', 'django_id': u'3', 'django_ct': u'tests.xapianmockmodel'}
{'flag': u't', 'name': u'david1', 'name_exact': u'david1', 'tags': u"['a', 'b', 'c']", 'keys': u'[1, 2, 3]', 'text': u'indexed!\n1', 'sites': u"['1', '2', '3']", 'titles': u"['object one title one', 'object one title two']", 'pub_date': u'20090224000000', 'value': u'000000000005', 'month': u'02', 'id': u'tests.xapianmockmodel.1', 'slug': u'http://example.com/1/', 'url': u'http://example.com/1/', 'popularity': '\xca\x84', 'django_id': u'1', 'django_ct': u'tests.xapianmockmodel', 'empty': u''},
{'flag': u'f', 'name': u'david2', 'name_exact': u'david2', 'tags': u"['ab', 'bc', 'cd']", 'keys': u'[2, 4, 6]', 'text': u'indexed!\n2', 'sites': u"['2', '4', '6']", 'titles': u"['object two title one', 'object two title two']", 'pub_date': u'20090223000000', 'value': u'000000000010', 'month': u'02', 'id': u'tests.xapianmockmodel.2', 'slug': u'http://example.com/2/', 'url': u'http://example.com/2/', 'popularity': '\xb4p', 'django_id': u'2', 'django_ct': u'tests.xapianmockmodel', 'empty': u''},
{'flag': u't', 'name': u'david3', 'name_exact': u'david3', 'tags': u"['an', 'to', 'or']", 'keys': u'[3, 6, 9]', 'text': u'indexed!\n3', 'sites': u"['3', '6', '9']", 'titles': u"['object three title one', 'object three title two']", 'pub_date': u'20090222000000', 'value': u'000000000015', 'month': u'02', 'id': u'tests.xapianmockmodel.3', 'slug': u'http://example.com/3/', 'url': u'http://example.com/3/', 'popularity': '\xcb\x98', 'django_id': u'3', 'django_ct': u'tests.xapianmockmodel', 'empty': u''}
])
def test_duplicate_update(self):
@ -169,8 +173,8 @@ class XapianSearchBackendTestCase(TestCase):
self.backend.remove(self.sample_objs[0])
self.assertEqual(len(self.xapian_search('')), 2)
self.assertEqual([dict(doc) for doc in self.xapian_search('')], [
{'flag': u'f', 'name': u'david2', 'name_exact': u'david2', 'tags': u"['ab', 'bc', 'cd']", 'keys': u'[2, 4, 6]', 'text': u'indexed!\n2', 'sites': u"['2', '4', '6']", 'titles': u"['object two title one', 'object two title two']", 'pub_date': u'20090223000000', 'value': u'000000000010', 'month': u'02', 'id': u'tests.xapianmockmodel.2', 'slug': u'http://example.com/2/', 'url': u'http://example.com/2/', 'popularity': '\xb4p', 'django_id': u'2', 'django_ct': u'tests.xapianmockmodel'},
{'flag': u't', 'name': u'david3', 'name_exact': u'david3', 'tags': u"['an', 'to', 'or']", 'keys': u'[3, 6, 9]', 'text': u'indexed!\n3', 'sites': u"['3', '6', '9']", 'titles': u"['object three title one', 'object three title two']", 'pub_date': u'20090222000000', 'value': u'000000000015', 'month': u'02', 'id': u'tests.xapianmockmodel.3', 'slug': u'http://example.com/3/', 'url': u'http://example.com/3/', 'popularity': '\xcb\x98', 'django_id': u'3', 'django_ct': u'tests.xapianmockmodel'}
{'flag': u'f', 'name': u'david2', 'name_exact': u'david2', 'tags': u"['ab', 'bc', 'cd']", 'keys': u'[2, 4, 6]', 'text': u'indexed!\n2', 'sites': u"['2', '4', '6']", 'titles': u"['object two title one', 'object two title two']", 'pub_date': u'20090223000000', 'value': u'000000000010', 'month': u'02', 'id': u'tests.xapianmockmodel.2', 'slug': u'http://example.com/2/', 'url': u'http://example.com/2/', 'popularity': '\xb4p', 'django_id': u'2', 'django_ct': u'tests.xapianmockmodel', 'empty': u''},
{'flag': u't', 'name': u'david3', 'name_exact': u'david3', 'tags': u"['an', 'to', 'or']", 'keys': u'[3, 6, 9]', 'text': u'indexed!\n3', 'sites': u"['3', '6', '9']", 'titles': u"['object three title one', 'object three title two']", 'pub_date': u'20090222000000', 'value': u'000000000015', 'month': u'02', 'id': u'tests.xapianmockmodel.3', 'slug': u'http://example.com/3/', 'url': u'http://example.com/3/', 'popularity': '\xcb\x98', 'django_id': u'3', 'django_ct': u'tests.xapianmockmodel', 'empty': u''}
])
def test_clear(self):
@ -375,20 +379,21 @@ class XapianSearchBackendTestCase(TestCase):
def test_build_schema(self):
(content_field_name, fields) = self.backend.build_schema(self.site.all_searchfields())
self.assertEqual(content_field_name, 'text')
self.assertEqual(len(fields), 12)
self.assertEqual(len(fields), 13)
self.assertEqual(fields, [
{'column': 0, 'type': 'text', 'field_name': 'name', 'multi_valued': 'false'},
{'column': 1, 'field_name': 'name_exact', 'type': 'text', 'multi_valued': 'false'},
{'column': 2, 'type': 'text', 'field_name': 'tags', 'multi_valued': 'true'},
{'column': 3, 'type': 'text', 'field_name': 'url', 'multi_valued': 'false'},
{'column': 3, 'type': 'text', 'field_name': 'keys', 'multi_valued': 'true'},
{'column': 4, 'type': 'text', 'field_name': 'text', 'multi_valued': 'false'},
{'column': 5, 'type': 'float', 'field_name': 'popularity', 'multi_valued': 'false'},
{'column': 6, 'type': 'text', 'field_name': 'sites', 'multi_valued': 'true'},
{'column': 7, 'type': 'long', 'field_name': 'value', 'multi_valued': 'false'},
{'column': 8, 'type': 'text', 'field_name': 'keys', 'multi_valued': 'true'},
{'column': 8, 'type': 'text', 'field_name': 'url', 'multi_valued': 'false'},
{'column': 9, 'type': 'boolean', 'field_name': 'flag', 'multi_valued': 'false'},
{'column': 10, 'type': 'text', 'field_name': 'titles', 'multi_valued': 'true'},
{'column': 11, 'type': 'date', 'field_name': 'pub_date', 'multi_valued': 'false'}
{'column': 11, 'type': 'date', 'field_name': 'pub_date', 'multi_valued': 'false'},
{'column': 12, 'type': 'text', 'field_name': 'empty', 'multi_valued': 'false'},
])
def test_parse_query(self):

View file

@ -214,31 +214,35 @@ class SearchBackend(BaseSearchBackend):
if field['type'] == 'text':
if field['multi_valued'] == 'false':
term = _marshal_term(value)
term_generator.index_text(term)
term_generator.index_text(term, 1, prefix)
if not string.whitespace in term:
document.add_term(term)
document.add_term(prefix + term)
document.add_value(field['column'], _marshal_value(value))
else:
for term in value:
term = _marshal_term(term)
if term:
term_generator.index_text(term)
term_generator.index_text(term, 1, prefix)
if not string.whitespace in term:
document.add_term(term)
document.add_term(prefix + term)
else:
if field['multi_valued'] == 'false':
term = _marshal_term(value)
document.add_term(term)
document.add_term(prefix + term)
document.add_value(field['column'], _marshal_value(value))
document.add_value(field['column'], _marshal_value(value))
else:
for term in value:
term = _marshal_term(term)
if term:
term_generator.index_text(term)
term_generator.index_text(term, 1, prefix)
if not string.whitespace in term:
document.add_term(term)
document.add_term(prefix + term)
else:
if field['multi_valued'] == 'false':
term = _marshal_term(value)
if term:
document.add_term(term)
document.add_term(prefix + term)
document.add_value(field['column'], _marshal_value(value))
else:
for term in value:
term = _marshal_term(term)
if term:
document.add_term(term)
document.add_term(prefix + term)
document.set_data(pickle.dumps(
(obj._meta.app_label, obj._meta.module_name, obj.pk, data),