From 7626ee6e380af166aa78d509fd3e7a369063393c Mon Sep 17 00:00:00 2001 From: David Sauve Date: Tue, 8 Dec 2009 13:18:17 -0500 Subject: [PATCH] Moved schema generation out of database opening method and made it into a property that should be cached. --- tests/xapian_tests/tests/xapian_backend.py | 98 +++++++++++----------- xapian_backend.py | 38 +++++---- 2 files changed, 71 insertions(+), 65 deletions(-) diff --git a/tests/xapian_tests/tests/xapian_backend.py b/tests/xapian_tests/tests/xapian_backend.py index 72d627c..d376586 100644 --- a/tests/xapian_tests/tests/xapian_backend.py +++ b/tests/xapian_tests/tests/xapian_backend.py @@ -29,12 +29,12 @@ class XapianMockModel(models.Model): foo = models.CharField(max_length=255, blank=True) pub_date = models.DateTimeField(default=datetime.datetime.now) tag = models.ForeignKey(MockTag) - + value = models.IntegerField(default=0) flag = models.BooleanField(default=True) slug = models.SlugField() popularity = models.FloatField(default=0.0) - + def __unicode__(self): return self.author @@ -44,7 +44,7 @@ class XapianMockModel(models.Model): class XapianMockSearchIndex(indexes.SearchIndex): text = indexes.CharField( - document=True, use_template=True, + document=True, use_template=True, template_name='search/indexes/core/mockmodel_text.txt' ) name = indexes.CharField(model_attr='author') @@ -54,7 +54,7 @@ class XapianMockSearchIndex(indexes.SearchIndex): slug = indexes.CharField(indexed=False, model_attr='slug') popularity = indexes.FloatField(model_attr='popularity') sites = indexes.MultiValueField() - + def prepare_sites(self, obj): return ['%d' % (i * obj.id) for i in xrange(1, 4)] @@ -79,7 +79,7 @@ class XapianSearchBackendTestCase(TestCase): mock.flag = bool(i % 2) mock.slug = 'http://example.com/%d' % i self.sample_objs.append(mock) - + self.sample_objs[0].popularity = 834.0 self.sample_objs[1].popularity = 35.5 self.sample_objs[2].popularity = 972.0 @@ -87,7 +87,7 @@ class XapianSearchBackendTestCase(TestCase): def tearDown(self): if os.path.exists(settings.HAYSTACK_XAPIAN_PATH): shutil.rmtree(settings.HAYSTACK_XAPIAN_PATH) - + super(XapianSearchBackendTestCase, self).tearDown() def xapian_search(self, query_string): @@ -111,7 +111,7 @@ class XapianSearchBackendTestCase(TestCase): model_data[key] = _marshal_value(value) model_data['id'] = u'%s.%s.%d' % (app_label, module_name, pk) document_list.append(model_data) - + return document_list def test_update(self): @@ -123,13 +123,13 @@ class XapianSearchBackendTestCase(TestCase): {'flag': u'f', 'name': u'david2', 'text': u'indexed!\n2', 'sites': u"['2', '4', '6']", 'pub_date': u'20090223000000', 'value': u'000000000010', 'id': u'tests.xapianmockmodel.2', 'slug': u'http://example.com/2', 'popularity': '\xb4p', 'django_id': u'2', 'django_ct': u'tests.xapianmockmodel'}, {'flag': u't', 'name': u'david3', 'text': u'indexed!\n3', 'sites': u"['3', '6', '9']", 'pub_date': u'20090222000000', 'value': u'000000000015', 'id': u'tests.xapianmockmodel.3', 'slug': u'http://example.com/3', 'popularity': '\xcb\x98', 'django_id': u'3', 'django_ct': u'tests.xapianmockmodel'} ]) - + def test_duplicate_update(self): self.backend.update(self.index, self.sample_objs) self.backend.update(self.index, self.sample_objs) # Duplicates should be updated, not appended -- http://github.com/notanumber/xapian-haystack/issues/#issue/6 self.assertEqual(len(self.xapian_search('')), 3) - + def test_remove(self): self.backend.update(self.index, self.sample_objs) self.assertEqual(len(self.xapian_search('')), 3) @@ -172,7 +172,7 @@ class XapianSearchBackendTestCase(TestCase): self.assertEqual([result.pk for result in self.backend.search(xapian.Query(''))['results']], [1, 2, 3]) self.assertEqual(self.backend.search(xapian.Query('indexed'))['hits'], 3) self.assertEqual([result.pk for result in self.backend.search(xapian.Query(''))['results']], [1, 2, 3]) - + def test_field_facets(self): self.backend.update(self.index, self.sample_objs) self.assertEqual(len(self.xapian_search('')), 3) @@ -181,7 +181,7 @@ class XapianSearchBackendTestCase(TestCase): results = self.backend.search(xapian.Query('indexed'), facets=['name']) self.assertEqual(results['hits'], 3) self.assertEqual(results['facets']['fields']['name'], [('david1', 1), ('david2', 1), ('david3', 1)]) - + results = self.backend.search(xapian.Query('indexed'), facets=['flag']) self.assertEqual(results['hits'], 3) self.assertEqual(results['facets']['fields']['flag'], [(False, 1), (True, 2)]) @@ -189,11 +189,11 @@ class XapianSearchBackendTestCase(TestCase): results = self.backend.search(xapian.Query('indexed'), facets=['sites']) self.assertEqual(results['hits'], 3) self.assertEqual(results['facets']['fields']['sites'], [('1', 1), ('3', 2), ('2', 2), ('4', 1), ('6', 2), ('9', 1)]) - + def test_date_facets(self): self.backend.update(self.index, self.sample_objs) self.assertEqual(len(self.xapian_search('')), 3) - + self.assertEqual(self.backend.search(xapian.Query(), date_facets={'pub_date': {'start_date': datetime.datetime(2008, 10, 26), 'end_date': datetime.datetime(2009, 3, 26), 'gap_by': 'month'}}), {'hits': 0, 'results': []}) results = self.backend.search(xapian.Query('indexed'), date_facets={'pub_date': {'start_date': datetime.datetime(2008, 10, 26), 'end_date': datetime.datetime(2009, 3, 26), 'gap_by': 'month'}}) self.assertEqual(results['hits'], 3) @@ -204,7 +204,7 @@ class XapianSearchBackendTestCase(TestCase): ('2008-11-26T00:00:00', 0), ('2008-10-26T00:00:00', 0), ]) - + results = self.backend.search(xapian.Query('indexed'), date_facets={'pub_date': {'start_date': datetime.datetime(2009, 02, 01), 'end_date': datetime.datetime(2009, 3, 15), 'gap_by': 'day', 'gap_amount': 15}}) self.assertEqual(results['hits'], 3) self.assertEqual(results['facets']['dates']['pub_date'], [ @@ -212,11 +212,11 @@ class XapianSearchBackendTestCase(TestCase): ('2009-02-16T00:00:00', 3), ('2009-02-01T00:00:00', 0) ]) - + def test_query_facets(self): self.backend.update(self.index, self.sample_objs) self.assertEqual(len(self.xapian_search('')), 3) - + self.assertEqual(self.backend.search(xapian.Query(), query_facets={'name': 'da*'}), {'hits': 0, 'results': []}) results = self.backend.search(xapian.Query('indexed'), query_facets={'name': 'da*'}) self.assertEqual(results['hits'], 3) @@ -250,10 +250,10 @@ class XapianSearchBackendTestCase(TestCase): self.assertEqual(self.backend.search(xapian.Query('foo'))['hits'], 0) self.assertEqual(self.backend.search(xapian.Query('foo'), spelling_query='indexy')['spelling_suggestion'], 'indexed') - + self.assertEqual(self.backend.search(xapian.Query('XNAMEdavid'))['hits'], 0) self.assertEqual(self.backend.search(xapian.Query('XNAMEdavid'))['spelling_suggestion'], 'david1') - + def test_more_like_this(self): self.backend.update(self.index, self.sample_objs) self.assertEqual(len(self.xapian_search('')), 3) @@ -261,15 +261,15 @@ class XapianSearchBackendTestCase(TestCase): results = self.backend.more_like_this(self.sample_objs[0]) self.assertEqual(results['hits'], 2) self.assertEqual([result.pk for result in results['results']], [3, 2]) - + results = self.backend.more_like_this(self.sample_objs[0], additional_query=xapian.Query('david3')) self.assertEqual(results['hits'], 1) self.assertEqual([result.pk for result in results['results']], [3]) - + results = self.backend.more_like_this(self.sample_objs[0], limit_to_registered_models=True) self.assertEqual(results['hits'], 2) self.assertEqual([result.pk for result in results['results']], [3, 2]) - + def test_order_by(self): self.backend.update(self.index, self.sample_objs) @@ -278,31 +278,31 @@ class XapianSearchBackendTestCase(TestCase): results = self.backend.search(xapian.Query(''), sort_by=['-pub_date']) self.assertEqual([result.pk for result in results['results']], [1, 2, 3]) - + results = self.backend.search(xapian.Query(''), sort_by=['id']) self.assertEqual([result.pk for result in results['results']], [1, 2, 3]) - + results = self.backend.search(xapian.Query(''), sort_by=['-id']) self.assertEqual([result.pk for result in results['results']], [3, 2, 1]) - + results = self.backend.search(xapian.Query(''), sort_by=['value']) self.assertEqual([result.pk for result in results['results']], [1, 2, 3]) - + results = self.backend.search(xapian.Query(''), sort_by=['-value']) self.assertEqual([result.pk for result in results['results']], [3, 2, 1]) - + results = self.backend.search(xapian.Query(''), sort_by=['popularity']) self.assertEqual([result.pk for result in results['results']], [2, 1, 3]) - + results = self.backend.search(xapian.Query(''), sort_by=['-popularity']) self.assertEqual([result.pk for result in results['results']], [3, 1, 2]) - + results = self.backend.search(xapian.Query(''), sort_by=['flag', 'id']) self.assertEqual([result.pk for result in results['results']], [2, 1, 3]) - + results = self.backend.search(xapian.Query(''), sort_by=['flag', '-id']) self.assertEqual([result.pk for result in results['results']], [2, 3, 1]) - + def test__marshal_value(self): self.assertEqual(_marshal_value('abc'), u'abc') self.assertEqual(_marshal_value(1), '000000000001') @@ -315,7 +315,7 @@ class XapianSearchBackendTestCase(TestCase): self.assertEqual(_marshal_value(datetime.datetime(2009, 5, 9, 0, 0)), u'20090509000000') self.assertEqual(_marshal_value(datetime.datetime(1899, 5, 18, 0, 0)), u'18990518000000') self.assertEqual(_marshal_value(datetime.datetime(2009, 5, 18, 1, 16, 30, 250)), u'20090518011630000250') - + def test_build_schema(self): (content_field_name, fields) = self.backend.build_schema(self.site.all_searchfields()) self.assertEqual(content_field_name, 'text') @@ -329,7 +329,7 @@ class XapianSearchBackendTestCase(TestCase): {'column': 5, 'field_name': 'flag', 'type': 'boolean', 'multi_valued': 'false'}, {'column': 6, 'field_name': 'pub_date', 'type': 'date', 'multi_valued': 'false'}, ]) - + def test_parse_query(self): self.backend.update(self.index, self.sample_objs) self.assertEqual(self.backend.parse_query('indexed').get_description(), 'Xapian::Query((indexed:(pos=1) OR Zindex:(pos=1)))') @@ -354,47 +354,47 @@ class LiveXapianSearchQueryTestCase(TestCase): SearchQuery specific tests """ fixtures = ['initial_data.json'] - + def setUp(self): super(LiveXapianSearchQueryTestCase, self).setUp() - + site = SearchSite() backend = SearchBackend(site=site) index = LiveXapianMockSearchIndex(MockModel, backend=backend) site.register(MockModel, LiveXapianMockSearchIndex) backend.update(index, MockModel.objects.all()) - + self.sq = SearchQuery(backend=backend) - + def test_get_spelling(self): self.sq.add_filter(SQ(content='indxd')) self.assertEqual(self.sq.get_spelling_suggestion(), u'indexed') self.assertEqual(self.sq.get_spelling_suggestion('indxd'), u'indexed') - + def test_startswith(self): self.sq.add_filter(SQ(name__startswith='da*')) self.assertEqual([result.pk for result in self.sq.get_results()], [1, 2, 3]) - + self.sq = SearchQuery(backend=SearchBackend()) self.sq.add_filter(SQ(name__startswith='daniel1')) self.assertEqual([result.pk for result in self.sq.get_results()], [1]) - + def test_build_query_gt(self): self.sq.add_filter(SQ(name__gt='m')) self.assertEqual(self.sq.build_query().get_description(), u'Xapian::Query(( AND_NOT VALUE_RANGE 3 a m))') - + def test_build_query_gte(self): self.sq.add_filter(SQ(name__gte='m')) self.assertEqual(self.sq.build_query().get_description(), u'Xapian::Query(VALUE_RANGE 3 m zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz)') - + def test_build_query_lt(self): self.sq.add_filter(SQ(name__lt='m')) self.assertEqual(self.sq.build_query().get_description(), u'Xapian::Query(( AND_NOT VALUE_RANGE 3 m zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz))') - + def test_build_query_lte(self): self.sq.add_filter(SQ(name__lte='m')) self.assertEqual(self.sq.build_query().get_description(), u'Xapian::Query(VALUE_RANGE 3 a m)') - + def test_build_query_multiple_filter_types(self): self.sq.add_filter(SQ(content='why')) self.sq.add_filter(SQ(pub_date__lte=datetime.datetime(2009, 2, 10, 1, 59, 0))) @@ -403,18 +403,18 @@ class LiveXapianSearchQueryTestCase(TestCase): self.sq.add_filter(SQ(title__gte='B')) self.sq.add_filter(SQ(id__in=[1, 2, 3])) self.assertEqual(self.sq.build_query().get_description(), u'Xapian::Query(((Zwhy OR why) AND VALUE_RANGE 2 00010101000000 20090210015900 AND ( AND_NOT VALUE_RANGE 3 a david) AND ( AND_NOT VALUE_RANGE 4 20090212121300 99990101000000) AND VALUE_RANGE 1 b zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz AND (ZXID1 OR XID1 OR ZXID2 OR XID2 OR ZXID3 OR XID3)))') - + def test_log_query(self): backends.reset_search_queries() self.assertEqual(len(backends.queries), 0) - + # Stow. old_debug = settings.DEBUG settings.DEBUG = False - + len(self.sq.get_results()) self.assertEqual(len(backends.queries), 0) - + settings.DEBUG = True # Redefine it to clear out the cached results. self.sq = SearchQuery(backend=SearchBackend()) @@ -422,7 +422,7 @@ class LiveXapianSearchQueryTestCase(TestCase): len(self.sq.get_results()) self.assertEqual(len(backends.queries), 1) self.assertEqual(backends.queries[0]['query_string'].get_description(), u'Xapian::Query((ZXNAMEbar OR XNAMEbar))') - + # And again, for good measure. self.sq = SearchQuery(backend=SearchBackend()) self.sq.add_filter(SQ(name='bar')) @@ -431,6 +431,6 @@ class LiveXapianSearchQueryTestCase(TestCase): self.assertEqual(len(backends.queries), 2) self.assertEqual(backends.queries[0]['query_string'].get_description(), u'Xapian::Query((ZXNAMEbar OR XNAMEbar))') self.assertEqual(backends.queries[1]['query_string'].get_description(), u'Xapian::Query(((ZXNAMEbar OR XNAMEbar) AND (ZXTEXTmoof OR XTEXTmoof)))') - + # Restore. settings.DEBUG = old_debug diff --git a/xapian_backend.py b/xapian_backend.py index 20a283a..96f5135 100755 --- a/xapian_backend.py +++ b/xapian_backend.py @@ -39,8 +39,8 @@ class InvalidIndexError(HaystackError): class XHValueRangeProcessor(xapian.ValueRangeProcessor): - def __init__(self, sb): - self.sb = sb + def __init__(self, backend): + self.backend = backend or SearchBackend() xapian.ValueRangeProcessor.__init__(self) def __call__(self, begin, end): @@ -55,7 +55,7 @@ class XHValueRangeProcessor(xapian.ValueRangeProcessor): colon = begin.find(':') field_name = begin[:colon] begin = begin[colon + 1:len(begin)] - for field_dict in self.sb.schema: + for field_dict in self.backend.schema: if field_dict['field_name'] == field_name: if not begin: if field_dict['type'] == 'text': @@ -136,6 +136,20 @@ class SearchBackend(BaseSearchBackend): raise IOError("The path to your Xapian index '%s' is not writable for the current user/group." % settings.HAYSTACK_XAPIAN_PATH) self.language = language + self._schema = None + self._content_field_name = None + + @property + def schema(self): + if not self._schema: + self._content_field_name, self._schema = self.build_schema(self.site.all_searchfields()) + return self._schema + + @property + def content_field_name(self): + if not self._content_field_name: + self._content_field_name, self._schema = self.build_schema(self.site.all_searchfields()) + return self._content_field_name def update(self, index, iterable): """ @@ -727,31 +741,23 @@ class SearchBackend(BaseSearchBackend): def _database(self, writable=False): """ - Private method that returns a xapian.Database for use and sets up - schema and content_field definitions. - + Private method that returns a xapian.Database for use. + Optional arguments: ``writable`` -- Open the database in read/write mode (default=False) - + Returns an instance of a xapian.Database or xapian.WritableDatabase """ if writable: - self.content_field_name, self.schema = self.build_schema(self.site.all_searchfields()) - database = xapian.WritableDatabase(settings.HAYSTACK_XAPIAN_PATH, xapian.DB_CREATE_OR_OPEN) - database.set_metadata('schema', pickle.dumps(self.schema, pickle.HIGHEST_PROTOCOL)) - database.set_metadata('content', pickle.dumps(self.content_field_name, pickle.HIGHEST_PROTOCOL)) else: try: database = xapian.Database(settings.HAYSTACK_XAPIAN_PATH) except xapian.DatabaseOpeningError: raise InvalidIndexError(u'Unable to open index at %s' % settings.HAYSTACK_XAPIAN_PATH) - - self.schema = pickle.loads(database.get_metadata('schema')) - self.content_field_name = pickle.loads(database.get_metadata('content')) - + return database - + def _value_column(self, field): """ Private method that returns the column value slot in the database