From 2c80f85ec621287cbd48181d9e40ff45999d18e6 Mon Sep 17 00:00:00 2001 From: David Sauve Date: Wed, 17 Jun 2009 15:20:52 -0400 Subject: [PATCH] Made stemmer an instance variable (default is english) and added corresponding tests --- tests/xapian_backend.py | 10 ++++++++++ xapian_backend.py | 9 ++++----- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/tests/xapian_backend.py b/tests/xapian_backend.py index 905b5a2..2ed5150 100644 --- a/tests/xapian_backend.py +++ b/tests/xapian_backend.py @@ -149,6 +149,16 @@ class XapianSearchBackendTestCase(TestCase): # # results = self.sb.search('Index*', narrow_queries=['name:david1']) # # self.assertEqual(results['hits'], 1) + def test_stemming(self): + self.sb.update(self.msi, self.sample_objs) + self.assertEqual(len(self.xapian_search('')), 3) + + results = self.sb.search('index', facets=['name']) + self.assertEqual(results['hits'], 3) + + results = self.sb.search('indexing', facets=['name']) + self.assertEqual(results['hits'], 3) + def test_more_like_this(self): self.sb.update(self.msi, self.sample_objs) self.assertEqual(len(self.xapian_search('')), 3) diff --git a/xapian_backend.py b/xapian_backend.py index 0b8dfee..68b639a 100644 --- a/xapian_backend.py +++ b/xapian_backend.py @@ -42,22 +42,22 @@ DOCUMENT_CT_TERM_PREFIX = DOCUMENT_CUSTOM_TERM_PREFIX + 'CONTENTTYPE' class SearchBackend(BaseSearchBackend): - def __init__(self, site=None): + def __init__(self, site=None, stem_lang='en'): super(SearchBackend, self).__init__(site) if not hasattr(settings, 'HAYSTACK_XAPIAN_PATH'): raise ImproperlyConfigured('You must specify a HAYSTACK_XAPIAN_PATH in your settings.') self.path = settings.HAYSTACK_XAPIAN_PATH + self.stemmer = xapian.Stem('english') if not os.path.exists(self.path): os.makedirs(self.path) def update(self, index, iterable): database = xapian.WritableDatabase(self.path, xapian.DB_CREATE_OR_OPEN) - stemmer = xapian.Stem('english') indexer = xapian.TermGenerator() indexer.set_database(database) - indexer.set_stemmer(stemmer) + indexer.set_stemmer(self.stemmer) indexer.set_flags(xapian.TermGenerator.FLAG_SPELLING) fields_data = database.get_metadata('fields') @@ -131,13 +131,12 @@ class SearchBackend(BaseSearchBackend): warnings.warn("Highlight has not been implemented yet.", Warning, stacklevel=2) database = xapian.Database(self.path) - stemmer = xapian.Stem('english') if query_string == '*': query = xapian.Query('') # Make '*' match everything else: qp = xapian.QueryParser() qp.set_database(database) - qp.set_stemmer(stemmer) + qp.set_stemmer(self.stemmer) qp.set_stemming_strategy(xapian.QueryParser.STEM_SOME) qp.add_boolean_prefix('django_ct', DOCUMENT_CT_TERM_PREFIX) for field in pickle.loads(database.get_metadata('fields')):