Added stemming support

This commit is contained in:
David Sauve 2009-12-05 11:32:29 -05:00
parent 72134e096b
commit 1ec4fdf7ab
3 changed files with 45 additions and 37 deletions

View file

@ -409,7 +409,7 @@ class LiveXapianSearchQueryTestCase(TestCase):
self.sq.add_filter(SQ(created__lt=datetime.datetime(2009, 2, 12, 12, 13, 0)))
self.sq.add_filter(SQ(title__gte='B'))
self.sq.add_filter(SQ(id__in=[1, 2, 3]))
self.assertEqual(self.sq.build_query().get_description(), u'Xapian::Query((why AND VALUE_RANGE 2 00010101000000 20090210015900 AND (<alldocuments> AND_NOT VALUE_RANGE 3 a david) AND (<alldocuments> AND_NOT VALUE_RANGE 4 20090212121300 99990101000000) AND VALUE_RANGE 1 b zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz AND (XID1 OR XID2 OR XID3)))')
self.assertEqual(self.sq.build_query().get_description(), u'Xapian::Query(((Zwhy OR why) AND VALUE_RANGE 2 00010101000000 20090210015900 AND (<alldocuments> AND_NOT VALUE_RANGE 3 a david) AND (<alldocuments> AND_NOT VALUE_RANGE 4 20090212121300 99990101000000) AND VALUE_RANGE 1 b zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz AND (ZXID1 OR XID1 OR ZXID2 OR XID2 OR ZXID3 OR XID3)))')
def test_log_query(self):
backends.reset_search_queries()
@ -428,7 +428,7 @@ class LiveXapianSearchQueryTestCase(TestCase):
self.sq.add_filter(SQ(name='bar'))
len(self.sq.get_results())
self.assertEqual(len(backends.queries), 1)
self.assertEqual(backends.queries[0]['query_string'].get_description(), 'Xapian::Query(XNAMEbar)')
self.assertEqual(backends.queries[0]['query_string'].get_description(), u'Xapian::Query((ZXNAMEbar OR XNAMEbar))')
# And again, for good measure.
self.sq = SearchQuery(backend=SearchBackend())
@ -436,8 +436,8 @@ class LiveXapianSearchQueryTestCase(TestCase):
self.sq.add_filter(SQ(text='moof'))
len(self.sq.get_results())
self.assertEqual(len(backends.queries), 2)
self.assertEqual(backends.queries[0]['query_string'].get_description(), u'Xapian::Query(XNAMEbar)')
self.assertEqual(backends.queries[1]['query_string'].get_description(), u'Xapian::Query((XNAMEbar AND XTEXTmoof))')
self.assertEqual(backends.queries[0]['query_string'].get_description(), u'Xapian::Query((ZXNAMEbar OR XNAMEbar))')
self.assertEqual(backends.queries[1]['query_string'].get_description(), u'Xapian::Query(((ZXNAMEbar OR XNAMEbar) AND (ZXTEXTmoof OR XTEXTmoof)))')
# Restore.
settings.DEBUG = old_debug

View file

@ -29,68 +29,68 @@ class XapianSearchQueryTestCase(TestCase):
def test_build_query_single_word(self):
self.sq.add_filter(SQ(content='hello'))
self.assertEqual(self.sq.build_query().get_description(), u'Xapian::Query(hello)')
self.assertEqual(self.sq.build_query().get_description(), u'Xapian::Query((Zhello OR hello))')
def test_build_query_single_word_not(self):
self.sq.add_filter(~SQ(content='hello'))
self.assertEqual(self.sq.build_query().get_description(), u'Xapian::Query((<alldocuments> AND_NOT hello))')
self.assertEqual(self.sq.build_query().get_description(), u'Xapian::Query((<alldocuments> AND_NOT (Zhello OR hello)))')
def test_build_query_single_word_field_exact(self):
self.sq.add_filter(SQ(foo='hello'))
self.assertEqual(self.sq.build_query().get_description(), u'Xapian::Query(XFOOhello)')
self.assertEqual(self.sq.build_query().get_description(), u'Xapian::Query((ZXFOOhello OR XFOOhello))')
def test_build_query_single_word_field_exact_not(self):
self.sq.add_filter(~SQ(foo='hello'))
self.assertEqual(self.sq.build_query().get_description(), u'Xapian::Query((<alldocuments> AND_NOT XFOOhello))')
self.assertEqual(self.sq.build_query().get_description(), u'Xapian::Query((<alldocuments> AND_NOT (ZXFOOhello OR XFOOhello)))')
def test_build_query_boolean(self):
self.sq.add_filter(SQ(content=True))
self.assertEqual(self.sq.build_query().get_description(), u'Xapian::Query(true)')
self.assertEqual(self.sq.build_query().get_description(), u'Xapian::Query((Ztrue OR true))')
def test_build_query_date(self):
self.sq.add_filter(SQ(content=datetime.date(2009, 5, 8)))
self.assertEqual(self.sq.build_query().get_description(), u'Xapian::Query(20090508000000)')
self.assertEqual(self.sq.build_query().get_description(), u'Xapian::Query((Z20090508000000 OR 20090508000000))')
def test_build_query_datetime(self):
self.sq.add_filter(SQ(content=datetime.datetime(2009, 5, 8, 11, 28)))
self.assertEqual(self.sq.build_query().get_description(), u'Xapian::Query(20090508112800)')
self.assertEqual(self.sq.build_query().get_description(), u'Xapian::Query((Z20090508112800 OR 20090508112800))')
def test_build_query_float(self):
self.sq.add_filter(SQ(content=25.52))
self.assertEqual(self.sq.build_query().get_description(), u'Xapian::Query(25.52)')
self.assertEqual(self.sq.build_query().get_description(), u'Xapian::Query((Z25.52 OR 25.52))')
def test_build_query_multiple_words_and(self):
self.sq.add_filter(SQ(content='hello'))
self.sq.add_filter(SQ(content='world'))
self.assertEqual(self.sq.build_query().get_description(), u'Xapian::Query((hello AND world))')
self.assertEqual(self.sq.build_query().get_description(), u'Xapian::Query(((Zhello OR hello) AND (Zworld OR world)))')
def test_build_query_multiple_words_not(self):
self.sq.add_filter(~SQ(content='hello'))
self.sq.add_filter(~SQ(content='world'))
self.assertEqual(self.sq.build_query().get_description(), u'Xapian::Query(((<alldocuments> AND_NOT hello) AND (<alldocuments> AND_NOT world)))')
self.assertEqual(self.sq.build_query().get_description(), u'Xapian::Query(((<alldocuments> AND_NOT (Zhello OR hello)) AND (<alldocuments> AND_NOT (Zworld OR world))))')
def test_build_query_multiple_words_or(self):
self.sq.add_filter(SQ(content='hello') | SQ(content='world'))
self.assertEqual(self.sq.build_query().get_description(), u'Xapian::Query((hello OR world))')
self.assertEqual(self.sq.build_query().get_description(), u'Xapian::Query((Zhello OR hello OR Zworld OR world))')
def test_build_query_multiple_words_or_not(self):
self.sq.add_filter(~SQ(content='hello') | ~SQ(content='world'))
self.assertEqual(self.sq.build_query().get_description(), u'Xapian::Query(((<alldocuments> AND_NOT hello) OR (<alldocuments> AND_NOT world)))')
self.assertEqual(self.sq.build_query().get_description(), u'Xapian::Query(((<alldocuments> AND_NOT (Zhello OR hello)) OR (<alldocuments> AND_NOT (Zworld OR world))))')
def test_build_query_multiple_words_mixed(self):
self.sq.add_filter(SQ(content='why') | SQ(content='hello'))
self.sq.add_filter(~SQ(content='world'))
self.assertEqual(self.sq.build_query().get_description(), u'Xapian::Query(((why OR hello) AND (<alldocuments> AND_NOT world)))')
self.assertEqual(self.sq.build_query().get_description(), u'Xapian::Query(((Zwhy OR why OR Zhello OR hello) AND (<alldocuments> AND_NOT (Zworld OR world))))')
def test_build_query_multiple_word_field_exact(self):
self.sq.add_filter(SQ(foo='hello'))
self.sq.add_filter(SQ(bar='world'))
self.assertEqual(self.sq.build_query().get_description(), u'Xapian::Query((XFOOhello AND XBARworld))')
self.assertEqual(self.sq.build_query().get_description(), u'Xapian::Query(((ZXFOOhello OR XFOOhello) AND (ZXBARworld OR XBARworld)))')
def test_build_query_multiple_word_field_exact_not(self):
self.sq.add_filter(~SQ(foo='hello'))
self.sq.add_filter(~SQ(bar='world'))
self.assertEqual(self.sq.build_query().get_description(), u'Xapian::Query(((<alldocuments> AND_NOT XFOOhello) AND (<alldocuments> AND_NOT XBARworld)))')
self.assertEqual(self.sq.build_query().get_description(), u'Xapian::Query(((<alldocuments> AND_NOT (ZXFOOhello OR XFOOhello)) AND (<alldocuments> AND_NOT (ZXBARworld OR XBARworld))))')
def test_build_query_phrase(self):
self.sq.add_filter(SQ(content='hello world'))
@ -103,37 +103,33 @@ class XapianSearchQueryTestCase(TestCase):
def test_build_query_boost(self):
self.sq.add_filter(SQ(content='hello'))
self.sq.add_boost('world', 5)
self.assertEqual(self.sq.build_query().get_description(), u'Xapian::Query((hello OR 5 * world))')
self.assertEqual(self.sq.build_query().get_description(), u'Xapian::Query((Zhello OR hello OR 5 * world))')
def test_build_query_in_filter_single_words(self):
self.sq.add_filter(SQ(content='why'))
self.sq.add_filter(SQ(title__in=["Dune", "Jaws"]))
self.assertEqual(self.sq.build_query().get_description(), u'Xapian::Query((why AND (XTITLEdune OR XTITLEjaws)))')
self.assertEqual(self.sq.build_query().get_description(), u'Xapian::Query(((Zwhy OR why) AND (ZXTITLEdune OR XTITLEdune OR ZXTITLEjaw OR XTITLEjaws)))')
def test_build_query_not_in_filter_single_words(self):
self.sq.add_filter(SQ(content='why'))
self.sq.add_filter(~SQ(title__in=["Dune", "Jaws"]))
self.assertEqual(self.sq.build_query().get_description(), u'Xapian::Query((why AND (<alldocuments> AND_NOT (XTITLEdune OR XTITLEjaws))))')
self.assertEqual(self.sq.build_query().get_description(), u'Xapian::Query(((Zwhy OR why) AND (<alldocuments> AND_NOT (ZXTITLEdune OR XTITLEdune OR ZXTITLEjaw OR XTITLEjaws))))')
def test_build_query_in_filter_multiple_words(self):
self.sq.add_filter(SQ(content='why'))
self.sq.add_filter(SQ(title__in=["A Famous Paper", "An Infamous Article"]))
self.assertEqual(self.sq.build_query().get_description(), u'Xapian::Query((why AND ((XTITLEa PHRASE 3 XTITLEfamous PHRASE 3 XTITLEpaper) OR (XTITLEan PHRASE 3 XTITLEinfamous PHRASE 3 XTITLEarticle))))')
self.assertEqual(self.sq.build_query().get_description(), u'Xapian::Query(((Zwhy OR why) AND ((XTITLEa PHRASE 3 XTITLEfamous PHRASE 3 XTITLEpaper) OR (XTITLEan PHRASE 3 XTITLEinfamous PHRASE 3 XTITLEarticle))))')
def test_build_query_not_in_filter_multiple_words(self):
self.sq.add_filter(SQ(content='why'))
self.sq.add_filter(~SQ(title__in=["A Famous Paper", "An Infamous Article"]))
self.assertEqual(self.sq.build_query().get_description(), u'Xapian::Query((why AND (<alldocuments> AND_NOT ((XTITLEa PHRASE 3 XTITLEfamous PHRASE 3 XTITLEpaper) OR (XTITLEan PHRASE 3 XTITLEinfamous PHRASE 3 XTITLEarticle)))))')
self.assertEqual(self.sq.build_query().get_description(), u'Xapian::Query(((Zwhy OR why) AND (<alldocuments> AND_NOT ((XTITLEa PHRASE 3 XTITLEfamous PHRASE 3 XTITLEpaper) OR (XTITLEan PHRASE 3 XTITLEinfamous PHRASE 3 XTITLEarticle)))))')
def test_build_query_in_filter_datetime(self):
self.sq.add_filter(SQ(content='why'))
self.sq.add_filter(SQ(pub_date__in=[datetime.datetime(2009, 7, 6, 1, 56, 21)]))
self.assertEqual(self.sq.build_query().get_description(), u'Xapian::Query((why AND XPUB_DATE20090706015621))')
self.assertEqual(self.sq.build_query().get_description(), u'Xapian::Query(((Zwhy OR why) AND (ZXPUB_DATE20090706015621 OR XPUB_DATE20090706015621)))')
# def test_stem_single_word(self):
# self.sq.add_filter(SQ(content='testing'))
# self.assertEqual(self.sq.build_query().get_description(), 'Xapian.Query(Ztest)')
#
def test_clean(self):
self.assertEqual(self.sq.clean('hello world'), 'hello world')
self.assertEqual(self.sq.clean('hello AND world'), 'hello AND world')
@ -143,7 +139,7 @@ class XapianSearchQueryTestCase(TestCase):
def test_build_query_with_models(self):
self.sq.add_filter(SQ(content='hello'))
self.sq.add_model(MockModel)
self.assertEqual(self.sq.build_query().get_description(), u'Xapian::Query((hello AND 0 * XCONTENTTYPEcore.mockmodel))')
self.assertEqual(self.sq.build_query().get_description(), u'Xapian::Query(((Zhello OR hello) AND 0 * XCONTENTTYPEcore.mockmodel))')
self.sq.add_model(AnotherMockModel)
self.assertEqual(self.sq.build_query().get_description(), u'Xapian::Query((hello AND (0 * XCONTENTTYPEcore.anothermockmodel OR 0 * XCONTENTTYPEcore.mockmodel)))')
self.assertEqual(self.sq.build_query().get_description(), u'Xapian::Query(((Zhello OR hello) AND (0 * XCONTENTTYPEcore.anothermockmodel OR 0 * XCONTENTTYPEcore.mockmodel)))')

View file

@ -720,12 +720,12 @@ class SearchBackend(BaseSearchBackend):
else:
return database.get_spelling_suggestion(spelling_query)
term_list = []
term_set = set()
for term in query:
for match in re.findall('[^A-Z]+', term): # Ignore field identifiers
term_list.append(database.get_spelling_suggestion(match))
term_set.add(database.get_spelling_suggestion(match))
return ' '.join(term_list)
return ' '.join(term_set)
def _database(self, writable=False):
"""
@ -1039,13 +1039,25 @@ class SearchQuery(BaseSearchQuery):
Returns:
A xapian.Query
"""
stem = xapian.Stem(self.backend.language)
if field:
return xapian.Query('%s%s%s' % (
DOCUMENT_CUSTOM_TERM_PREFIX, field.upper(), term
return xapian.Query(
xapian.Query.OP_OR,
xapian.Query('Z%s%s%s' % (
DOCUMENT_CUSTOM_TERM_PREFIX, field.upper(), stem(term)
)
),
xapian.Query('%s%s%s' % (
DOCUMENT_CUSTOM_TERM_PREFIX, field.upper(), term
)
)
)
else:
return xapian.Query(term)
return xapian.Query(
xapian.Query.OP_OR,
xapian.Query('Z%s' % term),
xapian.Query(term)
)
def _phrase_query(self, term_list, field=None):
"""