From d471858a98043ce119660463c1b7ee5ef90785c5 Mon Sep 17 00:00:00 2001 From: Jirka Vejrazka Date: Sun, 27 Mar 2016 20:33:32 +0200 Subject: [PATCH] modified tests so they're "compatible" with MySQL which ignores all words shorter than 4 chars --- src/tests/test_watson/tests.py | 88 +++++++++++++++++----------------- src/watson/backends.py | 3 ++ 2 files changed, 47 insertions(+), 44 deletions(-) diff --git a/src/tests/test_watson/tests.py b/src/tests/test_watson/tests.py index ce7cc3b..6950619 100644 --- a/src/tests/test_watson/tests.py +++ b/src/tests/test_watson/tests.py @@ -51,33 +51,33 @@ class EscapingTest(TestCase): def testEscaping(self): # Test query escaping. self.assertEqual(escape_query(""), "") - self.assertEqual(escape_query("abc"), "abc") - self.assertEqual(escape_query("abc def"), "abc def") - self.assertEqual(escape_query("abc def"), "abc def") - self.assertEqual(escape_query("abc'def"), "abc'def") - self.assertEqual(escape_query("'abc&def"), "'abc&def") - self.assertEqual(escape_query("abc@def"), "abc@def") - self.assertEqual(escape_query("abc#def"), "abc#def") - self.assertEqual(escape_query("abc$def"), "abc$def") - self.assertEqual(escape_query("abc^def"), "abc^def") - self.assertEqual(escape_query("abc&def"), "abc&def") - self.assertEqual(escape_query("abc*def"), "abc*def") - self.assertEqual(escape_query("abc=def"), "abc=def") - self.assertEqual(escape_query("abc+def"), "abc+def") - self.assertEqual(escape_query("abc-def"), "abc-def") - self.assertEqual(escape_query("abc_def"), "abc_def") - self.assertEqual(escape_query("abc.def"), "abc.def") - self.assertEqual(escape_query("abc,def"), "abc,def") + self.assertEqual(escape_query("abcd"), "abcd") + self.assertEqual(escape_query("abcd efgh"), "abcd efgh") + self.assertEqual(escape_query("abcd efgh"), "abcd efgh") + self.assertEqual(escape_query("abcd'efgh"), "abcd'efgh") + self.assertEqual(escape_query("'abcd&efgh"), "'abcd&efgh") + self.assertEqual(escape_query("abcd@efgh"), "abcd@efgh") + self.assertEqual(escape_query("abcd#efgh"), "abcd#efgh") + self.assertEqual(escape_query("abcd$efgh"), "abcd$efgh") + self.assertEqual(escape_query("abcd^efgh"), "abcd^efgh") + self.assertEqual(escape_query("abcd&efgh"), "abcd&efgh") + self.assertEqual(escape_query("abcd*efgh"), "abcd*efgh") + self.assertEqual(escape_query("abcd=efgh"), "abcd=efgh") + self.assertEqual(escape_query("abcd+efgh"), "abcd+efgh") + self.assertEqual(escape_query("abcd-efgh"), "abcd-efgh") + self.assertEqual(escape_query("abcd_efgh"), "abcd_efgh") + self.assertEqual(escape_query("abcd.efgh"), "abcd.efgh") + self.assertEqual(escape_query("abcd,efgh"), "abcd,efgh") # now the ones where we replace harmful characters - self.assertEqual(escape_query("&abc"), "abc") - self.assertEqual(escape_query("&&&abc"), "abc") - self.assertEqual(escape_query("abc&"), "abc") - self.assertEqual(escape_query("abc&&&"), "abc") - self.assertEqual(escape_query("abc|def"), "abc&def") - self.assertEqual(escape_query("abc!def"), "abc&def") - self.assertEqual(escape_query("abc:def"), "abc&def") - self.assertEqual(escape_query("abc(def"), "abc&def") - self.assertEqual(escape_query("abc)def"), "abc&def") + self.assertEqual(escape_query("&abcd"), "abcd") + self.assertEqual(escape_query("&&&abcd"), "abcd") + self.assertEqual(escape_query("abcd&"), "abcd") + self.assertEqual(escape_query("abcd&&&"), "abcd") + self.assertEqual(escape_query("abcd|efgh"), "abcd&efgh") + self.assertEqual(escape_query("abcd!efgh"), "abcd&efgh") + self.assertEqual(escape_query("abcd:efgh"), "abcd&efgh") + self.assertEqual(escape_query("abcd(efgh"), "abcd&efgh") + self.assertEqual(escape_query("abcd)efgh"), "abcd&efgh") complex_registration_search_engine = watson.SearchEngine("restricted") @@ -355,73 +355,73 @@ class SearchTest(SearchTestBase): x = WatsonTestModel1.objects.create( title = "title model1 instance12", content = "content model1 instance13 d'Argent", - description = "description abc+def", + description = "description abcd+efgh", ) - self.assertEqual(watson.search("abc+def").count(), 1) + self.assertEqual(watson.search("abcd+efgh").count(), 1) x.delete() x = WatsonTestModel1.objects.create( title = "title model1 instance12", content = "content model1 instance13 d'Argent", - description = "description abc&def", + description = "description abcd&efgh", ) - self.assertEqual(watson.search("abc&def").count(), 1) + self.assertEqual(watson.search("abcd&efgh").count(), 1) x.delete() x = WatsonTestModel1.objects.create( title = "title model1 instance12", content = "content model1 instance13 d'Argent", - description = "description abc|def", + description = "description abcd|efgh", ) - self.assertEqual(watson.search("abc|def").count(), 1) + self.assertEqual(watson.search("abcd|efgh").count(), 1) x.delete() x = WatsonTestModel1.objects.create( title = "title model1 instance12", content = "content model1 instance13 d'Argent", - description = "description abc:def", + description = "description abcd:efgh", ) - self.assertEqual(watson.search("abc:def").count(), 1) + self.assertEqual(watson.search("abcd:efgh").count(), 1) x.delete() x = WatsonTestModel1.objects.create( title = "title model1 instance12", content = "content model1 instance13 d'Argent", - description = "description abc.def", + description = "description abcd.efgh", ) - self.assertEqual(watson.search("abc.def").count(), 1) + self.assertEqual(watson.search("abcd.efgh").count(), 1) x.delete() x = WatsonTestModel1.objects.create( title = "title model1 instance12", content = "content model1 instance13 d'Argent", - description = "description abc,def", + description = "description abcd,efgh", ) - self.assertEqual(watson.search("abc,def").count(), 1) + self.assertEqual(watson.search("abcd,efgh").count(), 1) x.delete() x = WatsonTestModel1.objects.create( title = "title model1 instance12", content = "content model1 instance13 d'Argent", - description = "description abc:def", + description = "description abcd:efgh", ) - self.assertEqual(watson.search("abc:def").count(), 1) + self.assertEqual(watson.search("abcd:efgh").count(), 1) x.delete() x = WatsonTestModel1.objects.create( title = "title model1 instance12", content = "content model1 instance13 d'Argent", - description = "description abc()", + description = "description abcd()", ) - self.assertEqual(watson.search("abc()").count(), 1) + self.assertEqual(watson.search("abcd()").count(), 1) x.delete() x = WatsonTestModel1.objects.create( title = "title model1 instance12", content = "content model1 instance13 d'Argent", - description = "description abc(def", + description = "description abcd(efgh", ) - self.assertEqual(watson.search("abc(def").count(), 1) + self.assertEqual(watson.search("abcd(efgh").count(), 1) x.delete() diff --git a/src/watson/backends.py b/src/watson/backends.py index 2126f8d..5d72920 100644 --- a/src/watson/backends.py +++ b/src/watson/backends.py @@ -30,6 +30,9 @@ RE_SPACE = re.compile(r"[\s]+", re.UNICODE) RE_NON_WORD = re.compile(r'[!:"(|)]', re.UNICODE) def escape_query(text): + """normalizes the query text to a format that can be consumed + by the backend database + """ text = force_text(text) text = RE_SPACE.sub(" ", text) # Standardize spacing. text = RE_NON_WORD.sub("&", text) # Replace harmful characters with logical "AND"