changes to query escaping, tested on SQLite3 and PostgreSQL

This commit is contained in:
Jirka Vejrazka 2016-03-26 13:40:28 +01:00
parent 3ae9a76ae8
commit 8914cfb83c
2 changed files with 136 additions and 7 deletions

View file

@ -24,6 +24,7 @@ from django.utils.encoding import force_text
from watson import search as watson
from watson.models import SearchEntry
from watson.backends import escape_query
from test_watson.models import WatsonTestModel1, WatsonTestModel2
from test_watson import admin # Force early registration of all admin models.
@ -45,6 +46,38 @@ class RegistrationTest(TestCase):
self.assertTrue(WatsonTestModel1 not in watson.get_registered_models())
self.assertRaises(watson.RegistrationError, lambda: isinstance(watson.get_adapter(WatsonTestModel1)))
class EscapingTest(TestCase):
def testEscaping(self):
# Test query escaping.
self.assertEqual(escape_query(""), "")
self.assertEqual(escape_query("abc"), "abc")
self.assertEqual(escape_query("abc def"), "abc def")
self.assertEqual(escape_query("abc def"), "abc def")
self.assertEqual(escape_query("abc'def"), "abc'def")
self.assertEqual(escape_query("'abc&def"), "'abc&def")
self.assertEqual(escape_query("abc@def"), "abc@def")
self.assertEqual(escape_query("abc#def"), "abc#def")
self.assertEqual(escape_query("abc$def"), "abc$def")
self.assertEqual(escape_query("abc^def"), "abc^def")
self.assertEqual(escape_query("abc&def"), "abc&def")
self.assertEqual(escape_query("abc*def"), "abc*def")
self.assertEqual(escape_query("abc=def"), "abc=def")
self.assertEqual(escape_query("abc+def"), "abc+def")
self.assertEqual(escape_query("abc-def"), "abc-def")
self.assertEqual(escape_query("abc_def"), "abc_def")
self.assertEqual(escape_query("abc.def"), "abc.def")
self.assertEqual(escape_query("abc,def"), "abc,def")
# now the ones where we replace harmful characters
self.assertEqual(escape_query("&abc"), "abc")
self.assertEqual(escape_query("&&&abc"), "abc")
self.assertEqual(escape_query("abc&"), "abc")
self.assertEqual(escape_query("abc&&&"), "abc")
self.assertEqual(escape_query("abc|def"), "abc&def")
self.assertEqual(escape_query("abc!def"), "abc&def")
self.assertEqual(escape_query("abc:def"), "abc&def")
self.assertEqual(escape_query("abc(def"), "abc&def")
self.assertEqual(escape_query("abc)def"), "abc&def")
complex_registration_search_engine = watson.SearchEngine("restricted")
@ -286,21 +319,111 @@ class SearchTest(SearchTestBase):
)
self.assertEqual(watson.search("café").count(), 1)
def testSearchWithApostrophe(self):
WatsonTestModel1.objects.create(
# def testSearchWithApostrophe(self):
# WatsonTestModel1.objects.create(
# title = "title model1 instance12",
# content = "content model1 instance13 d'Argent",
# description = "description model1 instance13",
# )
# self.assertEqual(watson.search("d'Argent").count(), 1)
#
# def testSearchWithLeadingApostrophe(self):
# WatsonTestModel1.objects.create(
# title = "title model1 instance12",
# content = "'content model1 instance13",
# description = "description model1 instance13",
# )
# self.assertTrue(watson.search("'content").exists()) # Some database engines ignore leading apostrophes, some count them.
def testSearchWithSpecialChars(self):
x = WatsonTestModel1.objects.create(
title = "title model1 instance12",
content = "content model1 instance13 d'Argent",
description = "description model1 instance13",
)
self.assertEqual(watson.search("d'Argent").count(), 1)
x.delete()
def testSearchWithLeadingApostrophe(self):
WatsonTestModel1.objects.create(
x = WatsonTestModel1.objects.create(
title = "title model1 instance12",
content = "'content model1 instance13",
description = "description model1 instance13",
)
self.assertTrue(watson.search("'content").exists()) # Some database engines ignore leading apostrophes, some count them.
x.delete()
x = WatsonTestModel1.objects.create(
title = "title model1 instance12",
content = "content model1 instance13 d'Argent",
description = "description abc+def",
)
self.assertEqual(watson.search("abc+def").count(), 1)
x.delete()
x = WatsonTestModel1.objects.create(
title = "title model1 instance12",
content = "content model1 instance13 d'Argent",
description = "description abc&def",
)
self.assertEqual(watson.search("abc&def").count(), 1)
x.delete()
x = WatsonTestModel1.objects.create(
title = "title model1 instance12",
content = "content model1 instance13 d'Argent",
description = "description abc|def",
)
self.assertEqual(watson.search("abc|def").count(), 1)
x.delete()
x = WatsonTestModel1.objects.create(
title = "title model1 instance12",
content = "content model1 instance13 d'Argent",
description = "description abc:def",
)
self.assertEqual(watson.search("abc:def").count(), 1)
x.delete()
x = WatsonTestModel1.objects.create(
title = "title model1 instance12",
content = "content model1 instance13 d'Argent",
description = "description abc.def",
)
self.assertEqual(watson.search("abc.def").count(), 1)
x.delete()
x = WatsonTestModel1.objects.create(
title = "title model1 instance12",
content = "content model1 instance13 d'Argent",
description = "description abc,def",
)
self.assertEqual(watson.search("abc,def").count(), 1)
x.delete()
x = WatsonTestModel1.objects.create(
title = "title model1 instance12",
content = "content model1 instance13 d'Argent",
description = "description abc:def",
)
self.assertEqual(watson.search("abc:def").count(), 1)
x.delete()
x = WatsonTestModel1.objects.create(
title = "title model1 instance12",
content = "content model1 instance13 d'Argent",
description = "description abc()",
)
self.assertEqual(watson.search("abc()").count(), 1)
x.delete()
x = WatsonTestModel1.objects.create(
title = "title model1 instance12",
content = "content model1 instance13 d'Argent",
description = "description abc(def",
)
self.assertEqual(watson.search("abc(def").count(), 1)
x.delete()
@skipUnless(watson.get_backend().supports_prefix_matching, "Search backend does not support prefix matching.")
def testMultiTablePrefixSearch(self):

View file

@ -21,16 +21,22 @@ def regex_from_word(word):
RE_SPACE = re.compile(r"[\s]+", re.UNICODE)
RE_NON_WORD = re.compile(r"[^ \w\-\.']", re.UNICODE)
# (below was researched and tested on PostgreSQL 9.5)
# the only chars that to_tsquery does not really like are "! & : ( ) |"
# "&" does not harm us as it converts to AND which we do anyway
# "|" is syntactically correct, but performs OR lookup which may not be
# what the user expects, so we remove it
RE_NON_WORD = re.compile(r'[!:"(|)]', re.UNICODE)
def escape_query(text):
text = force_text(text)
text = RE_SPACE.sub(" ", text) # Standardize spacing.
text = RE_NON_WORD.sub("", text) # Remove non-word characters.
text = RE_NON_WORD.sub("&", text) # Replace harmful characters with logical "AND"
# text may not start or end with "&"
text = text.strip('&')
return text
class SearchBackend(six.with_metaclass(abc.ABCMeta)):
"""Base class for all search backends."""