Improving postgres query escaping

This commit is contained in:
Dave Hall 2015-06-01 11:01:50 +01:00
parent 4eeb9c73d5
commit 30b59a00a9
2 changed files with 29 additions and 19 deletions

View file

@ -1,3 +1,4 @@
# coding=utf-8
"""
Tests for django-watson.
@ -284,6 +285,14 @@ class SearchTest(SearchTestBase):
self.assertEqual(watson.search("FOOO INSTANCE11").count(), 0)
self.assertEqual(watson.search("MODEL2 INSTANCE11").count(), 0)
def testSearchWithAccent(self):
WatsonTestModel1.objects.create(
title = "title model1 instance12",
content = "content model1 instance13 café",
description = "description model1 instance13",
)
self.assertEqual(watson.search("café").count(), 1)
def testSearchWithApostrophe(self):
WatsonTestModel1.objects.create(
title = "title model1 instance12",
@ -298,7 +307,7 @@ class SearchTest(SearchTestBase):
content = "'content model1 instance13",
description = "description model1 instance13",
)
self.assertEqual(watson.search("'content").count(), 1)
self.assertTrue(watson.search("'content").exists()) # Some database engines ignore leading apostrophes, some count them.
@skipUnless(get_backend().supports_prefix_matching, "Search backend does not support prefix matching.")
def testMultiTablePrefixSearch(self):

View file

@ -18,15 +18,17 @@ def regex_from_word(word):
return "(\s{word})|(^{word})".format(
word = re.escape(word),
)
def make_escaper(badchars):
"""Creates an efficient escape function that strips the given characters from the string."""
translation_table = dict((ord(c), None) for c in badchars)
translation_table[ord("'")] = "''"
def escaper(text):
return force_text(text, errors="ignore").translate(translation_table)
return escaper
RE_SPACE = re.compile(r"[\s]+", re.UNICODE)
RE_NON_WORD = re.compile(r"[^ \w\-']", re.UNICODE)
def escape_query(text):
text = force_text(text)
text = RE_SPACE.sub(" ", text) # Standardize spacing.
text = RE_NON_WORD.sub("", text) # Remove non-word characters.
return text
class SearchBackend(six.with_metaclass(abc.ABCMeta)):
@ -154,9 +156,6 @@ class RegexSearchBackend(RegexSearchMixin, SearchBackend):
"""A search backend that works with SQLite3."""
escape_postgres_query_chars = make_escaper("():|!&*")
class PostgresSearchBackend(SearchBackend):
"""A search backend that uses native PostgreSQL full text indices."""
@ -167,9 +166,9 @@ class PostgresSearchBackend(SearchBackend):
def escape_postgres_query(self, text):
"""Escapes the given text to become a valid ts_query."""
return " & ".join(
"{0}:*".format(word)
"$${0}$$:*".format(word)
for word
in escape_postgres_query_chars(text).split()
in escape_query(text).split()
)
def is_installed(self):
@ -310,7 +309,11 @@ class PostgresLegacySearchBackend(PostgresSearchBackend):
def escape_postgres_query(self, text):
"""Escapes the given text to become a valid ts_query."""
return " & ".join(escape_postgres_query_chars(text).split())
return " & ".join(
"$${0}$$".format(word)
for word
in escape_query(text).split()
)
class PostgresPrefixLegacySearchBackend(RegexSearchMixin, PostgresLegacySearchBackend):
@ -322,16 +325,14 @@ class PostgresPrefixLegacySearchBackend(RegexSearchMixin, PostgresLegacySearchBa
Use if your postgres vesion is less than 8.3, and you absolutely can't live without
prefix matching. Beware, this backend can get slow with large datasets!
"""
escape_mysql_boolean_query_chars = make_escaper("+-<>()*\".!:,;")
def escape_mysql_boolean_query(search_text):
return " ".join(
'+{word}*'.format(
word = word,
)
for word in escape_mysql_boolean_query_chars(search_text).split()
for word in escape_query(search_text).split()
)