Remove Unidecode from postgres_search. Fix #5001 (#5514)

This commit is contained in:
Pavel Denisov 2019-08-17 10:23:07 +05:00 committed by Matt Westcott
parent f9753f1f23
commit d6e4072e26
6 changed files with 50 additions and 13 deletions

View file

@ -25,6 +25,7 @@ Changelog
* Fix: `pageurl` and `slugurl` tags no longer fail when `request.site` is `None` (Samir Shah)
* Fix: Output form media on add/edit image forms with custom models (Matt Westcott)
* Fix: Layout for the clear checkbox in default FileField widget (Mikalai Radchuk)
* Fix: Remove ASCII conversion from Postgres search backend, to support stemming in non-Latin alphabets (Pavel Denisov)
2.6.1 (05.08.2019)

View file

@ -389,6 +389,7 @@ Contributors
* Jonathan Liuti
* Rahmi Pruitt
* Sanyam Khurana
* Pavel Denisov
Translators
===========

View file

@ -46,6 +46,7 @@ Bug fixes
* ``pageurl`` and ``slugurl`` tags no longer fail when ``request.site`` is ``None`` (Samir Shah)
* Output form media on add/edit image forms with custom models (Matt Westcott)
* Fixes layout for the clear checkbox in default FileField widget (Mikalai Radchuk)
* Remove ASCII conversion from Postgres search backend, to support stemming in non-Latin alphabets (Pavel Denisov)
Upgrade considerations

View file

@ -17,7 +17,7 @@ from .models import RawSearchQuery as PostgresRawSearchQuery
from .models import IndexEntry
from .utils import (
get_content_type_pk, get_descendants_content_types_pks, get_postgresql_connections,
get_sql_weights, get_weight, unidecode)
get_sql_weights, get_weight)
EMPTY_VECTOR = SearchVector(Value(''))
@ -70,7 +70,7 @@ class Index:
def prepare_field(self, obj, field):
if isinstance(field, SearchField):
yield (field, get_weight(field.boost),
unidecode(self.prepare_value(field.get_value(obj))))
self.prepare_value(field.get_value(obj)))
elif isinstance(field, RelatedFields):
sub_obj = field.get_value(obj)
if sub_obj is None:
@ -227,16 +227,13 @@ class PostgresSearchQueryCompiler(BaseSearchQueryCompiler):
and field.field_name == field_lookup:
return self.get_search_field(sub_field_name, field.fields)
def prepare_word(self, word):
return unidecode(word)
def build_tsquery_content(self, query, group=False):
if isinstance(query, PlainText):
query_formats = []
query_params = []
for word in query.query_string.split():
query_formats.append(self.TSQUERY_WORD_FORMAT)
query_params.append(self.prepare_word(word))
query_params.append(word)
operator = self.TSQUERY_OPERATORS[query.operator]
query_format = operator.join(query_formats)
if group and len(query_formats) > 1:

View file

@ -0,0 +1,44 @@
import unittest
from django.conf import settings
from django.db import connection
from django.test import TestCase
from wagtail.search.backends import get_search_backend
from wagtail.tests.search import models
class TestPostgresStemming(TestCase):
def setUp(self):
backend_name = "wagtail.contrib.postgres_search.backend"
for conf in settings.WAGTAILSEARCH_BACKENDS.values():
if conf['BACKEND'] == backend_name:
break
else:
raise unittest.SkipTest("Only for %s" % backend_name)
self.backend = get_search_backend(backend_name)
def test_ru_stemming(self):
with connection.cursor() as cursor:
cursor.execute(
"SET default_text_search_config TO 'pg_catalog.russian'"
)
ru_book = models.Book.objects.create(
title="Голубое сало", publication_date="1999-05-01",
number_of_pages=352
)
self.backend.add(ru_book)
results = self.backend.search("Голубое", models.Book)
self.assertEqual(list(results), [ru_book])
results = self.backend.search("Голубая", models.Book)
self.assertEqual(list(results), [ru_book])
results = self.backend.search("Голубой", models.Book)
self.assertEqual(list(results), [ru_book])
ru_book.delete()

View file

@ -5,13 +5,6 @@ from django.db import connections
from wagtail.search.index import Indexed, RelatedFields, SearchField
try:
# Only use the GPLv2 licensed unidecode if it's installed.
from unidecode import unidecode
except ImportError:
def unidecode(value):
return value
def get_postgresql_connections():
return [connection for connection in connections.all()