diff --git a/CHANGELOG.txt b/CHANGELOG.txt index 305e09373..6d843a628 100644 --- a/CHANGELOG.txt +++ b/CHANGELOG.txt @@ -25,6 +25,7 @@ Changelog * Fix: `pageurl` and `slugurl` tags no longer fail when `request.site` is `None` (Samir Shah) * Fix: Output form media on add/edit image forms with custom models (Matt Westcott) * Fix: Layout for the clear checkbox in default FileField widget (Mikalai Radchuk) + * Fix: Remove ASCII conversion from Postgres search backend, to support stemming in non-Latin alphabets (Pavel Denisov) 2.6.1 (05.08.2019) diff --git a/CONTRIBUTORS.rst b/CONTRIBUTORS.rst index 407cb9b6c..16adf7407 100644 --- a/CONTRIBUTORS.rst +++ b/CONTRIBUTORS.rst @@ -389,6 +389,7 @@ Contributors * Jonathan Liuti * Rahmi Pruitt * Sanyam Khurana +* Pavel Denisov Translators =========== diff --git a/docs/releases/2.7.rst b/docs/releases/2.7.rst index fe464f5fb..5c4ff1a83 100644 --- a/docs/releases/2.7.rst +++ b/docs/releases/2.7.rst @@ -46,6 +46,7 @@ Bug fixes * ``pageurl`` and ``slugurl`` tags no longer fail when ``request.site`` is ``None`` (Samir Shah) * Output form media on add/edit image forms with custom models (Matt Westcott) * Fixes layout for the clear checkbox in default FileField widget (Mikalai Radchuk) + * Remove ASCII conversion from Postgres search backend, to support stemming in non-Latin alphabets (Pavel Denisov) Upgrade considerations diff --git a/wagtail/contrib/postgres_search/backend.py b/wagtail/contrib/postgres_search/backend.py index e7d13833b..8b16c4ce4 100644 --- a/wagtail/contrib/postgres_search/backend.py +++ b/wagtail/contrib/postgres_search/backend.py @@ -17,7 +17,7 @@ from .models import RawSearchQuery as PostgresRawSearchQuery from .models import IndexEntry from .utils import ( get_content_type_pk, get_descendants_content_types_pks, get_postgresql_connections, - get_sql_weights, get_weight, unidecode) + get_sql_weights, get_weight) EMPTY_VECTOR = SearchVector(Value('')) @@ -70,7 +70,7 @@ class Index: def prepare_field(self, obj, field): if isinstance(field, SearchField): yield (field, get_weight(field.boost), - unidecode(self.prepare_value(field.get_value(obj)))) + self.prepare_value(field.get_value(obj))) elif isinstance(field, RelatedFields): sub_obj = field.get_value(obj) if sub_obj is None: @@ -227,16 +227,13 @@ class PostgresSearchQueryCompiler(BaseSearchQueryCompiler): and field.field_name == field_lookup: return self.get_search_field(sub_field_name, field.fields) - def prepare_word(self, word): - return unidecode(word) - def build_tsquery_content(self, query, group=False): if isinstance(query, PlainText): query_formats = [] query_params = [] for word in query.query_string.split(): query_formats.append(self.TSQUERY_WORD_FORMAT) - query_params.append(self.prepare_word(word)) + query_params.append(word) operator = self.TSQUERY_OPERATORS[query.operator] query_format = operator.join(query_formats) if group and len(query_formats) > 1: diff --git a/wagtail/contrib/postgres_search/tests/test_stemming.py b/wagtail/contrib/postgres_search/tests/test_stemming.py new file mode 100644 index 000000000..2e3356cb2 --- /dev/null +++ b/wagtail/contrib/postgres_search/tests/test_stemming.py @@ -0,0 +1,44 @@ +import unittest + +from django.conf import settings +from django.db import connection +from django.test import TestCase + +from wagtail.search.backends import get_search_backend +from wagtail.tests.search import models + + +class TestPostgresStemming(TestCase): + def setUp(self): + backend_name = "wagtail.contrib.postgres_search.backend" + for conf in settings.WAGTAILSEARCH_BACKENDS.values(): + if conf['BACKEND'] == backend_name: + break + else: + raise unittest.SkipTest("Only for %s" % backend_name) + + self.backend = get_search_backend(backend_name) + + def test_ru_stemming(self): + with connection.cursor() as cursor: + cursor.execute( + "SET default_text_search_config TO 'pg_catalog.russian'" + ) + + ru_book = models.Book.objects.create( + title="Голубое сало", publication_date="1999-05-01", + number_of_pages=352 + ) + self.backend.add(ru_book) + + results = self.backend.search("Голубое", models.Book) + self.assertEqual(list(results), [ru_book]) + + + results = self.backend.search("Голубая", models.Book) + self.assertEqual(list(results), [ru_book]) + + results = self.backend.search("Голубой", models.Book) + self.assertEqual(list(results), [ru_book]) + + ru_book.delete() diff --git a/wagtail/contrib/postgres_search/utils.py b/wagtail/contrib/postgres_search/utils.py index bf0f76aa8..a4b8e5702 100644 --- a/wagtail/contrib/postgres_search/utils.py +++ b/wagtail/contrib/postgres_search/utils.py @@ -5,13 +5,6 @@ from django.db import connections from wagtail.search.index import Indexed, RelatedFields, SearchField -try: - # Only use the GPLv2 licensed unidecode if it's installed. - from unidecode import unidecode -except ImportError: - def unidecode(value): - return value - def get_postgresql_connections(): return [connection for connection in connections.all()