From f02a6937d262dcfa68ceb8e0551663799c7f9b17 Mon Sep 17 00:00:00 2001 From: Karl Hobley Date: Sun, 22 Jun 2014 17:31:52 +0100 Subject: [PATCH] Added more tests for ElasticSearch backend --- wagtail/tests/models.py | 15 +- .../tests/test_elasticsearch_backend.py | 347 ++++++++++++++++++ 2 files changed, 357 insertions(+), 5 deletions(-) diff --git a/wagtail/tests/models.py b/wagtail/tests/models.py index 5787c19ff..86c85e7f9 100644 --- a/wagtail/tests/models.py +++ b/wagtail/tests/models.py @@ -323,21 +323,26 @@ class SearchTest(models.Model, indexed.Indexed): title = models.CharField(max_length=255) content = models.TextField() live = models.BooleanField(default=False) + published_date = models.DateField(null=True) - search_fields = ( - indexed.SearchField('title'), + search_fields = [ + indexed.SearchField('title', partial_match=True), indexed.SearchField('content'), indexed.SearchField('callable_indexed_field'), + indexed.FilterField('title'), indexed.FilterField('live'), - ) + indexed.FilterField('published_date'), + ] def callable_indexed_field(self): return "Callable" class SearchTestChild(SearchTest): + subtitle = models.CharField(max_length=255, null=True, blank=True) extra_content = models.TextField() - search_fields = SearchTest.search_fields + ( + search_fields = SearchTest.search_fields + [ + indexed.SearchField('subtitle', partial_match=True), indexed.SearchField('extra_content'), - ) + ] diff --git a/wagtail/wagtailsearch/tests/test_elasticsearch_backend.py b/wagtail/wagtailsearch/tests/test_elasticsearch_backend.py index c2623fd54..88aa87f8b 100644 --- a/wagtail/wagtailsearch/tests/test_elasticsearch_backend.py +++ b/wagtail/wagtailsearch/tests/test_elasticsearch_backend.py @@ -1,7 +1,11 @@ from wagtail.tests.utils import unittest +import datetime +import json from django.test import TestCase +from django.db.models import Q +from wagtail.tests import models from .test_backends import BackendTests @@ -16,3 +20,346 @@ class TestElasticSearchBackend(BackendTests, TestCase): list(results) # Didn't crash, yay! + + def test_partial_search(self): + # Reset the index + self.backend.reset_index() + self.backend.add_type(models.SearchTest) + self.backend.add_type(models.SearchTestChild) + + # Add some test data + obj = models.SearchTest() + obj.title = "HelloWorld" + obj.live = True + obj.save() + self.backend.add(obj) + + # Refresh the index + self.backend.refresh_index() + + # Search and check + results = self.backend.search("HelloW", models.SearchTest.objects.all()) + + self.assertEqual(len(results), 1) + self.assertEqual(results[0].id, obj.id) + + def test_child_partial_search(self): + # Reset the index + self.backend.reset_index() + self.backend.add_type(models.SearchTest) + self.backend.add_type(models.SearchTestChild) + + obj = models.SearchTestChild() + obj.title = "WorldHello" + obj.subtitle = "HelloWorld" + obj.live = True + obj.save() + self.backend.add(obj) + + # Refresh the index + self.backend.refresh_index() + + # Search and check + results = self.backend.search("HelloW", models.SearchTest.objects.all()) + + self.assertEqual(len(results), 1) + self.assertEqual(results[0].id, obj.id) + + +class TestElasticSearchQuery(TestCase): + def assertDictEqual(self, a, b): + default = self.JSONSerializer().default + self.assertEqual(json.dumps(a, sort_keys=True, default=default), json.dumps(b, sort_keys=True, default=default)) + + def setUp(self): + # Import using a try-catch block to prevent crashes if the elasticsearch-py + # module is not installed + try: + from wagtail.wagtailsearch.backends.elasticsearch import ElasticSearchQuery + from elasticsearch.serializer import JSONSerializer + except ImportError: + raise unittest.SkipTest("elasticsearch-py not installed") + + self.ElasticSearchQuery = ElasticSearchQuery + self.JSONSerializer = JSONSerializer + + def test_simple(self): + # Create a query + query = self.ElasticSearchQuery(models.SearchTest.objects.all(), "Hello") + + # Check it + expected_result = {'filtered': {'filter': {'prefix': {'content_type': 'tests_searchtest'}}, 'query': {'query_string': {'query': 'Hello', 'fields': ['_all', '_partials']}}}} + self.assertDictEqual(query.to_es(), expected_result) + + def test_none_query_string(self): + # Create a query + query = self.ElasticSearchQuery(models.SearchTest.objects.all(), None) + + # Check it + expected_result = {'filtered': {'filter': {'prefix': {'content_type': 'tests_searchtest'}}, 'query': {'match_all': {}}}} + self.assertDictEqual(query.to_es(), expected_result) + + def test_filter(self): + # Create a query + query = self.ElasticSearchQuery(models.SearchTest.objects.filter(title="Test"), "Hello") + + # Check it + expected_result = {'filtered': {'filter': {'and': [{'prefix': {'content_type': 'tests_searchtest'}}, {'term': {'title_filter': 'Test'}}]}, 'query': {'query_string': {'query': 'Hello', 'fields': ['_all', '_partials']}}}} + self.assertDictEqual(query.to_es(), expected_result) + + def test_and_filter(self): + # Create a query + query = self.ElasticSearchQuery(models.SearchTest.objects.filter(title="Test", live=True), "Hello") + + # Check it + expected_result = {'filtered': {'filter': {'and': [{'prefix': {'content_type': 'tests_searchtest'}}, {'and': [{'term': {'live_filter': True}}, {'term': {'title_filter': 'Test'}}]}]}, 'query': {'query_string': {'query': 'Hello', 'fields': ['_all', '_partials']}}}} + self.assertDictEqual(query.to_es(), expected_result) + + def test_or_filter(self): + # Create a query + query = self.ElasticSearchQuery(models.SearchTest.objects.filter(Q(title="Test") | Q(live=True)), "Hello") + + # Check it + expected_result = {'filtered': {'filter': {'and': [{'prefix': {'content_type': 'tests_searchtest'}}, {'or': [{'term': {'title_filter': 'Test'}}, {'term': {'live_filter': True}}]}]}, 'query': {'query_string': {'query': 'Hello', 'fields': ['_all', '_partials']}}}} + self.assertDictEqual(query.to_es(), expected_result) + + def test_negated_filter(self): + # Create a query + query = self.ElasticSearchQuery(models.SearchTest.objects.exclude(live=True), "Hello") + + # Check it + expected_result = {'filtered': {'filter': {'and': [{'prefix': {'content_type': 'tests_searchtest'}}, {'not': {'term': {'live_filter': True}}}]}, 'query': {'query_string': {'query': 'Hello', 'fields': ['_all', '_partials']}}}} + self.assertDictEqual(query.to_es(), expected_result) + + def test_fields(self): + # Create a query + query = self.ElasticSearchQuery(models.SearchTest.objects.all(), "Hello", fields=['title']) + + # Check it + expected_result = {'filtered': {'filter': {'prefix': {'content_type': 'tests_searchtest'}}, 'query': {'query_string': {'query': 'Hello', 'fields': ['title']}}}} + self.assertDictEqual(query.to_es(), expected_result) + + def test_exact_lookup(self): + # Create a query + query = self.ElasticSearchQuery(models.SearchTest.objects.filter(title__exact="Test"), "Hello") + + # Check it + expected_result = {'filtered': {'filter': {'and': [{'prefix': {'content_type': 'tests_searchtest'}}, {'term': {'title_filter': 'Test'}}]}, 'query': {'query_string': {'query': 'Hello', 'fields': ['_all', '_partials']}}}} + self.assertDictEqual(query.to_es(), expected_result) + + def test_none_lookup(self): + # Create a query + query = self.ElasticSearchQuery(models.SearchTest.objects.filter(title=None), "Hello") + + # Check it + expected_result = {'filtered': {'filter': {'and': [{'prefix': {'content_type': 'tests_searchtest'}}, {'missing': {'field': 'title_filter'}}]}, 'query': {'query_string': {'query': 'Hello', 'fields': ['_all', '_partials']}}}} + self.assertDictEqual(query.to_es(), expected_result) + + def test_isnull_true_lookup(self): + # Create a query + query = self.ElasticSearchQuery(models.SearchTest.objects.filter(title__isnull=True), "Hello") + + # Check it + expected_result = {'filtered': {'filter': {'and': [{'prefix': {'content_type': 'tests_searchtest'}}, {'missing': {'field': 'title_filter'}}]}, 'query': {'query_string': {'query': 'Hello', 'fields': ['_all', '_partials']}}}} + self.assertDictEqual(query.to_es(), expected_result) + + def test_isnull_false_lookup(self): + # Create a query + query = self.ElasticSearchQuery(models.SearchTest.objects.filter(title__isnull=False), "Hello") + + # Check it + expected_result = {'filtered': {'filter': {'and': [{'prefix': {'content_type': 'tests_searchtest'}}, {'not': {'missing': {'field': 'title_filter'}}}]}, 'query': {'query_string': {'query': 'Hello', 'fields': ['_all', '_partials']}}}} + self.assertDictEqual(query.to_es(), expected_result) + + def test_startswith_lookup(self): + # Create a query + query = self.ElasticSearchQuery(models.SearchTest.objects.filter(title__startswith="Test"), "Hello") + + # Check it + expected_result = {'filtered': {'filter': {'and': [{'prefix': {'content_type': 'tests_searchtest'}}, {'prefix': {'title_filter': 'Test'}}]}, 'query': {'query_string': {'query': 'Hello', 'fields': ['_all', '_partials']}}}} + self.assertDictEqual(query.to_es(), expected_result) + + def test_gt_lookup(self): + # This shares the same code path as gte, lt and lte so theres no need to test those + # This also tests conversion of python dates to strings + + # Create a query + query = self.ElasticSearchQuery(models.SearchTest.objects.filter(published_date__gt=datetime.datetime(2014, 4, 29)), "Hello") + + # Check it + expected_result = {'filtered': {'filter': {'and': [{'prefix': {'content_type': 'tests_searchtest'}}, {'range': {'published_date_filter': {'gt': '2014-04-29'}}}]}, 'query': {'query_string': {'query': 'Hello', 'fields': ['_all', '_partials']}}}} + self.assertDictEqual(query.to_es(), expected_result) + + def test_range_lookup(self): + start_date = datetime.datetime(2014, 4, 29) + end_date = datetime.datetime(2014, 8, 19) + + # Create a query + query = self.ElasticSearchQuery(models.SearchTest.objects.filter(published_date__range=(start_date, end_date)), "Hello") + + # Check it + expected_result = {'filtered': {'filter': {'and': [{'prefix': {'content_type': 'tests_searchtest'}}, {'range': {'published_date_filter': {'gte': '2014-04-29', 'lte': '2014-08-19'}}}]}, 'query': {'query_string': {'query': 'Hello', 'fields': ['_all', '_partials']}}}} + self.assertDictEqual(query.to_es(), expected_result) + + +class TestElasticSearchMapping(TestCase): + def assertDictEqual(self, a, b): + default = self.JSONSerializer().default + self.assertEqual(json.dumps(a, sort_keys=True, default=default), json.dumps(b, sort_keys=True, default=default)) + + def setUp(self): + # Import using a try-catch block to prevent crashes if the elasticsearch-py + # module is not installed + try: + from wagtail.wagtailsearch.backends.elasticsearch import ElasticSearchMapping + from elasticsearch.serializer import JSONSerializer + except ImportError: + raise unittest.SkipTest("elasticsearch-py not installed") + + self.JSONSerializer = JSONSerializer + + # Create ES mapping + self.es_mapping = ElasticSearchMapping(models.SearchTest) + + # Create ES document + self.obj = models.SearchTest(title="Hello") + self.obj.save() + + def test_get_document_type(self): + self.assertEqual(self.es_mapping.get_document_type(), 'tests_searchtest') + + def test_get_mapping(self): + # Build mapping + mapping = self.es_mapping.get_mapping() + + # Check + expected_result = { + 'tests_searchtest': { + 'properties': { + 'pk': {'index': 'not_analyzed', 'type': 'string', 'store': 'yes', 'include_in_all': False}, + 'content_type': {'index': 'not_analyzed', 'type': 'string', 'include_in_all': False}, + '_partials': {'analyzer': 'edgengram_analyzer', 'include_in_all': False, 'type': 'string'}, + 'live_filter': {'index': 'not_analyzed', 'type': 'boolean', 'include_in_all': False}, + 'published_date_filter': {'index': 'not_analyzed', 'type': 'date', 'include_in_all': False}, + 'title': {'type': 'string', 'include_in_all': True, 'analyzer': 'edgengram_analyzer'}, + 'title_filter': {'index': 'not_analyzed', 'type': 'string', 'include_in_all': False}, + 'content': {'type': 'string', 'include_in_all': True}, + 'callable_indexed_field': {'type': 'string', 'include_in_all': True} + } + } + } + + self.assertDictEqual(mapping, expected_result) + + def test_get_document_id(self): + self.assertEqual(self.es_mapping.get_document_id(self.obj), 'tests_searchtest:' + str(self.obj.pk)) + + def test_get_document(self): + # Get document + document = self.es_mapping.get_document(self.obj) + + # Check + expected_result = { + 'pk': str(self.obj.pk), + 'content_type': 'tests_searchtest', + '_partials': ['Hello'], + 'live_filter': False, + 'published_date_filter': None, + 'title': 'Hello', + 'title_filter': 'Hello', + 'callable_indexed_field': 'Callable', + 'content': '', + } + + self.assertDictEqual(document, expected_result) + + +class TestElasticSearchMappingInheritance(TestCase): + def assertDictEqual(self, a, b): + default = self.JSONSerializer().default + self.assertEqual(json.dumps(a, sort_keys=True, default=default), json.dumps(b, sort_keys=True, default=default)) + + def setUp(self): + # Import using a try-catch block to prevent crashes if the elasticsearch-py + # module is not installed + try: + from wagtail.wagtailsearch.backends.elasticsearch import ElasticSearchMapping + from elasticsearch.serializer import JSONSerializer + except ImportError: + raise unittest.SkipTest("elasticsearch-py not installed") + + self.JSONSerializer = JSONSerializer + + # Create ES mapping + self.es_mapping = ElasticSearchMapping(models.SearchTestChild) + + # Create ES document + self.obj = models.SearchTestChild(title="Hello", subtitle="World") + self.obj.save() + + def test_get_document_type(self): + self.assertEqual(self.es_mapping.get_document_type(), 'tests_searchtest_tests_searchtestchild') + + def test_get_mapping(self): + # Build mapping + mapping = self.es_mapping.get_mapping() + + # Check + expected_result = { + 'tests_searchtest_tests_searchtestchild': { + 'properties': { + # New + 'extra_content': {'type': 'string', 'include_in_all': True}, + 'subtitle': {'type': 'string', 'include_in_all': True, 'analyzer': 'edgengram_analyzer'}, + + # Inherited + 'pk': {'index': 'not_analyzed', 'type': 'string', 'store': 'yes', 'include_in_all': False}, + 'content_type': {'index': 'not_analyzed', 'type': 'string', 'include_in_all': False}, + '_partials': {'analyzer': 'edgengram_analyzer', 'include_in_all': False, 'type': 'string'}, + 'live_filter': {'index': 'not_analyzed', 'type': 'boolean', 'include_in_all': False}, + 'published_date_filter': {'index': 'not_analyzed', 'type': 'date', 'include_in_all': False}, + 'title': {'type': 'string', 'include_in_all': True, 'analyzer': 'edgengram_analyzer'}, + 'title_filter': {'index': 'not_analyzed', 'type': 'string', 'include_in_all': False}, + 'content': {'type': 'string', 'include_in_all': True}, + 'callable_indexed_field': {'type': 'string', 'include_in_all': True} + } + } + } + + self.assertDictEqual(mapping, expected_result) + + def test_get_document_id(self): + # This must be tests_searchtest instead of 'tests_searchtest_tests_searchtestchild' + # as it uses the contents base content type name. + # This prevents the same object being accidentally indexed twice. + self.assertEqual(self.es_mapping.get_document_id(self.obj), 'tests_searchtest:' + str(self.obj.pk)) + + def test_get_document(self): + # Build document + document = self.es_mapping.get_document(self.obj) + + # Sort partials + if '_partials' in document: + document['_partials'].sort() + + # Check + expected_result = { + # New + 'extra_content': '', + 'subtitle': 'World', + + # Changed + 'content_type': 'tests_searchtest_tests_searchtestchild', + + # Inherited + 'pk': str(self.obj.pk), + '_partials': ['Hello', 'World'], + 'live_filter': False, + 'published_date_filter': None, + 'title': 'Hello', + 'title_filter': 'Hello', + 'callable_indexed_field': 'Callable', + 'content': '', + } + + self.assertDictEqual(document, expected_result)