diff --git a/.travis.yml b/.travis.yml index 3e0a4bb98..8764f2859 100644 --- a/.travis.yml +++ b/.travis.yml @@ -12,7 +12,7 @@ services: # Package installation install: - python setup.py install - - pip install psycopg2 pyelasticsearch elasticutils==0.8.2 wand embedly + - pip install psycopg2 elasticsearch wand embedly - pip install coveralls # Pre-test configuration before_script: diff --git a/CHANGELOG.txt b/CHANGELOG.txt index 19eec21e7..ca58949b4 100644 --- a/CHANGELOG.txt +++ b/CHANGELOG.txt @@ -3,6 +3,7 @@ Changelog 0.4 (xx.xx.20xx) ~~~~~~~~~~~~~~~~ + * ElasticUtils/pyelasticsearch swapped for elasticsearch-py * Added 'original' as a resizing rule supported by the 'image' tag * Hallo.js updated to version 1.0.4 * Snippets are now ordered alphabetically @@ -14,6 +15,7 @@ Changelog * Aesthetic improvements to preview experience * 'image' tag now accepts extra keyword arguments to be output as attributes on the img tag * Added an 'attrs' property to image rendition objects to output src, width, height and alt attributes all in one go + * Added 'construct_whitelister_element_rules' hook for customising the HTML whitelist used when saving rich text fields * Fix: Animated GIFs are now coalesced before resizing * Fix: Wand backend clones images before modifying them * Fix: Admin breadcrumb now positioned correctly on mobile diff --git a/docs/building_your_site/frontenddevelopers.rst b/docs/building_your_site/frontenddevelopers.rst index a8cf6a25e..8d0c9a3bb 100644 --- a/docs/building_your_site/frontenddevelopers.rst +++ b/docs/building_your_site/frontenddevelopers.rst @@ -186,23 +186,42 @@ The available resizing methods are: More control over the ``img`` tag --------------------------------- -In some cases greater control over the ``img`` tag is required, for example to add a custom ``class``. Rather than generating the ``img`` element for you, Wagtail can assign the relevant data to another object using Django's ``as`` syntax: +Wagtail provides two shorcuts to give greater control over the ``img`` element: + +.. versionadded:: 0.4 +**Adding attributes to the {% image %} tag** + +Extra attributes can be specified with the syntax ``attribute="value"``: + +.. code-block:: django + + {% image self.photo width-400 class="foo" id="bar" %} + +No validation is performed on attributes add in this way by the developer. It's possible to add `src`, `width`, `height` and `alt` of your own that might conflict with those generated by the tag itself. + + +**Generating the image "as"** + +Wagtail can assign the image data to another object using Django's ``as`` syntax: .. code-block:: django - {% load image %} - ... {% image self.photo width-400 as tmp_photo %} {{ tmp_photo.alt }} +.. versionadded:: 0.4 +The ``attrs`` shortcut +----------------------- + You can also use the ``attrs`` property as a shorthand to output the ``src``, ``width``, ``height`` and ``alt`` attributes in one go: .. code-block:: django + .. _rich-text-filter: Rich text (filter) diff --git a/docs/editing_api.rst b/docs/editing_api.rst index 88fc9458f..9f7602b11 100644 --- a/docs/editing_api.rst +++ b/docs/editing_api.rst @@ -546,6 +546,28 @@ Where ``'hook'`` is one of the following hook strings and ``function`` is a func + 'demo/css/vendor/font-awesome/css/font-awesome.min.css">') hooks.register('insert_editor_css', editor_css) +.. _construct_whitelister_element_rules: + +``construct_whitelister_element_rules`` + .. versionadded:: 0.4 + Customise the rules that define which HTML elements are allowed in rich text areas. By default only a limited set of HTML elements and attributes are whitelisted - all others are stripped out. The callables passed into this hook must return a dict, which maps element names to handler functions that will perform some kind of manipulation of the element. These handler functions receive the element as a `BeautifulSoup `_ Tag object. + + The ``wagtail.wagtailcore.whitelist`` module provides a few helper functions to assist in defining these handlers: ``allow_without_attributes``, a handler which preserves the element but strips out all of its attributes, and ``attribute_rule`` which accepts a dict specifying how to handle each attribute, and returns a handler function. This dict will map attribute names to either True (indicating that the attribute should be kept), False (indicating that it should be dropped), or a callable (which takes the initial attribute value and returns either a final value for the attribute, or None to drop the attribute). + + For example, the following hook function will add the ``
`` element to the whitelist, and allow the ``target`` attribute on ```` elements: + + .. code-block:: python + + from wagtail.wagtailadmin import hooks + from wagtail.wagtailcore.whitelist import attribute_rule, check_url, allow_without_attributes + + def whitelister_element_rules(): + return { + 'blockquote': allow_without_attributes, + 'a': attribute_rule({'href': check_url, 'target': True}), + } + hooks.register('construct_whitelister_element_rules', whitelister_element_rules) + Image Formats in the Rich Text Editor ------------------------------------- diff --git a/docs/wagtail_search.rst b/docs/wagtail_search.rst index 3d8ea26b2..1fa2c6525 100644 --- a/docs/wagtail_search.rst +++ b/docs/wagtail_search.rst @@ -220,17 +220,14 @@ The default DB search backend uses Django's ``__icontains`` filter. Elasticsearch Backend ````````````````````` -Prerequisites are the Elasticsearch service itself and, via pip, the `elasticutils`_ and `pyelasticsearch`_ packages: +Prerequisites are the Elasticsearch service itself and, via pip, the `elasticsearch-py`_ package: .. code-block:: guess - pip install elasticutils==0.8.2 pyelasticsearch + pip install elasticsearch .. note:: - ElasticUtils 0.9+ is not supported. - -.. note:: - The dependency on elasticutils and pyelasticsearch is scheduled to be replaced by a dependency on `elasticsearch-py`_. + If you are using Elasticsearch < 1.0, install elasticsearch-py version 0.4.5: ```pip install elasticsearch==0.4.5``` The backend is configured in settings: @@ -246,7 +243,7 @@ The backend is configured in settings: } } -Other than ``BACKEND`` the keys are optional and default to the values shown. ``FORCE_NEW`` is used by elasticutils. In addition, any other keys are passed directly to the Elasticsearch constructor as case-sensitive keyword arguments (e.g. ``'max_retries': 1``). +Other than ``BACKEND`` the keys are optional and default to the values shown. ``FORCE_NEW`` is used by elasticsearch-py. In addition, any other keys are passed directly to the Elasticsearch constructor as case-sensitive keyword arguments (e.g. ``'max_retries': 1``). If you prefer not to run an Elasticsearch server in development or production, there are many hosted services available, including `Searchly`_, who offer a free account suitable for testing and development. To use Searchly: @@ -256,8 +253,6 @@ If you prefer not to run an Elasticsearch server in development or production, t - Configure ``URLS`` and ``INDEX`` in the Elasticsearch entry in ``WAGTAILSEARCH_BACKENDS`` - Run ``./manage.py update_index`` -.. _elasticutils: http://elasticutils.readthedocs.org -.. _pyelasticsearch: http://pyelasticsearch.readthedocs.org .. _elasticsearch-py: http://elasticsearch-py.readthedocs.org .. _Searchly: http://www.searchly.com/ .. _dashboard.searchly.com/users/sign\_up: https://dashboard.searchly.com/users/sign_up diff --git a/runtests.py b/runtests.py index 96653b40d..efa66910f 100755 --- a/runtests.py +++ b/runtests.py @@ -13,7 +13,7 @@ MEDIA_ROOT = os.path.join(WAGTAIL_ROOT, 'test-media') if not settings.configured: try: - import elasticutils + import elasticsearch has_elasticsearch = True except ImportError: has_elasticsearch = False diff --git a/wagtail/tests/models.py b/wagtail/tests/models.py index f798129dc..ca6d411da 100644 --- a/wagtail/tests/models.py +++ b/wagtail/tests/models.py @@ -303,6 +303,9 @@ class StandardChild(Page): pass class BusinessIndex(Page): + subpage_types = ['tests.BusinessChild', 'tests.BusinessSubIndex'] + +class BusinessSubIndex(Page): subpage_types = ['tests.BusinessChild'] class BusinessChild(Page): diff --git a/wagtail/tests/wagtail_hooks.py b/wagtail/tests/wagtail_hooks.py index 91c76c9f4..5dccca666 100644 --- a/wagtail/tests/wagtail_hooks.py +++ b/wagtail/tests/wagtail_hooks.py @@ -1,4 +1,5 @@ from wagtail.wagtailadmin import hooks +from wagtail.wagtailcore.whitelist import attribute_rule, check_url, allow_without_attributes def editor_css(): return """""" @@ -8,3 +9,11 @@ hooks.register('insert_editor_css', editor_css) def editor_js(): return """""" hooks.register('insert_editor_js', editor_js) + + +def whitelister_element_rules(): + return { + 'blockquote': allow_without_attributes, + 'a': attribute_rule({'href': check_url, 'target': True}), + } +hooks.register('construct_whitelister_element_rules', whitelister_element_rules) diff --git a/wagtail/wagtailadmin/static/wagtailadmin/js/page-editor.js b/wagtail/wagtailadmin/static/wagtailadmin/js/page-editor.js index eacb8f86d..03adb7aca 100644 --- a/wagtail/wagtailadmin/static/wagtailadmin/js/page-editor.js +++ b/wagtail/wagtailadmin/static/wagtailadmin/js/page-editor.js @@ -329,9 +329,9 @@ $(function() { }); /* Set up behaviour of preview button */ - $('.action-preview').click(function(e) { + $('.action-preview').click(function(e) { e.preventDefault(); - + var previewWindow = window.open($(this).data('placeholder'), $(this).data('windowname')); $.ajax({ @@ -340,18 +340,9 @@ $(function() { data: $('#page-edit-form').serialize(), success: function(data, textStatus, request) { if (request.getResponseHeader('X-Wagtail-Preview') == 'ok') { - var pdoc = previewWindow.document; - var frame = pdoc.getElementById('preview-frame'); - - frame = frame.contentWindow || frame.contentDocument.document || frame.contentDocument; - frame.document.open(); - frame.document.write(data); - frame.document.close(); - - var hideTimeout = setTimeout(function(){ - pdoc.getElementById('loading-spinner-wrapper').className += 'remove'; - clearTimeout(hideTimeout); - }, 50) // just enough to give effect without adding discernible slowness + previewWindow.document.open(); + previewWindow.document.write(data); + previewWindow.document.close(); } else { previewWindow.close(); document.open(); diff --git a/wagtail/wagtailadmin/templates/wagtailadmin/pages/preview.html b/wagtail/wagtailadmin/templates/wagtailadmin/pages/preview.html index d5c4b7a51..46a4d2867 100644 --- a/wagtail/wagtailadmin/templates/wagtailadmin/pages/preview.html +++ b/wagtail/wagtailadmin/templates/wagtailadmin/pages/preview.html @@ -11,6 +11,5 @@
- diff --git a/wagtail/wagtailadmin/tests/test_pages_views.py b/wagtail/wagtailadmin/tests/test_pages_views.py index 6c1f490dd..191c51a50 100644 --- a/wagtail/wagtailadmin/tests/test_pages_views.py +++ b/wagtail/wagtailadmin/tests/test_pages_views.py @@ -1,5 +1,5 @@ from django.test import TestCase -from wagtail.tests.models import SimplePage, EventPage, StandardIndex, StandardChild, BusinessIndex, BusinessChild +from wagtail.tests.models import SimplePage, EventPage, StandardIndex, StandardChild, BusinessIndex, BusinessChild, BusinessSubIndex from wagtail.tests.utils import unittest, WagtailTestUtils from wagtail.wagtailcore.models import Page, PageRevision from django.core.urlresolvers import reverse @@ -771,41 +771,87 @@ class TestSubpageBusinessRules(TestCase, WagtailTestUtils): # Find root page self.root_page = Page.objects.get(id=2) - # Add standard page + # Add standard page (allows subpages of any type) self.standard_index = StandardIndex() self.standard_index.title = "Standard Index" self.standard_index.slug = "standard-index" self.root_page.add_child(instance=self.standard_index) - # Add business page + # Add business page (allows BusinessChild and BusinessSubIndex as subpages) self.business_index = BusinessIndex() self.business_index.title = "Business Index" self.business_index.slug = "business-index" self.root_page.add_child(instance=self.business_index) - # Add business child + # Add business child (allows no subpages) self.business_child = BusinessChild() self.business_child.title = "Business Child" self.business_child.slug = "business-child" self.business_index.add_child(instance=self.business_child) + # Add business subindex (allows only BusinessChild as subpages) + self.business_subindex = BusinessSubIndex() + self.business_subindex.title = "Business Subindex" + self.business_subindex.slug = "business-subindex" + self.business_index.add_child(instance=self.business_subindex) + # Login self.login() def test_standard_subpage(self): - response = self.client.get(reverse('wagtailadmin_pages_add_subpage', args=(self.standard_index.id, ))) + add_subpage_url = reverse('wagtailadmin_pages_add_subpage', args=(self.standard_index.id, )) + + # explorer should contain a link to 'add child page' + response = self.client.get(reverse('wagtailadmin_explore', args=(self.standard_index.id, ))) + self.assertEqual(response.status_code, 200) + self.assertContains(response, add_subpage_url) + + # add_subpage should give us the full set of page types to choose + response = self.client.get(add_subpage_url) self.assertEqual(response.status_code, 200) self.assertContains(response, 'Standard Child') self.assertContains(response, 'Business Child') def test_business_subpage(self): - response = self.client.get(reverse('wagtailadmin_pages_add_subpage', args=(self.business_index.id, ))) + add_subpage_url = reverse('wagtailadmin_pages_add_subpage', args=(self.business_index.id, )) + + # explorer should contain a link to 'add child page' + response = self.client.get(reverse('wagtailadmin_explore', args=(self.business_index.id, ))) + self.assertEqual(response.status_code, 200) + self.assertContains(response, add_subpage_url) + + # add_subpage should give us a cut-down set of page types to choose + response = self.client.get(add_subpage_url) self.assertEqual(response.status_code, 200) self.assertNotContains(response, 'Standard Child') self.assertContains(response, 'Business Child') def test_business_child_subpage(self): - response = self.client.get(reverse('wagtailadmin_pages_add_subpage', args=(self.business_child.id, ))) + add_subpage_url = reverse('wagtailadmin_pages_add_subpage', args=(self.business_child.id, )) + + # explorer should not contain a link to 'add child page', as this page doesn't accept subpages + response = self.client.get(reverse('wagtailadmin_explore', args=(self.business_child.id, ))) self.assertEqual(response.status_code, 200) - self.assertNotContains(response, 'Standard Child') - self.assertEqual(0, len(response.context['page_types'])) + self.assertNotContains(response, add_subpage_url) + + # this also means that fetching add_subpage is blocked at the permission-check level + response = self.client.get(reverse('wagtailadmin_pages_add_subpage', args=(self.business_child.id, ))) + self.assertEqual(response.status_code, 403) + + def test_cannot_add_invalid_subpage_type(self): + # cannot add SimplePage as a child of BusinessIndex, as SimplePage is not present in subpage_types + response = self.client.get(reverse('wagtailadmin_pages_create', args=('tests', 'simplepage', self.business_index.id))) + self.assertEqual(response.status_code, 403) + + # likewise for BusinessChild which has an empty subpage_types list + response = self.client.get(reverse('wagtailadmin_pages_create', args=('tests', 'simplepage', self.business_child.id))) + self.assertEqual(response.status_code, 403) + + # but we can add a BusinessChild to BusinessIndex + response = self.client.get(reverse('wagtailadmin_pages_create', args=('tests', 'businesschild', self.business_index.id))) + self.assertEqual(response.status_code, 200) + + def test_not_prompted_for_page_type_when_only_one_choice(self): + response = self.client.get(reverse('wagtailadmin_pages_add_subpage', args=(self.business_subindex.id, ))) + # BusinessChild is the only valid subpage type of BusinessSubIndex, so redirect straight there + self.assertRedirects(response, reverse('wagtailadmin_pages_create', args=('tests', 'businesschild', self.business_subindex.id))) diff --git a/wagtail/wagtailadmin/urls.py b/wagtail/wagtailadmin/urls.py index 21f127c1d..ee68f4655 100644 --- a/wagtail/wagtailadmin/urls.py +++ b/wagtail/wagtailadmin/urls.py @@ -50,7 +50,6 @@ urlpatterns += [ url(r'^pages/(\d+)/edit/preview/$', pages.preview_on_edit, name='wagtailadmin_pages_preview_on_edit'), url(r'^pages/preview/$', pages.preview, name='wagtailadmin_pages_preview'), - url(r'^pages/preview_loading/$', pages.preview_loading, name='wagtailadmin_pages_preview_loading'), url(r'^pages/(\d+)/view_draft/$', pages.view_draft, name='wagtailadmin_pages_view_draft'), url(r'^pages/(\d+)/add_subpage/$', pages.add_subpage, name='wagtailadmin_pages_add_subpage'), diff --git a/wagtail/wagtailadmin/views/pages.py b/wagtail/wagtailadmin/views/pages.py index bfca64463..b6677d7ec 100644 --- a/wagtail/wagtailadmin/views/pages.py +++ b/wagtail/wagtailadmin/views/pages.py @@ -12,7 +12,7 @@ from wagtail.wagtailadmin.edit_handlers import TabbedInterface, ObjectList from wagtail.wagtailadmin.forms import SearchForm from wagtail.wagtailadmin import tasks, hooks -from wagtail.wagtailcore.models import Page, PageRevision, get_page_types +from wagtail.wagtailcore.models import Page, PageRevision @permission_required('wagtailadmin.access_admin') @@ -57,6 +57,12 @@ def add_subpage(request, parent_page_id): page_types = sorted(parent_page.clean_subpage_types(), key=lambda pagetype: pagetype.name.lower()) + if len(page_types) == 1: + # Only one page type is available - redirect straight to the create form rather than + # making the user choose + content_type = page_types[0] + return redirect('wagtailadmin_pages_create', content_type.app_label, content_type.model, parent_page.id) + return render(request, 'wagtailadmin/pages/add_subpage.html', { 'parent_page': parent_page, 'page_types': page_types, @@ -109,15 +115,11 @@ def create(request, content_type_app_name, content_type_model_name, parent_page_ except ContentType.DoesNotExist: raise Http404 - page_class = content_type.model_class() - # page must be in the list of allowed subpage types for this parent ID - # == Restriction temporarily relaxed so that as superusers we can add index pages and things - - # == TODO: reinstate this for regular editors when we have distinct user types - # - # if page_class not in parent_page.clean_subpage_types(): - # messages.error(request, "Sorry, you do not have access to create a page of type '%s' here." % content_type.name) - # return redirect('wagtailadmin_pages_select_type') + if content_type not in parent_page.clean_subpage_types(): + raise PermissionDenied + + page_class = content_type.model_class() page = page_class(owner=request.user) edit_handler_class = get_page_edit_handler(page_class) @@ -420,12 +422,6 @@ def preview(request): """ return render(request, 'wagtailadmin/pages/preview.html') -def preview_loading(request): - """ - This page is blank, but must be real HTML so its DOM can be written to once the preview of the page has rendered - """ - return HttpResponse("") - @permission_required('wagtailadmin.access_admin') def unpublish(request, page_id): page = get_object_or_404(Page, id=page_id) diff --git a/wagtail/wagtailcore/models.py b/wagtail/wagtailcore/models.py index 1f0c98d83..6b273e4b6 100644 --- a/wagtail/wagtailcore/models.py +++ b/wagtail/wagtailcore/models.py @@ -843,6 +843,8 @@ class PagePermissionTester(object): def can_add_subpage(self): if not self.user.is_active: return False + if not self.page.specific_class.clean_subpage_types(): # this page model has an empty subpage_types list, so no subpages are allowed + return False return self.user.is_superuser or ('add' in self.permissions) def can_edit(self): @@ -897,10 +899,13 @@ class PagePermissionTester(object): """ Niggly special case for creating and publishing a page in one go. Differs from can_publish in that we want to be able to publish subpages of root, but not - to be able to publish root itself + to be able to publish root itself. (Also, can_publish_subpage returns false if the page + does not allow subpages at all.) """ if not self.user.is_active: return False + if not self.page.specific_class.clean_subpage_types(): # this page model has an empty subpage_types list, so no subpages are allowed + return False return self.user.is_superuser or ('publish' in self.permissions) diff --git a/wagtail/wagtailcore/rich_text.py b/wagtail/wagtailcore/rich_text.py index aa8f25336..8004cb30d 100644 --- a/wagtail/wagtailcore/rich_text.py +++ b/wagtail/wagtailcore/rich_text.py @@ -13,6 +13,8 @@ from wagtail.wagtaildocs.models import Document from wagtail.wagtailimages.models import get_image_model from wagtail.wagtailimages.formats import get_image_format +from wagtail.wagtailadmin import hooks + # Define a set of 'embed handlers' and 'link handlers'. These handle the translation # of 'special' HTML elements in rich text - ones which we do not want to include @@ -158,6 +160,18 @@ LINK_HANDLERS = { # Prepare a whitelisting engine with custom behaviour: # rewrite any elements with a data-embedtype or data-linktype attribute class DbWhitelister(Whitelister): + has_loaded_custom_whitelist_rules = False + + @classmethod + def clean(cls, html): + if not cls.has_loaded_custom_whitelist_rules: + for fn in hooks.get_hooks('construct_whitelister_element_rules'): + cls.element_rules = dict( + cls.element_rules.items() + fn().items()) + cls.has_loaded_custom_whitelist_rules = True + + return super(DbWhitelister, cls).clean(html) + @classmethod def clean_tag_node(cls, doc, tag): if 'data-embedtype' in tag.attrs: diff --git a/wagtail/wagtailcore/tests/test_dbwhitelister.py b/wagtail/wagtailcore/tests/test_dbwhitelister.py new file mode 100644 index 000000000..795b9637c --- /dev/null +++ b/wagtail/wagtailcore/tests/test_dbwhitelister.py @@ -0,0 +1,50 @@ +from django.test import TestCase +from wagtail.wagtailcore.rich_text import DbWhitelister +from wagtail.wagtailcore.whitelist import Whitelister + +from bs4 import BeautifulSoup + +class TestDbWhitelister(TestCase): + def assertHtmlEqual(self, str1, str2): + """ + Assert that two HTML strings are equal at the DOM level + (necessary because we can't guarantee the order that attributes are output in) + """ + self.assertEqual(BeautifulSoup(str1), BeautifulSoup(str2)) + + def test_page_link_is_rewritten(self): + input_html = '

Look at the lovely homepage of my Wagtail site

' + output_html = DbWhitelister.clean(input_html) + expected = '

Look at the lovely homepage of my Wagtail site

' + self.assertHtmlEqual(expected, output_html) + + def test_document_link_is_rewritten(self): + input_html = '

Look at our horribly oversized brochure

' + output_html = DbWhitelister.clean(input_html) + expected = '

Look at our horribly oversized brochure

' + self.assertHtmlEqual(expected, output_html) + + def test_image_embed_is_rewritten(self): + input_html = '

OMG look at this picture of a kitten:

A cute kitten
A kitten, yesterday.

' + output_html = DbWhitelister.clean(input_html) + expected = '

OMG look at this picture of a kitten:

' + self.assertHtmlEqual(expected, output_html) + + def test_media_embed_is_rewritten(self): + input_html = '

OMG look at this video of a kitten:

' + output_html = DbWhitelister.clean(input_html) + expected = '

OMG look at this video of a kitten:

' + self.assertHtmlEqual(expected, output_html) + + def test_whitelist_hooks(self): + # wagtail.tests.wagtail_hooks overrides the whitelist to permit
and + input_html = '
I would put a tax on all people who stand in water.

- Gumby

' + output_html = DbWhitelister.clean(input_html) + expected = '
I would put a tax on all people who stand in water.

- Gumby

' + self.assertHtmlEqual(expected, output_html) + + # check that the base Whitelister class is unaffected by these custom whitelist rules + input_html = '
I would put a tax on all people who stand in water.

- Gumby

' + output_html = Whitelister.clean(input_html) + expected = 'I would put a tax on all people who stand in water.

- Gumby

' + self.assertHtmlEqual(expected, output_html) diff --git a/wagtail/wagtailcore/whitelist.py b/wagtail/wagtailcore/whitelist.py index a3d377bd6..9a5f67cf1 100644 --- a/wagtail/wagtailcore/whitelist.py +++ b/wagtail/wagtailcore/whitelist.py @@ -65,7 +65,8 @@ class Whitelister(object): 'h6': allow_without_attributes, 'hr': allow_without_attributes, 'i': allow_without_attributes, - 'img': attribute_rule({'src': check_url, 'width': True, 'height': True, 'alt': True}), + 'img': attribute_rule({'src': check_url, 'width': True, 'height': True, + 'alt': True}), 'li': allow_without_attributes, 'ol': allow_without_attributes, 'p': allow_without_attributes, @@ -77,7 +78,8 @@ class Whitelister(object): @classmethod def clean(cls, html): - """Clean up an HTML string to contain just the allowed elements / attributes""" + """Clean up an HTML string to contain just the allowed elements / + attributes""" doc = BeautifulSoup(html, 'lxml') cls.clean_node(doc, doc) return unicode(doc) diff --git a/wagtail/wagtailsearch/backends/db.py b/wagtail/wagtailsearch/backends/db.py index 419d8a99b..08fdaf048 100644 --- a/wagtail/wagtailsearch/backends/db.py +++ b/wagtail/wagtailsearch/backends/db.py @@ -1,3 +1,5 @@ +import warnings + from django.db import models from wagtail.wagtailsearch.backends.base import BaseSearch @@ -65,7 +67,8 @@ class DBSearch(BaseSearch): query = query.distinct() # Prefetch related - for prefetch in prefetch_related: - query = query.prefetch_related(prefetch) + if prefetch_related: + for prefetch in prefetch_related: + query = query.prefetch_related(prefetch) return query \ No newline at end of file diff --git a/wagtail/wagtailsearch/backends/elasticsearch.py b/wagtail/wagtailsearch/backends/elasticsearch.py index d2b58d564..5561a3cee 100644 --- a/wagtail/wagtailsearch/backends/elasticsearch.py +++ b/wagtail/wagtailsearch/backends/elasticsearch.py @@ -1,55 +1,260 @@ +from __future__ import absolute_import + +import string +import json +import warnings + from django.db import models -from elasticutils import get_es, S +from elasticsearch import Elasticsearch, NotFoundError, RequestError +from elasticsearch.helpers import bulk from wagtail.wagtailsearch.backends.base import BaseSearch from wagtail.wagtailsearch.indexed import Indexed -import string + +class ElasticSearchQuery(object): + def __init__(self, model, query_string, fields=None, filters={}): + self.model = model + self.query_string = query_string + self.fields = fields or ['_all'] + self.filters = filters + + def _get_filters(self): + # Filters + filters = [] + + # Filter by content type + filters.append({ + 'prefix': { + 'content_type': self.model.indexed_get_content_type() + } + }) + + # Extra filters + if self.filters: + for key, value in self.filters.items(): + if '__' in key: + field, lookup = key.split('__') + else: + field = key + lookup = None + + if lookup is None: + if value is None: + filters.append({ + 'missing': { + 'field': field, + } + }) + else: + filters.append({ + 'term': { + field: value + } + }) + + if lookup in ['startswith', 'prefix']: + filters.append({ + 'prefix': { + field: value + } + }) + + if lookup in ['gt', 'gte', 'lt', 'lte']: + filters.append({ + 'range': { + field: { + lookup: value, + } + } + }) + + if lookup == 'range': + lower, upper = value + filters.append({ + 'range': { + field: { + 'gte': lower, + 'lte': upper, + } + } + }) + + return filters + + def to_es(self): + # Query + query = { + 'query_string': { + 'query': self.query_string, + } + } + + # Fields + if self.fields: + query['query_string']['fields'] = self.fields + + # Filters + filters = self._get_filters() + + return { + 'filtered': { + 'query': query, + 'filter': { + 'and': filters, + } + } + } + + def __repr__(self): + return json.dumps(self.to_es()) class ElasticSearchResults(object): - def __init__(self, model, query, prefetch_related=[]): - self.model = model + def __init__(self, backend, query, prefetch_related=None): + self.backend = backend self.query = query - self.count = query.count() self.prefetch_related = prefetch_related + self.start = 0 + self.stop = None + self._results_cache = None + self._count_cache = None + + def _set_limits(self, start=None, stop=None): + if stop is not None: + if self.stop is not None: + self.stop = min(self.stop, self.start + stop) + else: + self.stop = self.start + stop + + if start is not None: + if self.stop is not None: + self.start = min(self.stop, self.start + start) + else: + self.start = self.start + start + + def _clone(self): + klass = self.__class__ + new = klass(self.backend, self.query, prefetch_related=self.prefetch_related) + new.start = self.start + new.stop = self.stop + return new + + def _do_search(self): + # Params for elasticsearch query + params = dict( + index=self.backend.es_index, + body=dict(query=self.query.to_es()), + _source=False, + fields='pk', + from_=self.start, + ) + + # Add size if set + if self.stop is not None: + params['size'] = self.stop - self.start + + # Send to ElasticSearch + hits = self.backend.es.search(**params) + + # Get pks from results + pks = [hit['fields']['pk'] for hit in hits['hits']['hits']] + + # ElasticSearch 1.x likes to pack pks into lists, unpack them if this has happened + pks = [pk[0] if isinstance(pk, list) else pk for pk in pks] + + # Initialise results dictionary + results = dict((str(pk), None) for pk in pks) + + # Get queryset + queryset = self.query.model.objects.filter(pk__in=pks) + + # Add prefetch related + if self.prefetch_related: + for prefetch in self.prefetch_related: + queryset = queryset.prefetch_related(prefetch) + + # Find objects in database and add them to dict + for obj in queryset: + results[str(obj.pk)] = obj + + # Return results in order given by ElasticSearch + return [results[str(pk)] for pk in pks if results[str(pk)]] + + def _do_count(self): + # Get query + query = self.query.to_es() + + # Elasticsearch 1.x + count = self.backend.es.count( + index=self.backend.es_index, + body=dict(query=query), + ) + + # ElasticSearch 0.90.x fallback + if not count['_shards']['successful'] and "No query registered for [query]]" in count['_shards']['failures'][0]['reason']: + count = self.backend.es.count( + index=self.backend.es_index, + body=query, + ) + + # Get count + hit_count = count['count'] + + # Add limits + hit_count -= self.start + if self.stop is not None: + hit_count = min(hit_count, self.stop - self.start) + + return max(hit_count, 0) + + def results(self): + if self._results_cache is None: + self._results_cache = self._do_search() + return self._results_cache + + def count(self): + if self._count_cache is None: + if self._results_cache is not None: + self._count_cache = len(self._results_cache) + else: + self._count_cache = self._do_count() + return self._count_cache def __getitem__(self, key): + new = self._clone() + if isinstance(key, slice): - # Get primary keys - pk_list_unclean = [result._source["pk"] for result in self.query[key]] + # Set limits + start = int(key.start) if key.start else None + stop = int(key.stop) if key.stop else None + new._set_limits(start, stop) - # Remove duplicate keys (and preserve order) - seen_pks = set() - pk_list = [] - for pk in pk_list_unclean: - if pk not in seen_pks: - seen_pks.add(pk) - pk_list.append(pk) + # Copy results cache + if self._results_cache is not None: + new._results_cache = self._results_cache[key] - # Get results - results = self.model.objects.filter(pk__in=pk_list) - - # Prefetch related - for prefetch in self.prefetch_related: - results = results.prefetch_related(prefetch) - - # Put results into a dictionary (using primary key as the key) - results_dict = dict((str(result.pk), result) for result in results) - - # Build new list with items in the correct order - results_sorted = [results_dict[str(pk)] for pk in pk_list if str(pk) in results_dict] - - # Return the list - return results_sorted + return new else: - # Return a single item - pk = self.query[key]._source["pk"] - return self.model.objects.get(pk=pk) + if self._results_cache is not None: + return self._results_cache[key] + + new.start = key + new.stop = key + 1 + return list(new)[0] + + def __iter__(self): + return iter(self.results()) def __len__(self): - return self.count + return len(self.results()) + + def __repr__(self): + data = list(self[:21]) + if len(data) > 20: + data[-1] = "...(remaining elements truncated)..." + return repr(data) class ElasticSearch(BaseSearch): @@ -64,22 +269,17 @@ class ElasticSearch(BaseSearch): # Get ElasticSearch interface # Any remaining params are passed into the ElasticSearch constructor - self.es = get_es( + self.es = Elasticsearch( urls=self.es_urls, timeout=self.es_timeout, force_new=self.es_force_new, **params) - self.s = S().es( - urls=self.es_urls, - timeout=self.es_timeout, - force_new=self.es_force_new, - **params).indexes(self.es_index) def reset_index(self): # Delete old index try: - self.es.delete_index(self.es_index) - except: + self.es.indices.delete(self.es_index) + except NotFoundError: pass # Settings @@ -128,7 +328,7 @@ class ElasticSearch(BaseSearch): } # Create new index - self.es.create_index(self.es_index, INDEX_SETTINGS) + self.es.indices.create(self.es_index, INDEX_SETTINGS) def add_type(self, model): # Get type name @@ -144,14 +344,14 @@ class ElasticSearch(BaseSearch): }.items() + indexed_fields.items()) # Put mapping - self.es.put_mapping(self.es_index, content_type, { + self.es.indices.put_mapping(index=self.es_index, doc_type=content_type, body={ content_type: { "properties": fields, } }) def refresh_index(self): - self.es.refresh(self.es_index) + self.es.indices.refresh(self.es_index) def add(self, obj): # Make sure the object can be indexed @@ -183,24 +383,33 @@ class ElasticSearch(BaseSearch): type_set[obj_type].append(obj.indexed_build_document()) # Loop through each type and bulk add them - results = [] for type_name, type_objects in type_set.items(): - results.append((type_name, len(type_objects))) - self.es.bulk_index(self.es_index, type_name, type_objects) - return results + # Get list of actions + actions = [] + for obj in type_objects: + action = { + '_index': self.es_index, + '_type': type_name, + '_id': obj['id'], + } + action.update(obj) + actions.append(action) + + bulk(self.es, actions) def delete(self, obj): # Object must be a decendant of Indexed and be a django model if not isinstance(obj, Indexed) or not isinstance(obj, models.Model): return - # Get ID for document - doc_id = obj.indexed_get_document_id() - # Delete document try: - self.es.delete(self.es_index, obj.indexed_get_content_type(), doc_id) - except: + self.es.delete( + self.es_index, + obj.indexed_get_content_type(), + obj.indexed_get_document_id(), + ) + except NotFoundError: pass # Document doesn't exist, ignore this exception def search(self, query_string, model, fields=None, filters={}, prefetch_related=[]): @@ -215,27 +424,5 @@ class ElasticSearch(BaseSearch): if not query_string: return [] - # Query - if fields: - query = self.s.query_raw({ - "query_string": { - "query": query_string, - "fields": fields, - } - }) - else: - query = self.s.query_raw({ - "query_string": { - "query": query_string, - } - }) - - # Filter results by this content type - query = query.filter(content_type__prefix=model.indexed_get_content_type()) - - # Extra filters - if filters: - query = query.filter(**filters) - # Return search results - return ElasticSearchResults(model, query, prefetch_related=prefetch_related) + return ElasticSearchResults(self, ElasticSearchQuery(model, query_string, fields=fields, filters=filters), prefetch_related=prefetch_related)