Merge branch 'master' into custom-user-model-fixes

Conflicts: wagtail/wagtailadmin/forms.py
2026-05-20 04:51:55 +00:00 · 2014-10-24 13:59:47 -04:00 · 2014-10-24 13:59:47 -04:00 · e32827f143
commit e32827f143
parent 7d753fe99f 793ebd4cc2
10 changed files with 159 additions and 19 deletions
--- a/CHANGELOG.txt
+++ b/CHANGELOG.txt
@ -4,11 +4,16 @@ Changelog
 0.8 (xx.xx.2014)
 ~~~~~~~~~~~~~~~~

+ * Added logging for page operations
 * The save/publish/submit buttons on the page edit page now redirects the user back to the edit page instead of the explorer
 * Signal handlers for ``wagtail.wagtailsearch`` and ``wagtail.contrib.wagtailfrontendcache`` are now automatically registered when using Django 1.7 or above. (Tim Heap)
 * Fix: Replaced references of .username with .get_username() on users for better custom user model support (John-Scott Atlakson)
 * Fix: Unpinned dependency versions for six and requests to help prevent dependency conflicts
 * Fix: Fixed TypeError when getting embed HTML with oembed on Python 3 (John-Scott Atlakson)
+ * Fix: Made HTML whitelisting in rich text fields more robust at catching disallowed URL schemes such as "jav\tascript:" (Tim Heap)
+ * Fix: created_at timestamps on page revisions were not being preserved on page copy, causing revisions to get out of sequence
+ * Fix: When copying pages recursively, revisions of sub-pages were being copied regardless of the copy_revisions flag
+ * Fix: Updated the migration dependencies within the project template to ensure that Wagtail's own migrations consistently apply first.

 0.7 (09.10.2014)
 ~~~~~~~~~~~~~~~~
--- a/docs/releases/0.8.rst
+++ b/docs/releases/0.8.rst
@ -15,6 +15,7 @@ What's new
 Minor features
 ~~~~~~~~~~~~~~

+ * Page operations (creation, publishing, copying etc) are now logged via Python's ``logging`` framework; to configure this, add a logger entry for ``'wagtail'`` or ``'wagtail.core'`` to the ``LOGGING`` setup in your settings file.
 * The save/publish/submit buttons on the page edit page now redirects the user back to the edit page instead of the explorer
 * Signal handlers for ``wagtail.wagtailsearch`` and ``wagtail.contrib.wagtailfrontendcache`` are now automatically registered when using Django 1.7 or above.

@ -25,6 +26,11 @@ Bug fixes
 * Replaced references of .username with .get_username() on users for better custom user model support
 * Unpinned dependency versions for six and requests to help prevent dependency conflicts
 * Fixed TypeError when getting embed HTML with oembed on Python 3
+ * Made HTML whitelisting in rich text fields more robust at catching disallowed URL schemes such as ``jav\tascript:``
+ * ``created_at`` timestamps on page revisions were not being preserved on page copy, causing revisions to get out of sequence
+ * When copying pages recursively, revisions of sub-pages were being copied regardless of the ``copy_revisions`` flag
+ * Updated the migration dependencies within the project template to ensure that Wagtail's own migrations consistently apply first
+

 Upgrade considerations
 ======================
--- a/wagtail/project_template/core/migrations/0001_initial.py
+++ b/wagtail/project_template/core/migrations/0001_initial.py
@ -7,7 +7,7 @@ from django.db import models, migrations
 class Migration(migrations.Migration):

    dependencies = [
-        ('wagtailcore', '0002_initial_data'),
+        ('wagtailcore', '__latest__'),
    ]

    operations = [
--- a/wagtail/wagtailadmin/forms.py
+++ b/wagtail/wagtailadmin/forms.py
@ -40,9 +40,12 @@ class EmailLinkChooserWithLinkTextForm(forms.Form):
 class LoginForm(AuthenticationForm):
    username = forms.CharField(
        max_length=254,
+        widget=forms.TextInput(attrs={'tabindex': '1',}),
    )
    password = forms.CharField(
-        widget=forms.PasswordInput(attrs={'placeholder': ugettext_lazy("Enter password")}),
+        widget=forms.PasswordInput(attrs={'placeholder': ugettext_lazy("Enter password"),
+                                          'tabindex': '2',
+                                          }),
    )

    def __init__(self, request=None, *args, **kwargs):
--- a/wagtail/wagtailadmin/templates/wagtailadmin/login.html
+++ b/wagtail/wagtailadmin/templates/wagtailadmin/login.html
@ -55,7 +55,7 @@
                    </li>
                {% endcomment %}
                <li class="submit">
-                    <input type="submit" value="{% trans 'Sign in' %}"/>
+                    <input type="submit" value="{% trans 'Sign in' %}" tabindex="3"/>
                </li>
            </ul>
        </form>
@ -68,4 +68,4 @@
        $('form input[name=username]').focus();
    })
 </script>
-{% endblock %}
+{% endblock %}
--- a/wagtail/wagtailcore/migrations/0009_remove_auto_now_add_from_pagerevision_created_at.py
+++ b/wagtail/wagtailcore/migrations/0009_remove_auto_now_add_from_pagerevision_created_at.py
@ -0,0 +1,19 @@
+# -*- coding: utf-8 -*-
+from __future__ import unicode_literals
+
+from django.db import models, migrations
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('wagtailcore', '0008_populate_latest_revision_created_at'),
+    ]
+
+    operations = [
+        migrations.AlterField(
+            model_name='pagerevision',
+            name='created_at',
+            field=models.DateTimeField(),
+        ),
+    ]
--- a/wagtail/wagtailcore/models.py
+++ b/wagtail/wagtailcore/models.py
@ -1,7 +1,7 @@
+import logging
 import warnings

 import six
-from six import string_types
 from six import StringIO
 from six.moves.urllib.parse import urlparse

@ -9,7 +9,7 @@ from modelcluster.models import ClusterableModel, get_all_child_relations

 from django.db import models, connection, transaction
 from django.db.models import Q
-from django.db.models.signals import pre_delete
+from django.db.models.signals import pre_delete, post_delete
 from django.dispatch.dispatcher import receiver
 from django.http import Http404
 from django.core.cache import cache
@ -37,6 +37,9 @@ from wagtail.wagtailsearch import index
 from wagtail.wagtailsearch.backends import get_search_backend


+logger = logging.getLogger('wagtail.core')
+
+
 class SiteManager(models.Manager):
    def get_by_natural_key(self, hostname, port):
        return self.get(hostname=hostname, port=port)
@ -316,8 +319,9 @@ class Page(six.with_metaclass(PageBase, MP_Node, ClusterableModel, index.Indexed
    @transaction.atomic  # ensure that changes are only committed when we have updated all descendant URL paths, to preserve consistency
    def save(self, *args, **kwargs):
        update_descendant_url_paths = False
+        is_new = self.id is None

-        if self.id is None:
+        if is_new:
            # we are creating a record. If we're doing things properly, this should happen
            # through a treebeard method like add_child, in which case the 'path' field
            # has been set and so we can safely call get_parent
@ -341,6 +345,11 @@ class Page(six.with_metaclass(PageBase, MP_Node, ClusterableModel, index.Indexed
        if Site.objects.filter(root_page=self).exists():
            cache.delete('wagtail_site_root_paths')

+        # Log
+        if is_new:
+            cls = type(self)
+            logger.info("Page created: \"%s\" id=%d content_type=%s.%s path=%s", self.title, self.id, cls._meta.app_label, cls.__name__, self.url_path)
+
        return result

    def _update_descendant_url_paths(self, old_url_path, new_url_path):
@ -412,6 +421,7 @@ class Page(six.with_metaclass(PageBase, MP_Node, ClusterableModel, index.Indexed
                raise Http404

    def save_revision(self, user=None, submitted_for_moderation=False, approved_go_live_at=None):
+        # Create revision
        revision = self.revisions.create(
            content_json=self.to_json(),
            user=user,
@ -422,6 +432,12 @@ class Page(six.with_metaclass(PageBase, MP_Node, ClusterableModel, index.Indexed
        self.latest_revision_created_at = revision.created_at
        self.save(update_fields=['latest_revision_created_at'])

+        # Log
+        logger.info("Page edited: \"%s\" id=%d revision_id=%d", self.title, self.id, revision.id)
+
+        if submitted_for_moderation:
+            logger.info("Page submitted for moderation: \"%s\" id=%d revision_id=%d", self.title, self.id, revision.id)
+
        return revision

    def get_latest_revision(self):
@ -448,6 +464,8 @@ class Page(six.with_metaclass(PageBase, MP_Node, ClusterableModel, index.Indexed

            page_unpublished.send(sender=self.specific_class, instance=self.specific)

+            logger.info("Page unpublished: \"%s\" id=%d", self.title, self.id)
+
            self.revisions.update(approved_go_live_at=None)

    def get_context(self, request, *args, **kwargs):
@ -650,6 +668,9 @@ class Page(six.with_metaclass(PageBase, MP_Node, ClusterableModel, index.Indexed
        new_self.save()
        new_self._update_descendant_url_paths(old_url_path, new_url_path)

+        # Log
+        logger.info("Page moved: \"%s\" id=%d path=%s", self.title, self.id, new_url_path)
+
    def copy(self, recursive=False, to=None, update_attrs=None, copy_revisions=True):
        # Make a copy
        page_copy = Page.objects.get(id=self.id).specific
@ -689,10 +710,13 @@ class Page(six.with_metaclass(PageBase, MP_Node, ClusterableModel, index.Indexed
                revision.page = page_copy
                revision.save()

+        # Log
+        logger.info("Page copied: \"%s\" id=%d from=%d", page_copy.title, page_copy.id, self.id)
+
        # Copy child pages
        if recursive:
            for child_page in self.get_children():
-                child_page.specific.copy(recursive=True, to=page_copy)
+                child_page.specific.copy(recursive=True, to=page_copy, copy_revisions=copy_revisions)

        return page_copy

@ -794,12 +818,10 @@ class Page(six.with_metaclass(PageBase, MP_Node, ClusterableModel, index.Indexed
        return ['/']

    def get_sitemap_urls(self):
-        latest_revision = self.get_latest_revision()
-
        return [
            {
                'location': self.full_url,
-                'lastmod': latest_revision.created_at if latest_revision else None
+                'lastmod': self.latest_revision_created_at
            }
        ]

@ -899,6 +921,11 @@ def unpublish_page_before_delete(sender, instance, **kwargs):
        instance.unpublish(commit=False)


+@receiver(post_delete, sender=Page)
+def log_page_deletion(sender, instance, **kwargs):
+    logger.info("Page deleted: \"%s\" id=%d", instance.title, instance.id)
+
+
 class Orderable(models.Model):
    sort_order = models.IntegerField(null=True, blank=True, editable=False)
    sort_order_field = 'sort_order'
@ -917,7 +944,7 @@ class SubmittedRevisionsManager(models.Manager):
 class PageRevision(models.Model):
    page = models.ForeignKey('Page', related_name='revisions')
    submitted_for_moderation = models.BooleanField(default=False)
-    created_at = models.DateTimeField(auto_now_add=True)
+    created_at = models.DateTimeField()
    user = models.ForeignKey(settings.AUTH_USER_MODEL, null=True, blank=True)
    content_json = models.TextField()
    approved_go_live_at = models.DateTimeField(null=True, blank=True)
@ -926,6 +953,12 @@ class PageRevision(models.Model):
    submitted_revisions = SubmittedRevisionsManager()

    def save(self, *args, **kwargs):
+        # Set default value for created_at to now
+        # We cannot use auto_now_add as that will override
+        # any value that is set before saving
+        if self.created_at is None:
+            self.created_at = timezone.now()
+
        super(PageRevision, self).save(*args, **kwargs)
        if self.submitted_for_moderation:
            # ensure that all other revisions of this page have the 'submitted for moderation' flag unset
@ -955,10 +988,12 @@ class PageRevision(models.Model):

    def approve_moderation(self):
        if self.submitted_for_moderation:
+            logger.info("Page moderation approved: \"%s\" id=%d revision_id=%d", self.page.title, self.page.id, self.id)
            self.publish()

    def reject_moderation(self):
        if self.submitted_for_moderation:
+            logger.info("Page moderation rejected: \"%s\" id=%d revision_id=%d", self.page.title, self.page.id, self.id)
            self.submitted_for_moderation = False
            self.save(update_fields=['submitted_for_moderation'])

@ -995,8 +1030,12 @@ class PageRevision(models.Model):
        if page.live:
            page_published.send(sender=page.specific_class, instance=page.specific)

+            logger.info("Page published: \"%s\" id=%d revision_id=%d", page.title, page.id, self.id)
+        elif page.go_live_at:
+            logger.info("Page scheduled for publish: \"%s\" id=%d revision_id=%d go_live_at=%s", page.title, page.id, self.id, page.go_live_at.isoformat())
+
    def __str__(self):
-        return '"' + unicode(self.page) + '" at ' + unicode(self.created_at)
+        return '"' + six.text_type(self.page) + '" at ' + six.text_type(self.created_at)


 PAGE_PERMISSION_TYPE_CHOICES = [
--- a/wagtail/wagtailcore/tests/test_page_model.py
+++ b/wagtail/wagtailcore/tests/test_page_model.py
@ -443,6 +443,23 @@ class TestCopyPage(TestCase):
        # Check that the new revision is not submitted for moderation
        self.assertFalse(new_christmas_event.revisions.first().submitted_for_moderation)

+    def test_copy_page_copies_revisions_and_doesnt_change_created_at(self):
+        christmas_event = EventPage.objects.get(url_path='/home/events/christmas/')
+        christmas_event.save_revision(submitted_for_moderation=True)
+
+        # Set the created_at of the revision to a time in the past
+        revision = christmas_event.get_latest_revision()
+        revision.created_at = datetime.datetime(2014, 1, 1)
+        revision.save()
+
+        # Copy it
+        new_christmas_event = christmas_event.copy(update_attrs={'title': "New christmas event", 'slug': 'new-christmas-event'})
+
+        # Check that the created_at time is the same
+        christmas_event_created_at = christmas_event.get_latest_revision().created_at
+        new_christmas_event_created_at = new_christmas_event.get_latest_revision().created_at
+        self.assertEqual(christmas_event_created_at, new_christmas_event_created_at)
+
    def test_copy_page_copies_revisions_and_doesnt_schedule(self):
        christmas_event = EventPage.objects.get(url_path='/home/events/christmas/')
        christmas_event.save_revision(approved_go_live_at=datetime.datetime(2014, 9, 16, 9, 12, 00, tzinfo=pytz.utc))
@ -456,6 +473,19 @@ class TestCopyPage(TestCase):
        # Check that the new revision is not scheduled
        self.assertEqual(new_christmas_event.revisions.first().approved_go_live_at, None)

+    def test_copy_page_doesnt_copy_revisions_if_told_not_to_do_so(self):
+        christmas_event = EventPage.objects.get(url_path='/home/events/christmas/')
+        christmas_event.save_revision()
+
+        # Copy it
+        new_christmas_event = christmas_event.copy(update_attrs={'title': "New christmas event", 'slug': 'new-christmas-event'}, copy_revisions=False)
+
+        # Check that the revisions weren't copied
+        self.assertEqual(new_christmas_event.revisions.count(), 0, "Revisions were copied")
+
+        # Check that the revisions weren't removed from old page
+        self.assertEqual(christmas_event.revisions.count(), 1, "Revisions were removed from the original page")
+
    def test_copy_page_copies_child_objects_with_nonspecific_class(self):
        # Get chrismas page as Page instead of EventPage
        christmas_event = Page.objects.get(url_path='/home/events/christmas/')
@ -519,6 +549,23 @@ class TestCopyPage(TestCase):
        # Check that the revisions weren't removed from old page
        self.assertEqual(old_christmas_event.specific.revisions.count(), 1, "Revisions were removed from the original page")

+    def test_copy_page_copies_recursively_but_doesnt_copy_revisions_if_told_not_to_do_so(self):
+        events_index = EventIndex.objects.get(url_path='/home/events/')
+        old_christmas_event = events_index.get_children().filter(slug='christmas').first()
+        old_christmas_event.save_revision()
+
+        # Copy it
+        new_events_index = events_index.copy(recursive=True, update_attrs={'title': "New events index", 'slug': 'new-events-index'}, copy_revisions=False)
+
+        # Get christmas event
+        new_christmas_event = new_events_index.get_children().filter(slug='christmas').first()
+
+        # Check that the revisions weren't copied
+        self.assertEqual(new_christmas_event.specific.revisions.count(), 0, "Revisions were copied")
+
+        # Check that the revisions weren't removed from old page
+        self.assertEqual(old_christmas_event.specific.revisions.count(), 1, "Revisions were removed from the original page")
+

 class TestSubpageTypeBusinessRules(TestCase):
    def test_allowed_subpage_types(self):
--- a/wagtail/wagtailcore/tests/test_whitelist.py
+++ b/wagtail/wagtailcore/tests/test_whitelist.py
@ -17,6 +17,13 @@ class TestCheckUrl(TestCase):
    def test_disallowed_url_scheme(self):
        self.assertFalse(bool(check_url("invalid://url")))

+    def test_crafty_disallowed_url_scheme(self):
+        """
+        Some URL parsers do not parse 'jav\tascript:' as a valid scheme.
+        Browsers, however, do. The checker needs to catch these crafty schemes
+        """
+        self.assertFalse(bool(check_url("jav\tascript:alert('XSS')")))
+

 class TestAttributeRule(TestCase):
    def setUp(self):
--- a/wagtail/wagtailcore/whitelist.py
+++ b/wagtail/wagtailcore/whitelist.py
@ -2,19 +2,33 @@
 A generic HTML whitelisting engine, designed to accommodate subclassing to override
 specific rules.
 """
-from six.moves.urllib.parse import urlparse
+import re
+

 from bs4 import BeautifulSoup, NavigableString, Tag


-ALLOWED_URL_SCHEMES = ['', 'http', 'https', 'ftp', 'mailto', 'tel']
+ALLOWED_URL_SCHEMES = ['http', 'https', 'ftp', 'mailto', 'tel']
+
+PROTOCOL_RE = re.compile("^[a-z0-9][-+.a-z0-9]*:")


 def check_url(url_string):
-    # TODO: more paranoid checks (urlparse doesn't catch
-    # "jav\tascript:alert('XSS')")
-    url = urlparse(url_string)
-    return (url_string if url.scheme in ALLOWED_URL_SCHEMES else None)
+    # Remove control characters and other disallowed characters
+    # Browsers sometimes ignore these, so that 'jav\tascript:alert("XSS")'
+    # is treated as a valid javascript: link
+
+    unescaped = url_string.lower()
+    unescaped = unescaped.replace("&lt;", "<")
+    unescaped = unescaped.replace("&gt;", ">")
+    unescaped = unescaped.replace("&amp;", "&")
+    unescaped = re.sub("[`\000-\040\177-\240\s]+", '', unescaped)
+    unescaped = unescaped.replace("\ufffd", "")
+    if PROTOCOL_RE.match(unescaped):
+        protocol = unescaped.split(':', 1)[0]
+        if protocol not in ALLOWED_URL_SCHEMES:
+            return None
+    return url_string


 def attribute_rule(allowed_attrs):