diff --git a/wagtail/core/tests/test_whitelist.py b/wagtail/core/tests/test_whitelist.py
index eb7c60d43..1eba4893b 100644
--- a/wagtail/core/tests/test_whitelist.py
+++ b/wagtail/core/tests/test_whitelist.py
@@ -143,3 +143,8 @@ class TestWhitelister(TestCase):
string = 'snowman Yorkshire'
cleaned_string = Whitelister.clean(string)
self.assertEqual(cleaned_string, 'snowman Yorkshire')
+
+ def test_quoting(self):
+ string = '
'
+ cleaned_string = Whitelister.clean(string)
+ self.assertEqual(cleaned_string, '
')
diff --git a/wagtail/core/whitelist.py b/wagtail/core/whitelist.py
index e93715d05..a2a4041e6 100644
--- a/wagtail/core/whitelist.py
+++ b/wagtail/core/whitelist.py
@@ -5,6 +5,7 @@ specific rules.
import re
from bs4 import BeautifulSoup, Comment, NavigableString, Tag
+from django.utils.html import escape
ALLOWED_URL_SCHEMES = ['http', 'https', 'ftp', 'mailto', 'tel']
@@ -96,7 +97,13 @@ class Whitelister:
attributes"""
doc = BeautifulSoup(html, 'html5lib')
cls.clean_node(doc, doc)
- return doc.decode()
+
+ # Pass strings through django.utils.html.escape when generating the final HTML.
+ # This differs from BeautifulSoup's default EntitySubstitution.substitute_html formatter
+ # in that it escapes " to " as well as escaping < > & - if we don't do this, then
+ # BeautifulSoup will try to be clever and use single-quotes to wrap attribute values,
+ # which confuses our regexp-based db-HTML-to-real-HTML conversion.
+ return doc.decode(formatter=escape)
@classmethod
def clean_node(cls, doc, node):