Merge branch 'kaedroho-html5lib'

This commit is contained in:
Matt Westcott 2014-07-17 13:29:26 +01:00
commit 87ecbd6171
5 changed files with 9 additions and 6 deletions

View file

@ -4,6 +4,7 @@ Changelog
0.5 (xx.xx.20xx)
~~~~~~~~~~~~~~~~
* Added decorator syntax for hooks
* Replaced lxml dependency with html5lib, to simplify installation
0.4.1 (14.07.2014)
~~~~~~~~~~~~~~~~~~

View file

@ -7,8 +7,8 @@ Wagtail 0.5 release notes - IN DEVELOPMENT
:depth: 1
Whats new
=========
What's new
==========
Minor features
@ -27,6 +27,8 @@ Core
MenuItem('Kittens!', '/kittens/', classnames='icon icon-folder-inverse', order=1000)
)
* The lxml library (used for whitelisting and rewriting of rich text fields) has been replaced with the pure-python html5lib library, to simplify installation.
Bug fixes
~~~~~~~~~

View file

@ -32,7 +32,7 @@ install_requires = [
"django-treebeard==2.0",
"Pillow>=2.3.0",
"beautifulsoup4>=4.3.2",
"lxml>=3.3.0",
"html5lib==0.999",
"Unidecode>=0.04.14",
"six==1.7.3",
'requests==2.3.0',

View file

@ -25,9 +25,9 @@ class TestDbWhitelister(TestCase):
self.assertHtmlEqual(expected, output_html)
def test_image_embed_is_rewritten(self):
input_html = '<p>OMG look at this picture of a kitten: <figure data-embedtype="image" data-id="5" data-format="image-with-caption" data-alt="A cute kitten" class="fancy-image"><img src="/media/images/kitten.jpg" width="320" height="200" alt="A cute kitten" /><figcaption>A kitten, yesterday.</figcaption></figure></p>'
input_html = '<p>OMG look at this picture of a kitten:</p><figure data-embedtype="image" data-id="5" data-format="image-with-caption" data-alt="A cute kitten" class="fancy-image"><img src="/media/images/kitten.jpg" width="320" height="200" alt="A cute kitten" /><figcaption>A kitten, yesterday.</figcaption></figure>'
output_html = DbWhitelister.clean(input_html)
expected = '<p>OMG look at this picture of a kitten: <embed embedtype="image" id="5" format="image-with-caption" alt="A cute kitten" /></p>'
expected = '<p>OMG look at this picture of a kitten:</p><embed embedtype="image" id="5" format="image-with-caption" alt="A cute kitten" />'
self.assertHtmlEqual(expected, output_html)
def test_media_embed_is_rewritten(self):

View file

@ -81,7 +81,7 @@ class Whitelister(object):
def clean(cls, html):
"""Clean up an HTML string to contain just the allowed elements /
attributes"""
doc = BeautifulSoup(html, 'lxml')
doc = BeautifulSoup(html, 'html5lib')
cls.clean_node(doc, doc)
return doc.decode()