Merge branch 'html5lib' of https://github.com/kaedroho/wagtail into kaedroho-html5lib

This commit is contained in:
Matt Westcott 2014-07-17 12:45:07 +01:00
commit acb2b0bccd
3 changed files with 3 additions and 3 deletions

View file

@ -32,7 +32,7 @@ install_requires = [
"django-treebeard==2.0",
"Pillow>=2.3.0",
"beautifulsoup4>=4.3.2",
"lxml>=3.3.0",
"html5lib==0.999",
"Unidecode>=0.04.14",
"six==1.7.3",
'requests==2.3.0',

View file

@ -27,7 +27,7 @@ class TestDbWhitelister(TestCase):
def test_image_embed_is_rewritten(self):
input_html = '<p>OMG look at this picture of a kitten: <figure data-embedtype="image" data-id="5" data-format="image-with-caption" data-alt="A cute kitten" class="fancy-image"><img src="/media/images/kitten.jpg" width="320" height="200" alt="A cute kitten" /><figcaption>A kitten, yesterday.</figcaption></figure></p>'
output_html = DbWhitelister.clean(input_html)
expected = '<p>OMG look at this picture of a kitten: <embed embedtype="image" id="5" format="image-with-caption" alt="A cute kitten" /></p>'
expected = '<p>OMG look at this picture of a kitten: </p><embed embedtype="image" id="5" format="image-with-caption" alt="A cute kitten" /><p></p>'
self.assertHtmlEqual(expected, output_html)
def test_media_embed_is_rewritten(self):

View file

@ -81,7 +81,7 @@ class Whitelister(object):
def clean(cls, html):
"""Clean up an HTML string to contain just the allowed elements /
attributes"""
doc = BeautifulSoup(html, 'lxml')
doc = BeautifulSoup(html, 'html5lib')
cls.clean_node(doc, doc)
return doc.decode()