Ensure HTML entities are unescaped in html-to-contentstate conversion

This commit is contained in:
Matt Westcott 2018-02-24 01:36:17 +00:00 committed by Matt Westcott
parent 27b47e9943
commit 9ddd5f62ee
2 changed files with 15 additions and 1 deletions

View file

@ -273,7 +273,7 @@ class HtmlToContentStateHandler(HTMLParser):
if rule is not None:
self.element_handlers.add_rules(rule['from_database_format'])
super().__init__()
super().__init__(convert_charrefs=True)
def reset(self):
self.state = HandlerState()

View file

@ -659,3 +659,17 @@ class TestHtmlToContentState(TestCase):
}
}
})
def test_html_entities(self):
converter = ContentstateConverter(features=[])
result = json.loads(converter.from_database_format(
'''
<p>Arthur &quot;two sheds&quot; Jackson &lt;the third&gt; &amp; his wife</p>
'''
))
self.assertContentStateEqual(result, {
'entityMap': {},
'blocks': [
{'inlineStyleRanges': [], 'text': 'Arthur "two sheds" Jackson <the third> & his wife', 'depth': 0, 'type': 'unstyled', 'key': '00000', 'entityRanges': []},
]
})