diff --git a/wagtail/admin/rich_text/converters/contentstate.py b/wagtail/admin/rich_text/converters/contentstate.py index b4c61f8a4..50b6f401a 100644 --- a/wagtail/admin/rich_text/converters/contentstate.py +++ b/wagtail/admin/rich_text/converters/contentstate.py @@ -82,10 +82,7 @@ class ContentstateConverter(): def from_database_format(self, html): self.html_to_contentstate_handler.reset() self.html_to_contentstate_handler.feed(html) - - if not self.html_to_contentstate_handler.contentstate.blocks: - # Draftail does not accept an empty block list as valid, but does accept 'null' as meaning "no content" - return 'null' + self.html_to_contentstate_handler.close() return self.html_to_contentstate_handler.contentstate.as_json(indent=4, separators=(',', ': ')) diff --git a/wagtail/admin/rich_text/converters/html_to_contentstate.py b/wagtail/admin/rich_text/converters/html_to_contentstate.py index ea1c07a5f..c31d0f753 100644 --- a/wagtail/admin/rich_text/converters/html_to_contentstate.py +++ b/wagtail/admin/rich_text/converters/html_to_contentstate.py @@ -20,10 +20,16 @@ class HandlerState: self.current_block = None self.current_inline_styles = [] self.current_entity_ranges = [] + # what to do with leading whitespace on the next text node we encounter: strip, keep or force self.leading_whitespace = STRIP_WHITESPACE self.list_depth = 0 self.list_item_type = None + + # an atomic block which is NOT preceded by a non-atomic block must have a spacer + # paragraph inserted before it + self.has_preceding_nonatomic_block = False + self.pushed_states = [] def push(self): @@ -33,7 +39,8 @@ class HandlerState: 'current_entity_ranges': self.current_entity_ranges, 'leading_whitespace': self.leading_whitespace, 'list_depth': self.list_depth, - 'list_item_type': self.list_item_type + 'list_item_type': self.list_item_type, + 'has_preceding_nonatomic_block': self.has_preceding_nonatomic_block, }) def pop(self): @@ -44,6 +51,20 @@ class HandlerState: self.leading_whitespace = last_state['leading_whitespace'] self.list_depth = last_state['list_depth'] self.list_item_type = last_state['list_item_type'] + self.has_preceding_nonatomic_block = last_state['has_preceding_nonatomic_block'] + + +def add_paragraph_block(state, contentstate): + """ + Utility function for adding an unstyled (paragraph) block to contentstate; + useful for element handlers that aren't paragraph elements themselves, but need + to insert paragraphs to ensure correctness + """ + block = Block('unstyled', depth=state.list_depth) + contentstate.blocks.append(block) + state.current_block = block + state.leading_whitespace = STRIP_WHITESPACE + state.has_preceding_nonatomic_block = True class ListElementHandler: @@ -80,6 +101,7 @@ class BlockElementHandler: contentstate.blocks.append(block) state.current_block = block state.leading_whitespace = STRIP_WHITESPACE + state.has_preceding_nonatomic_block = True def handle_endtag(self, name, state, contentState): assert not state.current_inline_styles, "End of block reached without closing inline style elements" @@ -106,10 +128,7 @@ class InlineStyleElementHandler: if state.current_block is None: # Inline style element encountered at the top level - # start a new paragraph block to contain it - block = Block('unstyled', depth=state.list_depth) - contentstate.blocks.append(block) - state.current_block = block - state.leading_whitespace = STRIP_WHITESPACE + add_paragraph_block(state, contentstate) if state.leading_whitespace == FORCE_WHITESPACE: # any pending whitespace should be output before handling this tag, @@ -140,10 +159,7 @@ class InlineEntityElementHandler: if state.current_block is None: # Inline entity element encountered at the top level - # start a new paragraph block to contain it - block = Block('unstyled', depth=state.list_depth) - contentstate.blocks.append(block) - state.current_block = block - state.leading_whitespace = STRIP_WHITESPACE + add_paragraph_block(state, contentstate) if state.leading_whitespace == FORCE_WHITESPACE: # any pending whitespace should be output before handling this tag, @@ -206,6 +222,11 @@ class AtomicBlockEntityElementHandler: # forcibly close any block that illegally contains this one state.current_block = None + if not state.has_preceding_nonatomic_block: + # if this block is NOT preceded by a non-atomic block, + # need to insert a spacer paragraph + add_paragraph_block(state, contentstate) + attr_dict = dict(attrs) # convert attrs from list of (name, value) tuples to a dict entity = self.create_entity(name, attr_dict, state, contentstate) key = contentstate.add_entity(entity) @@ -217,6 +238,7 @@ class AtomicBlockEntityElementHandler: entity_range.offset = 0 entity_range.length = 1 block.entity_ranges.append(entity_range) + state.has_preceding_nonatomic_block = False def handle_endtag(self, name, state, contentstate): pass @@ -262,11 +284,6 @@ class HtmlToContentStateHandler(HTMLParser): super().reset() - def add_block(self, block): - self.contentstate.blocks.append(block) - self.state.current_block = block - self.state.leading_whitespace = STRIP_WHITESPACE - def handle_starttag(self, name, attrs): attr_dict = dict(attrs) # convert attrs from list of (name, value) tuples to a dict element_handler = self.element_handlers.match(name, attr_dict) @@ -296,7 +313,7 @@ class HtmlToContentStateHandler(HTMLParser): return else: # create a new paragraph block for this content - self.add_block(Block('unstyled', depth=self.state.list_depth)) + add_paragraph_block(self.state, self.contentstate) if content == ' ': # if leading_whitespace = strip, this whitespace node is not significant @@ -325,3 +342,9 @@ class HtmlToContentStateHandler(HTMLParser): self.state.leading_whitespace = KEEP_WHITESPACE self.state.current_block.text += content + + def close(self): + # if content ends in an atomic block (or is empty), need to append a spacer paragraph + if not self.state.has_preceding_nonatomic_block: + add_paragraph_block(self.state, self.contentstate) + super().close() diff --git a/wagtail/admin/tests/test_contentstate.py b/wagtail/admin/tests/test_contentstate.py index 3c277fa5d..5ec5cc37f 100644 --- a/wagtail/admin/tests/test_contentstate.py +++ b/wagtail/admin/tests/test_contentstate.py @@ -404,6 +404,34 @@ class TestHtmlToContentState(TestCase): } }) + def test_add_spacer_paragraph_between_image_embeds(self): + converter = ContentstateConverter(features=['image']) + result = json.loads(converter.from_database_format( + ''' + + + ''' + )) + self.assertContentStateEqual(result, { + 'blocks': [ + {'key': '00000', 'inlineStyleRanges': [], 'entityRanges': [], 'depth': 0, 'text': '', 'type': 'unstyled'}, + {'key': '00000', 'inlineStyleRanges': [], 'entityRanges': [{'key': 0, 'offset': 0, 'length': 1}], 'depth': 0, 'text': ' ', 'type': 'atomic'}, + {'key': '00000', 'inlineStyleRanges': [], 'entityRanges': [], 'depth': 0, 'text': '', 'type': 'unstyled'}, + {'key': '00000', 'inlineStyleRanges': [], 'entityRanges': [{'key': 1, 'offset': 0, 'length': 1}], 'depth': 0, 'text': ' ', 'type': 'atomic'}, + {'key': '00000', 'inlineStyleRanges': [], 'entityRanges': [], 'depth': 0, 'text': '', 'type': 'unstyled'}, + ], + 'entityMap': { + '0': { + 'data': {'format': 'left', 'alt': 'an image', 'id': '1', 'src': '/media/not-found'}, + 'mutability': 'IMMUTABLE', 'type': 'IMAGE' + }, + '1': { + 'data': {'format': 'left', 'alt': 'an image', 'id': '1', 'src': '/media/not-found'}, + 'mutability': 'IMMUTABLE', 'type': 'IMAGE' + }, + } + }) + @patch('wagtail.embeds.embeds.get_embed') def test_media_embed(self, get_embed): get_embed.return_value = Embed( @@ -448,6 +476,62 @@ class TestHtmlToContentState(TestCase): } }) + @patch('wagtail.embeds.embeds.get_embed') + def test_add_spacer_paras_between_media_embeds(self, get_embed): + get_embed.return_value = Embed( + url='https://www.youtube.com/watch?v=Kh0Y2hVe_bw', + max_width=None, + type='video', + html='test html', + title='what are birds', + author_name='look around you', + provider_name='YouTube', + thumbnail_url='http://test/thumbnail.url', + width=1000, + height=1000, + ) + + converter = ContentstateConverter(features=['embed']) + result = json.loads(converter.from_database_format( + ''' + + + ''' + )) + self.assertContentStateEqual(result, { + 'blocks': [ + {'key': '00000', 'inlineStyleRanges': [], 'entityRanges': [], 'depth': 0, 'text': '', 'type': 'unstyled'}, + {'key': '00000', 'inlineStyleRanges': [], 'entityRanges': [{'key': 0, 'offset': 0, 'length': 1}], 'depth': 0, 'text': ' ', 'type': 'atomic'}, + {'key': '00000', 'inlineStyleRanges': [], 'entityRanges': [], 'depth': 0, 'text': '', 'type': 'unstyled'}, + {'key': '00000', 'inlineStyleRanges': [], 'entityRanges': [{'key': 1, 'offset': 0, 'length': 1}], 'depth': 0, 'text': ' ', 'type': 'atomic'}, + {'key': '00000', 'inlineStyleRanges': [], 'entityRanges': [], 'depth': 0, 'text': '', 'type': 'unstyled'}, + ], + 'entityMap': { + '0': { + 'data': { + 'thumbnail': 'http://test/thumbnail.url', + 'embedType': 'video', + 'providerName': 'YouTube', + 'title': 'what are birds', + 'authorName': 'look around you', + 'url': 'https://www.youtube.com/watch?v=Kh0Y2hVe_bw' + }, + 'mutability': 'IMMUTABLE', 'type': 'EMBED' + }, + '1': { + 'data': { + 'thumbnail': 'http://test/thumbnail.url', + 'embedType': 'video', + 'providerName': 'YouTube', + 'title': 'what are birds', + 'authorName': 'look around you', + 'url': 'https://www.youtube.com/watch?v=Kh0Y2hVe_bw' + }, + 'mutability': 'IMMUTABLE', 'type': 'EMBED' + }, + } + }) + def test_hr(self): converter = ContentstateConverter(features=['hr']) result = json.loads(converter.from_database_format( @@ -471,6 +555,34 @@ class TestHtmlToContentState(TestCase): } }) + def test_add_spacer_paragraph_between_hrs(self): + converter = ContentstateConverter(features=['hr']) + result = json.loads(converter.from_database_format( + ''' +