Add spacer paragraphs around atomic blocks where necessary

This commit is contained in:
Matt Westcott 2018-02-23 22:08:21 +00:00 committed by Matt Westcott
parent bde97f55fd
commit 27b47e9943
3 changed files with 153 additions and 19 deletions

View file

@ -82,10 +82,7 @@ class ContentstateConverter():
def from_database_format(self, html):
self.html_to_contentstate_handler.reset()
self.html_to_contentstate_handler.feed(html)
if not self.html_to_contentstate_handler.contentstate.blocks:
# Draftail does not accept an empty block list as valid, but does accept 'null' as meaning "no content"
return 'null'
self.html_to_contentstate_handler.close()
return self.html_to_contentstate_handler.contentstate.as_json(indent=4, separators=(',', ': '))

View file

@ -20,10 +20,16 @@ class HandlerState:
self.current_block = None
self.current_inline_styles = []
self.current_entity_ranges = []
# what to do with leading whitespace on the next text node we encounter: strip, keep or force
self.leading_whitespace = STRIP_WHITESPACE
self.list_depth = 0
self.list_item_type = None
# an atomic block which is NOT preceded by a non-atomic block must have a spacer
# paragraph inserted before it
self.has_preceding_nonatomic_block = False
self.pushed_states = []
def push(self):
@ -33,7 +39,8 @@ class HandlerState:
'current_entity_ranges': self.current_entity_ranges,
'leading_whitespace': self.leading_whitespace,
'list_depth': self.list_depth,
'list_item_type': self.list_item_type
'list_item_type': self.list_item_type,
'has_preceding_nonatomic_block': self.has_preceding_nonatomic_block,
})
def pop(self):
@ -44,6 +51,20 @@ class HandlerState:
self.leading_whitespace = last_state['leading_whitespace']
self.list_depth = last_state['list_depth']
self.list_item_type = last_state['list_item_type']
self.has_preceding_nonatomic_block = last_state['has_preceding_nonatomic_block']
def add_paragraph_block(state, contentstate):
"""
Utility function for adding an unstyled (paragraph) block to contentstate;
useful for element handlers that aren't paragraph elements themselves, but need
to insert paragraphs to ensure correctness
"""
block = Block('unstyled', depth=state.list_depth)
contentstate.blocks.append(block)
state.current_block = block
state.leading_whitespace = STRIP_WHITESPACE
state.has_preceding_nonatomic_block = True
class ListElementHandler:
@ -80,6 +101,7 @@ class BlockElementHandler:
contentstate.blocks.append(block)
state.current_block = block
state.leading_whitespace = STRIP_WHITESPACE
state.has_preceding_nonatomic_block = True
def handle_endtag(self, name, state, contentState):
assert not state.current_inline_styles, "End of block reached without closing inline style elements"
@ -106,10 +128,7 @@ class InlineStyleElementHandler:
if state.current_block is None:
# Inline style element encountered at the top level -
# start a new paragraph block to contain it
block = Block('unstyled', depth=state.list_depth)
contentstate.blocks.append(block)
state.current_block = block
state.leading_whitespace = STRIP_WHITESPACE
add_paragraph_block(state, contentstate)
if state.leading_whitespace == FORCE_WHITESPACE:
# any pending whitespace should be output before handling this tag,
@ -140,10 +159,7 @@ class InlineEntityElementHandler:
if state.current_block is None:
# Inline entity element encountered at the top level -
# start a new paragraph block to contain it
block = Block('unstyled', depth=state.list_depth)
contentstate.blocks.append(block)
state.current_block = block
state.leading_whitespace = STRIP_WHITESPACE
add_paragraph_block(state, contentstate)
if state.leading_whitespace == FORCE_WHITESPACE:
# any pending whitespace should be output before handling this tag,
@ -206,6 +222,11 @@ class AtomicBlockEntityElementHandler:
# forcibly close any block that illegally contains this one
state.current_block = None
if not state.has_preceding_nonatomic_block:
# if this block is NOT preceded by a non-atomic block,
# need to insert a spacer paragraph
add_paragraph_block(state, contentstate)
attr_dict = dict(attrs) # convert attrs from list of (name, value) tuples to a dict
entity = self.create_entity(name, attr_dict, state, contentstate)
key = contentstate.add_entity(entity)
@ -217,6 +238,7 @@ class AtomicBlockEntityElementHandler:
entity_range.offset = 0
entity_range.length = 1
block.entity_ranges.append(entity_range)
state.has_preceding_nonatomic_block = False
def handle_endtag(self, name, state, contentstate):
pass
@ -262,11 +284,6 @@ class HtmlToContentStateHandler(HTMLParser):
super().reset()
def add_block(self, block):
self.contentstate.blocks.append(block)
self.state.current_block = block
self.state.leading_whitespace = STRIP_WHITESPACE
def handle_starttag(self, name, attrs):
attr_dict = dict(attrs) # convert attrs from list of (name, value) tuples to a dict
element_handler = self.element_handlers.match(name, attr_dict)
@ -296,7 +313,7 @@ class HtmlToContentStateHandler(HTMLParser):
return
else:
# create a new paragraph block for this content
self.add_block(Block('unstyled', depth=self.state.list_depth))
add_paragraph_block(self.state, self.contentstate)
if content == ' ':
# if leading_whitespace = strip, this whitespace node is not significant
@ -325,3 +342,9 @@ class HtmlToContentStateHandler(HTMLParser):
self.state.leading_whitespace = KEEP_WHITESPACE
self.state.current_block.text += content
def close(self):
# if content ends in an atomic block (or is empty), need to append a spacer paragraph
if not self.state.has_preceding_nonatomic_block:
add_paragraph_block(self.state, self.contentstate)
super().close()

View file

@ -404,6 +404,34 @@ class TestHtmlToContentState(TestCase):
}
})
def test_add_spacer_paragraph_between_image_embeds(self):
converter = ContentstateConverter(features=['image'])
result = json.loads(converter.from_database_format(
'''
<embed embedtype="image" alt="an image" id="1" format="left" />
<embed embedtype="image" alt="an image" id="1" format="left" />
'''
))
self.assertContentStateEqual(result, {
'blocks': [
{'key': '00000', 'inlineStyleRanges': [], 'entityRanges': [], 'depth': 0, 'text': '', 'type': 'unstyled'},
{'key': '00000', 'inlineStyleRanges': [], 'entityRanges': [{'key': 0, 'offset': 0, 'length': 1}], 'depth': 0, 'text': ' ', 'type': 'atomic'},
{'key': '00000', 'inlineStyleRanges': [], 'entityRanges': [], 'depth': 0, 'text': '', 'type': 'unstyled'},
{'key': '00000', 'inlineStyleRanges': [], 'entityRanges': [{'key': 1, 'offset': 0, 'length': 1}], 'depth': 0, 'text': ' ', 'type': 'atomic'},
{'key': '00000', 'inlineStyleRanges': [], 'entityRanges': [], 'depth': 0, 'text': '', 'type': 'unstyled'},
],
'entityMap': {
'0': {
'data': {'format': 'left', 'alt': 'an image', 'id': '1', 'src': '/media/not-found'},
'mutability': 'IMMUTABLE', 'type': 'IMAGE'
},
'1': {
'data': {'format': 'left', 'alt': 'an image', 'id': '1', 'src': '/media/not-found'},
'mutability': 'IMMUTABLE', 'type': 'IMAGE'
},
}
})
@patch('wagtail.embeds.embeds.get_embed')
def test_media_embed(self, get_embed):
get_embed.return_value = Embed(
@ -448,6 +476,62 @@ class TestHtmlToContentState(TestCase):
}
})
@patch('wagtail.embeds.embeds.get_embed')
def test_add_spacer_paras_between_media_embeds(self, get_embed):
get_embed.return_value = Embed(
url='https://www.youtube.com/watch?v=Kh0Y2hVe_bw',
max_width=None,
type='video',
html='test html',
title='what are birds',
author_name='look around you',
provider_name='YouTube',
thumbnail_url='http://test/thumbnail.url',
width=1000,
height=1000,
)
converter = ContentstateConverter(features=['embed'])
result = json.loads(converter.from_database_format(
'''
<embed embedtype="media" url="https://www.youtube.com/watch?v=Kh0Y2hVe_bw" />
<embed embedtype="media" url="https://www.youtube.com/watch?v=Kh0Y2hVe_bw" />
'''
))
self.assertContentStateEqual(result, {
'blocks': [
{'key': '00000', 'inlineStyleRanges': [], 'entityRanges': [], 'depth': 0, 'text': '', 'type': 'unstyled'},
{'key': '00000', 'inlineStyleRanges': [], 'entityRanges': [{'key': 0, 'offset': 0, 'length': 1}], 'depth': 0, 'text': ' ', 'type': 'atomic'},
{'key': '00000', 'inlineStyleRanges': [], 'entityRanges': [], 'depth': 0, 'text': '', 'type': 'unstyled'},
{'key': '00000', 'inlineStyleRanges': [], 'entityRanges': [{'key': 1, 'offset': 0, 'length': 1}], 'depth': 0, 'text': ' ', 'type': 'atomic'},
{'key': '00000', 'inlineStyleRanges': [], 'entityRanges': [], 'depth': 0, 'text': '', 'type': 'unstyled'},
],
'entityMap': {
'0': {
'data': {
'thumbnail': 'http://test/thumbnail.url',
'embedType': 'video',
'providerName': 'YouTube',
'title': 'what are birds',
'authorName': 'look around you',
'url': 'https://www.youtube.com/watch?v=Kh0Y2hVe_bw'
},
'mutability': 'IMMUTABLE', 'type': 'EMBED'
},
'1': {
'data': {
'thumbnail': 'http://test/thumbnail.url',
'embedType': 'video',
'providerName': 'YouTube',
'title': 'what are birds',
'authorName': 'look around you',
'url': 'https://www.youtube.com/watch?v=Kh0Y2hVe_bw'
},
'mutability': 'IMMUTABLE', 'type': 'EMBED'
},
}
})
def test_hr(self):
converter = ContentstateConverter(features=['hr'])
result = json.loads(converter.from_database_format(
@ -471,6 +555,34 @@ class TestHtmlToContentState(TestCase):
}
})
def test_add_spacer_paragraph_between_hrs(self):
converter = ContentstateConverter(features=['hr'])
result = json.loads(converter.from_database_format(
'''
<hr />
<hr />
'''
))
self.assertContentStateEqual(result, {
'blocks': [
{'key': '00000', 'inlineStyleRanges': [], 'entityRanges': [], 'depth': 0, 'text': '', 'type': 'unstyled'},
{'key': '00000', 'inlineStyleRanges': [], 'entityRanges': [{'key': 0, 'offset': 0, 'length': 1}], 'depth': 0, 'text': ' ', 'type': 'atomic'},
{'key': '00000', 'inlineStyleRanges': [], 'entityRanges': [], 'depth': 0, 'text': '', 'type': 'unstyled'},
{'key': '00000', 'inlineStyleRanges': [], 'entityRanges': [{'key': 1, 'offset': 0, 'length': 1}], 'depth': 0, 'text': ' ', 'type': 'atomic'},
{'key': '00000', 'inlineStyleRanges': [], 'entityRanges': [], 'depth': 0, 'text': '', 'type': 'unstyled'},
],
'entityMap': {
'0': {
'data': {},
'mutability': 'IMMUTABLE', 'type': 'HORIZONTAL_RULE'
},
'1': {
'data': {},
'mutability': 'IMMUTABLE', 'type': 'HORIZONTAL_RULE'
},
}
})
def test_block_element_in_paragraph(self):
converter = ContentstateConverter(features=['hr'])
result = json.loads(converter.from_database_format(
@ -533,10 +645,12 @@ class TestHtmlToContentState(TestCase):
))
# ignoring the paragraph completely would probably be better,
# but we'll settle for an empty preceding paragraph and not crashing as the next best thing...
# (and if it's the first/last block we actually do want a spacer paragraph anyhow)
self.assertContentStateEqual(result, {
'blocks': [
{'key': '00000', 'inlineStyleRanges': [], 'entityRanges': [], 'depth': 0, 'text': '', 'type': 'unstyled'},
{'key': '00000', 'inlineStyleRanges': [], 'entityRanges': [{'key': 0, 'offset': 0, 'length': 1}], 'depth': 0, 'text': ' ', 'type': 'atomic'},
{'key': '00000', 'inlineStyleRanges': [], 'entityRanges': [], 'depth': 0, 'text': '', 'type': 'unstyled'},
],
'entityMap': {
'0': {