2026-01-11 22:34:27 +00:00
|
|
|
"""Document-level layout."""
|
|
|
|
|
|
|
|
|
|
from ..parser.html import Element, Text
|
|
|
|
|
from ..render.fonts import get_font, linespace
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class LayoutLine:
|
|
|
|
|
"""A laid-out line ready for rendering."""
|
2026-01-12 09:22:34 +00:00
|
|
|
|
2026-01-11 22:34:27 +00:00
|
|
|
def __init__(self, text: str, x: float, y: float, font_size: int, char_positions: list = None):
|
|
|
|
|
self.text = text
|
|
|
|
|
self.x = x
|
|
|
|
|
self.y = y # Top of line
|
|
|
|
|
self.font_size = font_size
|
|
|
|
|
self.height = linespace(font_size)
|
|
|
|
|
self.width = 0
|
|
|
|
|
self.char_positions = char_positions or []
|
2026-01-12 09:22:34 +00:00
|
|
|
|
2026-01-11 22:34:27 +00:00
|
|
|
# Calculate width
|
|
|
|
|
if text:
|
|
|
|
|
font = get_font(font_size)
|
|
|
|
|
self.width = font.measureText(text)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class LayoutBlock:
|
|
|
|
|
"""A laid-out block with its lines."""
|
2026-01-12 09:22:34 +00:00
|
|
|
|
2026-01-11 22:34:27 +00:00
|
|
|
def __init__(self, tag: str, block_type: str = "block"):
|
|
|
|
|
self.tag = tag
|
|
|
|
|
self.block_type = block_type
|
|
|
|
|
self.lines = [] # List of LayoutLine
|
|
|
|
|
self.x = 0
|
|
|
|
|
self.y = 0
|
|
|
|
|
self.width = 0
|
|
|
|
|
self.height = 0
|
2026-01-09 11:20:46 +00:00
|
|
|
|
|
|
|
|
|
|
|
|
|
class DocumentLayout:
|
2026-01-11 22:34:27 +00:00
|
|
|
"""Layout engine for a document."""
|
2026-01-12 09:22:34 +00:00
|
|
|
|
2026-01-09 11:20:46 +00:00
|
|
|
def __init__(self, node, frame=None):
|
|
|
|
|
self.node = node
|
|
|
|
|
self.frame = frame
|
2026-01-11 22:34:27 +00:00
|
|
|
self.blocks = [] # List of LayoutBlock
|
|
|
|
|
self.lines = [] # Flat list of all LayoutLine for rendering
|
|
|
|
|
self.width = 0
|
|
|
|
|
self.height = 0
|
2026-01-12 09:22:34 +00:00
|
|
|
|
2026-01-11 22:34:27 +00:00
|
|
|
def layout(self, width: int, x_margin: int = 20, y_start: int = 30) -> list:
|
|
|
|
|
"""
|
|
|
|
|
Layout the document and return a list of LayoutLine objects.
|
2026-01-12 09:22:34 +00:00
|
|
|
|
2026-01-11 22:34:27 +00:00
|
|
|
Returns:
|
|
|
|
|
List of LayoutLine objects ready for rendering
|
|
|
|
|
"""
|
|
|
|
|
self.width = width
|
|
|
|
|
max_width = max(10, width - 2 * x_margin)
|
|
|
|
|
y = y_start
|
2026-01-12 09:22:34 +00:00
|
|
|
|
2026-01-11 22:34:27 +00:00
|
|
|
self.blocks = []
|
|
|
|
|
self.lines = []
|
2026-01-12 09:22:34 +00:00
|
|
|
|
2026-01-11 22:34:27 +00:00
|
|
|
# Find body
|
|
|
|
|
body = self._find_body(self.node)
|
|
|
|
|
if not body:
|
|
|
|
|
return self.lines
|
2026-01-12 09:22:34 +00:00
|
|
|
|
2026-01-11 22:34:27 +00:00
|
|
|
# Collect and layout blocks
|
|
|
|
|
raw_blocks = self._collect_blocks(body)
|
2026-01-12 09:22:34 +00:00
|
|
|
|
2026-01-11 22:34:27 +00:00
|
|
|
for block_info in raw_blocks:
|
|
|
|
|
font_size = block_info.get("font_size", 14)
|
|
|
|
|
text = block_info.get("text", "")
|
|
|
|
|
margin_top = block_info.get("margin_top", 6)
|
|
|
|
|
margin_bottom = block_info.get("margin_bottom", 10)
|
|
|
|
|
block_type = block_info.get("block_type", "block")
|
|
|
|
|
tag = block_info.get("tag", "")
|
2026-01-12 09:22:34 +00:00
|
|
|
|
2026-01-11 22:34:27 +00:00
|
|
|
if not text:
|
|
|
|
|
y += font_size * 0.6
|
|
|
|
|
continue
|
2026-01-12 09:22:34 +00:00
|
|
|
|
2026-01-11 22:34:27 +00:00
|
|
|
# Optional bullet prefix
|
|
|
|
|
if block_info.get("bullet"):
|
|
|
|
|
text = f"• {text}"
|
2026-01-12 09:22:34 +00:00
|
|
|
|
2026-01-11 22:34:27 +00:00
|
|
|
layout_block = LayoutBlock(tag, block_type)
|
|
|
|
|
layout_block.x = x_margin
|
|
|
|
|
layout_block.y = y + margin_top
|
2026-01-12 09:22:34 +00:00
|
|
|
|
2026-01-11 22:34:27 +00:00
|
|
|
# Word wrap
|
|
|
|
|
font = get_font(font_size)
|
|
|
|
|
words = text.split()
|
|
|
|
|
wrapped_lines = []
|
|
|
|
|
current_line = []
|
|
|
|
|
current_width = 0
|
2026-01-12 09:22:34 +00:00
|
|
|
|
2026-01-11 22:34:27 +00:00
|
|
|
for word in words:
|
|
|
|
|
word_width = font.measureText(word + " ")
|
|
|
|
|
if current_width + word_width > max_width and current_line:
|
|
|
|
|
wrapped_lines.append(" ".join(current_line))
|
|
|
|
|
current_line = [word]
|
|
|
|
|
current_width = word_width
|
|
|
|
|
else:
|
|
|
|
|
current_line.append(word)
|
|
|
|
|
current_width += word_width
|
|
|
|
|
if current_line:
|
|
|
|
|
wrapped_lines.append(" ".join(current_line))
|
2026-01-12 09:22:34 +00:00
|
|
|
|
2026-01-11 22:34:27 +00:00
|
|
|
# Create LayoutLines
|
|
|
|
|
line_height = linespace(font_size)
|
|
|
|
|
y += margin_top
|
|
|
|
|
block_start_y = y
|
2026-01-12 09:22:34 +00:00
|
|
|
|
2026-01-11 22:34:27 +00:00
|
|
|
for line_text in wrapped_lines:
|
|
|
|
|
# Calculate character positions
|
|
|
|
|
char_positions = [0.0]
|
|
|
|
|
for i in range(1, len(line_text) + 1):
|
|
|
|
|
char_positions.append(font.measureText(line_text[:i]))
|
2026-01-12 09:22:34 +00:00
|
|
|
|
2026-01-11 22:34:27 +00:00
|
|
|
layout_line = LayoutLine(
|
|
|
|
|
text=line_text,
|
|
|
|
|
x=x_margin,
|
|
|
|
|
y=y, # Top of line, baseline is y + font_size
|
|
|
|
|
font_size=font_size,
|
|
|
|
|
char_positions=char_positions
|
|
|
|
|
)
|
2026-01-12 09:22:34 +00:00
|
|
|
|
2026-01-11 22:34:27 +00:00
|
|
|
layout_block.lines.append(layout_line)
|
|
|
|
|
self.lines.append(layout_line)
|
|
|
|
|
y += line_height
|
2026-01-12 09:22:34 +00:00
|
|
|
|
2026-01-11 22:34:27 +00:00
|
|
|
layout_block.height = y - block_start_y
|
|
|
|
|
layout_block.width = max_width
|
|
|
|
|
self.blocks.append(layout_block)
|
2026-01-12 09:22:34 +00:00
|
|
|
|
2026-01-11 22:34:27 +00:00
|
|
|
y += margin_bottom
|
2026-01-12 09:22:34 +00:00
|
|
|
|
2026-01-11 22:34:27 +00:00
|
|
|
self.height = y + 50 # Padding at bottom
|
|
|
|
|
return self.lines
|
2026-01-12 09:22:34 +00:00
|
|
|
|
2026-01-11 22:34:27 +00:00
|
|
|
def _find_body(self, node):
|
|
|
|
|
"""Find the body element in the document."""
|
|
|
|
|
if isinstance(node, Element) and node.tag == "body":
|
|
|
|
|
return node
|
|
|
|
|
if hasattr(node, "children"):
|
|
|
|
|
for child in node.children:
|
|
|
|
|
if isinstance(child, Element) and child.tag == "body":
|
|
|
|
|
return child
|
|
|
|
|
found = self._find_body(child)
|
|
|
|
|
if found:
|
|
|
|
|
return found
|
|
|
|
|
return None
|
2026-01-12 09:22:34 +00:00
|
|
|
|
2026-01-11 22:34:27 +00:00
|
|
|
def _collect_blocks(self, node) -> list:
|
|
|
|
|
"""Collect renderable blocks from the DOM."""
|
|
|
|
|
blocks = []
|
2026-01-12 09:22:34 +00:00
|
|
|
|
2026-01-11 22:34:27 +00:00
|
|
|
for child in getattr(node, "children", []):
|
|
|
|
|
if isinstance(child, Text):
|
|
|
|
|
txt = child.text.strip()
|
|
|
|
|
if txt:
|
2026-01-12 10:41:18 +00:00
|
|
|
# Use computed style if available
|
|
|
|
|
style = getattr(child, "computed_style", None)
|
|
|
|
|
font_size = style.get_int("font-size", 14) if style else 14
|
|
|
|
|
blocks.append({"text": txt, "font_size": font_size, "block_type": "text", "style": style})
|
2026-01-11 22:34:27 +00:00
|
|
|
continue
|
2026-01-12 09:22:34 +00:00
|
|
|
|
2026-01-11 22:34:27 +00:00
|
|
|
if isinstance(child, Element):
|
|
|
|
|
tag = child.tag.lower()
|
2026-01-12 10:41:18 +00:00
|
|
|
|
|
|
|
|
# Skip style and script tags - they shouldn't be rendered
|
|
|
|
|
if tag in {"style", "script", "head", "title", "meta", "link"}:
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
# Container elements - just recurse, don't add as blocks
|
|
|
|
|
if tag in {"ul", "ol", "div", "section", "article", "main", "header", "footer", "nav"}:
|
|
|
|
|
blocks.extend(self._collect_blocks(child))
|
|
|
|
|
continue
|
|
|
|
|
|
2026-01-11 22:34:27 +00:00
|
|
|
content = self._text_of(child)
|
|
|
|
|
if not content:
|
|
|
|
|
continue
|
2026-01-12 09:22:34 +00:00
|
|
|
|
2026-01-12 10:41:18 +00:00
|
|
|
# Get computed style for this element
|
|
|
|
|
style = getattr(child, "computed_style", None)
|
|
|
|
|
|
|
|
|
|
# Extract style properties
|
|
|
|
|
if style:
|
|
|
|
|
font_size = style.get_int("font-size", 14)
|
|
|
|
|
margin_top = style.get_int("margin-top", 6)
|
|
|
|
|
margin_bottom = style.get_int("margin-bottom", 10)
|
|
|
|
|
display = style.get("display", "block")
|
2026-01-11 22:34:27 +00:00
|
|
|
else:
|
2026-01-12 10:41:18 +00:00
|
|
|
# Fallback to hardcoded defaults
|
|
|
|
|
font_size = self._get_default_font_size(tag)
|
|
|
|
|
margin_top = self._get_default_margin_top(tag)
|
|
|
|
|
margin_bottom = self._get_default_margin_bottom(tag)
|
|
|
|
|
display = "inline" if tag in {"span", "a", "strong", "em", "b", "i", "code"} else "block"
|
|
|
|
|
|
|
|
|
|
# Determine block type
|
|
|
|
|
block_type = "inline" if display == "inline" else "block"
|
|
|
|
|
if tag == "li" or display == "list-item":
|
|
|
|
|
block_type = "list-item"
|
|
|
|
|
|
|
|
|
|
# Add bullet for list items
|
|
|
|
|
bullet = (tag == "li" or display == "list-item")
|
|
|
|
|
|
|
|
|
|
blocks.append({
|
|
|
|
|
"text": content,
|
|
|
|
|
"font_size": font_size,
|
|
|
|
|
"margin_top": margin_top,
|
|
|
|
|
"margin_bottom": margin_bottom,
|
|
|
|
|
"block_type": block_type,
|
|
|
|
|
"tag": tag,
|
|
|
|
|
"bullet": bullet,
|
|
|
|
|
"style": style
|
|
|
|
|
})
|
2026-01-12 09:22:34 +00:00
|
|
|
|
2026-01-11 22:34:27 +00:00
|
|
|
return blocks
|
2026-01-12 09:22:34 +00:00
|
|
|
|
2026-01-12 10:41:18 +00:00
|
|
|
def _get_default_font_size(self, tag: str) -> int:
|
|
|
|
|
"""Get default font size for a tag (fallback when no styles)."""
|
|
|
|
|
sizes = {
|
|
|
|
|
"h1": 24, "h2": 20, "h3": 18, "h4": 16, "h5": 15, "h6": 14
|
|
|
|
|
}
|
|
|
|
|
return sizes.get(tag, 14)
|
|
|
|
|
|
|
|
|
|
def _get_default_margin_top(self, tag: str) -> int:
|
|
|
|
|
"""Get default top margin for a tag (fallback when no styles)."""
|
|
|
|
|
margins = {
|
|
|
|
|
"h1": 12, "h2": 10, "h3": 8, "p": 6, "li": 4
|
|
|
|
|
}
|
|
|
|
|
return margins.get(tag, 0)
|
|
|
|
|
|
|
|
|
|
def _get_default_margin_bottom(self, tag: str) -> int:
|
|
|
|
|
"""Get default bottom margin for a tag (fallback when no styles)."""
|
|
|
|
|
margins = {
|
|
|
|
|
"h1": 12, "h2": 10, "h3": 8, "p": 12, "li": 4
|
|
|
|
|
}
|
|
|
|
|
return margins.get(tag, 0)
|
|
|
|
|
|
2026-01-11 22:34:27 +00:00
|
|
|
def _text_of(self, node) -> str:
|
|
|
|
|
"""Extract text content from a node."""
|
|
|
|
|
if isinstance(node, Text):
|
|
|
|
|
return node.text
|
|
|
|
|
if isinstance(node, Element):
|
|
|
|
|
parts = []
|
|
|
|
|
for child in node.children:
|
|
|
|
|
parts.append(self._text_of(child))
|
|
|
|
|
return " ".join([p for p in parts if p]).strip()
|
|
|
|
|
return ""
|