Enhance DOM graph visualization and image handling

- Implement PNG rendering for DOM graphs, with fallback to DOT format.
- Add support for max-width constraints in image layout based on CSS styles.
- Introduce caching mechanisms for image loading, including tracking failed and pending loads.
- Update HTML parser to handle void elements correctly.
- Modify template rendering to support PNG graph files.
This commit is contained in:
Benedikt Willi 2026-01-13 14:23:45 +01:00
parent 8c2d360515
commit ce5dbff562
8 changed files with 238 additions and 35 deletions

View file

@ -942,7 +942,7 @@ class Chrome:
def _show_dom_graph(self):
"""Generate and display DOM graph for current tab."""
from ..debug.dom_graph import render_dom_graph_to_svg, save_dom_graph, print_dom_tree
from ..debug.dom_graph import render_dom_graph_to_png, save_dom_graph, print_dom_tree
if not self.browser.active_tab:
self.logger.warning("No active tab to visualize")
@ -957,8 +957,8 @@ class Chrome:
output_dir = Path.home() / ".cache" / "bowser"
output_dir.mkdir(parents=True, exist_ok=True)
# Try SVG first, fallback to DOT
svg_path = output_dir / "dom_graph.svg"
# Try PNG first, fallback to DOT
png_path = output_dir / "dom_graph.png"
dot_path = output_dir / "dom_graph.dot"
self.logger.info("Generating DOM graph...")
@ -971,11 +971,11 @@ class Chrome:
print(tree_text)
print("="*60 + "\n")
# Try to render as SVG
if render_dom_graph_to_svg(frame.document, str(svg_path)):
# Try to render as PNG
if render_dom_graph_to_png(frame.document, str(png_path)):
# Open in new browser tab
self.logger.info(f"Opening DOM graph in new tab: {svg_path}")
self.browser.new_tab(f"about:dom-graph?path={svg_path}")
self.logger.info(f"Opening DOM graph in new tab: {png_path}")
self.browser.new_tab(f"about:dom-graph?path={png_path}")
else:
# Fallback to DOT file
if save_dom_graph(frame.document, str(dot_path)):

View file

@ -149,6 +149,47 @@ def render_dom_graph_to_svg(document: Optional[Element], output_path: str) -> bo
return False
def render_dom_graph_to_png(document: Optional[Element], output_path: str) -> bool:
"""
Render DOM tree as a PNG image using Graphviz (if available).
Args:
document: Root element of the DOM tree
output_path: Path where to save the .png file
Returns:
True if successful, False otherwise
"""
logger = logging.getLogger("bowser.debug")
try:
import subprocess
dot_content = generate_dot_graph(document)
# Try to render with graphviz
result = subprocess.run(
['dot', '-Tpng', '-o', output_path],
input=dot_content.encode('utf-8'),
capture_output=True,
timeout=10
)
if result.returncode == 0:
logger.info(f"DOM graph rendered to {output_path}")
return True
else:
logger.warning(f"Graphviz rendering failed: {result.stderr.decode()}")
return False
except FileNotFoundError:
logger.warning("Graphviz 'dot' command not found. Install graphviz for PNG output.")
return False
except Exception as e:
logger.error(f"Failed to render DOM graph: {e}")
return False
def print_dom_tree(node, indent: int = 0, max_depth: int = 10) -> str:
"""
Generate a text representation of the DOM tree.

View file

@ -240,7 +240,24 @@ class DocumentLayout:
if tag == "img":
image_layout = ImageLayout(child)
image_layout.load(self.base_url, async_load=self.async_images)
image_layout.layout(max_width=self.width - 40 if self.width > 40 else 800)
# Get computed style for max-width constraint
style = getattr(child, "computed_style", None)
max_width_css = None
if style:
max_width_val = style.get("max-width", "")
if max_width_val == "100%":
# 100% means constrain to container width
max_width_css = self.width - 40 if self.width > 40 else 800
elif max_width_val.endswith("px"):
try:
max_width_css = float(max_width_val[:-2])
except ValueError:
pass
# Use CSS max-width or default container width
effective_max_width = max_width_css if max_width_css else (self.width - 40 if self.width > 40 else 800)
image_layout.layout(max_width=effective_max_width)
# Get computed style for margins
style = getattr(child, "computed_style", None)
@ -439,7 +456,24 @@ class DocumentLayout:
if child.tag.lower() == "img":
image_layout = ImageLayout(child)
image_layout.load(self.base_url, async_load=self.async_images)
image_layout.layout(max_width=self.width - 40 if self.width > 40 else 800)
# Get computed style for max-width constraint
style = getattr(child, "computed_style", None)
max_width_css = None
if style:
max_width_val = style.get("max-width", "")
if max_width_val == "100%":
# 100% means constrain to container width
max_width_css = self.width - 40 if self.width > 40 else 800
elif max_width_val.endswith("px"):
try:
max_width_css = float(max_width_val[:-2])
except ValueError:
pass
# Use CSS max-width or default container width
effective_max_width = max_width_css if max_width_css else (self.width - 40 if self.width > 40 else 800)
image_layout.layout(max_width=effective_max_width)
style = getattr(child, "computed_style", None)
if style:

View file

@ -4,7 +4,7 @@ import logging
from typing import Optional, Callable
import skia
from ..network.images import load_image, load_image_async
from ..network.images import load_image, load_image_async, get_cached_image, is_data_url, has_image_failed
logger = logging.getLogger("bowser.layout.embed")
@ -36,6 +36,7 @@ class ImageLayout:
self._load_task_id: Optional[int] = None
self._src = ""
self._base_url: Optional[str] = None
self._max_width: Optional[float] = None # Store max_width for async re-layout
def load(self, base_url: Optional[str] = None, async_load: bool = False):
"""
@ -58,10 +59,26 @@ class ImageLayout:
self._src = src
self._base_url = base_url
# Check cache first (fast, non-blocking)
cached = get_cached_image(src, base_url)
if cached:
self.image = cached
return
# Skip images that previously failed to load (e.g., SVG)
if has_image_failed(src, base_url):
return
# Data URLs should be loaded synchronously (they're inline, no network)
if is_data_url(src):
self.image = load_image(src, base_url)
return
if async_load:
# Load in background thread
self._load_async(src, base_url)
else:
# Synchronous load (for tests or cached images)
# Synchronous load (blocks UI - use sparingly)
self.image = load_image(src, base_url)
def _load_async(self, src: str, base_url: Optional[str]):
@ -99,7 +116,6 @@ class ImageLayout:
# Calculate dimensions based on attributes or intrinsic size
if width_attr and height_attr:
# Both specified - use them
try:
self.width = float(width_attr)
self.height = float(height_attr)
@ -107,7 +123,6 @@ class ImageLayout:
self.width = intrinsic_width
self.height = intrinsic_height
elif width_attr:
# Only width specified - maintain aspect ratio
try:
self.width = float(width_attr)
if intrinsic_width > 0:
@ -119,7 +134,6 @@ class ImageLayout:
self.width = intrinsic_width
self.height = intrinsic_height
elif height_attr:
# Only height specified - maintain aspect ratio
try:
self.height = float(height_attr)
if intrinsic_height > 0:
@ -131,10 +145,15 @@ class ImageLayout:
self.width = intrinsic_width
self.height = intrinsic_height
else:
# No explicit dimensions - use intrinsic size
self.width = intrinsic_width
self.height = intrinsic_height
# Apply max_width constraint if set
if self._max_width and self.width > self._max_width:
aspect_ratio = intrinsic_height / intrinsic_width if intrinsic_width > 0 else 1
self.width = self._max_width
self.height = self.width * aspect_ratio
@property
def is_loading(self) -> bool:
"""True if image is currently being loaded."""
@ -155,6 +174,9 @@ class ImageLayout:
Returns:
Width of the image (for inline layout)
"""
# Store max_width for async image load re-layout
self._max_width = max_width
if not self.image:
# If image failed to load, use alt text dimensions
# For now, just use a placeholder size
@ -208,7 +230,7 @@ class ImageLayout:
self.width = intrinsic_width
self.height = intrinsic_height
# Constrain to max_width if specified
# Always constrain to max_width if specified (applies to all cases)
if max_width and self.width > max_width:
aspect_ratio = intrinsic_height / intrinsic_width if intrinsic_width > 0 else 1
self.width = max_width

View file

@ -27,6 +27,8 @@ class ImageCache:
if cls._instance is None:
cls._instance = super().__new__(cls)
cls._instance._cache = {}
cls._instance._failed = set() # URLs that failed to load
cls._instance._pending = set() # URLs currently being loaded
cls._instance._cache_lock = threading.Lock()
return cls._instance
@ -39,16 +41,43 @@ class ImageCache:
"""Cache an image by URL."""
with self._cache_lock:
self._cache[url] = image
self._pending.discard(url) # No longer pending
def has(self, url: str) -> bool:
"""Check if URL is cached."""
with self._cache_lock:
return url in self._cache
def mark_pending(self, url: str) -> bool:
"""Mark a URL as pending load. Returns False if already pending/cached/failed."""
with self._cache_lock:
if url in self._cache or url in self._failed or url in self._pending:
return False
self._pending.add(url)
return True
def mark_failed(self, url: str):
"""Mark a URL as failed to load (to prevent retries)."""
with self._cache_lock:
self._failed.add(url)
self._pending.discard(url) # No longer pending
def has_failed(self, url: str) -> bool:
"""Check if URL previously failed to load."""
with self._cache_lock:
return url in self._failed
def is_pending(self, url: str) -> bool:
"""Check if URL is currently being loaded."""
with self._cache_lock:
return url in self._pending
def clear(self):
"""Clear all cached images."""
with self._cache_lock:
self._cache.clear()
self._failed.clear()
self._pending.clear()
# Callbacks for image load completion
@ -57,6 +86,43 @@ ImageCallback = Callable[[Optional[skia.Image]], None]
BytesCallback = Callable[[Optional[bytes], str], None]
def get_cached_image(url: str, base_url: Optional[str] = None) -> Optional[skia.Image]:
"""
Get an image from cache if available (no loading).
Args:
url: Image URL or file path
base_url: Base URL for resolving relative URLs
Returns:
Cached Skia Image, or None if not in cache
"""
full_url = _resolve_url(url, base_url)
cache = ImageCache()
return cache.get(full_url)
def has_image_failed(url: str, base_url: Optional[str] = None) -> bool:
"""
Check if an image URL previously failed to load.
Args:
url: Image URL or file path
base_url: Base URL for resolving relative URLs
Returns:
True if the URL failed to load previously
"""
full_url = _resolve_url(url, base_url)
cache = ImageCache()
return cache.has_failed(full_url)
def is_data_url(url: str) -> bool:
"""Check if URL is a data: URL."""
return url.startswith('data:')
def load_image(url: str, base_url: Optional[str] = None) -> Optional[skia.Image]:
"""
Load an image from a URL or file path (synchronous).
@ -89,6 +155,9 @@ def load_image(url: str, base_url: Optional[str] = None) -> Optional[skia.Image]
# Decode with Skia
image = skia.Image.MakeFromEncoded(data)
if image:
# Convert to raster image for safe drawing
# (encoded images may crash on some operations)
image = image.makeRasterImage()
cache.set(full_url, image)
logger.debug(f"Loaded image: {full_url} ({image.width()}x{image.height()})")
@ -162,6 +231,14 @@ def load_image_async(
GLib.idle_add(lambda: on_complete(cached) or False)
return -1 # No task needed
# Atomically check if failed/pending and mark as pending
# This prevents multiple concurrent loads of the same URL
if not cache.mark_pending(full_url):
logger.debug(f"Skipping image (cached/failed/pending): {full_url}")
if on_complete:
GLib.idle_add(lambda: on_complete(None) or False)
return -1
def do_load_bytes():
"""Load raw bytes in background thread."""
return _load_image_bytes(full_url)
@ -169,6 +246,7 @@ def load_image_async(
def on_bytes_loaded(data: Optional[bytes]):
"""Decode image on main thread and call user callback."""
if data is None:
cache.mark_failed(full_url)
if on_complete:
on_complete(None)
return
@ -177,22 +255,23 @@ def load_image_async(
# Decode image on main thread (Skia thread safety)
decoded = skia.Image.MakeFromEncoded(data)
if decoded:
# Convert to raster image to ensure data is fully decoded
# This prevents potential lazy decoding issues during rendering
surface = skia.Surface(decoded.width(), decoded.height())
canvas = surface.getCanvas()
canvas.drawImage(decoded, 0, 0)
image = surface.makeImageSnapshot()
# Convert to raster image for safe drawing
# (encoded images may crash on some operations)
image = decoded.makeRasterImage()
cache.set(full_url, image)
logger.debug(f"Async loaded image: {full_url} ({image.width()}x{image.height()})")
if on_complete:
on_complete(image)
else:
# Failed to decode (e.g., SVG or unsupported format)
logger.warning(f"Failed to decode image (unsupported format?): {full_url}")
cache.mark_failed(full_url)
if on_complete:
on_complete(None)
except Exception as e:
logger.error(f"Failed to decode image {full_url}: {e}")
cache.mark_failed(full_url)
if on_complete:
on_complete(None)
if on_complete:

View file

@ -49,6 +49,12 @@ def print_tree(node, indent=0):
class _DOMBuilder(HTMLParser):
"""Tiny HTML parser that produces Element/Text nodes."""
# HTML5 void elements - elements that cannot have children
VOID_ELEMENTS = frozenset({
"area", "base", "br", "col", "embed", "hr", "img", "input",
"link", "meta", "param", "source", "track", "wbr"
})
def __init__(self):
super().__init__(convert_charrefs=False)
self.root = Element("html")
@ -134,6 +140,12 @@ class _DOMBuilder(HTMLParser):
if tag == "p" and self.current.tag == "p":
self._pop("p")
# For void elements, add to tree but don't push onto stack
# (they can't have children and don't have closing tags)
if tag in self.VOID_ELEMENTS:
el.parent = self.current
self.current.children.append(el)
else:
self._push(el)
def handle_endtag(self, tag):

View file

@ -39,6 +39,8 @@ class RenderPipeline:
# Also set on ImageLayout class for global notification
def on_image_loaded():
# Invalidate layout cache so positions are recalculated with actual image sizes
self.invalidate()
if self._on_needs_redraw:
self._on_needs_redraw()

View file

@ -99,12 +99,13 @@ def render_dom_graph_page(graph_path: str) -> str:
Render the DOM graph visualization page.
Args:
graph_path: Path to the SVG or DOT file
graph_path: Path to the PNG, SVG or DOT file
Returns:
Rendered HTML with embedded graph
"""
from pathlib import Path
import base64
logger = logging.getLogger("bowser.templates")
graph_path_obj = Path(graph_path)
@ -114,20 +115,31 @@ def render_dom_graph_page(graph_path: str) -> str:
return render_template("dom_graph.html",
error="Graph file not found",
graph_content="",
is_svg=False)
is_svg=False,
is_png=False)
try:
# Check file type
is_svg = graph_path_obj.suffix == '.svg'
suffix = graph_path_obj.suffix.lower()
is_svg = suffix == '.svg'
is_png = suffix == '.png'
# Read the file
if is_png:
# Read PNG as binary and convert to base64 data URL
with open(graph_path, 'rb') as f:
png_data = f.read()
graph_content = base64.b64encode(png_data).decode('ascii')
logger.info(f"Rendering DOM graph (PNG) from {graph_path}")
else:
# Read text content for SVG or DOT
with open(graph_path, 'r', encoding='utf-8') as f:
graph_content = f.read()
logger.info(f"Rendering DOM graph from {graph_path}")
return render_template("dom_graph.html",
graph_content=graph_content,
is_svg=is_svg,
is_png=is_png,
graph_path=str(graph_path),
error=None)
@ -136,4 +148,5 @@ def render_dom_graph_page(graph_path: str) -> str:
return render_template("dom_graph.html",
error=f"Failed to load graph: {e}",
graph_content="",
is_svg=False)
is_svg=False,
is_png=False)