Enhance DOM graph visualization and image handling

- Implement PNG rendering for DOM graphs, with fallback to DOT format. - Add support for max-width constraints in image layout based on CSS styles. - Introduce caching mechanisms for image loading, including tracking failed and pending loads. - Update HTML parser to handle void elements correctly. - Modify template rendering to support PNG graph files.
2026-03-16 19:10:24 +00:00 · 2026-01-13 14:23:45 +01:00 · 2026-01-13 14:23:45 +01:00 · ce5dbff562
commit ce5dbff562
parent 8c2d360515
8 changed files with 238 additions and 35 deletions
--- a/src/browser/chrome.py
+++ b/src/browser/chrome.py
@ -942,7 +942,7 @@ class Chrome:

    def _show_dom_graph(self):
        """Generate and display DOM graph for current tab."""
-        from ..debug.dom_graph import render_dom_graph_to_svg, save_dom_graph, print_dom_tree
+        from ..debug.dom_graph import render_dom_graph_to_png, save_dom_graph, print_dom_tree

        if not self.browser.active_tab:
            self.logger.warning("No active tab to visualize")
@ -957,8 +957,8 @@ class Chrome:
        output_dir = Path.home() / ".cache" / "bowser"
        output_dir.mkdir(parents=True, exist_ok=True)

-        # Try SVG first, fallback to DOT
-        svg_path = output_dir / "dom_graph.svg"
+        # Try PNG first, fallback to DOT
+        png_path = output_dir / "dom_graph.png"
        dot_path = output_dir / "dom_graph.dot"

        self.logger.info("Generating DOM graph...")
@ -971,11 +971,11 @@ class Chrome:
        print(tree_text)
        print("="*60 + "\n")

-        # Try to render as SVG
-        if render_dom_graph_to_svg(frame.document, str(svg_path)):
+        # Try to render as PNG
+        if render_dom_graph_to_png(frame.document, str(png_path)):
            # Open in new browser tab
-            self.logger.info(f"Opening DOM graph in new tab: {svg_path}")
-            self.browser.new_tab(f"about:dom-graph?path={svg_path}")
+            self.logger.info(f"Opening DOM graph in new tab: {png_path}")
+            self.browser.new_tab(f"about:dom-graph?path={png_path}")
        else:
            # Fallback to DOT file
            if save_dom_graph(frame.document, str(dot_path)):
--- a/src/debug/dom_graph.py
+++ b/src/debug/dom_graph.py
@ -149,6 +149,47 @@ def render_dom_graph_to_svg(document: Optional[Element], output_path: str) -> bo
        return False


+def render_dom_graph_to_png(document: Optional[Element], output_path: str) -> bool:
+    """
+    Render DOM tree as a PNG image using Graphviz (if available).
+
+    Args:
+        document: Root element of the DOM tree
+        output_path: Path where to save the .png file
+
+    Returns:
+        True if successful, False otherwise
+    """
+    logger = logging.getLogger("bowser.debug")
+
+    try:
+        import subprocess
+
+        dot_content = generate_dot_graph(document)
+
+        # Try to render with graphviz
+        result = subprocess.run(
+            ['dot', '-Tpng', '-o', output_path],
+            input=dot_content.encode('utf-8'),
+            capture_output=True,
+            timeout=10
+        )
+
+        if result.returncode == 0:
+            logger.info(f"DOM graph rendered to {output_path}")
+            return True
+        else:
+            logger.warning(f"Graphviz rendering failed: {result.stderr.decode()}")
+            return False
+
+    except FileNotFoundError:
+        logger.warning("Graphviz 'dot' command not found. Install graphviz for PNG output.")
+        return False
+    except Exception as e:
+        logger.error(f"Failed to render DOM graph: {e}")
+        return False
+
+
 def print_dom_tree(node, indent: int = 0, max_depth: int = 10) -> str:
    """
    Generate a text representation of the DOM tree.
--- a/src/layout/document.py
+++ b/src/layout/document.py
@ -240,7 +240,24 @@ class DocumentLayout:
                if tag == "img":
                    image_layout = ImageLayout(child)
                    image_layout.load(self.base_url, async_load=self.async_images)
-                    image_layout.layout(max_width=self.width - 40 if self.width > 40 else 800)
+
+                    # Get computed style for max-width constraint
+                    style = getattr(child, "computed_style", None)
+                    max_width_css = None
+                    if style:
+                        max_width_val = style.get("max-width", "")
+                        if max_width_val == "100%":
+                            # 100% means constrain to container width
+                            max_width_css = self.width - 40 if self.width > 40 else 800
+                        elif max_width_val.endswith("px"):
+                            try:
+                                max_width_css = float(max_width_val[:-2])
+                            except ValueError:
+                                pass
+
+                    # Use CSS max-width or default container width
+                    effective_max_width = max_width_css if max_width_css else (self.width - 40 if self.width > 40 else 800)
+                    image_layout.layout(max_width=effective_max_width)

                    # Get computed style for margins
                    style = getattr(child, "computed_style", None)
@ -439,7 +456,24 @@ class DocumentLayout:
                if child.tag.lower() == "img":
                    image_layout = ImageLayout(child)
                    image_layout.load(self.base_url, async_load=self.async_images)
-                    image_layout.layout(max_width=self.width - 40 if self.width > 40 else 800)
+
+                    # Get computed style for max-width constraint
+                    style = getattr(child, "computed_style", None)
+                    max_width_css = None
+                    if style:
+                        max_width_val = style.get("max-width", "")
+                        if max_width_val == "100%":
+                            # 100% means constrain to container width
+                            max_width_css = self.width - 40 if self.width > 40 else 800
+                        elif max_width_val.endswith("px"):
+                            try:
+                                max_width_css = float(max_width_val[:-2])
+                            except ValueError:
+                                pass
+
+                    # Use CSS max-width or default container width
+                    effective_max_width = max_width_css if max_width_css else (self.width - 40 if self.width > 40 else 800)
+                    image_layout.layout(max_width=effective_max_width)

                    style = getattr(child, "computed_style", None)
                    if style:
--- a/src/layout/embed.py
+++ b/src/layout/embed.py
@ -4,7 +4,7 @@ import logging
 from typing import Optional, Callable
 import skia

-from ..network.images import load_image, load_image_async
+from ..network.images import load_image, load_image_async, get_cached_image, is_data_url, has_image_failed


 logger = logging.getLogger("bowser.layout.embed")
@ -36,6 +36,7 @@ class ImageLayout:
        self._load_task_id: Optional[int] = None
        self._src = ""
        self._base_url: Optional[str] = None
+        self._max_width: Optional[float] = None  # Store max_width for async re-layout

    def load(self, base_url: Optional[str] = None, async_load: bool = False):
        """
@ -58,10 +59,26 @@ class ImageLayout:
        self._src = src
        self._base_url = base_url

+        # Check cache first (fast, non-blocking)
+        cached = get_cached_image(src, base_url)
+        if cached:
+            self.image = cached
+            return
+
+        # Skip images that previously failed to load (e.g., SVG)
+        if has_image_failed(src, base_url):
+            return
+
+        # Data URLs should be loaded synchronously (they're inline, no network)
+        if is_data_url(src):
+            self.image = load_image(src, base_url)
+            return
+
        if async_load:
+            # Load in background thread
            self._load_async(src, base_url)
        else:
-            # Synchronous load (for tests or cached images)
+            # Synchronous load (blocks UI - use sparingly)
            self.image = load_image(src, base_url)

    def _load_async(self, src: str, base_url: Optional[str]):
@ -99,7 +116,6 @@ class ImageLayout:

        # Calculate dimensions based on attributes or intrinsic size
        if width_attr and height_attr:
-            # Both specified - use them
            try:
                self.width = float(width_attr)
                self.height = float(height_attr)
@ -107,7 +123,6 @@ class ImageLayout:
                self.width = intrinsic_width
                self.height = intrinsic_height
        elif width_attr:
-            # Only width specified - maintain aspect ratio
            try:
                self.width = float(width_attr)
                if intrinsic_width > 0:
@ -119,7 +134,6 @@ class ImageLayout:
                self.width = intrinsic_width
                self.height = intrinsic_height
        elif height_attr:
-            # Only height specified - maintain aspect ratio
            try:
                self.height = float(height_attr)
                if intrinsic_height > 0:
@ -131,10 +145,15 @@ class ImageLayout:
                self.width = intrinsic_width
                self.height = intrinsic_height
        else:
-            # No explicit dimensions - use intrinsic size
            self.width = intrinsic_width
            self.height = intrinsic_height

+        # Apply max_width constraint if set
+        if self._max_width and self.width > self._max_width:
+            aspect_ratio = intrinsic_height / intrinsic_width if intrinsic_width > 0 else 1
+            self.width = self._max_width
+            self.height = self.width * aspect_ratio
+
    @property
    def is_loading(self) -> bool:
        """True if image is currently being loaded."""
@ -155,6 +174,9 @@ class ImageLayout:
        Returns:
            Width of the image (for inline layout)
        """
+        # Store max_width for async image load re-layout
+        self._max_width = max_width
+
        if not self.image:
            # If image failed to load, use alt text dimensions
            # For now, just use a placeholder size
@ -208,7 +230,7 @@ class ImageLayout:
            self.width = intrinsic_width
            self.height = intrinsic_height

-            # Constrain to max_width if specified
+        # Always constrain to max_width if specified (applies to all cases)
        if max_width and self.width > max_width:
            aspect_ratio = intrinsic_height / intrinsic_width if intrinsic_width > 0 else 1
            self.width = max_width
--- a/src/network/images.py
+++ b/src/network/images.py
@ -27,6 +27,8 @@ class ImageCache:
            if cls._instance is None:
                cls._instance = super().__new__(cls)
                cls._instance._cache = {}
+                cls._instance._failed = set()  # URLs that failed to load
+                cls._instance._pending = set()  # URLs currently being loaded
                cls._instance._cache_lock = threading.Lock()
            return cls._instance

@ -39,16 +41,43 @@ class ImageCache:
        """Cache an image by URL."""
        with self._cache_lock:
            self._cache[url] = image
+            self._pending.discard(url)  # No longer pending

    def has(self, url: str) -> bool:
        """Check if URL is cached."""
        with self._cache_lock:
            return url in self._cache

+    def mark_pending(self, url: str) -> bool:
+        """Mark a URL as pending load. Returns False if already pending/cached/failed."""
+        with self._cache_lock:
+            if url in self._cache or url in self._failed or url in self._pending:
+                return False
+            self._pending.add(url)
+            return True
+
+    def mark_failed(self, url: str):
+        """Mark a URL as failed to load (to prevent retries)."""
+        with self._cache_lock:
+            self._failed.add(url)
+            self._pending.discard(url)  # No longer pending
+
+    def has_failed(self, url: str) -> bool:
+        """Check if URL previously failed to load."""
+        with self._cache_lock:
+            return url in self._failed
+
+    def is_pending(self, url: str) -> bool:
+        """Check if URL is currently being loaded."""
+        with self._cache_lock:
+            return url in self._pending
+
    def clear(self):
        """Clear all cached images."""
        with self._cache_lock:
            self._cache.clear()
+            self._failed.clear()
+            self._pending.clear()


 # Callbacks for image load completion
@ -57,6 +86,43 @@ ImageCallback = Callable[[Optional[skia.Image]], None]
 BytesCallback = Callable[[Optional[bytes], str], None]


+def get_cached_image(url: str, base_url: Optional[str] = None) -> Optional[skia.Image]:
+    """
+    Get an image from cache if available (no loading).
+
+    Args:
+        url: Image URL or file path
+        base_url: Base URL for resolving relative URLs
+
+    Returns:
+        Cached Skia Image, or None if not in cache
+    """
+    full_url = _resolve_url(url, base_url)
+    cache = ImageCache()
+    return cache.get(full_url)
+
+
+def has_image_failed(url: str, base_url: Optional[str] = None) -> bool:
+    """
+    Check if an image URL previously failed to load.
+
+    Args:
+        url: Image URL or file path
+        base_url: Base URL for resolving relative URLs
+
+    Returns:
+        True if the URL failed to load previously
+    """
+    full_url = _resolve_url(url, base_url)
+    cache = ImageCache()
+    return cache.has_failed(full_url)
+
+
+def is_data_url(url: str) -> bool:
+    """Check if URL is a data: URL."""
+    return url.startswith('data:')
+
+
 def load_image(url: str, base_url: Optional[str] = None) -> Optional[skia.Image]:
    """
    Load an image from a URL or file path (synchronous).
@ -89,6 +155,9 @@ def load_image(url: str, base_url: Optional[str] = None) -> Optional[skia.Image]
        # Decode with Skia
        image = skia.Image.MakeFromEncoded(data)
        if image:
+            # Convert to raster image for safe drawing
+            # (encoded images may crash on some operations)
+            image = image.makeRasterImage()
            cache.set(full_url, image)
            logger.debug(f"Loaded image: {full_url} ({image.width()}x{image.height()})")

@ -162,6 +231,14 @@ def load_image_async(
            GLib.idle_add(lambda: on_complete(cached) or False)
        return -1  # No task needed

+    # Atomically check if failed/pending and mark as pending
+    # This prevents multiple concurrent loads of the same URL
+    if not cache.mark_pending(full_url):
+        logger.debug(f"Skipping image (cached/failed/pending): {full_url}")
+        if on_complete:
+            GLib.idle_add(lambda: on_complete(None) or False)
+        return -1
+
    def do_load_bytes():
        """Load raw bytes in background thread."""
        return _load_image_bytes(full_url)
@ -169,6 +246,7 @@ def load_image_async(
    def on_bytes_loaded(data: Optional[bytes]):
        """Decode image on main thread and call user callback."""
        if data is None:
+            cache.mark_failed(full_url)
            if on_complete:
                on_complete(None)
            return
@ -177,22 +255,23 @@ def load_image_async(
            # Decode image on main thread (Skia thread safety)
            decoded = skia.Image.MakeFromEncoded(data)
            if decoded:
-                # Convert to raster image to ensure data is fully decoded
-                # This prevents potential lazy decoding issues during rendering
-                surface = skia.Surface(decoded.width(), decoded.height())
-                canvas = surface.getCanvas()
-                canvas.drawImage(decoded, 0, 0)
-                image = surface.makeImageSnapshot()
+                # Convert to raster image for safe drawing
+                # (encoded images may crash on some operations)
+                image = decoded.makeRasterImage()

                cache.set(full_url, image)
                logger.debug(f"Async loaded image: {full_url} ({image.width()}x{image.height()})")
                if on_complete:
                    on_complete(image)
            else:
+                # Failed to decode (e.g., SVG or unsupported format)
+                logger.warning(f"Failed to decode image (unsupported format?): {full_url}")
+                cache.mark_failed(full_url)
                if on_complete:
                    on_complete(None)
        except Exception as e:
            logger.error(f"Failed to decode image {full_url}: {e}")
+            cache.mark_failed(full_url)
            if on_complete:
                on_complete(None)
            if on_complete:
--- a/src/parser/html.py
+++ b/src/parser/html.py
@ -49,6 +49,12 @@ def print_tree(node, indent=0):
 class _DOMBuilder(HTMLParser):
    """Tiny HTML parser that produces Element/Text nodes."""

+    # HTML5 void elements - elements that cannot have children
+    VOID_ELEMENTS = frozenset({
+        "area", "base", "br", "col", "embed", "hr", "img", "input",
+        "link", "meta", "param", "source", "track", "wbr"
+    })
+
    def __init__(self):
        super().__init__(convert_charrefs=False)
        self.root = Element("html")
@ -134,6 +140,12 @@ class _DOMBuilder(HTMLParser):
        if tag == "p" and self.current.tag == "p":
            self._pop("p")

+        # For void elements, add to tree but don't push onto stack
+        # (they can't have children and don't have closing tags)
+        if tag in self.VOID_ELEMENTS:
+            el.parent = self.current
+            self.current.children.append(el)
+        else:
            self._push(el)

    def handle_endtag(self, tag):
--- a/src/render/pipeline.py
+++ b/src/render/pipeline.py
@ -39,6 +39,8 @@ class RenderPipeline:

        # Also set on ImageLayout class for global notification
        def on_image_loaded():
+            # Invalidate layout cache so positions are recalculated with actual image sizes
+            self.invalidate()
            if self._on_needs_redraw:
                self._on_needs_redraw()

--- a/src/templates.py
+++ b/src/templates.py
@ -99,12 +99,13 @@ def render_dom_graph_page(graph_path: str) -> str:
    Render the DOM graph visualization page.

    Args:
-        graph_path: Path to the SVG or DOT file
+        graph_path: Path to the PNG, SVG or DOT file

    Returns:
        Rendered HTML with embedded graph
    """
    from pathlib import Path
+    import base64

    logger = logging.getLogger("bowser.templates")
    graph_path_obj = Path(graph_path)
@ -114,20 +115,31 @@ def render_dom_graph_page(graph_path: str) -> str:
        return render_template("dom_graph.html",
                             error="Graph file not found",
                             graph_content="",
-                             is_svg=False)
+                             is_svg=False,
+                             is_png=False)

    try:
        # Check file type
-        is_svg = graph_path_obj.suffix == '.svg'
+        suffix = graph_path_obj.suffix.lower()
+        is_svg = suffix == '.svg'
+        is_png = suffix == '.png'

-        # Read the file
+        if is_png:
+            # Read PNG as binary and convert to base64 data URL
+            with open(graph_path, 'rb') as f:
+                png_data = f.read()
+            graph_content = base64.b64encode(png_data).decode('ascii')
+            logger.info(f"Rendering DOM graph (PNG) from {graph_path}")
+        else:
+            # Read text content for SVG or DOT
            with open(graph_path, 'r', encoding='utf-8') as f:
                graph_content = f.read()
-
            logger.info(f"Rendering DOM graph from {graph_path}")
+
        return render_template("dom_graph.html",
                             graph_content=graph_content,
                             is_svg=is_svg,
+                             is_png=is_png,
                             graph_path=str(graph_path),
                             error=None)

@ -136,4 +148,5 @@ def render_dom_graph_page(graph_path: str) -> str:
        return render_template("dom_graph.html",
                             error=f"Failed to load graph: {e}",
                             graph_content="",
-                             is_svg=False)
+                             is_svg=False,
+                             is_png=False)