From ce5dbff562db1659dd046568679d8fe1ba28fb27 Mon Sep 17 00:00:00 2001
From: Benedikt Willi <benedikt.willi@unibas.ch>
Date: Tue, 13 Jan 2026 14:23:45 +0100
Subject: [PATCH] Enhance DOM graph visualization and image handling

- Implement PNG rendering for DOM graphs, with fallback to DOT format.
- Add support for max-width constraints in image layout based on CSS styles.
- Introduce caching mechanisms for image loading, including tracking failed and pending loads.
- Update HTML parser to handle void elements correctly.
- Modify template rendering to support PNG graph files.
---
 src/browser/chrome.py  | 14 +++----
 src/debug/dom_graph.py | 41 +++++++++++++++++++
 src/layout/document.py | 38 +++++++++++++++++-
 src/layout/embed.py    | 44 +++++++++++++++-----
 src/network/images.py  | 91 +++++++++++++++++++++++++++++++++++++++---
 src/parser/html.py     | 14 ++++++-
 src/render/pipeline.py |  2 +
 src/templates.py       | 29 ++++++++++----
 8 files changed, 238 insertions(+), 35 deletions(-)

diff --git a/src/browser/chrome.py b/src/browser/chrome.py
index d977b95..15a622d 100644
--- a/src/browser/chrome.py
+++ b/src/browser/chrome.py
@@ -942,7 +942,7 @@ class Chrome:
 
     def _show_dom_graph(self):
         """Generate and display DOM graph for current tab."""
-        from ..debug.dom_graph import render_dom_graph_to_svg, save_dom_graph, print_dom_tree
+        from ..debug.dom_graph import render_dom_graph_to_png, save_dom_graph, print_dom_tree
 
         if not self.browser.active_tab:
             self.logger.warning("No active tab to visualize")
@@ -957,8 +957,8 @@ class Chrome:
         output_dir = Path.home() / ".cache" / "bowser"
         output_dir.mkdir(parents=True, exist_ok=True)
 
-        # Try SVG first, fallback to DOT
-        svg_path = output_dir / "dom_graph.svg"
+        # Try PNG first, fallback to DOT
+        png_path = output_dir / "dom_graph.png"
         dot_path = output_dir / "dom_graph.dot"
 
         self.logger.info("Generating DOM graph...")
@@ -971,11 +971,11 @@ class Chrome:
         print(tree_text)
         print("="*60 + "\n")
 
-        # Try to render as SVG
-        if render_dom_graph_to_svg(frame.document, str(svg_path)):
+        # Try to render as PNG
+        if render_dom_graph_to_png(frame.document, str(png_path)):
             # Open in new browser tab
-            self.logger.info(f"Opening DOM graph in new tab: {svg_path}")
-            self.browser.new_tab(f"about:dom-graph?path={svg_path}")
+            self.logger.info(f"Opening DOM graph in new tab: {png_path}")
+            self.browser.new_tab(f"about:dom-graph?path={png_path}")
         else:
             # Fallback to DOT file
             if save_dom_graph(frame.document, str(dot_path)):
diff --git a/src/debug/dom_graph.py b/src/debug/dom_graph.py
index f147a89..7760fd2 100644
--- a/src/debug/dom_graph.py
+++ b/src/debug/dom_graph.py
@@ -149,6 +149,47 @@ def render_dom_graph_to_svg(document: Optional[Element], output_path: str) -> bo
         return False
 
 
+def render_dom_graph_to_png(document: Optional[Element], output_path: str) -> bool:
+    """
+    Render DOM tree as a PNG image using Graphviz (if available).
+
+    Args:
+        document: Root element of the DOM tree
+        output_path: Path where to save the .png file
+
+    Returns:
+        True if successful, False otherwise
+    """
+    logger = logging.getLogger("bowser.debug")
+
+    try:
+        import subprocess
+
+        dot_content = generate_dot_graph(document)
+
+        # Try to render with graphviz
+        result = subprocess.run(
+            ['dot', '-Tpng', '-o', output_path],
+            input=dot_content.encode('utf-8'),
+            capture_output=True,
+            timeout=10
+        )
+
+        if result.returncode == 0:
+            logger.info(f"DOM graph rendered to {output_path}")
+            return True
+        else:
+            logger.warning(f"Graphviz rendering failed: {result.stderr.decode()}")
+            return False
+
+    except FileNotFoundError:
+        logger.warning("Graphviz 'dot' command not found. Install graphviz for PNG output.")
+        return False
+    except Exception as e:
+        logger.error(f"Failed to render DOM graph: {e}")
+        return False
+
+
 def print_dom_tree(node, indent: int = 0, max_depth: int = 10) -> str:
     """
     Generate a text representation of the DOM tree.
diff --git a/src/layout/document.py b/src/layout/document.py
index 682e98e..6c16c41 100644
--- a/src/layout/document.py
+++ b/src/layout/document.py
@@ -240,7 +240,24 @@ class DocumentLayout:
                 if tag == "img":
                     image_layout = ImageLayout(child)
                     image_layout.load(self.base_url, async_load=self.async_images)
-                    image_layout.layout(max_width=self.width - 40 if self.width > 40 else 800)
+
+                    # Get computed style for max-width constraint
+                    style = getattr(child, "computed_style", None)
+                    max_width_css = None
+                    if style:
+                        max_width_val = style.get("max-width", "")
+                        if max_width_val == "100%":
+                            # 100% means constrain to container width
+                            max_width_css = self.width - 40 if self.width > 40 else 800
+                        elif max_width_val.endswith("px"):
+                            try:
+                                max_width_css = float(max_width_val[:-2])
+                            except ValueError:
+                                pass
+
+                    # Use CSS max-width or default container width
+                    effective_max_width = max_width_css if max_width_css else (self.width - 40 if self.width > 40 else 800)
+                    image_layout.layout(max_width=effective_max_width)
 
                     # Get computed style for margins
                     style = getattr(child, "computed_style", None)
@@ -439,7 +456,24 @@ class DocumentLayout:
                 if child.tag.lower() == "img":
                     image_layout = ImageLayout(child)
                     image_layout.load(self.base_url, async_load=self.async_images)
-                    image_layout.layout(max_width=self.width - 40 if self.width > 40 else 800)
+
+                    # Get computed style for max-width constraint
+                    style = getattr(child, "computed_style", None)
+                    max_width_css = None
+                    if style:
+                        max_width_val = style.get("max-width", "")
+                        if max_width_val == "100%":
+                            # 100% means constrain to container width
+                            max_width_css = self.width - 40 if self.width > 40 else 800
+                        elif max_width_val.endswith("px"):
+                            try:
+                                max_width_css = float(max_width_val[:-2])
+                            except ValueError:
+                                pass
+
+                    # Use CSS max-width or default container width
+                    effective_max_width = max_width_css if max_width_css else (self.width - 40 if self.width > 40 else 800)
+                    image_layout.layout(max_width=effective_max_width)
 
                     style = getattr(child, "computed_style", None)
                     if style:
diff --git a/src/layout/embed.py b/src/layout/embed.py
index 4c88ca5..d040c12 100644
--- a/src/layout/embed.py
+++ b/src/layout/embed.py
@@ -4,7 +4,7 @@ import logging
 from typing import Optional, Callable
 import skia
 
-from ..network.images import load_image, load_image_async
+from ..network.images import load_image, load_image_async, get_cached_image, is_data_url, has_image_failed
 
 
 logger = logging.getLogger("bowser.layout.embed")
@@ -36,6 +36,7 @@ class ImageLayout:
         self._load_task_id: Optional[int] = None
         self._src = ""
         self._base_url: Optional[str] = None
+        self._max_width: Optional[float] = None  # Store max_width for async re-layout
 
     def load(self, base_url: Optional[str] = None, async_load: bool = False):
         """
@@ -58,10 +59,26 @@ class ImageLayout:
         self._src = src
         self._base_url = base_url
 
+        # Check cache first (fast, non-blocking)
+        cached = get_cached_image(src, base_url)
+        if cached:
+            self.image = cached
+            return
+
+        # Skip images that previously failed to load (e.g., SVG)
+        if has_image_failed(src, base_url):
+            return
+
+        # Data URLs should be loaded synchronously (they're inline, no network)
+        if is_data_url(src):
+            self.image = load_image(src, base_url)
+            return
+
         if async_load:
+            # Load in background thread
             self._load_async(src, base_url)
         else:
-            # Synchronous load (for tests or cached images)
+            # Synchronous load (blocks UI - use sparingly)
             self.image = load_image(src, base_url)
 
     def _load_async(self, src: str, base_url: Optional[str]):
@@ -99,7 +116,6 @@ class ImageLayout:
 
         # Calculate dimensions based on attributes or intrinsic size
         if width_attr and height_attr:
-            # Both specified - use them
             try:
                 self.width = float(width_attr)
                 self.height = float(height_attr)
@@ -107,7 +123,6 @@ class ImageLayout:
                 self.width = intrinsic_width
                 self.height = intrinsic_height
         elif width_attr:
-            # Only width specified - maintain aspect ratio
             try:
                 self.width = float(width_attr)
                 if intrinsic_width > 0:
@@ -119,7 +134,6 @@ class ImageLayout:
                 self.width = intrinsic_width
                 self.height = intrinsic_height
         elif height_attr:
-            # Only height specified - maintain aspect ratio
             try:
                 self.height = float(height_attr)
                 if intrinsic_height > 0:
@@ -131,10 +145,15 @@ class ImageLayout:
                 self.width = intrinsic_width
                 self.height = intrinsic_height
         else:
-            # No explicit dimensions - use intrinsic size
             self.width = intrinsic_width
             self.height = intrinsic_height
 
+        # Apply max_width constraint if set
+        if self._max_width and self.width > self._max_width:
+            aspect_ratio = intrinsic_height / intrinsic_width if intrinsic_width > 0 else 1
+            self.width = self._max_width
+            self.height = self.width * aspect_ratio
+
     @property
     def is_loading(self) -> bool:
         """True if image is currently being loaded."""
@@ -155,6 +174,9 @@ class ImageLayout:
         Returns:
             Width of the image (for inline layout)
         """
+        # Store max_width for async image load re-layout
+        self._max_width = max_width
+
         if not self.image:
             # If image failed to load, use alt text dimensions
             # For now, just use a placeholder size
@@ -208,11 +230,11 @@ class ImageLayout:
             self.width = intrinsic_width
             self.height = intrinsic_height
 
-            # Constrain to max_width if specified
-            if max_width and self.width > max_width:
-                aspect_ratio = intrinsic_height / intrinsic_width if intrinsic_width > 0 else 1
-                self.width = max_width
-                self.height = self.width * aspect_ratio
+        # Always constrain to max_width if specified (applies to all cases)
+        if max_width and self.width > max_width:
+            aspect_ratio = intrinsic_height / intrinsic_width if intrinsic_width > 0 else 1
+            self.width = max_width
+            self.height = self.width * aspect_ratio
 
         return self.width
 
diff --git a/src/network/images.py b/src/network/images.py
index f6ccb7b..d7795ea 100644
--- a/src/network/images.py
+++ b/src/network/images.py
@@ -27,6 +27,8 @@ class ImageCache:
             if cls._instance is None:
                 cls._instance = super().__new__(cls)
                 cls._instance._cache = {}
+                cls._instance._failed = set()  # URLs that failed to load
+                cls._instance._pending = set()  # URLs currently being loaded
                 cls._instance._cache_lock = threading.Lock()
             return cls._instance
 
@@ -39,16 +41,43 @@ class ImageCache:
         """Cache an image by URL."""
         with self._cache_lock:
             self._cache[url] = image
+            self._pending.discard(url)  # No longer pending
 
     def has(self, url: str) -> bool:
         """Check if URL is cached."""
         with self._cache_lock:
             return url in self._cache
 
+    def mark_pending(self, url: str) -> bool:
+        """Mark a URL as pending load. Returns False if already pending/cached/failed."""
+        with self._cache_lock:
+            if url in self._cache or url in self._failed or url in self._pending:
+                return False
+            self._pending.add(url)
+            return True
+
+    def mark_failed(self, url: str):
+        """Mark a URL as failed to load (to prevent retries)."""
+        with self._cache_lock:
+            self._failed.add(url)
+            self._pending.discard(url)  # No longer pending
+
+    def has_failed(self, url: str) -> bool:
+        """Check if URL previously failed to load."""
+        with self._cache_lock:
+            return url in self._failed
+
+    def is_pending(self, url: str) -> bool:
+        """Check if URL is currently being loaded."""
+        with self._cache_lock:
+            return url in self._pending
+
     def clear(self):
         """Clear all cached images."""
         with self._cache_lock:
             self._cache.clear()
+            self._failed.clear()
+            self._pending.clear()
 
 
 # Callbacks for image load completion
@@ -57,6 +86,43 @@ ImageCallback = Callable[[Optional[skia.Image]], None]
 BytesCallback = Callable[[Optional[bytes], str], None]
 
 
+def get_cached_image(url: str, base_url: Optional[str] = None) -> Optional[skia.Image]:
+    """
+    Get an image from cache if available (no loading).
+
+    Args:
+        url: Image URL or file path
+        base_url: Base URL for resolving relative URLs
+
+    Returns:
+        Cached Skia Image, or None if not in cache
+    """
+    full_url = _resolve_url(url, base_url)
+    cache = ImageCache()
+    return cache.get(full_url)
+
+
+def has_image_failed(url: str, base_url: Optional[str] = None) -> bool:
+    """
+    Check if an image URL previously failed to load.
+
+    Args:
+        url: Image URL or file path
+        base_url: Base URL for resolving relative URLs
+
+    Returns:
+        True if the URL failed to load previously
+    """
+    full_url = _resolve_url(url, base_url)
+    cache = ImageCache()
+    return cache.has_failed(full_url)
+
+
+def is_data_url(url: str) -> bool:
+    """Check if URL is a data: URL."""
+    return url.startswith('data:')
+
+
 def load_image(url: str, base_url: Optional[str] = None) -> Optional[skia.Image]:
     """
     Load an image from a URL or file path (synchronous).
@@ -89,6 +155,9 @@ def load_image(url: str, base_url: Optional[str] = None) -> Optional[skia.Image]
         # Decode with Skia
         image = skia.Image.MakeFromEncoded(data)
         if image:
+            # Convert to raster image for safe drawing
+            # (encoded images may crash on some operations)
+            image = image.makeRasterImage()
             cache.set(full_url, image)
             logger.debug(f"Loaded image: {full_url} ({image.width()}x{image.height()})")
 
@@ -162,6 +231,14 @@ def load_image_async(
             GLib.idle_add(lambda: on_complete(cached) or False)
         return -1  # No task needed
 
+    # Atomically check if failed/pending and mark as pending
+    # This prevents multiple concurrent loads of the same URL
+    if not cache.mark_pending(full_url):
+        logger.debug(f"Skipping image (cached/failed/pending): {full_url}")
+        if on_complete:
+            GLib.idle_add(lambda: on_complete(None) or False)
+        return -1
+
     def do_load_bytes():
         """Load raw bytes in background thread."""
         return _load_image_bytes(full_url)
@@ -169,6 +246,7 @@ def load_image_async(
     def on_bytes_loaded(data: Optional[bytes]):
         """Decode image on main thread and call user callback."""
         if data is None:
+            cache.mark_failed(full_url)
             if on_complete:
                 on_complete(None)
             return
@@ -177,22 +255,23 @@ def load_image_async(
             # Decode image on main thread (Skia thread safety)
             decoded = skia.Image.MakeFromEncoded(data)
             if decoded:
-                # Convert to raster image to ensure data is fully decoded
-                # This prevents potential lazy decoding issues during rendering
-                surface = skia.Surface(decoded.width(), decoded.height())
-                canvas = surface.getCanvas()
-                canvas.drawImage(decoded, 0, 0)
-                image = surface.makeImageSnapshot()
+                # Convert to raster image for safe drawing
+                # (encoded images may crash on some operations)
+                image = decoded.makeRasterImage()
 
                 cache.set(full_url, image)
                 logger.debug(f"Async loaded image: {full_url} ({image.width()}x{image.height()})")
                 if on_complete:
                     on_complete(image)
             else:
+                # Failed to decode (e.g., SVG or unsupported format)
+                logger.warning(f"Failed to decode image (unsupported format?): {full_url}")
+                cache.mark_failed(full_url)
                 if on_complete:
                     on_complete(None)
         except Exception as e:
             logger.error(f"Failed to decode image {full_url}: {e}")
+            cache.mark_failed(full_url)
             if on_complete:
                 on_complete(None)
             if on_complete:
diff --git a/src/parser/html.py b/src/parser/html.py
index 6008594..99eb831 100644
--- a/src/parser/html.py
+++ b/src/parser/html.py
@@ -49,6 +49,12 @@ def print_tree(node, indent=0):
 class _DOMBuilder(HTMLParser):
     """Tiny HTML parser that produces Element/Text nodes."""
 
+    # HTML5 void elements - elements that cannot have children
+    VOID_ELEMENTS = frozenset({
+        "area", "base", "br", "col", "embed", "hr", "img", "input",
+        "link", "meta", "param", "source", "track", "wbr"
+    })
+
     def __init__(self):
         super().__init__(convert_charrefs=False)
         self.root = Element("html")
@@ -134,7 +140,13 @@ class _DOMBuilder(HTMLParser):
         if tag == "p" and self.current.tag == "p":
             self._pop("p")
 
-        self._push(el)
+        # For void elements, add to tree but don't push onto stack
+        # (they can't have children and don't have closing tags)
+        if tag in self.VOID_ELEMENTS:
+            el.parent = self.current
+            self.current.children.append(el)
+        else:
+            self._push(el)
 
     def handle_endtag(self, tag):
         if tag in {"script"}:
diff --git a/src/render/pipeline.py b/src/render/pipeline.py
index 1d3afb5..7b7e0d3 100644
--- a/src/render/pipeline.py
+++ b/src/render/pipeline.py
@@ -39,6 +39,8 @@ class RenderPipeline:
 
         # Also set on ImageLayout class for global notification
         def on_image_loaded():
+            # Invalidate layout cache so positions are recalculated with actual image sizes
+            self.invalidate()
             if self._on_needs_redraw:
                 self._on_needs_redraw()
 
diff --git a/src/templates.py b/src/templates.py
index e23a349..229f5ad 100644
--- a/src/templates.py
+++ b/src/templates.py
@@ -99,12 +99,13 @@ def render_dom_graph_page(graph_path: str) -> str:
     Render the DOM graph visualization page.
 
     Args:
-        graph_path: Path to the SVG or DOT file
+        graph_path: Path to the PNG, SVG or DOT file
 
     Returns:
         Rendered HTML with embedded graph
     """
     from pathlib import Path
+    import base64
 
     logger = logging.getLogger("bowser.templates")
     graph_path_obj = Path(graph_path)
@@ -114,20 +115,31 @@ def render_dom_graph_page(graph_path: str) -> str:
         return render_template("dom_graph.html",
                              error="Graph file not found",
                              graph_content="",
-                             is_svg=False)
+                             is_svg=False,
+                             is_png=False)
 
     try:
         # Check file type
-        is_svg = graph_path_obj.suffix == '.svg'
+        suffix = graph_path_obj.suffix.lower()
+        is_svg = suffix == '.svg'
+        is_png = suffix == '.png'
 
-        # Read the file
-        with open(graph_path, 'r', encoding='utf-8') as f:
-            graph_content = f.read()
+        if is_png:
+            # Read PNG as binary and convert to base64 data URL
+            with open(graph_path, 'rb') as f:
+                png_data = f.read()
+            graph_content = base64.b64encode(png_data).decode('ascii')
+            logger.info(f"Rendering DOM graph (PNG) from {graph_path}")
+        else:
+            # Read text content for SVG or DOT
+            with open(graph_path, 'r', encoding='utf-8') as f:
+                graph_content = f.read()
+            logger.info(f"Rendering DOM graph from {graph_path}")
 
-        logger.info(f"Rendering DOM graph from {graph_path}")
         return render_template("dom_graph.html",
                              graph_content=graph_content,
                              is_svg=is_svg,
+                             is_png=is_png,
                              graph_path=str(graph_path),
                              error=None)
 
@@ -136,4 +148,5 @@ def render_dom_graph_page(graph_path: str) -> str:
         return render_template("dom_graph.html",
                              error=f"Failed to load graph: {e}",
                              graph_content="",
-                             is_svg=False)
+                             is_svg=False,
+                             is_png=False)