From ce5dbff562db1659dd046568679d8fe1ba28fb27 Mon Sep 17 00:00:00 2001 From: Benedikt Willi Date: Tue, 13 Jan 2026 14:23:45 +0100 Subject: [PATCH] Enhance DOM graph visualization and image handling - Implement PNG rendering for DOM graphs, with fallback to DOT format. - Add support for max-width constraints in image layout based on CSS styles. - Introduce caching mechanisms for image loading, including tracking failed and pending loads. - Update HTML parser to handle void elements correctly. - Modify template rendering to support PNG graph files. --- src/browser/chrome.py | 14 +++---- src/debug/dom_graph.py | 41 +++++++++++++++++++ src/layout/document.py | 38 +++++++++++++++++- src/layout/embed.py | 44 +++++++++++++++----- src/network/images.py | 91 +++++++++++++++++++++++++++++++++++++++--- src/parser/html.py | 14 ++++++- src/render/pipeline.py | 2 + src/templates.py | 29 ++++++++++---- 8 files changed, 238 insertions(+), 35 deletions(-) diff --git a/src/browser/chrome.py b/src/browser/chrome.py index d977b95..15a622d 100644 --- a/src/browser/chrome.py +++ b/src/browser/chrome.py @@ -942,7 +942,7 @@ class Chrome: def _show_dom_graph(self): """Generate and display DOM graph for current tab.""" - from ..debug.dom_graph import render_dom_graph_to_svg, save_dom_graph, print_dom_tree + from ..debug.dom_graph import render_dom_graph_to_png, save_dom_graph, print_dom_tree if not self.browser.active_tab: self.logger.warning("No active tab to visualize") @@ -957,8 +957,8 @@ class Chrome: output_dir = Path.home() / ".cache" / "bowser" output_dir.mkdir(parents=True, exist_ok=True) - # Try SVG first, fallback to DOT - svg_path = output_dir / "dom_graph.svg" + # Try PNG first, fallback to DOT + png_path = output_dir / "dom_graph.png" dot_path = output_dir / "dom_graph.dot" self.logger.info("Generating DOM graph...") @@ -971,11 +971,11 @@ class Chrome: print(tree_text) print("="*60 + "\n") - # Try to render as SVG - if render_dom_graph_to_svg(frame.document, str(svg_path)): + # Try to render as PNG + if render_dom_graph_to_png(frame.document, str(png_path)): # Open in new browser tab - self.logger.info(f"Opening DOM graph in new tab: {svg_path}") - self.browser.new_tab(f"about:dom-graph?path={svg_path}") + self.logger.info(f"Opening DOM graph in new tab: {png_path}") + self.browser.new_tab(f"about:dom-graph?path={png_path}") else: # Fallback to DOT file if save_dom_graph(frame.document, str(dot_path)): diff --git a/src/debug/dom_graph.py b/src/debug/dom_graph.py index f147a89..7760fd2 100644 --- a/src/debug/dom_graph.py +++ b/src/debug/dom_graph.py @@ -149,6 +149,47 @@ def render_dom_graph_to_svg(document: Optional[Element], output_path: str) -> bo return False +def render_dom_graph_to_png(document: Optional[Element], output_path: str) -> bool: + """ + Render DOM tree as a PNG image using Graphviz (if available). + + Args: + document: Root element of the DOM tree + output_path: Path where to save the .png file + + Returns: + True if successful, False otherwise + """ + logger = logging.getLogger("bowser.debug") + + try: + import subprocess + + dot_content = generate_dot_graph(document) + + # Try to render with graphviz + result = subprocess.run( + ['dot', '-Tpng', '-o', output_path], + input=dot_content.encode('utf-8'), + capture_output=True, + timeout=10 + ) + + if result.returncode == 0: + logger.info(f"DOM graph rendered to {output_path}") + return True + else: + logger.warning(f"Graphviz rendering failed: {result.stderr.decode()}") + return False + + except FileNotFoundError: + logger.warning("Graphviz 'dot' command not found. Install graphviz for PNG output.") + return False + except Exception as e: + logger.error(f"Failed to render DOM graph: {e}") + return False + + def print_dom_tree(node, indent: int = 0, max_depth: int = 10) -> str: """ Generate a text representation of the DOM tree. diff --git a/src/layout/document.py b/src/layout/document.py index 682e98e..6c16c41 100644 --- a/src/layout/document.py +++ b/src/layout/document.py @@ -240,7 +240,24 @@ class DocumentLayout: if tag == "img": image_layout = ImageLayout(child) image_layout.load(self.base_url, async_load=self.async_images) - image_layout.layout(max_width=self.width - 40 if self.width > 40 else 800) + + # Get computed style for max-width constraint + style = getattr(child, "computed_style", None) + max_width_css = None + if style: + max_width_val = style.get("max-width", "") + if max_width_val == "100%": + # 100% means constrain to container width + max_width_css = self.width - 40 if self.width > 40 else 800 + elif max_width_val.endswith("px"): + try: + max_width_css = float(max_width_val[:-2]) + except ValueError: + pass + + # Use CSS max-width or default container width + effective_max_width = max_width_css if max_width_css else (self.width - 40 if self.width > 40 else 800) + image_layout.layout(max_width=effective_max_width) # Get computed style for margins style = getattr(child, "computed_style", None) @@ -439,7 +456,24 @@ class DocumentLayout: if child.tag.lower() == "img": image_layout = ImageLayout(child) image_layout.load(self.base_url, async_load=self.async_images) - image_layout.layout(max_width=self.width - 40 if self.width > 40 else 800) + + # Get computed style for max-width constraint + style = getattr(child, "computed_style", None) + max_width_css = None + if style: + max_width_val = style.get("max-width", "") + if max_width_val == "100%": + # 100% means constrain to container width + max_width_css = self.width - 40 if self.width > 40 else 800 + elif max_width_val.endswith("px"): + try: + max_width_css = float(max_width_val[:-2]) + except ValueError: + pass + + # Use CSS max-width or default container width + effective_max_width = max_width_css if max_width_css else (self.width - 40 if self.width > 40 else 800) + image_layout.layout(max_width=effective_max_width) style = getattr(child, "computed_style", None) if style: diff --git a/src/layout/embed.py b/src/layout/embed.py index 4c88ca5..d040c12 100644 --- a/src/layout/embed.py +++ b/src/layout/embed.py @@ -4,7 +4,7 @@ import logging from typing import Optional, Callable import skia -from ..network.images import load_image, load_image_async +from ..network.images import load_image, load_image_async, get_cached_image, is_data_url, has_image_failed logger = logging.getLogger("bowser.layout.embed") @@ -36,6 +36,7 @@ class ImageLayout: self._load_task_id: Optional[int] = None self._src = "" self._base_url: Optional[str] = None + self._max_width: Optional[float] = None # Store max_width for async re-layout def load(self, base_url: Optional[str] = None, async_load: bool = False): """ @@ -58,10 +59,26 @@ class ImageLayout: self._src = src self._base_url = base_url + # Check cache first (fast, non-blocking) + cached = get_cached_image(src, base_url) + if cached: + self.image = cached + return + + # Skip images that previously failed to load (e.g., SVG) + if has_image_failed(src, base_url): + return + + # Data URLs should be loaded synchronously (they're inline, no network) + if is_data_url(src): + self.image = load_image(src, base_url) + return + if async_load: + # Load in background thread self._load_async(src, base_url) else: - # Synchronous load (for tests or cached images) + # Synchronous load (blocks UI - use sparingly) self.image = load_image(src, base_url) def _load_async(self, src: str, base_url: Optional[str]): @@ -99,7 +116,6 @@ class ImageLayout: # Calculate dimensions based on attributes or intrinsic size if width_attr and height_attr: - # Both specified - use them try: self.width = float(width_attr) self.height = float(height_attr) @@ -107,7 +123,6 @@ class ImageLayout: self.width = intrinsic_width self.height = intrinsic_height elif width_attr: - # Only width specified - maintain aspect ratio try: self.width = float(width_attr) if intrinsic_width > 0: @@ -119,7 +134,6 @@ class ImageLayout: self.width = intrinsic_width self.height = intrinsic_height elif height_attr: - # Only height specified - maintain aspect ratio try: self.height = float(height_attr) if intrinsic_height > 0: @@ -131,10 +145,15 @@ class ImageLayout: self.width = intrinsic_width self.height = intrinsic_height else: - # No explicit dimensions - use intrinsic size self.width = intrinsic_width self.height = intrinsic_height + # Apply max_width constraint if set + if self._max_width and self.width > self._max_width: + aspect_ratio = intrinsic_height / intrinsic_width if intrinsic_width > 0 else 1 + self.width = self._max_width + self.height = self.width * aspect_ratio + @property def is_loading(self) -> bool: """True if image is currently being loaded.""" @@ -155,6 +174,9 @@ class ImageLayout: Returns: Width of the image (for inline layout) """ + # Store max_width for async image load re-layout + self._max_width = max_width + if not self.image: # If image failed to load, use alt text dimensions # For now, just use a placeholder size @@ -208,11 +230,11 @@ class ImageLayout: self.width = intrinsic_width self.height = intrinsic_height - # Constrain to max_width if specified - if max_width and self.width > max_width: - aspect_ratio = intrinsic_height / intrinsic_width if intrinsic_width > 0 else 1 - self.width = max_width - self.height = self.width * aspect_ratio + # Always constrain to max_width if specified (applies to all cases) + if max_width and self.width > max_width: + aspect_ratio = intrinsic_height / intrinsic_width if intrinsic_width > 0 else 1 + self.width = max_width + self.height = self.width * aspect_ratio return self.width diff --git a/src/network/images.py b/src/network/images.py index f6ccb7b..d7795ea 100644 --- a/src/network/images.py +++ b/src/network/images.py @@ -27,6 +27,8 @@ class ImageCache: if cls._instance is None: cls._instance = super().__new__(cls) cls._instance._cache = {} + cls._instance._failed = set() # URLs that failed to load + cls._instance._pending = set() # URLs currently being loaded cls._instance._cache_lock = threading.Lock() return cls._instance @@ -39,16 +41,43 @@ class ImageCache: """Cache an image by URL.""" with self._cache_lock: self._cache[url] = image + self._pending.discard(url) # No longer pending def has(self, url: str) -> bool: """Check if URL is cached.""" with self._cache_lock: return url in self._cache + def mark_pending(self, url: str) -> bool: + """Mark a URL as pending load. Returns False if already pending/cached/failed.""" + with self._cache_lock: + if url in self._cache or url in self._failed or url in self._pending: + return False + self._pending.add(url) + return True + + def mark_failed(self, url: str): + """Mark a URL as failed to load (to prevent retries).""" + with self._cache_lock: + self._failed.add(url) + self._pending.discard(url) # No longer pending + + def has_failed(self, url: str) -> bool: + """Check if URL previously failed to load.""" + with self._cache_lock: + return url in self._failed + + def is_pending(self, url: str) -> bool: + """Check if URL is currently being loaded.""" + with self._cache_lock: + return url in self._pending + def clear(self): """Clear all cached images.""" with self._cache_lock: self._cache.clear() + self._failed.clear() + self._pending.clear() # Callbacks for image load completion @@ -57,6 +86,43 @@ ImageCallback = Callable[[Optional[skia.Image]], None] BytesCallback = Callable[[Optional[bytes], str], None] +def get_cached_image(url: str, base_url: Optional[str] = None) -> Optional[skia.Image]: + """ + Get an image from cache if available (no loading). + + Args: + url: Image URL or file path + base_url: Base URL for resolving relative URLs + + Returns: + Cached Skia Image, or None if not in cache + """ + full_url = _resolve_url(url, base_url) + cache = ImageCache() + return cache.get(full_url) + + +def has_image_failed(url: str, base_url: Optional[str] = None) -> bool: + """ + Check if an image URL previously failed to load. + + Args: + url: Image URL or file path + base_url: Base URL for resolving relative URLs + + Returns: + True if the URL failed to load previously + """ + full_url = _resolve_url(url, base_url) + cache = ImageCache() + return cache.has_failed(full_url) + + +def is_data_url(url: str) -> bool: + """Check if URL is a data: URL.""" + return url.startswith('data:') + + def load_image(url: str, base_url: Optional[str] = None) -> Optional[skia.Image]: """ Load an image from a URL or file path (synchronous). @@ -89,6 +155,9 @@ def load_image(url: str, base_url: Optional[str] = None) -> Optional[skia.Image] # Decode with Skia image = skia.Image.MakeFromEncoded(data) if image: + # Convert to raster image for safe drawing + # (encoded images may crash on some operations) + image = image.makeRasterImage() cache.set(full_url, image) logger.debug(f"Loaded image: {full_url} ({image.width()}x{image.height()})") @@ -162,6 +231,14 @@ def load_image_async( GLib.idle_add(lambda: on_complete(cached) or False) return -1 # No task needed + # Atomically check if failed/pending and mark as pending + # This prevents multiple concurrent loads of the same URL + if not cache.mark_pending(full_url): + logger.debug(f"Skipping image (cached/failed/pending): {full_url}") + if on_complete: + GLib.idle_add(lambda: on_complete(None) or False) + return -1 + def do_load_bytes(): """Load raw bytes in background thread.""" return _load_image_bytes(full_url) @@ -169,6 +246,7 @@ def load_image_async( def on_bytes_loaded(data: Optional[bytes]): """Decode image on main thread and call user callback.""" if data is None: + cache.mark_failed(full_url) if on_complete: on_complete(None) return @@ -177,22 +255,23 @@ def load_image_async( # Decode image on main thread (Skia thread safety) decoded = skia.Image.MakeFromEncoded(data) if decoded: - # Convert to raster image to ensure data is fully decoded - # This prevents potential lazy decoding issues during rendering - surface = skia.Surface(decoded.width(), decoded.height()) - canvas = surface.getCanvas() - canvas.drawImage(decoded, 0, 0) - image = surface.makeImageSnapshot() + # Convert to raster image for safe drawing + # (encoded images may crash on some operations) + image = decoded.makeRasterImage() cache.set(full_url, image) logger.debug(f"Async loaded image: {full_url} ({image.width()}x{image.height()})") if on_complete: on_complete(image) else: + # Failed to decode (e.g., SVG or unsupported format) + logger.warning(f"Failed to decode image (unsupported format?): {full_url}") + cache.mark_failed(full_url) if on_complete: on_complete(None) except Exception as e: logger.error(f"Failed to decode image {full_url}: {e}") + cache.mark_failed(full_url) if on_complete: on_complete(None) if on_complete: diff --git a/src/parser/html.py b/src/parser/html.py index 6008594..99eb831 100644 --- a/src/parser/html.py +++ b/src/parser/html.py @@ -49,6 +49,12 @@ def print_tree(node, indent=0): class _DOMBuilder(HTMLParser): """Tiny HTML parser that produces Element/Text nodes.""" + # HTML5 void elements - elements that cannot have children + VOID_ELEMENTS = frozenset({ + "area", "base", "br", "col", "embed", "hr", "img", "input", + "link", "meta", "param", "source", "track", "wbr" + }) + def __init__(self): super().__init__(convert_charrefs=False) self.root = Element("html") @@ -134,7 +140,13 @@ class _DOMBuilder(HTMLParser): if tag == "p" and self.current.tag == "p": self._pop("p") - self._push(el) + # For void elements, add to tree but don't push onto stack + # (they can't have children and don't have closing tags) + if tag in self.VOID_ELEMENTS: + el.parent = self.current + self.current.children.append(el) + else: + self._push(el) def handle_endtag(self, tag): if tag in {"script"}: diff --git a/src/render/pipeline.py b/src/render/pipeline.py index 1d3afb5..7b7e0d3 100644 --- a/src/render/pipeline.py +++ b/src/render/pipeline.py @@ -39,6 +39,8 @@ class RenderPipeline: # Also set on ImageLayout class for global notification def on_image_loaded(): + # Invalidate layout cache so positions are recalculated with actual image sizes + self.invalidate() if self._on_needs_redraw: self._on_needs_redraw() diff --git a/src/templates.py b/src/templates.py index e23a349..229f5ad 100644 --- a/src/templates.py +++ b/src/templates.py @@ -99,12 +99,13 @@ def render_dom_graph_page(graph_path: str) -> str: Render the DOM graph visualization page. Args: - graph_path: Path to the SVG or DOT file + graph_path: Path to the PNG, SVG or DOT file Returns: Rendered HTML with embedded graph """ from pathlib import Path + import base64 logger = logging.getLogger("bowser.templates") graph_path_obj = Path(graph_path) @@ -114,20 +115,31 @@ def render_dom_graph_page(graph_path: str) -> str: return render_template("dom_graph.html", error="Graph file not found", graph_content="", - is_svg=False) + is_svg=False, + is_png=False) try: # Check file type - is_svg = graph_path_obj.suffix == '.svg' + suffix = graph_path_obj.suffix.lower() + is_svg = suffix == '.svg' + is_png = suffix == '.png' - # Read the file - with open(graph_path, 'r', encoding='utf-8') as f: - graph_content = f.read() + if is_png: + # Read PNG as binary and convert to base64 data URL + with open(graph_path, 'rb') as f: + png_data = f.read() + graph_content = base64.b64encode(png_data).decode('ascii') + logger.info(f"Rendering DOM graph (PNG) from {graph_path}") + else: + # Read text content for SVG or DOT + with open(graph_path, 'r', encoding='utf-8') as f: + graph_content = f.read() + logger.info(f"Rendering DOM graph from {graph_path}") - logger.info(f"Rendering DOM graph from {graph_path}") return render_template("dom_graph.html", graph_content=graph_content, is_svg=is_svg, + is_png=is_png, graph_path=str(graph_path), error=None) @@ -136,4 +148,5 @@ def render_dom_graph_page(graph_path: str) -> str: return render_template("dom_graph.html", error=f"Failed to load graph: {e}", graph_content="", - is_svg=False) + is_svg=False, + is_png=False)