diff --git a/README.md b/README.md
index 47b3b6c..f25c31e 100644
--- a/README.md
+++ b/README.md
@@ -2,13 +2,14 @@
A custom web browser built from scratch following the [browser.engineering](https://browser.engineering/) curriculum. Features a clean architecture with Skia-based rendering, GTK 4/Adwaita UI, and proper separation of concerns.
-**Status**: Milestone 2 - Basic HTML rendering with text layout
+**Status**: Milestone 3 - Basic HTML rendering with text layout and image support
## Features
- **Adwaita Tab Bar** - Modern GNOME-style tab management
- **Skia Rendering** - Hardware-accelerated 2D graphics
- **Text Layout** - Word wrapping, character-level selection
+- **Image Support** - Load and render images from HTTP, data URLs, and local files
- **DOM Parsing** - HTML parsing with proper tree structure
- **Debug Mode** - Visual layout debugging with FPS counter
- **DOM Visualization** - Generate visual graphs of page structure
@@ -54,18 +55,21 @@ bowser/
│ ├── layout/ # Layout calculation
│ │ ├── document.py # DocumentLayout - full page layout
│ │ ├── block.py # BlockLayout, LineLayout - block elements
-│ │ └── inline.py # TextLayout, InlineLayout - text runs
+│ │ ├── inline.py # TextLayout, InlineLayout - text runs
+│ │ └── embed.py # ImageLayout - embedded content
│ │
│ ├── render/ # Painting & rendering
│ │ ├── pipeline.py # RenderPipeline - coordinates layout/paint
│ │ ├── fonts.py # FontCache - Skia font management
-│ │ ├── paint.py # DisplayList, DrawText, DrawRect
+│ │ ├── paint.py # DisplayList, DrawText, DrawRect, DrawImage
│ │ └── composite.py # Layer compositing
│ │
│ ├── network/ # Networking
│ │ ├── http.py # HTTP client with redirects
│ │ ├── url.py # URL parsing and normalization
-│ │ └── cookies.py # Cookie management
+│ │ ├── cookies.py # Cookie management
+│ │ ├── images.py # Image loading and caching
+│ │ └── tasks.py # Async task queue for background loading
│ │
│ ├── debug/ # Development tools
│ │ └── dom_graph.py # DOM tree visualization
@@ -91,8 +95,11 @@ bowser/
| `Element`, `Text` | parser | DOM tree nodes |
| `DocumentLayout` | layout | Page layout with line positioning |
| `LayoutLine`, `LayoutBlock` | layout | Positioned text with bounding boxes |
+| `ImageLayout`, `LayoutImage` | layout | Image sizing and positioning |
| `RenderPipeline` | render | Coordinates layout → paint |
+| `DrawImage` | render | Image rendering command |
| `FontCache` | render | Skia font caching |
+| `ImageCache` | network | Image loading and caching |
| `Chrome` | browser | GTK window, delegates to RenderPipeline |
## Development
@@ -141,12 +148,70 @@ Shows:
- [x] **M0**: Project scaffold
- [x] **M1**: GTK window with Skia rendering
- [x] **M2**: HTML parsing and text layout
-- [ ] **M3**: CSS parsing and styling
-- [ ] **M4**: Clickable links and navigation
-- [ ] **M5**: Form input and submission
-- [ ] **M6**: JavaScript execution
-- [ ] **M7**: Event handling
-- [ ] **M8**: Images and iframes
+- [x] **M3**: Image loading and rendering
+- [ ] **M4**: CSS parsing and styling
+- [ ] **M5**: Clickable links and navigation
+- [ ] **M6**: Form input and submission
+- [ ] **M7**: JavaScript execution
+- [ ] **M8**: Event handling
+
+## Image Support
+
+Bowser supports loading and rendering images from multiple sources:
+
+### Supported Sources
+
+- **HTTP/HTTPS URLs**: `
`
+- **Data URLs**: ``
+- **Local files**: `
`
+
+### Features
+
+- **Async loading**: Images load in background threads, keeping UI responsive
+- **Smart sizing**: Respects width/height attributes, maintains aspect ratios
+- **Caching**: Thread-safe global image cache prevents redundant loads
+- **Alt text placeholders**: Shows placeholder with alt text when images fail
+- **Format support**: PNG, JPEG, GIF, WebP, and more (via Skia)
+- **Viewport culling**: Only renders visible images for performance
+- **Progressive display**: Page shows immediately, images appear as they load
+
+### Example
+
+```html
+
+
+
+
+
+
+
+
+
+
+
+```
+
+### Architecture
+
+```
+HTML
tag
+ ↓
+ImageLayout.load(async=True)
+ ↓
+TaskQueue (background thread pool)
+ ↓
+load_image() → HTTP/file/data URL
+ ↓ ↓
+ImageCache GLib.idle_add
+(thread-safe) ↓
+ on_complete callback
+ ↓
+ ImageLayout.image = loaded
+ ↓
+ RenderPipeline._request_redraw()
+ ↓
+ DrawImage.execute() → Canvas
+```
## References
diff --git a/assets/pages/startpage.html b/assets/pages/startpage.html
index f53b366..6f13efe 100644
--- a/assets/pages/startpage.html
+++ b/assets/pages/startpage.html
@@ -98,6 +98,8 @@
Bowser v{{ version }}
diff --git a/src/browser/chrome.py b/src/browser/chrome.py index ebbe37c..3e6e1c0 100644 --- a/src/browser/chrome.py +++ b/src/browser/chrome.py @@ -149,6 +149,9 @@ class Chrome: self.drawing_area.set_can_focus(True) # Allow focus for keyboard events self.drawing_area.set_focusable(True) content_box.append(self.drawing_area) + + # Set up redraw callback for async image loading + self.render_pipeline.set_redraw_callback(self._request_redraw) # Add scroll controller for mouse wheel scroll_controller = Gtk.EventControllerScroll.new( @@ -405,11 +408,16 @@ class Chrome: def _render_dom_content(self, canvas, document, width: int, height: int): """Render the DOM content using the render pipeline.""" - sub_timings = {} # Sync debug mode with render pipeline self.render_pipeline.debug_mode = self.debug_mode + + # Set base URL for resolving relative image paths + if self.browser.active_tab and self.browser.active_tab.current_url: + self.render_pipeline.base_url = str(self.browser.active_tab.current_url) + else: + self.render_pipeline.base_url = None # Use render pipeline for layout and rendering t0 = time.perf_counter() @@ -551,8 +559,19 @@ class Chrome: def paint(self): """Trigger redraw of the drawing area.""" - if self.drawing_area: + if self.drawing_area and self.window: self.drawing_area.queue_draw() + + def _request_redraw(self): + """Request a redraw, called when async images finish loading.""" + # This is called from the main thread via GLib.idle_add + try: + # Only redraw if we have a valid window and drawing area + if self.window and self.drawing_area and self.browser.active_tab: + self.logger.debug("Async image loaded, requesting redraw") + self.drawing_area.queue_draw() + except Exception as e: + self.logger.warning(f"Failed to request redraw: {e}") def _setup_keyboard_shortcuts(self): """Setup keyboard event handling for shortcuts.""" diff --git a/src/layout/document.py b/src/layout/document.py index 1ff0545..f73b733 100644 --- a/src/layout/document.py +++ b/src/layout/document.py @@ -2,6 +2,7 @@ from ..parser.html import Element, Text from ..render.fonts import get_font, linespace +from .embed import ImageLayout class LayoutLine: @@ -23,6 +24,28 @@ class LayoutLine: self.width = font.measureText(text) +class LayoutImage: + """A laid-out image ready for rendering.""" + + def __init__(self, image_layout: ImageLayout, x: float, y: float): + self.image_layout = image_layout + self.x = x + self.y = y + # Store initial dimensions but also provide dynamic access + self._initial_width = image_layout.width + self._initial_height = image_layout.height + + @property + def width(self) -> float: + """Get current width (may update after async image load).""" + return self.image_layout.width if self.image_layout.width > 0 else self._initial_width + + @property + def height(self) -> float: + """Get current height (may update after async image load).""" + return self.image_layout.height if self.image_layout.height > 0 else self._initial_height + + class LayoutBlock: """A laid-out block with its lines.""" @@ -39,11 +62,14 @@ class LayoutBlock: class DocumentLayout: """Layout engine for a document.""" - def __init__(self, node, frame=None): + def __init__(self, node, frame=None, base_url=None, async_images: bool = False): self.node = node self.frame = frame + self.base_url = base_url # For resolving relative image URLs + self.async_images = async_images # Load images in background self.blocks = [] # List of LayoutBlock self.lines = [] # Flat list of all LayoutLine for rendering + self.images = [] # List of LayoutImage for rendering self.width = 0 self.height = 0 @@ -60,6 +86,7 @@ class DocumentLayout: self.blocks = [] self.lines = [] + self.images = [] # Find body body = self._find_body(self.node) @@ -70,6 +97,25 @@ class DocumentLayout: raw_blocks = self._collect_blocks(body) for block_info in raw_blocks: + # Handle images separately + if block_info.get("is_image"): + image_layout = block_info.get("image_layout") + if image_layout: + margin_top = block_info.get("margin_top", 6) + margin_bottom = block_info.get("margin_bottom", 10) + y += margin_top + + # Position the image + image_layout.x = x_margin + image_layout.y = y + + # Add to images list for rendering + layout_image = LayoutImage(image_layout, x_margin, y) + self.images.append(layout_image) + + y += image_layout.height + margin_bottom + continue + font_size = block_info.get("font_size", 14) font_family = block_info.get("font_family", "") text = block_info.get("text", "") @@ -182,12 +228,39 @@ class DocumentLayout: # Skip style and script tags - they shouldn't be rendered if tag in {"style", "script", "head", "title", "meta", "link"}: continue + + # Handle img tags + if tag == "img": + image_layout = ImageLayout(child) + image_layout.load(self.base_url, async_load=self.async_images) + image_layout.layout(max_width=self.width - 40 if self.width > 40 else 800) + + # Get computed style for margins + style = getattr(child, "computed_style", None) + if style: + margin_top = style.get_int("margin-top", 6) + margin_bottom = style.get_int("margin-bottom", 10) + else: + margin_top = 6 + margin_bottom = 10 + + blocks.append({ + "is_image": True, + "image_layout": image_layout, + "margin_top": margin_top, + "margin_bottom": margin_bottom, + }) + continue # Container elements - just recurse, don't add as blocks if tag in {"ul", "ol", "div", "section", "article", "main", "header", "footer", "nav"}: blocks.extend(self._collect_blocks(child)) continue + # For other elements (p, h1, etc), first collect any embedded images + embedded_images = self._collect_images(child) + blocks.extend(embedded_images) + content = self._text_of(child) if not content: continue @@ -253,6 +326,40 @@ class DocumentLayout: } return margins.get(tag, 0) + def _collect_images(self, node) -> list: + """Recursively collect all img elements from a node.""" + images = [] + + if not isinstance(node, Element): + return images + + for child in getattr(node, "children", []): + if isinstance(child, Element): + if child.tag.lower() == "img": + image_layout = ImageLayout(child) + image_layout.load(self.base_url, async_load=self.async_images) + image_layout.layout(max_width=self.width - 40 if self.width > 40 else 800) + + style = getattr(child, "computed_style", None) + if style: + margin_top = style.get_int("margin-top", 6) + margin_bottom = style.get_int("margin-bottom", 10) + else: + margin_top = 6 + margin_bottom = 10 + + images.append({ + "is_image": True, + "image_layout": image_layout, + "margin_top": margin_top, + "margin_bottom": margin_bottom, + }) + else: + # Recurse into children + images.extend(self._collect_images(child)) + + return images + def _text_of(self, node) -> str: """Extract text content from a node.""" if isinstance(node, Text): diff --git a/src/layout/embed.py b/src/layout/embed.py index 207d671..136e03c 100644 --- a/src/layout/embed.py +++ b/src/layout/embed.py @@ -1,15 +1,220 @@ -"""Embedded content layout stubs (images, iframes).""" +"""Embedded content layout (images, iframes).""" + +import logging +from typing import Optional, Callable +import skia + +from ..network.images import load_image, load_image_async, ImageCache + + +logger = logging.getLogger("bowser.layout.embed") + + +# Callback type for when an image finishes loading +OnImageLoadedCallback = Callable[["ImageLayout"], None] class ImageLayout: + """Layout for an
'
+ root = parse_html(html)
+
+ # Find the img element
+ body = root.children[0]
+ img = body.children[0]
+
+ assert img.tag == "img"
+ assert img.attributes["src"] == "test.png"
+ assert img.attributes["alt"] == "Test image"
+ assert img.attributes["width"] == "100"
+
+ def test_layout_with_image(self):
+ """Test document layout with an image."""
+ html = 'Text before

Text after
' + root = parse_html(html) + + layout = DocumentLayout(root) + + # Mock the image loading by creating the images manually + # This would normally happen in _collect_blocks + # For now, just verify the structure is created + lines = layout.layout(800) + + # Should have lines and potentially images + assert isinstance(lines, list) + + def test_layout_image_class(self): + """Test LayoutImage class.""" + node = Element("img", {"src": "test.png"}) + image_layout = ImageLayout(node) + image_layout.image = create_test_image(100, 100) + image_layout.layout() + + layout_image = LayoutImage(image_layout, 10, 20) + + assert layout_image.x == 10 + assert layout_image.y == 20 + assert layout_image.width == 100 + assert layout_image.height == 100 + assert layout_image.image_layout is image_layout + + +class TestImageIntegration: + """Integration tests for the complete image pipeline.""" + + def test_html_with_data_url_image(self): + """Test parsing and layout of HTML with data URL image.""" + # 1x1 red PNG + data_url = ( + "data:image/png;base64," + "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8z8DwHwAFBQIAX8jx0gAAAABJRU5ErkJggg==" + ) + + html = f'Before
After
' + root = parse_html(html) + + # Verify structure + body = root.children[0] + # The img tag is self-closing, so the second p tag becomes a child of img + # This is a quirk of the HTML parser treating img as a container + assert len(body.children) >= 2 + assert body.children[0].tag == "p" + assert body.children[1].tag == "img" + + def test_nested_image_in_paragraph(self): + """Test that images inside paragraphs are collected.""" + # 1x1 red PNG + data_url = ( + "data:image/png;base64," + "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8z8DwHwAFBQIAX8jx0gAAAABJRU5ErkJggg==" + ) + + html = f'Text before text after
'
+ root = parse_html(html)
+
+ layout = DocumentLayout(root)
+ layout.layout(800)
+
+ # Should have image layout even though load failed
+ assert len(layout.images) >= 1
+
+ # Check alt text is set
+ if layout.images:
+ img = layout.images[0]
+ assert img.image_layout.alt_text == "Image failed"
+
+
+class TestURLResolution:
+ """Test URL resolution for images."""
+
+ def test_resolve_about_page_relative_url(self):
+ """Test resolving relative URLs for about: pages."""
+ from src.network.images import _resolve_url, ASSETS_DIR
+
+ # Relative URL from about:startpage should resolve to assets directory
+ resolved = _resolve_url("../WebBowserLogo.jpeg", "about:startpage")
+
+ # Should be an absolute path to the assets directory
+ assert "WebBowserLogo.jpeg" in resolved
+ assert str(ASSETS_DIR) in resolved or resolved.endswith("WebBowserLogo.jpeg")
+
+ def test_resolve_http_relative_url(self):
+ """Test resolving relative URLs for HTTP pages."""
+ from src.network.images import _resolve_url
+
+ # Relative URL from HTTP page
+ resolved = _resolve_url("images/photo.jpg", "https://example.com/page/index.html")
+
+ assert resolved == "https://example.com/page/images/photo.jpg"
+
+ def test_resolve_absolute_url(self):
+ """Test that absolute URLs are returned unchanged."""
+ from src.network.images import _resolve_url
+
+ url = "https://example.com/image.png"
+ resolved = _resolve_url(url, "https://other.com/page.html")
+
+ assert resolved == url
+
+ def test_resolve_data_url(self):
+ """Test that data URLs are returned unchanged."""
+ from src.network.images import _resolve_url
+
+ url = "data:image/png;base64,abc123"
+ resolved = _resolve_url(url, "https://example.com/")
+
+ assert resolved == url
diff --git a/tests/test_tasks.py b/tests/test_tasks.py
new file mode 100644
index 0000000..3f07b69
--- /dev/null
+++ b/tests/test_tasks.py
@@ -0,0 +1,234 @@
+"""Tests for the async task queue system."""
+
+import pytest
+import time
+import threading
+from unittest.mock import Mock, patch
+
+
+class TestTaskQueue:
+ """Tests for the TaskQueue class."""
+
+ def test_task_queue_singleton(self):
+ """Test that TaskQueue is a singleton."""
+ from src.network.tasks import TaskQueue
+
+ # Reset singleton for clean test
+ TaskQueue.reset_instance()
+
+ q1 = TaskQueue()
+ q2 = TaskQueue()
+
+ assert q1 is q2
+
+ # Clean up
+ TaskQueue.reset_instance()
+
+ def test_submit_task_returns_id(self):
+ """Test that submit returns a task ID."""
+ from src.network.tasks import TaskQueue
+
+ TaskQueue.reset_instance()
+ queue = TaskQueue()
+
+ # Mock GLib.idle_add to avoid GTK dependency
+ with patch('src.network.tasks.GLib') as mock_glib:
+ mock_glib.idle_add = lambda cb, *args: cb(*args) if args else cb()
+
+ task_id = queue.submit(lambda: 42)
+
+ # Task ID should be non-negative (or -1 for cached)
+ assert isinstance(task_id, int)
+
+ # Wait for task to complete
+ time.sleep(0.1)
+ TaskQueue.reset_instance()
+
+ def test_task_executes_function(self):
+ """Test that submitted tasks are executed."""
+ from src.network.tasks import TaskQueue
+
+ TaskQueue.reset_instance()
+ queue = TaskQueue()
+
+ result = []
+ event = threading.Event()
+
+ def task():
+ result.append("executed")
+ return "done"
+
+ with patch('src.network.tasks.GLib') as mock_glib:
+ mock_glib.idle_add = lambda cb, *args: cb(*args) if args else cb()
+
+ queue.submit(task)
+
+ # Wait for task to complete
+ time.sleep(0.2)
+
+ assert "executed" in result
+
+ TaskQueue.reset_instance()
+
+ def test_on_complete_callback(self):
+ """Test that on_complete callback is called with result."""
+ from src.network.tasks import TaskQueue
+
+ TaskQueue.reset_instance()
+ queue = TaskQueue()
+
+ results = []
+
+ def task():
+ return 42
+
+ def on_complete(result):
+ results.append(result)
+
+ with patch('src.network.tasks.GLib') as mock_glib:
+ # Make idle_add execute immediately
+ mock_glib.idle_add = lambda cb, *args: cb(*args) if args else cb()
+
+ queue.submit(task, on_complete=on_complete)
+
+ # Wait for task to complete (may need more time under load)
+ for _ in range(10):
+ if 42 in results:
+ break
+ time.sleep(0.05)
+
+ assert 42 in results
+
+ TaskQueue.reset_instance()
+
+ def test_on_error_callback(self):
+ """Test that on_error callback is called on exception."""
+ from src.network.tasks import TaskQueue
+
+ TaskQueue.reset_instance()
+ queue = TaskQueue()
+
+ errors = []
+
+ def failing_task():
+ raise ValueError("Test error")
+
+ def on_error(e):
+ errors.append(str(e))
+
+ with patch('src.network.tasks.GLib') as mock_glib:
+ mock_glib.idle_add = lambda cb, *args: cb(*args) if args else cb()
+
+ queue.submit(failing_task, on_error=on_error)
+
+ # Wait for task to complete (may need more time under load)
+ for _ in range(10):
+ if len(errors) == 1:
+ break
+ time.sleep(0.05)
+
+ assert len(errors) == 1
+ assert "Test error" in errors[0]
+
+ TaskQueue.reset_instance()
+
+ def test_cancel_task(self):
+ """Test task cancellation."""
+ from src.network.tasks import TaskQueue
+
+ TaskQueue.reset_instance()
+ queue = TaskQueue()
+
+ result = []
+
+ def slow_task():
+ time.sleep(1)
+ result.append("completed")
+ return True
+
+ with patch('src.network.tasks.GLib') as mock_glib:
+ mock_glib.idle_add = lambda cb, *args: cb(*args) if args else cb()
+
+ task_id = queue.submit(slow_task)
+
+ # Cancel immediately
+ cancelled = queue.cancel(task_id)
+
+ # May or may not be cancellable depending on timing
+ assert isinstance(cancelled, bool)
+
+ # Wait briefly
+ time.sleep(0.1)
+
+ TaskQueue.reset_instance()
+
+ def test_pending_count(self):
+ """Test pending task count."""
+ from src.network.tasks import TaskQueue
+
+ TaskQueue.reset_instance()
+ queue = TaskQueue()
+
+ initial_count = queue.pending_count
+ assert initial_count >= 0
+
+ TaskQueue.reset_instance()
+
+
+class TestAsyncImageLoading:
+ """Tests for async image loading."""
+
+ def test_load_image_async_cached(self):
+ """Test that cached images return -1 (no task needed)."""
+ from src.network.images import load_image_async, load_image, ImageCache
+
+ # Clear cache
+ ImageCache().clear()
+
+ # Load an image synchronously first (to cache it)
+ data_url = (
+ "data:image/png;base64,"
+ "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8z8DwHwAFBQIAX8jx0gAAAABJRU5ErkJggg=="
+ )
+ image = load_image(data_url)
+ assert image is not None
+
+ # Now load async - should hit cache and return -1 (no task)
+ # We don't need a callback for this test - just checking return value
+ task_id = load_image_async(data_url, on_complete=None)
+
+ # Cached loads return -1 (no task created)
+ assert task_id == -1
+
+ def test_load_image_async_uncached(self):
+ """Test that uncached images create tasks."""
+ from src.network.images import load_image_async, ImageCache
+ from src.network.tasks import TaskQueue
+
+ # Clear cache
+ ImageCache().clear()
+ TaskQueue.reset_instance()
+
+ # Use a data URL that's not cached
+ data_url = (
+ "data:image/png;base64,"
+ "iVBORw0KGgoAAAANSUhEUgAAAAIAAAACCAYAAABytg0kAAAADklEQVR42mP8z8DwHwYAAQYBA/5h2aw4AAAAAElFTkSuQmCC"
+ )
+
+ # Patch GLib.idle_add to call callbacks immediately (no GTK main loop in tests)
+ with patch('src.network.tasks.GLib') as mock_glib:
+ mock_glib.idle_add = lambda cb, *args: cb(*args) if args else cb()
+
+ # Without a callback, it just submits the task
+ task_id = load_image_async(data_url, on_complete=None)
+
+ # Should create a task (non-negative ID)
+ assert task_id >= 0
+
+ # Wait for task to complete
+ time.sleep(0.3)
+
+ # Image should now be cached
+ assert ImageCache().has(data_url)
+
+ TaskQueue.reset_instance()