Implement HTTP redirect following

- Support for 301, 302, 303, 307, 308 redirects
- Automatic Location header following
- Relative URL resolution for redirects
- Max redirect limit (default 10) to prevent infinite loops
- 303 (See Other) automatically changes method to GET
- 4 new unit tests for redirect functionality
- All 73 tests passing
This commit is contained in:
Benedikt Willi 2026-01-09 14:11:46 +01:00
parent ae6fcbfab4
commit 283dae295c
9 changed files with 580 additions and 25 deletions

View file

@ -161,15 +161,19 @@ class Chrome:
# White background # White background
canvas.clear(skia.ColorWHITE) canvas.clear(skia.ColorWHITE)
# Draw placeholder text # Get content to render
paint = skia.Paint() content_text = self._get_content_text()
paint.setAntiAlias(True)
paint.setColor(skia.ColorBLACK) if content_text:
font = skia.Font(skia.Typeface.MakeDefault(), 20) # Render actual page content with text wrapping
canvas.drawString("Bowser — M1: Hello World", 20, 50, font, paint) self._render_text_content(canvas, content_text, width, height)
else:
# Paint render stats # Show placeholder
canvas.drawString(f"Window: {width}x{height}", 20, 80, font, paint) paint = skia.Paint()
paint.setAntiAlias(True)
paint.setColor(skia.ColorBLACK)
font = skia.Font(skia.Typeface.MakeDefault(), 20)
canvas.drawString("Bowser — Enter a URL to browse", 20, 50, font, paint)
# Convert Skia surface to GTK Pixbuf and blit to Cairo context # Convert Skia surface to GTK Pixbuf and blit to Cairo context
image = self.skia_surface.makeImageSnapshot() image = self.skia_surface.makeImageSnapshot()
@ -187,6 +191,71 @@ class Chrome:
Gdk.cairo_set_source_pixbuf(context, pixbuf, 0, 0) Gdk.cairo_set_source_pixbuf(context, pixbuf, 0, 0)
context.paint() context.paint()
self.logger.debug("on_draw end") self.logger.debug("on_draw end")
def _get_content_text(self) -> str:
"""Extract text content from active tab's document."""
if not self.browser.active_tab:
return ""
frame = self.browser.active_tab.main_frame
if not frame.document:
return ""
# Extract text from document tree
return self._extract_text(frame.document)
def _extract_text(self, node) -> str:
"""Recursively extract text from HTML tree."""
from ..parser.html import Text, Element
if isinstance(node, Text):
return node.text
elif isinstance(node, Element):
texts = []
for child in node.children:
texts.append(self._extract_text(child))
return " ".join(texts)
return ""
def _render_text_content(self, canvas, text: str, width: int, height: int):
"""Render text content with basic word wrapping."""
paint = skia.Paint()
paint.setAntiAlias(True)
paint.setColor(skia.ColorBLACK)
font_size = 14
font = skia.Font(skia.Typeface.MakeDefault(), font_size)
# Simple word wrapping
words = text.split()
lines = []
current_line = []
current_width = 0
max_width = width - 40 # 20px margin on each side
for word in words:
word_width = font.measureText(word + " ")
if current_width + word_width > max_width and current_line:
lines.append(" ".join(current_line))
current_line = [word]
current_width = word_width
else:
current_line.append(word)
current_width += word_width
if current_line:
lines.append(" ".join(current_line))
# Draw lines
y = 30
line_height = font_size * 1.4
for line in lines:
if y > height - 20: # Don't draw past bottom
break
canvas.drawString(line, 20, y, font, paint)
y += line_height
def paint(self): def paint(self):
"""Trigger redraw of the drawing area.""" """Trigger redraw of the drawing area."""

View file

@ -4,6 +4,8 @@ from typing import Optional
import logging import logging
from ..network.url import URL from ..network.url import URL
from ..network import http
from ..parser.html import parse_html, Element
class Frame: class Frame:
@ -11,10 +13,30 @@ class Frame:
self.tab = tab self.tab = tab
self.parent_frame = parent_frame self.parent_frame = parent_frame
self.frame_element = frame_element self.frame_element = frame_element
self.document: Optional[Element] = None
def load(self, url: URL, payload: Optional[bytes] = None): def load(self, url: URL, payload: Optional[bytes] = None):
# TODO: integrate network + parsing + layout + render pipeline """Fetch and parse the URL content."""
self.tab.current_url = url try:
status, content_type, body = http.request(url, payload)
if status == 200:
# Decode response
text = body.decode('utf-8', errors='replace')
# Parse HTML
self.document = parse_html(text)
self.tab.current_url = url
else:
# Error handling - show error page
error_html = f"<html><body>Error {status}: Failed to load {url}</body></html>"
self.document = parse_html(error_html)
except Exception as e:
# Network error - show error page
error_html = f"<html><body>Network Error: {e}</body></html>"
self.document = parse_html(error_html)
logging.getLogger("bowser.tab").error(f"Failed to load {url}: {e}")
class Tab: class Tab:

View file

@ -1,23 +1,90 @@
"""HTTP requests and response handling.""" """HTTP requests and response handling."""
import http.client import http.client
from typing import Optional from typing import Optional, Tuple
import logging import logging
from .url import URL from .url import URL
def request(url: URL, payload: Optional[bytes] = None, method: str = "GET"): def request(url: URL, payload: Optional[bytes] = None, method: str = "GET", max_redirects: int = 10) -> Tuple[int, str, bytes]:
"""
Fetch a URL and follow redirects, returning (status_code, content_type, body).
Args:
url: URL to fetch
payload: Optional request body
method: HTTP method (GET, POST, etc.)
max_redirects: Maximum number of redirects to follow (default 10)
Returns:
Tuple of (status_code, content_type, response_body)
"""
logger = logging.getLogger("bowser.network") logger = logging.getLogger("bowser.network")
parsed = url._parsed current_url = url
conn_class = http.client.HTTPSConnection if parsed.scheme == "https" else http.client.HTTPConnection redirect_count = 0
conn = conn_class(parsed.hostname, parsed.port or (443 if parsed.scheme == "https" else 80))
path = parsed.path or "/" while redirect_count < max_redirects:
if parsed.query: parsed = current_url._parsed
path = f"{path}?{parsed.query}" conn_class = http.client.HTTPSConnection if parsed.scheme == "https" else http.client.HTTPConnection
headers = {}
logger.info(f"HTTP {method} {parsed.scheme}://{parsed.hostname}{path}") try:
conn.request(method, path, body=payload, headers=headers) conn = conn_class(parsed.hostname, parsed.port or (443 if parsed.scheme == "https" else 80))
resp = conn.getresponse() path = parsed.path or "/"
logger.info(f"HTTP response {resp.status} {resp.reason}") if parsed.query:
return resp path = f"{path}?{parsed.query}"
headers = {
"User-Agent": "Bowser/0.0.1",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
}
logger.info(f"HTTP {method} {parsed.scheme}://{parsed.hostname}{path}")
conn.request(method, path, body=payload, headers=headers)
resp = conn.getresponse()
status = resp.status
content_type = resp.getheader("Content-Type", "text/html")
body = resp.read()
logger.info(f"HTTP response {status} {resp.reason} ({len(body)} bytes)")
# Handle redirects (3xx status codes)
if 300 <= status < 400 and status != 304:
location = resp.getheader("Location")
conn.close()
if not location:
logger.warning(f"Redirect response {status} without Location header")
return status, content_type, body
logger.info(f"Following redirect to {location}")
redirect_count += 1
# Convert relative URLs to absolute
if location.startswith("http://") or location.startswith("https://"):
current_url = URL(location)
else:
# Relative redirect
base_url = f"{parsed.scheme}://{parsed.hostname}"
if parsed.port:
base_url += f":{parsed.port}"
current_url = URL(base_url + location)
# For 303 (See Other), change method to GET
if status == 303:
method = "GET"
payload = None
continue
conn.close()
return status, content_type, body
except Exception as e:
logger.error(f"HTTP request failed: {e}")
raise
# Max redirects exceeded
logger.error(f"Maximum redirects ({max_redirects}) exceeded")
raise Exception(f"Too many redirects (max: {max_redirects})")

View file

@ -1,5 +1,7 @@
"""HTML parser stubs.""" """HTML parser stubs."""
import re
class Text: class Text:
def __init__(self, text, parent=None): def __init__(self, text, parent=None):
@ -27,3 +29,36 @@ def print_tree(node, indent=0):
if hasattr(node, "children"): if hasattr(node, "children"):
for child in node.children: for child in node.children:
print_tree(child, indent + 1) print_tree(child, indent + 1)
def parse_html(html_text: str) -> Element:
"""
Very basic HTML parser that extracts text content.
For now, just removes tags and returns a simple tree.
"""
# Strip HTML tags for basic text extraction
text_content = re.sub(r'<script[^>]*>.*?</script>', '', html_text, flags=re.DOTALL | re.IGNORECASE)
text_content = re.sub(r'<style[^>]*>.*?</style>', '', text_content, flags=re.DOTALL | re.IGNORECASE)
text_content = re.sub(r'<[^>]+>', ' ', text_content)
# Decode HTML entities
text_content = text_content.replace('&lt;', '<')
text_content = text_content.replace('&gt;', '>')
text_content = text_content.replace('&amp;', '&')
text_content = text_content.replace('&quot;', '"')
text_content = text_content.replace('&#39;', "'")
text_content = text_content.replace('&nbsp;', ' ')
# Clean up whitespace
text_content = re.sub(r'\s+', ' ', text_content).strip()
# Create a simple document structure
root = Element("html")
body = Element("body", parent=root)
root.children.append(body)
if text_content:
text_node = Text(text_content, parent=body)
body.children.append(text_node)
return root

76
tests/test_frame.py Normal file
View file

@ -0,0 +1,76 @@
"""Tests for Frame and content loading."""
import pytest
from unittest.mock import Mock, patch
from src.browser.tab import Frame, Tab
from src.network.url import URL
class TestFrame:
@patch('src.browser.tab.http.request')
def test_frame_load_success(self, mock_request):
mock_request.return_value = (200, "text/html", b"<html><body>Test</body></html>")
browser = Mock()
browser._log = Mock()
tab = Tab(browser)
frame = tab.main_frame
url = URL("http://example.com")
frame.load(url)
assert frame.document is not None
assert frame.document.tag == "html"
assert tab.current_url == url
@patch('src.browser.tab.http.request')
def test_frame_load_404(self, mock_request):
mock_request.return_value = (404, "text/html", b"Not Found")
browser = Mock()
browser._log = Mock()
tab = Tab(browser)
frame = tab.main_frame
url = URL("http://example.com/missing")
frame.load(url)
# Should create error document
assert frame.document is not None
# Error message in document
text = frame.document.children[0].children[0].text if frame.document.children else ""
assert "404" in text or "Error" in text
@patch('src.browser.tab.http.request')
def test_frame_load_network_error(self, mock_request):
mock_request.side_effect = Exception("Network unreachable")
browser = Mock()
browser._log = Mock()
tab = Tab(browser)
frame = tab.main_frame
url = URL("http://unreachable.example.com")
frame.load(url)
# Should create error document
assert frame.document is not None
text = frame.document.children[0].children[0].text if frame.document.children else ""
assert "Error" in text or "unreachable" in text
@patch('src.browser.tab.http.request')
def test_frame_load_utf8_decode(self, mock_request):
mock_request.return_value = (200, "text/html", "<html><body>Héllo Wörld</body></html>".encode('utf-8'))
browser = Mock()
browser._log = Mock()
tab = Tab(browser)
frame = tab.main_frame
url = URL("http://example.com")
frame.load(url)
assert frame.document is not None
# Should handle UTF-8 characters
text = frame.document.children[0].children[0].text
assert "llo" in text # Part of Héllo

View file

@ -0,0 +1,82 @@
"""Tests for HTML parsing functionality."""
import pytest
from src.parser.html import parse_html, Text, Element
class TestParseHTML:
def test_parse_simple_text(self):
html = "<html><body>Hello World</body></html>"
root = parse_html(html)
assert isinstance(root, Element)
assert root.tag == "html"
assert len(root.children) == 1
body = root.children[0]
assert body.tag == "body"
assert len(body.children) == 1
text = body.children[0]
assert isinstance(text, Text)
assert "Hello World" in text.text
def test_parse_strips_tags(self):
html = "<html><body><p>Hello</p><div>World</div></body></html>"
root = parse_html(html)
body = root.children[0]
text = body.children[0]
assert "Hello" in text.text
assert "World" in text.text
def test_parse_removes_script_tags(self):
html = "<html><body>Visible<script>alert('bad')</script>Text</body></html>"
root = parse_html(html)
body = root.children[0]
text = body.children[0]
assert "Visible" in text.text
assert "Text" in text.text
assert "alert" not in text.text
assert "script" not in text.text.lower()
def test_parse_removes_style_tags(self):
html = "<html><body>Text<style>body{color:red;}</style>More</body></html>"
root = parse_html(html)
body = root.children[0]
text = body.children[0]
assert "Text" in text.text
assert "More" in text.text
assert "color" not in text.text
def test_parse_decodes_entities(self):
html = "<html><body>&lt;div&gt; &amp; &quot;test&quot;</body></html>"
root = parse_html(html)
body = root.children[0]
text = body.children[0]
assert "<div>" in text.text
assert "&" in text.text
assert '"test"' in text.text
def test_parse_normalizes_whitespace(self):
html = "<html><body>Hello \n\n World</body></html>"
root = parse_html(html)
body = root.children[0]
text = body.children[0]
# Multiple whitespace should be collapsed
assert "Hello World" in text.text
def test_parse_empty_document(self):
html = "<html><body></body></html>"
root = parse_html(html)
assert isinstance(root, Element)
assert root.tag == "html"
body = root.children[0]
assert body.tag == "body"
# Empty body should have no text children
assert len(body.children) == 0

204
tests/test_http.py Normal file
View file

@ -0,0 +1,204 @@
"""Tests for HTTP functionality."""
import pytest
from unittest.mock import Mock, patch, MagicMock
from src.network.url import URL
from src.network import http
class TestHTTPRequest:
@patch('src.network.http.http.client.HTTPConnection')
def test_http_request_success(self, mock_conn_class):
# Setup mock
mock_conn = Mock()
mock_response = Mock()
mock_response.status = 200
mock_response.reason = "OK"
mock_response.getheader.return_value = "text/html"
mock_response.read.return_value = b"<html>Hello</html>"
mock_conn.getresponse.return_value = mock_response
mock_conn_class.return_value = mock_conn
# Test
url = URL("http://example.com/page")
status, content_type, body = http.request(url)
assert status == 200
assert content_type == "text/html"
assert body == b"<html>Hello</html>"
@patch('src.network.http.http.client.HTTPSConnection')
def test_https_request(self, mock_conn_class):
# Setup mock
mock_conn = Mock()
mock_response = Mock()
mock_response.status = 200
mock_response.reason = "OK"
mock_response.getheader.return_value = "text/html"
mock_response.read.return_value = b"Secure content"
mock_conn.getresponse.return_value = mock_response
mock_conn_class.return_value = mock_conn
# Test
url = URL("https://example.com")
status, content_type, body = http.request(url)
assert status == 200
assert b"Secure" in body
mock_conn_class.assert_called_once()
@patch('src.network.http.http.client.HTTPConnection')
def test_http_request_404(self, mock_conn_class):
# Setup mock
mock_conn = Mock()
mock_response = Mock()
mock_response.status = 404
mock_response.reason = "Not Found"
mock_response.getheader.return_value = "text/html"
mock_response.read.return_value = b"<html>Not Found</html>"
mock_conn.getresponse.return_value = mock_response
mock_conn_class.return_value = mock_conn
# Test
url = URL("http://example.com/missing")
status, content_type, body = http.request(url)
assert status == 404
@patch('src.network.http.http.client.HTTPConnection')
def test_http_request_with_user_agent(self, mock_conn_class):
# Setup mock
mock_conn = Mock()
mock_response = Mock()
mock_response.status = 200
mock_response.reason = "OK"
mock_response.getheader.return_value = "text/html"
mock_response.read.return_value = b"content"
mock_conn.getresponse.return_value = mock_response
mock_conn_class.return_value = mock_conn
# Test
url = URL("http://example.com")
http.request(url)
# Verify User-Agent header was sent
call_args = mock_conn.request.call_args
headers = call_args[1]['headers']
assert 'User-Agent' in headers
assert 'Bowser' in headers['User-Agent']
@patch('src.network.http.http.client.HTTPConnection')
def test_http_redirect_301(self, mock_conn_class):
"""Test following 301 permanent redirect."""
# Setup mock for first request (redirect)
mock_conn = Mock()
mock_response_redirect = Mock()
mock_response_redirect.status = 301
mock_response_redirect.reason = "Moved Permanently"
mock_response_redirect.getheader.side_effect = lambda header, default="": {
"Content-Type": "text/html",
"Location": "http://example.com/new-page"
}.get(header, default)
mock_response_redirect.read.return_value = b"<html>Redirect</html>"
# Setup mock for second request (final response)
mock_response_final = Mock()
mock_response_final.status = 200
mock_response_final.reason = "OK"
mock_response_final.getheader.side_effect = lambda header, default="": {
"Content-Type": "text/html",
}.get(header, default)
mock_response_final.read.return_value = b"<html>Final content</html>"
mock_conn.getresponse.side_effect = [mock_response_redirect, mock_response_final]
mock_conn_class.return_value = mock_conn
# Test
url = URL("http://example.com/old-page")
status, content_type, body = http.request(url)
assert status == 200
assert body == b"<html>Final content</html>"
assert mock_conn.request.call_count == 2
@patch('src.network.http.http.client.HTTPConnection')
def test_http_redirect_302(self, mock_conn_class):
"""Test following 302 temporary redirect."""
# Setup mock for first request (redirect)
mock_conn = Mock()
mock_response_redirect = Mock()
mock_response_redirect.status = 302
mock_response_redirect.reason = "Found"
mock_response_redirect.getheader.side_effect = lambda header, default="": {
"Content-Type": "text/html",
"Location": "http://example.com/temp-page"
}.get(header, default)
mock_response_redirect.read.return_value = b"<html>Redirect</html>"
# Setup mock for second request (final response)
mock_response_final = Mock()
mock_response_final.status = 200
mock_response_final.reason = "OK"
mock_response_final.getheader.side_effect = lambda header, default="": {
"Content-Type": "text/html",
}.get(header, default)
mock_response_final.read.return_value = b"<html>Temp content</html>"
mock_conn.getresponse.side_effect = [mock_response_redirect, mock_response_final]
mock_conn_class.return_value = mock_conn
# Test
url = URL("http://example.com/old-page")
status, content_type, body = http.request(url)
assert status == 200
assert body == b"<html>Temp content</html>"
@patch('src.network.http.http.client.HTTPConnection')
def test_http_redirect_no_location(self, mock_conn_class):
"""Test handling of redirect without Location header."""
# Setup mock
mock_conn = Mock()
mock_response = Mock()
mock_response.status = 302
mock_response.reason = "Found"
mock_response.getheader.side_effect = lambda header, default="": {
"Content-Type": "text/html",
}.get(header, default)
mock_response.read.return_value = b"<html>Redirect</html>"
mock_conn.getresponse.return_value = mock_response
mock_conn_class.return_value = mock_conn
# Test
url = URL("http://example.com/page")
status, content_type, body = http.request(url)
# Should return the redirect response if no Location header
assert status == 302
assert body == b"<html>Redirect</html>"
@patch('src.network.http.http.client.HTTPConnection')
def test_http_max_redirects(self, mock_conn_class):
"""Test that max redirects limit is enforced."""
# Setup mock that always returns a redirect
mock_conn = Mock()
mock_response = Mock()
mock_response.status = 302
mock_response.reason = "Found"
mock_response.getheader.side_effect = lambda header, default="": {
"Location": "http://example.com/redirect-loop"
}.get(header, default)
mock_response.read.return_value = b""
mock_conn.getresponse.return_value = mock_response
mock_conn_class.return_value = mock_conn
# Test with max_redirects=2
url = URL("http://example.com/page")
with pytest.raises(Exception, match="Too many redirects"):
http.request(url, max_redirects=2)