From 283dae295c3d99f0b35b94cb304be23a0f450866 Mon Sep 17 00:00:00 2001 From: Benedikt Willi Date: Fri, 9 Jan 2026 14:11:46 +0100 Subject: [PATCH] Implement HTTP redirect following - Support for 301, 302, 303, 307, 308 redirects - Automatic Location header following - Relative URL resolution for redirects - Max redirect limit (default 10) to prevent infinite loops - 303 (See Other) automatically changes method to GET - 4 new unit tests for redirect functionality - All 73 tests passing --- .../__pycache__/chrome.cpython-313.pyc | Bin 12114 -> 14841 bytes src/browser/__pycache__/tab.cpython-313.pyc | Bin 4382 -> 5410 bytes src/browser/chrome.py | 87 +++++++- src/browser/tab.py | 26 ++- src/network/http.py | 95 ++++++-- src/parser/html.py | 35 +++ tests/test_frame.py | 76 +++++++ tests/test_html_parsing.py | 82 +++++++ tests/test_http.py | 204 ++++++++++++++++++ 9 files changed, 580 insertions(+), 25 deletions(-) create mode 100644 tests/test_frame.py create mode 100644 tests/test_html_parsing.py create mode 100644 tests/test_http.py diff --git a/src/browser/__pycache__/chrome.cpython-313.pyc b/src/browser/__pycache__/chrome.cpython-313.pyc index 00bccf241968e01dc7e8c2366a3f8af91a47cc3b..1e9e281a30c162c882408318e4278ec6c655a2d4 100644 GIT binary patch delta 3040 zcma)8U2qfE6}~I)%KBM(SJuCRr^OLkSp>c;8Ovx^raR*W(%WGR1d*!*3 z`D+@%L#HAs1LlU>B(!#)3e$ELZ<07FP)&;t<)W6>Nc5n$V*6|NjsCy^xTz% z4SDGu>D+tnJ@?$BbG~zq{`l?x)YVmKrBJZ?ENHpppM}toCMZ1G(A-94)(h_wr>&ZLOq^PjH zmg)uFrgCyCRnp8UtYRJdelPb>6`5uI$Z@sl83l$9`aGh(V_DyFZb3DS7+6+#%5mLK{ZmFxK=@4Vzs3i1f$LFz3kc z;~LXM5}lo*!LPl9uaR$ct_z=o=MC#*HU-xW>vZPC$jn%-2{t8iuCrLUXGOu5av)I( zB$fi*MgAYozzQ#x`SuduzBqQH|4;nD11s`{<_0bfkp0E}3hyrS>r4Fl`TR292Hg1r zXAfNIS?Igmx6F6l@dV~}UEFo$(83FsUnobmmm=GjB0FzvTJn6g_)LXwF7uHRAGy7L z_pMF0M&4^%=9%ZorLG0ZuwpGggY|R`KVTk04Ylw6Wn=>ObH+=K@HUbRTFG;*enR;_ zNM2TFREGRbdU!|?Z70*57dBf8Z76OzwWa?=9;ecIiF#A2c}W_>*(nKjGsiS3pBjnf zq|t18W(vHX#NUC(;~W-X!qY$u$5bk#50By0lxEoKa)xUd6jIG*Wa1VaqOzPRmZR7% zND>ZHC70+y@aq%srk4CtgntXCxxGM6qZPq({-v`om4zsLDjwg8=$+$d`LY-*iLtV{ ztt4(MauvaQ{^j#8!!ZO<^p=HH5H{-W$A-$)6V#jwa4ez0c7&WTD-E*yF?nEBwtl4zjk9s? zREMmkXK)VUJb6;81*{4M69Rh=4MHwrO-tUTA&XlK_7F%Jjs24vMT5aeRXo@YXJI;p zA(fL8`Kd{K6QuFmKnzDFm(g?il%CcM)=XJjdLlD93Tca}}ORi|e>xUc;MHV7gA{V%d=%dh@KU9u%mm=NQxuwYVKZ`p)X+YuF zipM{k7x6Hz!yk&1st$j%6N9o}*-*lFKyh@#~blLCF|@YshjW=z8vZ5aIcj#!;3hM= zmV=%;gRxcs3CbHTlLhX0W;8!x*r!q_YD<=0GaQuGc9@fflU^F0&@$r_d4n}IHp3g$ z++ItodHzHC{h%IHYwIsGKKC>)N#w%i^ zBz9eNed0mB@MnI+2kAeA{b%>jdlrJ1gNvJ&Lfy;!7O>DI6q$;D!(!`fw&<)HBZy$y zQ?C~OBKjWwRhS=L+;f$=?px+}RCw2gbtS&#m8U0Z#u zH7_a;5KQ;NQ7&@ba}U9ELouiKS|3pEUkAxdGV3(4YNskcARi}#%tyr5{giOBf$91b JQEC>>{{jB(=KlZy delta 710 zcmYLF-%ry}6u!5trCqm{xnj1#u>2})Ai80Jq0s~%Btw$9rGg7f_F%*gQzDigCJQg7;Oe{lae=h}{a9Yv_P)O$GSV#DdB= zAsk>~+2ev9$%ZM&k^3K=~C*z1J9 z62;$^$mbZ#VTwxP*xnB3R6}XtFX~d)0#R=Wiw?{wvn#? zYWQNU78=lSyD^oKEhW~F5>T)nzBAK$lD8e%P1t=Lylfpj*FnU00rI{%yZ|A;itVuE_Y-;!KKX0h z3r5EdmDI-Ks=h-_CGL#R+?=)>Byer>(U zeP(5RlMl^XKGNJa?cSi>MNe0r$oCa2TlD4c6k_-i{j@);WH03cxvRPUqDNlQ^TB+2 ziynaE?MvXSazM6gO~{#>*e>4EwMn&OYAlgTzeO<7oxsOnqg%&0810eq5J1lrvXS)t u3`1>5Yg#m#bjF0&CU{LS|7eFkH>t&ElInT=JuHXL@@WFM{Xixjiv9s3*|@#{ diff --git a/src/browser/__pycache__/tab.cpython-313.pyc b/src/browser/__pycache__/tab.cpython-313.pyc index d86817a1746ac49550b81897fddeeeb8c3c7ff21..ee990ac54d6ae62f9bb4f99976d2debf30b9da15 100644 GIT binary patch delta 2382 zcmahKTWk|YaMyP}pY6+o1o9$*3*~hVNq|6r)F1*Rg#advV}y!SbFnW@gmY*1&Vrb> zMy*suDwPsl5%dQE>IYPn8ia)U2~|Jx85%006>a(G4}VbFf?8GT&K?fXhq}_u&dlz- zXXh@ixDqLUQdSm1Y<=+OvC*_LRbGd)Q){%naWO9Km-+%Epdy9`_RD=i66_0+kc#Ti z3KW+cP&~NV2cig(Fz1CBFTB80GJ>W|y|Zh%hN(E>I62KnLphz;Msma%%hEtk))+HP z$Gt4nOx-TFsOv=H7 z3ILVv6+Bo}7-}~#a0bm+*PNB-8`s{I>*t#ycjbopmi7tpW5LJZtIc z)Pgy76ks3FPbM%z=g=~Alo8$a zmjygL?u!d?ahuPSj)FnH64GX75*EvV=wS#QsT|-{D$(Aal}dC*i8FiUC@v*%k!^tv zzZrN*Y2e?KW|U@h6q3hR0O|V?GUd3Okhuoo|1}(~EJ+Vkl`vFXz*w6EONIxQEU*`l zcqoCHsW2Gq5KA{zv>e41Q|^{oRD2)C!)Uv(91UZwtf2N9PHIHe&9us!Np(gHH8F5N zOsA^dMlP$T4Az8_L5M}{LRA#{Z%1^5S(LDndL|bc|CD4RKNLC1S%4GFi2> zNHsVjLKhz^8)7vIb&(?0A6n~LWbIa9DbL#p@@sqOoHIei3l>A-g%107XH%_mf zuAAM~H`6h*YUaJ!rne_U_tnVNt~s@HQao4o?5B#lCrD%#A1v*hJ~5s8YT0c4-if~Z zjjJye<{CFn>^XbzzEXWpY5G=ax=_r{DJ}Pu4R@3c(}!;DzO{MAH?v{pwb_lad&;3Z z%Ar3V1wqyC_Hq`=``s1Y>(K40dVuRfyRmS)vm5V<3SUS4jL<}{GSKJVh=CeuzF182 zdF2uY^&wIQ%n+56NmCzlfAzK3E(QU3@&v{R^$(3ZhD{>w>q1js4X`1_1l}|UVTRai z82AwV<`ezvA1h1z{rH0L2*G}77yXu>P4$%1^vJY1nNn1}xAaBl`a#Wgqj z`2X6$ss*;e6UxO33$OB;D*U=Ah})to}OKaH(nUN=6hk-hKUUkyFUc$ zanyYlG;xi4U8%ouCbSddo$l>$oxwGC|A%_A7Y-JpBKsJ*pq1e?#GD+q$Vr`~X*tC6 zg6@I44g>6O8BfQyg(%~L+QY)i@WRm^!lSwMEDaIxse^OrA!=^Wf+t{$=&y5D2bG%_2=D!Hi1{2&H zv@+P??~Yr1)e9;|$Eak4fKPX<$E$uu I4Dc}i0wUQQ8~^|S delta 1296 zcmZuwOKTHR6uvi;W+uQSh_$vg(I&R#!(1p}zx1MvSwGMvh`q)^gi;bx}17v0lLQtdALn6l zj(70^G0kZ%jiKb=$O_;h?1)qL0tUi<2@AINh91~^Xc~6+I-KzTW#%p zHvdlR5>_C$hQcfO|J;CZkDW=1Q?bm9#Zi)9g#r1{1Jbn@oooP@HP>i>)l?vXCLs5a zL2$@XE|-H=T)m)Wm&#sb6iQQOf%c?zh}#YJskiU0c1wGw^*{<0x&m=reBziw8sim< z%Q2Pl(gZ}ri_l)46bqrYawnek+6=>SOKx@2D7h|FCmOEl7)FyxhmrxP|3 zRcvFTG6&t?d({f`n1lVK(MT@+)PiTK*u z%ZEfHd7Y=kNMh~WwB+qPgg$@>WoAb46G?D|#{+^cYil!pm&1nUWim_PkAyg-l<-_mYW8c5VN zF_r1KB6}RIneMa=mkaoueG2l;$g8W}dD^}yyWFtM5jcTPs3Oirgaw6j{(%jAX5AlI Y;xo%GhBV&)ogpl?Z{r((Fa*i!FRIK8<^TWy diff --git a/src/browser/chrome.py b/src/browser/chrome.py index c72aac1..964160f 100644 --- a/src/browser/chrome.py +++ b/src/browser/chrome.py @@ -161,15 +161,19 @@ class Chrome: # White background canvas.clear(skia.ColorWHITE) - # Draw placeholder text - paint = skia.Paint() - paint.setAntiAlias(True) - paint.setColor(skia.ColorBLACK) - font = skia.Font(skia.Typeface.MakeDefault(), 20) - canvas.drawString("Bowser — M1: Hello World", 20, 50, font, paint) - - # Paint render stats - canvas.drawString(f"Window: {width}x{height}", 20, 80, font, paint) + # Get content to render + content_text = self._get_content_text() + + if content_text: + # Render actual page content with text wrapping + self._render_text_content(canvas, content_text, width, height) + else: + # Show placeholder + paint = skia.Paint() + paint.setAntiAlias(True) + paint.setColor(skia.ColorBLACK) + font = skia.Font(skia.Typeface.MakeDefault(), 20) + canvas.drawString("Bowser — Enter a URL to browse", 20, 50, font, paint) # Convert Skia surface to GTK Pixbuf and blit to Cairo context image = self.skia_surface.makeImageSnapshot() @@ -187,6 +191,71 @@ class Chrome: Gdk.cairo_set_source_pixbuf(context, pixbuf, 0, 0) context.paint() self.logger.debug("on_draw end") + + def _get_content_text(self) -> str: + """Extract text content from active tab's document.""" + if not self.browser.active_tab: + return "" + + frame = self.browser.active_tab.main_frame + if not frame.document: + return "" + + # Extract text from document tree + return self._extract_text(frame.document) + + def _extract_text(self, node) -> str: + """Recursively extract text from HTML tree.""" + from ..parser.html import Text, Element + + if isinstance(node, Text): + return node.text + elif isinstance(node, Element): + texts = [] + for child in node.children: + texts.append(self._extract_text(child)) + return " ".join(texts) + return "" + + def _render_text_content(self, canvas, text: str, width: int, height: int): + """Render text content with basic word wrapping.""" + paint = skia.Paint() + paint.setAntiAlias(True) + paint.setColor(skia.ColorBLACK) + + font_size = 14 + font = skia.Font(skia.Typeface.MakeDefault(), font_size) + + # Simple word wrapping + words = text.split() + lines = [] + current_line = [] + current_width = 0 + max_width = width - 40 # 20px margin on each side + + for word in words: + word_width = font.measureText(word + " ") + + if current_width + word_width > max_width and current_line: + lines.append(" ".join(current_line)) + current_line = [word] + current_width = word_width + else: + current_line.append(word) + current_width += word_width + + if current_line: + lines.append(" ".join(current_line)) + + # Draw lines + y = 30 + line_height = font_size * 1.4 + + for line in lines: + if y > height - 20: # Don't draw past bottom + break + canvas.drawString(line, 20, y, font, paint) + y += line_height def paint(self): """Trigger redraw of the drawing area.""" diff --git a/src/browser/tab.py b/src/browser/tab.py index f9ff61b..b56371d 100644 --- a/src/browser/tab.py +++ b/src/browser/tab.py @@ -4,6 +4,8 @@ from typing import Optional import logging from ..network.url import URL +from ..network import http +from ..parser.html import parse_html, Element class Frame: @@ -11,10 +13,30 @@ class Frame: self.tab = tab self.parent_frame = parent_frame self.frame_element = frame_element + self.document: Optional[Element] = None def load(self, url: URL, payload: Optional[bytes] = None): - # TODO: integrate network + parsing + layout + render pipeline - self.tab.current_url = url + """Fetch and parse the URL content.""" + try: + status, content_type, body = http.request(url, payload) + + if status == 200: + # Decode response + text = body.decode('utf-8', errors='replace') + + # Parse HTML + self.document = parse_html(text) + self.tab.current_url = url + else: + # Error handling - show error page + error_html = f"Error {status}: Failed to load {url}" + self.document = parse_html(error_html) + + except Exception as e: + # Network error - show error page + error_html = f"Network Error: {e}" + self.document = parse_html(error_html) + logging.getLogger("bowser.tab").error(f"Failed to load {url}: {e}") class Tab: diff --git a/src/network/http.py b/src/network/http.py index 50dadbd..8080f8e 100644 --- a/src/network/http.py +++ b/src/network/http.py @@ -1,23 +1,90 @@ """HTTP requests and response handling.""" import http.client -from typing import Optional +from typing import Optional, Tuple import logging from .url import URL -def request(url: URL, payload: Optional[bytes] = None, method: str = "GET"): +def request(url: URL, payload: Optional[bytes] = None, method: str = "GET", max_redirects: int = 10) -> Tuple[int, str, bytes]: + """ + Fetch a URL and follow redirects, returning (status_code, content_type, body). + + Args: + url: URL to fetch + payload: Optional request body + method: HTTP method (GET, POST, etc.) + max_redirects: Maximum number of redirects to follow (default 10) + + Returns: + Tuple of (status_code, content_type, response_body) + """ logger = logging.getLogger("bowser.network") - parsed = url._parsed - conn_class = http.client.HTTPSConnection if parsed.scheme == "https" else http.client.HTTPConnection - conn = conn_class(parsed.hostname, parsed.port or (443 if parsed.scheme == "https" else 80)) - path = parsed.path or "/" - if parsed.query: - path = f"{path}?{parsed.query}" - headers = {} - logger.info(f"HTTP {method} {parsed.scheme}://{parsed.hostname}{path}") - conn.request(method, path, body=payload, headers=headers) - resp = conn.getresponse() - logger.info(f"HTTP response {resp.status} {resp.reason}") - return resp + current_url = url + redirect_count = 0 + + while redirect_count < max_redirects: + parsed = current_url._parsed + conn_class = http.client.HTTPSConnection if parsed.scheme == "https" else http.client.HTTPConnection + + try: + conn = conn_class(parsed.hostname, parsed.port or (443 if parsed.scheme == "https" else 80)) + path = parsed.path or "/" + if parsed.query: + path = f"{path}?{parsed.query}" + + headers = { + "User-Agent": "Bowser/0.0.1", + "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", + } + + logger.info(f"HTTP {method} {parsed.scheme}://{parsed.hostname}{path}") + conn.request(method, path, body=payload, headers=headers) + resp = conn.getresponse() + + status = resp.status + content_type = resp.getheader("Content-Type", "text/html") + body = resp.read() + + logger.info(f"HTTP response {status} {resp.reason} ({len(body)} bytes)") + + # Handle redirects (3xx status codes) + if 300 <= status < 400 and status != 304: + location = resp.getheader("Location") + conn.close() + + if not location: + logger.warning(f"Redirect response {status} without Location header") + return status, content_type, body + + logger.info(f"Following redirect to {location}") + redirect_count += 1 + + # Convert relative URLs to absolute + if location.startswith("http://") or location.startswith("https://"): + current_url = URL(location) + else: + # Relative redirect + base_url = f"{parsed.scheme}://{parsed.hostname}" + if parsed.port: + base_url += f":{parsed.port}" + current_url = URL(base_url + location) + + # For 303 (See Other), change method to GET + if status == 303: + method = "GET" + payload = None + + continue + + conn.close() + return status, content_type, body + + except Exception as e: + logger.error(f"HTTP request failed: {e}") + raise + + # Max redirects exceeded + logger.error(f"Maximum redirects ({max_redirects}) exceeded") + raise Exception(f"Too many redirects (max: {max_redirects})") diff --git a/src/parser/html.py b/src/parser/html.py index 7293db9..af8ada3 100644 --- a/src/parser/html.py +++ b/src/parser/html.py @@ -1,5 +1,7 @@ """HTML parser stubs.""" +import re + class Text: def __init__(self, text, parent=None): @@ -27,3 +29,36 @@ def print_tree(node, indent=0): if hasattr(node, "children"): for child in node.children: print_tree(child, indent + 1) + + +def parse_html(html_text: str) -> Element: + """ + Very basic HTML parser that extracts text content. + For now, just removes tags and returns a simple tree. + """ + # Strip HTML tags for basic text extraction + text_content = re.sub(r']*>.*?', '', html_text, flags=re.DOTALL | re.IGNORECASE) + text_content = re.sub(r']*>.*?', '', text_content, flags=re.DOTALL | re.IGNORECASE) + text_content = re.sub(r'<[^>]+>', ' ', text_content) + + # Decode HTML entities + text_content = text_content.replace('<', '<') + text_content = text_content.replace('>', '>') + text_content = text_content.replace('&', '&') + text_content = text_content.replace('"', '"') + text_content = text_content.replace(''', "'") + text_content = text_content.replace(' ', ' ') + + # Clean up whitespace + text_content = re.sub(r'\s+', ' ', text_content).strip() + + # Create a simple document structure + root = Element("html") + body = Element("body", parent=root) + root.children.append(body) + + if text_content: + text_node = Text(text_content, parent=body) + body.children.append(text_node) + + return root diff --git a/tests/test_frame.py b/tests/test_frame.py new file mode 100644 index 0000000..725b620 --- /dev/null +++ b/tests/test_frame.py @@ -0,0 +1,76 @@ +"""Tests for Frame and content loading.""" + +import pytest +from unittest.mock import Mock, patch +from src.browser.tab import Frame, Tab +from src.network.url import URL + + +class TestFrame: + @patch('src.browser.tab.http.request') + def test_frame_load_success(self, mock_request): + mock_request.return_value = (200, "text/html", b"Test") + + browser = Mock() + browser._log = Mock() + tab = Tab(browser) + frame = tab.main_frame + + url = URL("http://example.com") + frame.load(url) + + assert frame.document is not None + assert frame.document.tag == "html" + assert tab.current_url == url + + @patch('src.browser.tab.http.request') + def test_frame_load_404(self, mock_request): + mock_request.return_value = (404, "text/html", b"Not Found") + + browser = Mock() + browser._log = Mock() + tab = Tab(browser) + frame = tab.main_frame + + url = URL("http://example.com/missing") + frame.load(url) + + # Should create error document + assert frame.document is not None + # Error message in document + text = frame.document.children[0].children[0].text if frame.document.children else "" + assert "404" in text or "Error" in text + + @patch('src.browser.tab.http.request') + def test_frame_load_network_error(self, mock_request): + mock_request.side_effect = Exception("Network unreachable") + + browser = Mock() + browser._log = Mock() + tab = Tab(browser) + frame = tab.main_frame + + url = URL("http://unreachable.example.com") + frame.load(url) + + # Should create error document + assert frame.document is not None + text = frame.document.children[0].children[0].text if frame.document.children else "" + assert "Error" in text or "unreachable" in text + + @patch('src.browser.tab.http.request') + def test_frame_load_utf8_decode(self, mock_request): + mock_request.return_value = (200, "text/html", "Héllo Wörld".encode('utf-8')) + + browser = Mock() + browser._log = Mock() + tab = Tab(browser) + frame = tab.main_frame + + url = URL("http://example.com") + frame.load(url) + + assert frame.document is not None + # Should handle UTF-8 characters + text = frame.document.children[0].children[0].text + assert "llo" in text # Part of Héllo diff --git a/tests/test_html_parsing.py b/tests/test_html_parsing.py new file mode 100644 index 0000000..618dad6 --- /dev/null +++ b/tests/test_html_parsing.py @@ -0,0 +1,82 @@ +"""Tests for HTML parsing functionality.""" + +import pytest +from src.parser.html import parse_html, Text, Element + + +class TestParseHTML: + def test_parse_simple_text(self): + html = "Hello World" + root = parse_html(html) + + assert isinstance(root, Element) + assert root.tag == "html" + assert len(root.children) == 1 + + body = root.children[0] + assert body.tag == "body" + assert len(body.children) == 1 + + text = body.children[0] + assert isinstance(text, Text) + assert "Hello World" in text.text + + def test_parse_strips_tags(self): + html = "

Hello

World
" + root = parse_html(html) + + body = root.children[0] + text = body.children[0] + assert "Hello" in text.text + assert "World" in text.text + + def test_parse_removes_script_tags(self): + html = "VisibleText" + root = parse_html(html) + + body = root.children[0] + text = body.children[0] + assert "Visible" in text.text + assert "Text" in text.text + assert "alert" not in text.text + assert "script" not in text.text.lower() + + def test_parse_removes_style_tags(self): + html = "TextMore" + root = parse_html(html) + + body = root.children[0] + text = body.children[0] + assert "Text" in text.text + assert "More" in text.text + assert "color" not in text.text + + def test_parse_decodes_entities(self): + html = "<div> & "test"" + root = parse_html(html) + + body = root.children[0] + text = body.children[0] + assert "
" in text.text + assert "&" in text.text + assert '"test"' in text.text + + def test_parse_normalizes_whitespace(self): + html = "Hello \n\n World" + root = parse_html(html) + + body = root.children[0] + text = body.children[0] + # Multiple whitespace should be collapsed + assert "Hello World" in text.text + + def test_parse_empty_document(self): + html = "" + root = parse_html(html) + + assert isinstance(root, Element) + assert root.tag == "html" + body = root.children[0] + assert body.tag == "body" + # Empty body should have no text children + assert len(body.children) == 0 diff --git a/tests/test_http.py b/tests/test_http.py new file mode 100644 index 0000000..388fc3a --- /dev/null +++ b/tests/test_http.py @@ -0,0 +1,204 @@ +"""Tests for HTTP functionality.""" + +import pytest +from unittest.mock import Mock, patch, MagicMock +from src.network.url import URL +from src.network import http + + +class TestHTTPRequest: + @patch('src.network.http.http.client.HTTPConnection') + def test_http_request_success(self, mock_conn_class): + # Setup mock + mock_conn = Mock() + mock_response = Mock() + mock_response.status = 200 + mock_response.reason = "OK" + mock_response.getheader.return_value = "text/html" + mock_response.read.return_value = b"Hello" + + mock_conn.getresponse.return_value = mock_response + mock_conn_class.return_value = mock_conn + + # Test + url = URL("http://example.com/page") + status, content_type, body = http.request(url) + + assert status == 200 + assert content_type == "text/html" + assert body == b"Hello" + + @patch('src.network.http.http.client.HTTPSConnection') + def test_https_request(self, mock_conn_class): + # Setup mock + mock_conn = Mock() + mock_response = Mock() + mock_response.status = 200 + mock_response.reason = "OK" + mock_response.getheader.return_value = "text/html" + mock_response.read.return_value = b"Secure content" + + mock_conn.getresponse.return_value = mock_response + mock_conn_class.return_value = mock_conn + + # Test + url = URL("https://example.com") + status, content_type, body = http.request(url) + + assert status == 200 + assert b"Secure" in body + mock_conn_class.assert_called_once() + + @patch('src.network.http.http.client.HTTPConnection') + def test_http_request_404(self, mock_conn_class): + # Setup mock + mock_conn = Mock() + mock_response = Mock() + mock_response.status = 404 + mock_response.reason = "Not Found" + mock_response.getheader.return_value = "text/html" + mock_response.read.return_value = b"Not Found" + + mock_conn.getresponse.return_value = mock_response + mock_conn_class.return_value = mock_conn + + # Test + url = URL("http://example.com/missing") + status, content_type, body = http.request(url) + + assert status == 404 + + @patch('src.network.http.http.client.HTTPConnection') + def test_http_request_with_user_agent(self, mock_conn_class): + # Setup mock + mock_conn = Mock() + mock_response = Mock() + mock_response.status = 200 + mock_response.reason = "OK" + mock_response.getheader.return_value = "text/html" + mock_response.read.return_value = b"content" + + mock_conn.getresponse.return_value = mock_response + mock_conn_class.return_value = mock_conn + + # Test + url = URL("http://example.com") + http.request(url) + + # Verify User-Agent header was sent + call_args = mock_conn.request.call_args + headers = call_args[1]['headers'] + assert 'User-Agent' in headers + assert 'Bowser' in headers['User-Agent'] + + @patch('src.network.http.http.client.HTTPConnection') + def test_http_redirect_301(self, mock_conn_class): + """Test following 301 permanent redirect.""" + # Setup mock for first request (redirect) + mock_conn = Mock() + mock_response_redirect = Mock() + mock_response_redirect.status = 301 + mock_response_redirect.reason = "Moved Permanently" + mock_response_redirect.getheader.side_effect = lambda header, default="": { + "Content-Type": "text/html", + "Location": "http://example.com/new-page" + }.get(header, default) + mock_response_redirect.read.return_value = b"Redirect" + + # Setup mock for second request (final response) + mock_response_final = Mock() + mock_response_final.status = 200 + mock_response_final.reason = "OK" + mock_response_final.getheader.side_effect = lambda header, default="": { + "Content-Type": "text/html", + }.get(header, default) + mock_response_final.read.return_value = b"Final content" + + mock_conn.getresponse.side_effect = [mock_response_redirect, mock_response_final] + mock_conn_class.return_value = mock_conn + + # Test + url = URL("http://example.com/old-page") + status, content_type, body = http.request(url) + + assert status == 200 + assert body == b"Final content" + assert mock_conn.request.call_count == 2 + + @patch('src.network.http.http.client.HTTPConnection') + def test_http_redirect_302(self, mock_conn_class): + """Test following 302 temporary redirect.""" + # Setup mock for first request (redirect) + mock_conn = Mock() + mock_response_redirect = Mock() + mock_response_redirect.status = 302 + mock_response_redirect.reason = "Found" + mock_response_redirect.getheader.side_effect = lambda header, default="": { + "Content-Type": "text/html", + "Location": "http://example.com/temp-page" + }.get(header, default) + mock_response_redirect.read.return_value = b"Redirect" + + # Setup mock for second request (final response) + mock_response_final = Mock() + mock_response_final.status = 200 + mock_response_final.reason = "OK" + mock_response_final.getheader.side_effect = lambda header, default="": { + "Content-Type": "text/html", + }.get(header, default) + mock_response_final.read.return_value = b"Temp content" + + mock_conn.getresponse.side_effect = [mock_response_redirect, mock_response_final] + mock_conn_class.return_value = mock_conn + + # Test + url = URL("http://example.com/old-page") + status, content_type, body = http.request(url) + + assert status == 200 + assert body == b"Temp content" + + @patch('src.network.http.http.client.HTTPConnection') + def test_http_redirect_no_location(self, mock_conn_class): + """Test handling of redirect without Location header.""" + # Setup mock + mock_conn = Mock() + mock_response = Mock() + mock_response.status = 302 + mock_response.reason = "Found" + mock_response.getheader.side_effect = lambda header, default="": { + "Content-Type": "text/html", + }.get(header, default) + mock_response.read.return_value = b"Redirect" + + mock_conn.getresponse.return_value = mock_response + mock_conn_class.return_value = mock_conn + + # Test + url = URL("http://example.com/page") + status, content_type, body = http.request(url) + + # Should return the redirect response if no Location header + assert status == 302 + assert body == b"Redirect" + + @patch('src.network.http.http.client.HTTPConnection') + def test_http_max_redirects(self, mock_conn_class): + """Test that max redirects limit is enforced.""" + # Setup mock that always returns a redirect + mock_conn = Mock() + mock_response = Mock() + mock_response.status = 302 + mock_response.reason = "Found" + mock_response.getheader.side_effect = lambda header, default="": { + "Location": "http://example.com/redirect-loop" + }.get(header, default) + mock_response.read.return_value = b"" + + mock_conn.getresponse.return_value = mock_response + mock_conn_class.return_value = mock_conn + + # Test with max_redirects=2 + url = URL("http://example.com/page") + with pytest.raises(Exception, match="Too many redirects"): + http.request(url, max_redirects=2)