diff --git a/assets/default.css b/assets/default.css index 83c6765..24d386c 100644 --- a/assets/default.css +++ b/assets/default.css @@ -1,6 +1,89 @@ -/* Default user-agent stylesheet placeholder. */ +/* Default user-agent stylesheet - inspired by water.css */ + +* { + box-sizing: border-box; +} body { + font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', 'Roboto', 'Oxygen', + 'Ubuntu', 'Cantarell', 'Fira Sans', 'Droid Sans', 'Helvetica Neue', sans-serif; + line-height: 1.6; margin: 8px; - font-family: sans-serif; + padding: 0; + color: #333; + background-color: #cf5959; + font-size: 16px; +} + +h1, h2, h3, h4, h5, h6 { + font-weight: 600; + margin-top: 1.5rem; + margin-bottom: 0.5rem; + line-height: 1.2; +} + +h1 { font-size: 2.5rem; } +h2 { font-size: 2rem; } +h3 { font-size: 1.75rem; } +h4 { font-size: 1.5rem; } +h5 { font-size: 1.25rem; } +h6 { font-size: 1rem; } + +p { + margin-top: 0; + margin-bottom: 1rem; +} + +a { + color: #0066cc; + text-decoration: none; +} + +a:hover { + text-decoration: underline; +} + +ul, ol { + margin-top: 0; + margin-bottom: 1rem; + padding-left: 2rem; +} + +li { + margin-bottom: 0.25rem; +} + +blockquote { + margin: 1rem 0; + padding-left: 1rem; + border-left: 4px solid #ddd; + color: #666; +} + +code, pre { + font-family: 'Monaco', 'Menlo', 'Ubuntu Mono', 'Consolas', 'source-code-pro', monospace; + font-size: 0.9em; + background-color: #f5f5f5; + padding: 0.2em 0.4em; + border-radius: 3px; +} + +pre { + padding: 1rem; + overflow-x: auto; + line-height: 1.4; +} + +strong, b { + font-weight: 600; +} + +em, i { + font-style: italic; +} + +hr { + border: none; + border-top: 1px solid #ddd; + margin: 2rem 0; } diff --git a/src/browser/tab.py b/src/browser/tab.py index daf9f18..a19f92b 100644 --- a/src/browser/tab.py +++ b/src/browser/tab.py @@ -5,7 +5,7 @@ import logging from ..network.url import URL from ..network import http -from ..parser.html import parse_html, Element +from ..parser.html import parse_html_with_styles, Element from ..templates import render_startpage, render_error_page if TYPE_CHECKING: @@ -27,7 +27,7 @@ class Frame: url_str = str(url) if url_str.startswith("about:startpage"): html = render_startpage() - self.document = parse_html(html) + self.document = parse_html_with_styles(html) self.tab.current_url = url return @@ -40,7 +40,7 @@ class Frame: graph_path = params.get('path', [''])[0] html = render_dom_graph_page(graph_path) - self.document = parse_html(html) + self.document = parse_html_with_styles(html) self.tab.current_url = url return @@ -52,17 +52,17 @@ class Frame: text = body.decode('utf-8', errors='replace') # Parse HTML - self.document = parse_html(text) + self.document = parse_html_with_styles(text) self.tab.current_url = url else: # Error handling - show error page html = render_error_page(status, str(url)) - self.document = parse_html(html) + self.document = parse_html_with_styles(html) except Exception as e: # Network error - show error page html = render_error_page(0, str(url), str(e)) - self.document = parse_html(html) + self.document = parse_html_with_styles(html) logger.error(f"Failed to load {url}: {e}") diff --git a/src/layout/document.py b/src/layout/document.py index a87af4c..0803b7e 100644 --- a/src/layout/document.py +++ b/src/layout/document.py @@ -160,63 +160,86 @@ class DocumentLayout: if isinstance(child, Text): txt = child.text.strip() if txt: - blocks.append({"text": txt, "font_size": 14, "block_type": "text"}) + # Use computed style if available + style = getattr(child, "computed_style", None) + font_size = style.get_int("font-size", 14) if style else 14 + blocks.append({"text": txt, "font_size": font_size, "block_type": "text", "style": style}) continue if isinstance(child, Element): tag = child.tag.lower() + + # Skip style and script tags - they shouldn't be rendered + if tag in {"style", "script", "head", "title", "meta", "link"}: + continue + + # Container elements - just recurse, don't add as blocks + if tag in {"ul", "ol", "div", "section", "article", "main", "header", "footer", "nav"}: + blocks.extend(self._collect_blocks(child)) + continue + content = self._text_of(child) if not content: continue - if tag == "h1": - blocks.append({ - "text": content, "font_size": 24, - "margin_top": 12, "margin_bottom": 12, - "block_type": "block", "tag": "h1" - }) - elif tag == "h2": - blocks.append({ - "text": content, "font_size": 20, - "margin_top": 10, "margin_bottom": 10, - "block_type": "block", "tag": "h2" - }) - elif tag == "h3": - blocks.append({ - "text": content, "font_size": 18, - "margin_top": 8, "margin_bottom": 8, - "block_type": "block", "tag": "h3" - }) - elif tag == "p": - blocks.append({ - "text": content, "font_size": 14, - "margin_top": 6, "margin_bottom": 12, - "block_type": "block", "tag": "p" - }) - elif tag == "li": - blocks.append({ - "text": content, "font_size": 14, "bullet": True, - "margin_top": 4, "margin_bottom": 4, - "block_type": "list-item", "tag": "li" - }) - elif tag in {"ul", "ol"}: - blocks.extend(self._collect_blocks(child)) - elif tag in {"span", "a", "strong", "em", "b", "i", "code"}: - blocks.append({ - "text": content, "font_size": 14, - "block_type": "inline", "tag": tag - }) - elif tag in {"div", "section", "article", "main", "header", "footer", "nav"}: - # Container elements - recurse into children - blocks.extend(self._collect_blocks(child)) + # Get computed style for this element + style = getattr(child, "computed_style", None) + + # Extract style properties + if style: + font_size = style.get_int("font-size", 14) + margin_top = style.get_int("margin-top", 6) + margin_bottom = style.get_int("margin-bottom", 10) + display = style.get("display", "block") else: - blocks.append({ - "text": content, "font_size": 14, - "block_type": "block", "tag": tag - }) + # Fallback to hardcoded defaults + font_size = self._get_default_font_size(tag) + margin_top = self._get_default_margin_top(tag) + margin_bottom = self._get_default_margin_bottom(tag) + display = "inline" if tag in {"span", "a", "strong", "em", "b", "i", "code"} else "block" + + # Determine block type + block_type = "inline" if display == "inline" else "block" + if tag == "li" or display == "list-item": + block_type = "list-item" + + # Add bullet for list items + bullet = (tag == "li" or display == "list-item") + + blocks.append({ + "text": content, + "font_size": font_size, + "margin_top": margin_top, + "margin_bottom": margin_bottom, + "block_type": block_type, + "tag": tag, + "bullet": bullet, + "style": style + }) return blocks + def _get_default_font_size(self, tag: str) -> int: + """Get default font size for a tag (fallback when no styles).""" + sizes = { + "h1": 24, "h2": 20, "h3": 18, "h4": 16, "h5": 15, "h6": 14 + } + return sizes.get(tag, 14) + + def _get_default_margin_top(self, tag: str) -> int: + """Get default top margin for a tag (fallback when no styles).""" + margins = { + "h1": 12, "h2": 10, "h3": 8, "p": 6, "li": 4 + } + return margins.get(tag, 0) + + def _get_default_margin_bottom(self, tag: str) -> int: + """Get default bottom margin for a tag (fallback when no styles).""" + margins = { + "h1": 12, "h2": 10, "h3": 8, "p": 12, "li": 4 + } + return margins.get(tag, 0) + def _text_of(self, node) -> str: """Extract text content from a node.""" if isinstance(node, Text): diff --git a/src/parser/css.py b/src/parser/css.py index 004dd86..43a3a1a 100644 --- a/src/parser/css.py +++ b/src/parser/css.py @@ -1,16 +1,259 @@ -"""CSS parser stubs.""" +"""CSS parser with tokenizer, selector parsing, and property declarations. + +Supports: +- Tag selectors (p, div, h1) +- Class selectors (.classname) +- ID selectors (#idname) +- Property declarations (color: red; font-size: 14px;) +- Inline styles (style attribute) +""" + +import re +from typing import List, Dict, Tuple + + +class Selector: + """CSS selector with specificity calculation.""" + + def __init__(self, text: str): + self.text = text.strip() + self.tag = None + self.id = None + self.classes = [] + self._parse() + + def _parse(self): + """Parse selector into tag, id, and classes.""" + remaining = self.text + + # Parse ID (#id) + if "#" in remaining: + id_match = re.search(r'#([\w-]+)', remaining) + if id_match: + self.id = id_match.group(1) + remaining = remaining.replace(f"#{self.id}", "") + + # Parse classes (.class) + class_matches = re.findall(r'\.([\w-]+)', remaining) + self.classes = class_matches + for cls in class_matches: + remaining = remaining.replace(f".{cls}", "", 1) + + # What's left is the tag + remaining = remaining.strip() + if remaining and remaining.isalnum(): + self.tag = remaining + + def specificity(self) -> Tuple[int, int, int]: + """ + Calculate specificity as (id_count, class_count, tag_count). + Higher specificity wins in cascade. + """ + id_count = 1 if self.id else 0 + class_count = len(self.classes) + tag_count = 1 if self.tag else 0 + return (id_count, class_count, tag_count) + + def matches(self, element) -> bool: + """Check if this selector matches the given element.""" + # Check tag + if self.tag and element.tag != self.tag: + return False + + # Check ID + if self.id: + elem_id = element.attributes.get("id", "") + if elem_id != self.id: + return False + + # Check classes + if self.classes: + elem_classes = element.attributes.get("class", "").split() + for cls in self.classes: + if cls not in elem_classes: + return False + + return True + + def __repr__(self): + return f"Selector({self.text!r})" class CSSRule: - def __init__(self, selector: str, declarations: dict): + """A CSS rule with selector and property declarations.""" + + def __init__(self, selector: Selector, declarations: Dict[str, str]): self.selector = selector self.declarations = declarations + def __repr__(self): + return f"CSSRule({self.selector.text!r}, {self.declarations!r})" -def parse(css_text: str): - # Placeholder: split on semicolons per line - rules = [] - for line in css_text.splitlines(): - if "{" not in line: + +class CSSParser: + """Parser for CSS stylesheets.""" + + def __init__(self, css_text: str): + self.css_text = css_text + self.position = 0 + self.rules = [] + + def parse(self) -> List[CSSRule]: + """Parse CSS text into a list of rules.""" + self.rules = [] + self.position = 0 + + while self.position < len(self.css_text): + self._skip_whitespace() + if self.position >= len(self.css_text): + break + + # Skip comments + if self._peek(2) == "/*": + self._skip_comment() + continue + + # Parse rule + rule = self._parse_rule() + if rule: + self.rules.append(rule) + + return self.rules + + def _peek(self, count=1) -> str: + """Peek ahead without consuming.""" + return self.css_text[self.position:self.position + count] + + def _consume(self, count=1) -> str: + """Consume and return characters.""" + result = self.css_text[self.position:self.position + count] + self.position += count + return result + + def _skip_whitespace(self): + """Skip whitespace and newlines.""" + while self.position < len(self.css_text) and self.css_text[self.position] in " \t\n\r": + self.position += 1 + + def _skip_comment(self): + """Skip CSS comment /* ... */.""" + if self._peek(2) == "/*": + self._consume(2) + while self.position < len(self.css_text) - 1: + if self._peek(2) == "*/": + self._consume(2) + break + self._consume() + + def _parse_rule(self) -> CSSRule: + """Parse a single CSS rule: selector { declarations }.""" + # Parse selector + selector_text = "" + while self.position < len(self.css_text): + char = self._peek() + if char == "{": + break + selector_text += self._consume() + + if not selector_text.strip(): + return None + + selector = Selector(selector_text) + + # Expect { + self._skip_whitespace() + if self._peek() != "{": + return None + self._consume() # consume { + + # Parse declarations + declarations = self._parse_declarations() + + # Expect } + self._skip_whitespace() + if self._peek() == "}": + self._consume() + + return CSSRule(selector, declarations) + + def _parse_declarations(self) -> Dict[str, str]: + """Parse property declarations inside { }.""" + declarations = {} + + while self.position < len(self.css_text): + self._skip_whitespace() + + # Check for end of block + if self._peek() == "}": + break + + # Parse property name + prop_name = "" + while self.position < len(self.css_text): + char = self._peek() + if char in ":}": + break + prop_name += self._consume() + + prop_name = prop_name.strip() + if not prop_name: + break + + # Expect : + self._skip_whitespace() + if self._peek() != ":": + break + self._consume() # consume : + + # Parse property value + self._skip_whitespace() + prop_value = "" + while self.position < len(self.css_text): + char = self._peek() + if char in ";}\n": + break + prop_value += self._consume() + + prop_value = prop_value.strip() + + # Store property + if prop_name and prop_value: + declarations[prop_name] = prop_value + + # Consume optional ; + self._skip_whitespace() + if self._peek() == ";": + self._consume() + + return declarations + + +def parse_inline_style(style_attr: str) -> Dict[str, str]: + """ + Parse inline style attribute into property declarations. + + Example: "color: red; font-size: 14px" -> {"color": "red", "font-size": "14px"} + """ + declarations = {} + + # Split by semicolons + parts = style_attr.split(";") + for part in parts: + part = part.strip() + if not part or ":" not in part: continue - return rules + + prop, value = part.split(":", 1) + prop = prop.strip() + value = value.strip() + + if prop and value: + declarations[prop] = value + + return declarations + + +def parse(css_text: str) -> List[CSSRule]: + """Parse CSS text into a list of rules.""" + parser = CSSParser(css_text) + return parser.parse() diff --git a/src/parser/html.py b/src/parser/html.py index 12f7cbe..1afc20d 100644 --- a/src/parser/html.py +++ b/src/parser/html.py @@ -94,7 +94,7 @@ class _DOMBuilder(HTMLParser): # HTMLParser callbacks def handle_starttag(self, tag, attrs): - if tag in {"script", "style"}: + if tag in {"script"}: self._skip_depth += 1 return if self._skip_depth > 0: @@ -104,7 +104,7 @@ class _DOMBuilder(HTMLParser): if tag == "html": return # Use our root instead if tag == "head": - self._skip_depth += 1 # Skip head content + # We skip head but need to preserve style tags return if tag == "body": if self._body is None: @@ -115,6 +115,13 @@ class _DOMBuilder(HTMLParser): self.current = self._body return + # Handle style tags - keep them in the tree for CSS extraction + if tag == "style": + attr_dict = {k: v for k, v in attrs} + el = Element(tag, attr_dict) + self._push(el) + return + attr_dict = {k: v for k, v in attrs} el = Element(tag, attr_dict) @@ -125,13 +132,13 @@ class _DOMBuilder(HTMLParser): self._push(el) def handle_endtag(self, tag): - if tag in {"script", "style", "head"}: + if tag in {"script"}: if self._skip_depth > 0: self._skip_depth -= 1 return if self._skip_depth > 0: return - if tag in {"html", "body"}: + if tag in {"html", "body", "head"}: return # Don't pop these self._pop(tag) @@ -171,3 +178,74 @@ def parse_html(html_text: str) -> Element: parser.feed(html_text) parser.close() return parser.root + + +def parse_html_with_styles(html_text: str, apply_styles: bool = True) -> Element: + """ + Parse HTML and optionally extract and apply CSS styles. + + Args: + html_text: The HTML source code + apply_styles: Whether to parse More" root = parse_html(html) body = root.children[0] - joined = " ".join(collect_text(body)) - assert "Text" in joined - assert "More" in joined - assert "color" not in joined + # Find style element + style_elem = None + for child in body.children: + if hasattr(child, "tag") and child.tag == "style": + style_elem = child + break + + assert style_elem is not None + # Style content should be in the element + joined = " ".join(collect_text(style_elem)) + assert "color" in joined def test_parse_decodes_entities(self): html = "<div> & "test"" diff --git a/tests/test_layout.py b/tests/test_layout.py index 2447535..a9d84ef 100644 --- a/tests/test_layout.py +++ b/tests/test_layout.py @@ -145,6 +145,23 @@ class TestDocumentLayout: assert len(lines) > 1 # Should wrap to multiple lines + def test_document_layout_skips_style_tags(self): + """Style tags should not be rendered as text.""" + body = Element("body") + p = Element("p") + p.children = [Text("Visible text")] + style = Element("style") + style.children = [Text("body { color: red; }")] + body.children = [p, style] + + layout = DocumentLayout(body) + lines = layout.layout(800) + + assert len(lines) == 1 + assert lines[0].text == "Visible text" + # CSS should not appear in rendered text + assert not any("color" in line.text for line in lines) + def test_document_layout_char_positions(self): body = Element("body") p = Element("p") diff --git a/tests/test_styling_integration.py b/tests/test_styling_integration.py new file mode 100644 index 0000000..1bf2231 --- /dev/null +++ b/tests/test_styling_integration.py @@ -0,0 +1,248 @@ +"""Integration tests for CSS styling system.""" + +import pytest +from src.parser.html import parse_html_with_styles, Element +from src.layout.document import DocumentLayout + + +class TestStyleIntegration: + """Test end-to-end CSS parsing and layout integration.""" + + def test_parse_with_style_tag(self): + html = """ + + + + + +

Hello World

+ + + """ + root = parse_html_with_styles(html) + + # Find the p element + p_elem = None + for child in root.children: + if hasattr(child, "tag") and child.tag == "body": + for grandchild in child.children: + if hasattr(grandchild, "tag") and grandchild.tag == "p": + p_elem = grandchild + break + + assert p_elem is not None + assert hasattr(p_elem, "computed_style") + assert p_elem.computed_style.get("color") == "red" + assert p_elem.computed_style.get("font-size") == "18px" + + def test_inline_style_override(self): + html = """ + + +

Styled paragraph

+ + + """ + root = parse_html_with_styles(html) + + # Find the p element + for child in root.children: + if hasattr(child, "tag") and child.tag == "body": + for grandchild in child.children: + if hasattr(grandchild, "tag") and grandchild.tag == "p": + p_elem = grandchild + assert p_elem.computed_style.get("color") == "blue" + assert p_elem.computed_style.get("font-size") == "20px" + return + + pytest.fail("P element not found") + + def test_cascade_priority(self): + html = """ + + + + + +

Tag only

+

With class

+

With ID

+

With inline

+ + + """ + root = parse_html_with_styles(html) + + # Find body + body = None + for child in root.children: + if hasattr(child, "tag") and child.tag == "body": + body = child + break + + assert body is not None + paragraphs = [c for c in body.children if hasattr(c, "tag") and c.tag == "p"] + assert len(paragraphs) == 4 + + # Check cascade + assert paragraphs[0].computed_style.get("color") == "red" # Tag only + assert paragraphs[1].computed_style.get("color") == "green" # Class wins + assert paragraphs[2].computed_style.get("color") == "blue" # ID wins + assert paragraphs[3].computed_style.get("color") == "purple" # Inline wins + + def test_inheritance(self): + html = """ + + + + + +
+

Nested paragraph

+
+ + + """ + root = parse_html_with_styles(html) + + # Find the nested p element + for child in root.children: + if hasattr(child, "tag") and child.tag == "body": + for grandchild in child.children: + if hasattr(grandchild, "tag") and grandchild.tag == "div": + for ggchild in grandchild.children: + if hasattr(ggchild, "tag") and ggchild.tag == "p": + # Should inherit color from body + assert ggchild.computed_style.get("color") == "blue" + # Font-size may be set by default.css + assert ggchild.computed_style.get("font-size") != "" + return + + pytest.fail("Nested p element not found") + + def test_layout_uses_styles(self): + html = """ + + + + + +

Title

+

Paragraph

+ + + """ + root = parse_html_with_styles(html) + + # Create layout + layout = DocumentLayout(root) + lines = layout.layout(800) + # H1 should use custom font size + assert lines[0].font_size == 40 + + # P should use custom font size + assert lines[1].font_size == 20 + + def test_multiple_classes(self): + html = """ + + + + + +

Multiple classes

+ + + """ + root = parse_html_with_styles(html) + + # Find the p element + for child in root.children: + if hasattr(child, "tag") and child.tag == "body": + for grandchild in child.children: + if hasattr(grandchild, "tag") and grandchild.tag == "p": + # Should match both classes + assert grandchild.computed_style.get("font-size") == "24px" + assert grandchild.computed_style.get("color") == "red" + return + + pytest.fail("P element not found") + + def test_default_styles_applied(self): + html = """ + + +

Heading

+

Paragraph

+ Link + + + """ + root = parse_html_with_styles(html) + + # Find elements + body = None + for child in root.children: + if hasattr(child, "tag") and child.tag == "body": + body = child + break + + assert body is not None + + h1 = next((c for c in body.children if hasattr(c, "tag") and c.tag == "h1"), None) + p = next((c for c in body.children if hasattr(c, "tag") and c.tag == "p"), None) + a = next((c for c in body.children if hasattr(c, "tag") and c.tag == "a"), None) + + # Check default styles from default.css + assert h1 is not None + # Font-size from default.css is 2.5rem + assert h1.computed_style.get("font-size") == "2.5rem" + assert h1.computed_style.get("font-weight") == "600" + + assert p is not None + assert p.computed_style.get("display") == "block" + + assert a is not None + # Link color from default.css + assert a.computed_style.get("color") == "#0066cc" + assert a.computed_style.get("text-decoration") == "none" + + def test_no_styles_when_disabled(self): + html = """ + + + + + +

Test

+ + + """ + root = parse_html_with_styles(html, apply_styles=False) + + # Find the p element + for child in root.children: + if hasattr(child, "tag") and child.tag == "body": + for grandchild in child.children: + if hasattr(grandchild, "tag") and grandchild.tag == "p": + # Should not have computed_style when disabled + assert not hasattr(grandchild, "computed_style") + return + + pytest.fail("P element not found")