diff --git a/linkcheck/HtmlParser/__init__.py b/linkcheck/HtmlParser/__init__.py
index 3fca9f95..3fc837a8 100644
--- a/linkcheck/HtmlParser/__init__.py
+++ b/linkcheck/HtmlParser/__init__.py
@@ -33,13 +33,6 @@ Used callbacks of a handler are:
@param attrs: tag attributes
@type attrs: ListDict
-- Start-end tag:
- def start_end_element(tag, attrs):
- @param tag: tag name
- @type tag: Unicode string
- @param attrs: tag attributes
- @type attrs: ListDict
-
Additionally, there are error and warning callbacks:
- Parser warning.
diff --git a/linkcheck/HtmlParser/htmlsax.py b/linkcheck/HtmlParser/htmlsax.py
index 6f524683..d6f8cc4f 100644
--- a/linkcheck/HtmlParser/htmlsax.py
+++ b/linkcheck/HtmlParser/htmlsax.py
@@ -42,20 +42,14 @@ class Parser(object):
def parse_contents(self, contents):
for content in contents:
if isinstance(content, Tag):
- tag_column = None if content.sourcepos is None \
+ self.handler.start_element(
+ content.name, content.attrs, content.text.strip(),
+ content.sourceline,
+ None if content.sourcepos is None
else content.sourcepos + 1
- if content.is_empty_element:
- self.handler.start_end_element(
- content.name, content.attrs, content.text.strip(),
- content.sourceline, tag_column
- )
- else:
- self.handler.start_element(
- content.name, content.attrs, content.text.strip(),
- content.sourceline, tag_column
- )
- if hasattr(content, 'contents'): # recursion
- self.parse_contents(content.contents)
+ )
+ if hasattr(content, 'contents'): # recursion
+ self.parse_contents(content.contents)
def parser(handler=None):
diff --git a/linkcheck/htmlutil/linkparse.py b/linkcheck/htmlutil/linkparse.py
index b2ed61e6..c455cffa 100644
--- a/linkcheck/htmlutil/linkparse.py
+++ b/linkcheck/htmlutil/linkparse.py
@@ -109,11 +109,6 @@ class TagFinder (object):
"""Does nothing, override in a subclass."""
pass
- def start_end_element (self, tag, attrs, element_text, lineno, column):
- """Delegate a combined start/end element (eg.
) to
- the start_element method. Ignore the end element part."""
- self.start_element(tag, attrs, element_text, lineno, column)
-
class MetaRobotsFinder (TagFinder):
"""Class for finding robots.txt meta values in HTML."""
diff --git a/tests/htmllib.py b/tests/htmllib.py
index 06cebbc4..08318704 100644
--- a/tests/htmllib.py
+++ b/tests/htmllib.py
@@ -49,42 +49,16 @@ class HtmlPrettyPrinter:
@type attrs: dict
@return: None
"""
- self._start_element(tag, attrs, ">", element_text)
- self.fd.write("%s>" % tag)
-
- def start_end_element (self, tag, attrs, element_text, lineno, column):
- """
- Print HTML start-end element.
-
- @param tag: tag name
- @type tag: string
- @param attrs: tag attributes
- @type attrs: dict
- @return: None
- """
- self._start_element(tag, attrs, "/>", element_text)
-
- def _start_element (self, tag, attrs, end, element_text):
- """
- Print HTML element with end string.
-
- @param tag: tag name
- @type tag: string
- @param attrs: tag attributes
- @type attrs: dict
- @param end: either > or />
- @type end: string
- @return: None
- """
self.fd.write("<%s" % tag.replace("/", ""))
for key, val in sorted(attrs.items()):
if val is None:
self.fd.write(" %s" % key)
else:
self.fd.write(' %s="%s"' % (key, quote_attrval(val)))
- self.fd.write(end)
if element_text:
- self.fd.write(element_text)
+ self.fd.write(">%s%s>" % (element_text, tag))
+ else:
+ self.fd.write("/>")
def quote_attrval (s):
diff --git a/tests/test_parser.py b/tests/test_parser.py
index 7e087082..206f8ce4 100644
--- a/tests/test_parser.py
+++ b/tests/test_parser.py
@@ -31,42 +31,42 @@ from .htmllib import HtmlPrettyPrinter
# (, )
parsetests = [
# start tags
- ("""""", """"""),
- ("""""", """"""),
- ("""""", """"""),
- ("""""", """"""),
- ("""""", """"""),
- ("""""", """"""),
- ("""""", """"""),
- ("""""", """"""),
- ("""""", """"""),
- ("""""", """"""),
- ("""""", """"""),
- ("""""", """"""),
- ("""""", """"""),
- ("""""", """"""),
- ("""""", """"""),
- ("""""", """"""),
+ ("""""", """"""),
+ ("""""", """"""),
+ ("""""", """"""),
+ ("""""", """"""),
+ ("""""", """"""),
+ ("""""", """"""),
+ ("""""", """"""),
+ ("""""", """"""),
+ ("""""", """"""),
+ ("""""", """"""),
+ ("""""", """"""),
+ ("""""", """"""),
+ ("""""", """"""),
+ ("""""", """"""),
+ ("""""", """"""),
+ ("""""", """"""),
("""<>""", """"""),
("""< >""", """"""),
- ("""""", u""""""),
- ("""""", u""""""),
- ("""""", u""""""),
+ ("""""", u""""""),
+ ("""""", u""""""),
+ ("""""", u""""""),
# multiple attribute names should be ignored...
- ("""""", """"""),
+ ("""""", """"""),
# ... but which one wins - in our implementation the last one
- ("""""", """"""),
+ ("""""", """"""),
# reduce test
("""<""", """<"""),
# numbers in tag
("""bla
""", """bla
"""),
# more start tags
- ("""""", """"""),
- ("""
""", """
"""),
+ ("""""", """"""),
+ ("""
""", """
"""),
("""
""", """
"""),
- ("""
""", """
"""),
+ ("""
""", """
"""),
# start and end tag (HTML doctype assumed)
- ("""""", """"""),
+ ("""""", """"""),
("""""", """"""),
("""""", """"""),
# line continuation (Dr. Fun webpage)
@@ -74,30 +74,30 @@ parsetests = [
("""
""", """
"""),
("""
""", """
"""),
# href with $
- ("""""", """"""),
+ ("""""", """"""),
# quoting
- ("""""", """"""),
- ("""""", """"""),
- ("""""", """"""),
- ("""""", """"""),
- ("""""", """"""),
- ("""""", """"""),
- ("""""", """"""),
+ ("""""", """"""),
+ ("""""", """"""),
+ ("""""", """"""),
+ ("""""", """"""),
+ ("""""", """"""),
+ ("""""", """"""),
+ ("""""", """"""),
("""""",
""""""),
+ """'/images/nav.gif',1);move(this);"/>"""),
("""""",
- """"""),
+ """"""),
# entity resolving
- ("""""", """"""),
- ("""""", """"""),
- ("""""", """"""),
- ("""""", """"""),
- ("""""", """"""),
- ("""""", """"""),
+ ("""""", """"""),
+ ("""""", """"""),
+ ("""""", """"""),
+ ("""""", """"""),
+ ("""""", """"""),
+ ("""""", """"""),
# note that \u8156 is not valid encoding and therefore gets removed
- ("""""", """"""),
+ ("""""", """"""),
# mailto link
("""1""",
"""1"""),
@@ -116,19 +116,20 @@ parsetests = [
("""""", """"""),
("""""", """"""),
# missing beginning quote
- ("""""", """ | | """),
+ ("""""", """ | | """),
# stray < before start tag
- ("""<0.""", """ | | """),
+ ("""<0.""", """ | | """),
# HTML5 tags
- ("""