mirror of
https://github.com/Hopiu/linkchecker.git
synced 2026-03-16 22:10:26 +00:00
Remove support for start_end_element() callback
The LinkFinder handler start_end_element() callback does nothing apart from call start_element().
This commit is contained in:
parent
c9f17e92b9
commit
eb3cf28baa
5 changed files with 61 additions and 104 deletions
|
|
@ -33,13 +33,6 @@ Used callbacks of a handler are:
|
|||
@param attrs: tag attributes
|
||||
@type attrs: ListDict
|
||||
|
||||
- Start-end tag: <tag {attr1:value1, attr2:value2, ..}/>
|
||||
def start_end_element(tag, attrs):
|
||||
@param tag: tag name
|
||||
@type tag: Unicode string
|
||||
@param attrs: tag attributes
|
||||
@type attrs: ListDict
|
||||
|
||||
Additionally, there are error and warning callbacks:
|
||||
|
||||
- Parser warning.
|
||||
|
|
|
|||
|
|
@ -42,20 +42,14 @@ class Parser(object):
|
|||
def parse_contents(self, contents):
|
||||
for content in contents:
|
||||
if isinstance(content, Tag):
|
||||
tag_column = None if content.sourcepos is None \
|
||||
self.handler.start_element(
|
||||
content.name, content.attrs, content.text.strip(),
|
||||
content.sourceline,
|
||||
None if content.sourcepos is None
|
||||
else content.sourcepos + 1
|
||||
if content.is_empty_element:
|
||||
self.handler.start_end_element(
|
||||
content.name, content.attrs, content.text.strip(),
|
||||
content.sourceline, tag_column
|
||||
)
|
||||
else:
|
||||
self.handler.start_element(
|
||||
content.name, content.attrs, content.text.strip(),
|
||||
content.sourceline, tag_column
|
||||
)
|
||||
if hasattr(content, 'contents'): # recursion
|
||||
self.parse_contents(content.contents)
|
||||
)
|
||||
if hasattr(content, 'contents'): # recursion
|
||||
self.parse_contents(content.contents)
|
||||
|
||||
|
||||
def parser(handler=None):
|
||||
|
|
|
|||
|
|
@ -109,11 +109,6 @@ class TagFinder (object):
|
|||
"""Does nothing, override in a subclass."""
|
||||
pass
|
||||
|
||||
def start_end_element (self, tag, attrs, element_text, lineno, column):
|
||||
"""Delegate a combined start/end element (eg. <br/>) to
|
||||
the start_element method. Ignore the end element part."""
|
||||
self.start_element(tag, attrs, element_text, lineno, column)
|
||||
|
||||
|
||||
class MetaRobotsFinder (TagFinder):
|
||||
"""Class for finding robots.txt meta values in HTML."""
|
||||
|
|
|
|||
|
|
@ -49,42 +49,16 @@ class HtmlPrettyPrinter:
|
|||
@type attrs: dict
|
||||
@return: None
|
||||
"""
|
||||
self._start_element(tag, attrs, ">", element_text)
|
||||
self.fd.write("</%s>" % tag)
|
||||
|
||||
def start_end_element (self, tag, attrs, element_text, lineno, column):
|
||||
"""
|
||||
Print HTML start-end element.
|
||||
|
||||
@param tag: tag name
|
||||
@type tag: string
|
||||
@param attrs: tag attributes
|
||||
@type attrs: dict
|
||||
@return: None
|
||||
"""
|
||||
self._start_element(tag, attrs, "/>", element_text)
|
||||
|
||||
def _start_element (self, tag, attrs, end, element_text):
|
||||
"""
|
||||
Print HTML element with end string.
|
||||
|
||||
@param tag: tag name
|
||||
@type tag: string
|
||||
@param attrs: tag attributes
|
||||
@type attrs: dict
|
||||
@param end: either > or />
|
||||
@type end: string
|
||||
@return: None
|
||||
"""
|
||||
self.fd.write("<%s" % tag.replace("/", ""))
|
||||
for key, val in sorted(attrs.items()):
|
||||
if val is None:
|
||||
self.fd.write(" %s" % key)
|
||||
else:
|
||||
self.fd.write(' %s="%s"' % (key, quote_attrval(val)))
|
||||
self.fd.write(end)
|
||||
if element_text:
|
||||
self.fd.write(element_text)
|
||||
self.fd.write(">%s</%s>" % (element_text, tag))
|
||||
else:
|
||||
self.fd.write("/>")
|
||||
|
||||
|
||||
def quote_attrval (s):
|
||||
|
|
|
|||
|
|
@ -31,42 +31,42 @@ from .htmllib import HtmlPrettyPrinter
|
|||
# (<test pattern>, <expected parse output>)
|
||||
parsetests = [
|
||||
# start tags
|
||||
("""<a b="c" >""", """<a b="c"></a>"""),
|
||||
("""<a b='c' >""", """<a b="c"></a>"""),
|
||||
("""<a b=c" >""", """<a b="c""></a>"""),
|
||||
("""<a b=c' >""", """<a b="c'"></a>"""),
|
||||
("""<a b="" >""", """<a b=""></a>"""),
|
||||
("""<a b='' >""", """<a b=""></a>"""),
|
||||
("""<a b=>""", """<a b=""></a>"""),
|
||||
("""<a b= >""", """<a b=""></a>"""),
|
||||
("""<a =c>""", """<a =c=""></a>"""),
|
||||
("""<a =c >""", """<a =c=""></a>"""),
|
||||
("""<a =>""", """<a ==""></a>"""),
|
||||
("""<a = >""", """<a ==""></a>"""),
|
||||
("""<a b= "c" >""", """<a b="c"></a>"""),
|
||||
("""<a b ="c" >""", """<a b="c"></a>"""),
|
||||
("""<a b = "c" >""", """<a b="c"></a>"""),
|
||||
("""<a >""", """<a></a>"""),
|
||||
("""<a b="c" >""", """<a b="c"/>"""),
|
||||
("""<a b='c' >""", """<a b="c"/>"""),
|
||||
("""<a b=c" >""", """<a b="c""/>"""),
|
||||
("""<a b=c' >""", """<a b="c'"/>"""),
|
||||
("""<a b="" >""", """<a b=""/>"""),
|
||||
("""<a b='' >""", """<a b=""/>"""),
|
||||
("""<a b=>""", """<a b=""/>"""),
|
||||
("""<a b= >""", """<a b=""/>"""),
|
||||
("""<a =c>""", """<a =c=""/>"""),
|
||||
("""<a =c >""", """<a =c=""/>"""),
|
||||
("""<a =>""", """<a ==""/>"""),
|
||||
("""<a = >""", """<a ==""/>"""),
|
||||
("""<a b= "c" >""", """<a b="c"/>"""),
|
||||
("""<a b ="c" >""", """<a b="c"/>"""),
|
||||
("""<a b = "c" >""", """<a b="c"/>"""),
|
||||
("""<a >""", """<a/>"""),
|
||||
("""<>""", """"""),
|
||||
("""< >""", """"""),
|
||||
("""<aä>""", u"""<aä></aä>"""),
|
||||
("""<a aä="b">""", u"""<a aä="b"></a>"""),
|
||||
("""<a a="bä">""", u"""<a a="bä"></a>"""),
|
||||
("""<aä>""", u"""<aä/>"""),
|
||||
("""<a aä="b">""", u"""<a aä="b"/>"""),
|
||||
("""<a a="bä">""", u"""<a a="bä"/>"""),
|
||||
# multiple attribute names should be ignored...
|
||||
("""<a b="c" b="c" >""", """<a b="c"></a>"""),
|
||||
("""<a b="c" b="c" >""", """<a b="c"/>"""),
|
||||
# ... but which one wins - in our implementation the last one
|
||||
("""<a b="c" b="d" >""", """<a b="d"></a>"""),
|
||||
("""<a b="c" b="d" >""", """<a b="d"/>"""),
|
||||
# reduce test
|
||||
("""<a b="c"><""", """<a b="c"><</a>"""),
|
||||
# numbers in tag
|
||||
("""<h1>bla</h1>""", """<h1>bla</h1>"""),
|
||||
# more start tags
|
||||
("""<a b=c"><a b="c">""", """<a b="c""><a b="c"></a></a>"""),
|
||||
("""<a b=/c/></a><br>""", """<a b="/c/"></a><br/>"""),
|
||||
("""<a b=c"><a b="c">""", """<a b="c""/><a b="c"/>"""),
|
||||
("""<a b=/c/></a><br>""", """<a b="/c/"/><br/>"""),
|
||||
("""<br/>""", """<br/>"""),
|
||||
("""<a b="50%"><br>""", """<a b="50%"><br/></a>"""),
|
||||
("""<a b="50%"><br>""", """<a b="50%"/><br/>"""),
|
||||
# start and end tag (HTML doctype assumed)
|
||||
("""<a/>""", """<a></a>"""),
|
||||
("""<a/>""", """<a/>"""),
|
||||
("""<meta/>""", """<meta/>"""),
|
||||
("""<MetA/>""", """<meta/>"""),
|
||||
# line continuation (Dr. Fun webpage)
|
||||
|
|
@ -74,30 +74,30 @@ parsetests = [
|
|||
("""<img align="mid\\\ndle">""", """<img align="mid\\\ndle"/>"""),
|
||||
("""<img align='mid\\\ndle'>""", """<img align="mid\\\ndle"/>"""),
|
||||
# href with $
|
||||
("""<a href="123$456">""", """<a href="123$456"></a>"""),
|
||||
("""<a href="123$456">""", """<a href="123$456"/>"""),
|
||||
# quoting
|
||||
("""<a href=/ >""", """<a href="/"></a>"""),
|
||||
("""<a href= />""", """<a href="/"></a>"""),
|
||||
("""<a href= >""", """<a href=""></a>"""),
|
||||
("""<a href="'" >""", """<a href="'"></a>"""),
|
||||
("""<a href='"' >""", """<a href="""></a>"""),
|
||||
("""<a href="bla" %]" >""", """<a %]"="" href="bla"></a>"""),
|
||||
("""<a href=bla" >""", """<a href="bla""></a>"""),
|
||||
("""<a href=/ >""", """<a href="/"/>"""),
|
||||
("""<a href= />""", """<a href="/"/>"""),
|
||||
("""<a href= >""", """<a href=""/>"""),
|
||||
("""<a href="'" >""", """<a href="'"/>"""),
|
||||
("""<a href='"' >""", """<a href="""/>"""),
|
||||
("""<a href="bla" %]" >""", """<a %]"="" href="bla"/>"""),
|
||||
("""<a href=bla" >""", """<a href="bla""/>"""),
|
||||
("""<a onmouseover=blubb('nav1','',"""\
|
||||
"""'/images/nav.gif',1);move(this); b="c">""",
|
||||
"""<a b="c" onmouseover="blubb('nav1','',"""\
|
||||
"""'/images/nav.gif',1);move(this);"></a>"""),
|
||||
"""'/images/nav.gif',1);move(this);"/>"""),
|
||||
("""<a onClick=location.href('/index.htm') b="c">""",
|
||||
"""<a b="c" onclick="location.href('/index.htm')"></a>"""),
|
||||
"""<a b="c" onclick="location.href('/index.htm')"/>"""),
|
||||
# entity resolving
|
||||
("""<a href="D;ailto:" >""", """<a href="D;ailto:"></a>"""),
|
||||
("""<a href="&ailto:" >""", """<a href="&ailto:"></a>"""),
|
||||
("""<a href="&amp;ailto:" >""", """<a href="&amp;ailto:"></a>"""),
|
||||
("""<a href="&hulla;ailto:" >""", """<a href="&hulla;ailto:"></a>"""),
|
||||
("""<a href="mailto:" >""", """<a href="mailto:"></a>"""),
|
||||
("""<a href="mailto:" >""", """<a href="mailto:"></a>"""),
|
||||
("""<a href="D;ailto:" >""", """<a href="D;ailto:"/>"""),
|
||||
("""<a href="&ailto:" >""", """<a href="&ailto:"/>"""),
|
||||
("""<a href="&amp;ailto:" >""", """<a href="&amp;ailto:"/>"""),
|
||||
("""<a href="&hulla;ailto:" >""", """<a href="&hulla;ailto:"/>"""),
|
||||
("""<a href="mailto:" >""", """<a href="mailto:"/>"""),
|
||||
("""<a href="mailto:" >""", """<a href="mailto:"/>"""),
|
||||
# note that \u8156 is not valid encoding and therefore gets removed
|
||||
("""<a href="῜ailto:" >""", """<a href="῜ailto:"></a>"""),
|
||||
("""<a href="῜ailto:" >""", """<a href="῜ailto:"/>"""),
|
||||
# mailto link
|
||||
("""<a href=mailto:calvin@LocalHost?subject=Hallo&to=michi>1</a>""",
|
||||
"""<a href="mailto:calvin@LocalHost?subject=Hallo&to=michi">1</a>"""),
|
||||
|
|
@ -116,19 +116,20 @@ parsetests = [
|
|||
("""</td <td a="b" >""", """"""),
|
||||
("""</td<td a="b" >""", """"""),
|
||||
# missing beginning quote
|
||||
("""<td a=b">""", """<td a="b""></td>"""),
|
||||
("""<td a=b">""", """<td a="b""/>"""),
|
||||
# stray < before start tag
|
||||
("""<0.<td a="b" >""", """<td a="b"></td>"""),
|
||||
("""<0.<td a="b" >""", """<td a="b"/>"""),
|
||||
# HTML5 tags
|
||||
("""<audio src=bla>""", """<audio src="bla"></audio>"""),
|
||||
("""<button formaction=bla>""", """<button formaction="bla"></button>"""),
|
||||
("""<html manifest=bla>""", """<html manifest="bla"></html>"""),
|
||||
("""<audio src=bla>""", """<audio src="bla"/>"""),
|
||||
("""<button formaction=bla>""", """<button formaction="bla"/>"""),
|
||||
("""<html manifest=bla>""", """<html manifest="bla"/>"""),
|
||||
("""<source src=bla>""", """<source src="bla"/>"""),
|
||||
("""<track src=bla>""", """<track src="bla"/>"""),
|
||||
("""<video src=bla>""", """<video src="bla"></video>"""),
|
||||
("""<video src=bla>""", """<video src="bla"/>"""),
|
||||
# Test inserted tag s
|
||||
("""<b><a></a></b>""", """<b><a></a></b>"""),
|
||||
("""<a></a><b></b>""", """<a></a><b></b>"""),
|
||||
("""<a></a><b></b>""", """<a/><b/>"""),
|
||||
# This is not correct result for an HTML parser, but it is for us
|
||||
("""<b><a></a></b>""", """<b/><a/>"""),
|
||||
]
|
||||
|
||||
|
||||
|
|
|
|||
Loading…
Reference in a new issue