mirror of
https://github.com/Hopiu/linkchecker.git
synced 2026-04-23 15:44:44 +00:00
fix parser tests after parser change
This commit is contained in:
parent
d6d48b4814
commit
0089349760
1 changed files with 95 additions and 92 deletions
|
|
@ -33,75 +33,75 @@ from parameterized import parameterized
|
|||
# (<test pattern>, <expected parse output>)
|
||||
parsetests = [
|
||||
# start tags
|
||||
("""<a b="c" >""", """<a b="c">"""),
|
||||
("""<a b='c' >""", """<a b="c">"""),
|
||||
("""<a b=c" >""", """<a b="c">"""),
|
||||
("""<a b=c' >""", """<a b="c'">"""),
|
||||
("""<a b="c" >""", """<a b="c"></a>"""),
|
||||
("""<a b='c' >""", """<a b="c"></a>"""),
|
||||
("""<a b=c" >""", """<a b="c""></a>"""),
|
||||
("""<a b=c' >""", """<a b="c'"></a>"""),
|
||||
("""<a b="c >""", """<a b="c >"""),
|
||||
("""<a b="" >""", """<a b="">"""),
|
||||
("""<a b='' >""", """<a b="">"""),
|
||||
("""<a b=>""", """<a b="">"""),
|
||||
("""<a b= >""", """<a b="">"""),
|
||||
("""<a =c>""", """<a c>"""),
|
||||
("""<a =c >""", """<a c>"""),
|
||||
("""<a =>""", """<a>"""),
|
||||
("""<a = >""", """<a>"""),
|
||||
("""<a b= "c" >""", """<a b="c">"""),
|
||||
("""<a b ="c" >""", """<a b="c">"""),
|
||||
("""<a b = "c" >""", """<a b="c">"""),
|
||||
("""<a >""", """<a>"""),
|
||||
("""< a>""", """<a>"""),
|
||||
("""< a >""", """<a>"""),
|
||||
("""<a b="" >""", """<a b=""></a>"""),
|
||||
("""<a b='' >""", """<a b=""></a>"""),
|
||||
("""<a b=>""", """<a b=""></a>"""),
|
||||
("""<a b= >""", """<a b=""></a>"""),
|
||||
("""<a =c>""", """<a =c=""></a>"""),
|
||||
("""<a =c >""", """<a =c=""></a>"""),
|
||||
("""<a =>""", """<a ==""></a>"""),
|
||||
("""<a = >""", """<a ==""></a>"""),
|
||||
("""<a b= "c" >""", """<a b="c"></a>"""),
|
||||
("""<a b ="c" >""", """<a b="c"></a>"""),
|
||||
("""<a b = "c" >""", """<a b="c"></a>"""),
|
||||
("""<a >""", """<a></a>"""),
|
||||
("""< a>""", """< a>"""),
|
||||
("""< a >""", """< a >"""),
|
||||
("""<>""", """<>"""),
|
||||
("""< >""", """< >"""),
|
||||
("""<aä>""", """<a>"""),
|
||||
("""<a aä="b">""", """<a a="b">"""),
|
||||
("""<a a="bä">""", """<a a="bä">"""),
|
||||
("""<aä>""", u"""<aä></aä>"""),
|
||||
("""<a aä="b">""", u"""<a aä="b"></a>"""),
|
||||
("""<a a="bä">""", u"""<a a="bä"></a>"""),
|
||||
# multiple attribute names should be ignored...
|
||||
("""<a b="c" b="c" >""", """<a b="c">"""),
|
||||
("""<a b="c" b="c" >""", """<a b="c"></a>"""),
|
||||
# ... but which one wins - in our implementation the last one
|
||||
("""<a b="c" b="d" >""", """<a b="d">"""),
|
||||
("""<a b="c" b="d" >""", """<a b="d"></a>"""),
|
||||
# reduce test
|
||||
("""<a b="c"><""", """<a b="c"><"""),
|
||||
("""<a b="c"><""", """<a b="c"><</a>"""),
|
||||
("""d>""", """d>"""),
|
||||
# numbers in tag
|
||||
("""<h1>bla</h1>""", """<h1>bla</h1>"""),
|
||||
# more start tags
|
||||
("""<a b=c"><a b="c">""", """<a b="c"><a b="c">"""),
|
||||
("""<a b=/c/></a><br>""", """<a b="/c/"></a><br>"""),
|
||||
("""<br/>""", """<br>"""),
|
||||
("""<a b="50%"><br>""", """<a b="50%"><br>"""),
|
||||
("""<a b=c"><a b="c">""", """<a b="c""><a b="c"></a></a>"""),
|
||||
("""<a b=/c/></a><br>""", """<a b="/c/"></a><br/>"""),
|
||||
("""<br/>""", """<br/>"""),
|
||||
("""<a b="50%"><br>""", """<a b="50%"><br/></a>"""),
|
||||
# comments
|
||||
("""<!---->< 1>""", """<!----><1>"""),
|
||||
("""<!-- a - b -->< 2>""", """<!-- a - b --><2>"""),
|
||||
("""<!----->< 3>""", """<!-----><3>"""),
|
||||
("""<!------>< 4>""", """<!------><4>"""),
|
||||
("""<!------->< 5>""", """<!-------><5>"""),
|
||||
("""<!-- -->< 7>""", """<!-- --><7>"""),
|
||||
("""<!---->< 1>""", """<!-- -->< 1>"""),
|
||||
("""<!-- a - b -->< 2>""", """<!-- a - b -->< 2>"""),
|
||||
("""<!----->< 3>""", """<!----->< 3>"""),
|
||||
("""<!------>< 4>""", """<!------>< 4>"""),
|
||||
("""<!------->< 5>""", """<!------->< 5>"""),
|
||||
("""<!-- -->< 7>""", """<!-- -->< 7>"""),
|
||||
("""<!---- />-->""", """<!---- />-->"""),
|
||||
("""<!-- a-2 -->< 9>""", """<!-- a-2 --><9>"""),
|
||||
("""<!-- --- -->< 10>""", """<!-- --- --><10>"""),
|
||||
("""<!>""", """<!---->"""), # empty comment
|
||||
("""<!-- a-2 -->< 9>""", """<!-- a-2 -->< 9>"""),
|
||||
("""<!-- --- -->< 10>""", """<!-- --- -->< 10>"""),
|
||||
("""<!>""", """<!-- -->"""), # empty comment
|
||||
# invalid comments
|
||||
("""<!-- -- >< 8>""", """<!-- --><8>"""),
|
||||
("""<!---- >< 6>""", """<!----><6>"""),
|
||||
("""<!- blubb ->""", """<!-- blubb -->"""),
|
||||
("""<! -- blubb -->""", """<!-- blubb -->"""),
|
||||
("""<!-- -- >< 8>""", """<!-- -->< 8>"""),
|
||||
("""<!---- >< 6>""", """<!-- -->< 6>"""),
|
||||
("""<!- blubb ->""", """<!--- blubb --->"""),
|
||||
("""<! -- blubb -->""", """<!-- -- blubb ---->"""),
|
||||
("""<!-- blubb -- >""", """<!-- blubb -->"""),
|
||||
("""<! blubb !>< a>""", """<!--blubb !--><a>"""),
|
||||
("""<! blubb >< a>""", """<!--blubb --><a>"""),
|
||||
("""<! blubb !>< a>""", """<!-- blubb !-->< a>"""),
|
||||
("""<! blubb >< a>""", """<!-- blubb -->< a>"""),
|
||||
# end tags
|
||||
("""</a>""", """</a>"""),
|
||||
("""</ a>""", """</a>"""),
|
||||
("""</ a >""", """</a>"""),
|
||||
("""</a >""", """</a>"""),
|
||||
("""< / a>""", """</a>"""),
|
||||
("""< /a>""", """</a>"""),
|
||||
("""</aä>""", """</a>"""),
|
||||
("""</a>""", """"""),
|
||||
("""</ a>""", """"""),
|
||||
("""</ a >""", """"""),
|
||||
("""</a >""", """"""),
|
||||
("""< / a>""", """< / a>"""),
|
||||
("""< /a>""", """< /a>"""),
|
||||
("""</aä>""", """"""),
|
||||
# start and end tag (HTML doctype assumed)
|
||||
("""<a/>""", """<a/>"""),
|
||||
("""<meta/>""", """<meta>"""),
|
||||
("""<MetA/>""", """<meta>"""),
|
||||
("""<a/>""", """<a></a>"""),
|
||||
("""<meta/>""", """<meta/>"""),
|
||||
("""<MetA/>""", """<meta/>"""),
|
||||
# declaration tags
|
||||
("""<!DOCtype adrbook SYSTEM "adrbook.dtd">""",
|
||||
"""<!DOCTYPE adrbook SYSTEM "adrbook.dtd">"""),
|
||||
|
|
@ -115,37 +115,37 @@ parsetests = [
|
|||
("""<script ><!--bla//-->// </script >""",
|
||||
"""<script><!--bla//-->// </script>"""),
|
||||
# line continuation (Dr. Fun webpage)
|
||||
("""<img bo\\\nrder=0 >""", """<img border="0">"""),
|
||||
("""<img align="mid\\\ndle">""", """<img align="middle">"""),
|
||||
("""<img align='mid\\\ndle'>""", """<img align="middle">"""),
|
||||
("""<img bo\\\nrder=0 >""", """<img bo\\="" rder="0"/>"""),
|
||||
("""<img align="mid\\\ndle">""", """<img align="mid\\\ndle"/>"""),
|
||||
("""<img align='mid\\\ndle'>""", """<img align="mid\\\ndle"/>"""),
|
||||
# href with $
|
||||
("""<a href="123$456">""", """<a href="123$456">"""),
|
||||
("""<a href="123$456">""", """<a href="123$456"></a>"""),
|
||||
# quoting
|
||||
("""<a href=/ >""", """<a href="/">"""),
|
||||
("""<a href= />""", """<a href="/">"""),
|
||||
("""<a href= >""", """<a href="">"""),
|
||||
("""<a href="'" >""", """<a href="'">"""),
|
||||
("""<a href='"' >""", """<a href=""">"""),
|
||||
("""<a href="bla" %]" >""", """<a href="bla">"""),
|
||||
("""<a href=bla" >""", """<a href="bla">"""),
|
||||
("""<a href=/ >""", """<a href="/"></a>"""),
|
||||
("""<a href= />""", """<a href="/"></a>"""),
|
||||
("""<a href= >""", """<a href=""></a>"""),
|
||||
("""<a href="'" >""", """<a href="'"></a>"""),
|
||||
("""<a href='"' >""", """<a href="""></a>"""),
|
||||
("""<a href="bla" %]" >""", """<a %]"="" href="bla"></a>"""),
|
||||
("""<a href=bla" >""", """<a href="bla""></a>"""),
|
||||
("""<a onmouseover=blubb('nav1','',"""\
|
||||
"""'/images/nav.gif',1);move(this); b="c">""",
|
||||
"""<a onmouseover="blubb('nav1','',"""\
|
||||
"""'/images/nav.gif',1);move(this);" b="c">"""),
|
||||
"""<a b="c" onmouseover="blubb('nav1','',"""\
|
||||
"""'/images/nav.gif',1);move(this);"></a>"""),
|
||||
("""<a onClick=location.href('/index.htm') b="c">""",
|
||||
"""<a onclick="location.href('/index.htm')" b="c">"""),
|
||||
"""<a b="c" onclick="location.href('/index.htm')"></a>"""),
|
||||
# entity resolving
|
||||
("""<a href="D;ailto:" >""", """<a href="ailto:">"""),
|
||||
("""<a href="&ailto:" >""", """<a href="&ailto:">"""),
|
||||
("""<a href="&amp;ailto:" >""", """<a href="&amp;ailto:">"""),
|
||||
("""<a href="&hulla;ailto:" >""", """<a href="ailto:">"""),
|
||||
("""<a href="mailto:" >""", """<a href="mailto:">"""),
|
||||
("""<a href="mailto:" >""", """<a href="mailto:">"""),
|
||||
("""<a href="D;ailto:" >""", """<a href="D;ailto:"></a>"""),
|
||||
("""<a href="&ailto:" >""", """<a href="&ailto:"></a>"""),
|
||||
("""<a href="&amp;ailto:" >""", """<a href="&amp;ailto:"></a>"""),
|
||||
("""<a href="&hulla;ailto:" >""", """<a href="&hulla;ailto:"></a>"""),
|
||||
("""<a href="mailto:" >""", """<a href="mailto:"></a>"""),
|
||||
("""<a href="mailto:" >""", """<a href="mailto:"></a>"""),
|
||||
# note that \u8156 is not valid encoding and therefore gets removed
|
||||
("""<a href="῜ailto:" >""", """<a href="ailto:">"""),
|
||||
("""<a href="῜ailto:" >""", """<a href="῜ailto:"></a>"""),
|
||||
# non-ascii characters
|
||||
("""<Üzgür> fahr </langsamer> ¹²³¼½¬{""",
|
||||
"""<Üzgür> fahr </langsamer> ¹²³¼½¬{"""),
|
||||
("""<Üzgür> fahr </langsamer> ¿¿¿¿¿¿{""",
|
||||
u"""<Üzgür> fahr ¿¿¿¿¿¿{"""),
|
||||
# mailto link
|
||||
("""<a href=mailto:calvin@LocalHost?subject=Hallo&to=michi>1</a>""",
|
||||
"""<a href="mailto:calvin@LocalHost?subject=Hallo&to=michi">1</a>"""),
|
||||
|
|
@ -154,38 +154,41 @@ parsetests = [
|
|||
"""<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"><meta a="b"/>"""),
|
||||
# meta tag with charset encoding
|
||||
("""<meta http-equiv="content-type" content>""",
|
||||
"""<meta http-equiv="content-type" content>"""),
|
||||
"""<meta content="" http-equiv="content-type"/>"""),
|
||||
("""<meta http-equiv="content-type" content=>""",
|
||||
"""<meta http-equiv="content-type" content="">"""),
|
||||
"""<meta content="" http-equiv="content-type"/>"""),
|
||||
("""<meta http-equiv="content-type" content="hulla">""",
|
||||
"""<meta http-equiv="content-type" content="hulla">"""),
|
||||
"""<meta content="hulla" http-equiv="content-type"/>"""),
|
||||
("""<meta http-equiv="content-type" content="text/html; charset=iso8859-1">""",
|
||||
"""<meta http-equiv="content-type" content="text/html; charset=iso8859-1">"""),
|
||||
"""<meta content="text/html; charset=iso8859-1" http-equiv="content-type"/>"""),
|
||||
("""<meta http-equiv="content-type" content="text/html; charset=hulla">""",
|
||||
"""<meta http-equiv="content-type" content="text/html; charset=hulla">"""),
|
||||
"""<meta content="text/html; charset=hulla" http-equiv="content-type"/>"""),
|
||||
# CDATA
|
||||
("""<![CDATA[<a>hallo</a>]]>""", """<![CDATA[<a>hallo</a>]]>"""),
|
||||
# missing > in end tag
|
||||
("""</td <td a="b" >""", """</td><td a="b">"""),
|
||||
("""</td<td a="b" >""", """</td><td a="b">"""),
|
||||
("""</td <td a="b" >""", """"""),
|
||||
("""</td<td a="b" >""", """"""),
|
||||
# missing beginning quote
|
||||
("""<td a=b">""", """<td a="b">"""),
|
||||
("""<td a=b">""", """<td a="b""></td>"""),
|
||||
# stray < before start tag
|
||||
("""<0.<td a="b" >""", """<0.<td a="b">"""),
|
||||
("""<0.<td a="b" >""", """<0.<td a="b"></td>"""),
|
||||
# stray < before end tag
|
||||
("""<0.</td >""", """<0.</td>"""),
|
||||
("""<0.</td >""", """<0."""),
|
||||
# missing end quote (XXX TODO)
|
||||
#("""<td a="b>\n""", """<td a="b">\n"""),
|
||||
#("""<td a="b></td>\na""", """<td a="b"></td>\na"""),
|
||||
#("""<a b="c><a b="c>\n""", """<a b="c"><a b="c">\n"""),
|
||||
#("""<td a="b c="d"></td>\n""", """<td a="b" c="d"></td>\n"""),
|
||||
# HTML5 tags
|
||||
("""<audio src=bla>""", """<audio src="bla">"""),
|
||||
("""<button formaction=bla>""", """<button formaction="bla">"""),
|
||||
("""<html manifest=bla>""", """<html manifest="bla">"""),
|
||||
("""<source src=bla>""", """<source src="bla">"""),
|
||||
("""<track src=bla>""", """<track src="bla">"""),
|
||||
("""<video src=bla>""", """<video src="bla">"""),
|
||||
("""<audio src=bla>""", """<audio src="bla"></audio>"""),
|
||||
("""<button formaction=bla>""", """<button formaction="bla"></button>"""),
|
||||
("""<html manifest=bla>""", """<html manifest="bla"></html>"""),
|
||||
("""<source src=bla>""", """<source src="bla"/>"""),
|
||||
("""<track src=bla>""", """<track src="bla"/>"""),
|
||||
("""<video src=bla>""", """<video src="bla"></video>"""),
|
||||
# Test inserted tag s
|
||||
("""<b><a></a></b>""", """<b><a></a></b>"""),
|
||||
("""<a></a><b></b>""", """<a></a><b></b>"""),
|
||||
]
|
||||
|
||||
flushtests = [
|
||||
|
|
|
|||
Loading…
Reference in a new issue