mirror of
https://github.com/Hopiu/linkchecker.git
synced 2026-05-10 07:34:44 +00:00
more checkes
git-svn-id: https://linkchecker.svn.sourceforge.net/svnroot/linkchecker/trunk/linkchecker@2928 e7d03fd6-7b0d-0410-9947-9c21f3af8025
This commit is contained in:
parent
c049837cdd
commit
e29efc31fa
1 changed files with 101 additions and 104 deletions
|
|
@ -27,129 +27,131 @@ from linkcheck.tests import MsgTestCase
|
||||||
|
|
||||||
|
|
||||||
# list of tuples
|
# list of tuples
|
||||||
# (<test pattern>, <expected parse output>, <no. of expected errors>)
|
# (<test pattern>, <expected parse output>)
|
||||||
parsetests = [
|
parsetests = [
|
||||||
# start tags
|
# start tags
|
||||||
("""<a b="c" >""", """<a b="c">""", 0),
|
("""<a b="c" >""", """<a b="c">"""),
|
||||||
("""<a b='c' >""", """<a b="c">""", 0),
|
("""<a b='c' >""", """<a b="c">"""),
|
||||||
("""<a b=c" >""", """<a b="c">""", 1),
|
("""<a b=c" >""", """<a b="c">"""),
|
||||||
("""<a b=c' >""", """<a b="c'">""", 0),
|
("""<a b=c' >""", """<a b="c'">"""),
|
||||||
("""<a b="c >""", """<a b="c >""", 0),
|
("""<a b="c >""", """<a b="c >"""),
|
||||||
("""<a b="" >""", """<a b="">""", 0),
|
("""<a b="" >""", """<a b="">"""),
|
||||||
("""<a b='' >""", """<a b="">""", 0),
|
("""<a b='' >""", """<a b="">"""),
|
||||||
("""<a b=>""", """<a b="">""", 0),
|
("""<a b=>""", """<a b="">"""),
|
||||||
("""<a b= >""", """<a b="">""", 0),
|
("""<a b= >""", """<a b="">"""),
|
||||||
("""<a =c>""", """<a c>""", 0),
|
("""<a =c>""", """<a c>"""),
|
||||||
("""<a =c >""", """<a c>""", 0),
|
("""<a =c >""", """<a c>"""),
|
||||||
("""<a =>""", """<a>""", 0),
|
("""<a =>""", """<a>"""),
|
||||||
("""<a = >""", """<a>""", 0),
|
("""<a = >""", """<a>"""),
|
||||||
("""<a b= "c" >""", """<a b="c">""", 0),
|
("""<a b= "c" >""", """<a b="c">"""),
|
||||||
("""<a b ="c" >""", """<a b="c">""", 0),
|
("""<a b ="c" >""", """<a b="c">"""),
|
||||||
("""<a b = "c" >""", """<a b="c">""", 0),
|
("""<a b = "c" >""", """<a b="c">"""),
|
||||||
("""<a >""", """<a>""", 0),
|
("""<a >""", """<a>"""),
|
||||||
("""< a>""", """<a>""", 0),
|
("""< a>""", """<a>"""),
|
||||||
("""< a >""", """<a>""", 0),
|
("""< a >""", """<a>"""),
|
||||||
("""<>""", """<>""", 0),
|
("""<>""", """<>"""),
|
||||||
("""< >""", """< >""", 0),
|
("""< >""", """< >"""),
|
||||||
("""<aä>""", """<a>""", 0),
|
("""<aä>""", """<a>"""),
|
||||||
("""<a aä="b">""", """<a a="b">""", 0),
|
("""<a aä="b">""", """<a a="b">"""),
|
||||||
("""<a a="bä">""", """<a a="bä">""", 0),
|
("""<a a="bä">""", """<a a="bä">"""),
|
||||||
# reduce test
|
# reduce test
|
||||||
("""<a b="c"><""", """<a b="c"><""", 0),
|
("""<a b="c"><""", """<a b="c"><"""),
|
||||||
("""d>""", """d>""", 0),
|
("""d>""", """d>"""),
|
||||||
# numbers in tag
|
# numbers in tag
|
||||||
("""<h1>bla</h1>""", """<h1>bla</h1>""", 0),
|
("""<h1>bla</h1>""", """<h1>bla</h1>"""),
|
||||||
# more start tags
|
# more start tags
|
||||||
("""<a b=c"><a b="c">""", """<a b="c"><a b="c">""", 1),
|
("""<a b=c"><a b="c">""", """<a b="c"><a b="c">"""),
|
||||||
("""<a b=/c/></a><br>""", """<a b="/c/"></a><br>""", 0),
|
("""<a b=/c/></a><br>""", """<a b="/c/"></a><br>"""),
|
||||||
("""<br/>""", """<br>""", 0),
|
("""<br/>""", """<br>"""),
|
||||||
("""<a b="50%"><br>""", """<a b="50%"><br>""", 0),
|
("""<a b="50%"><br>""", """<a b="50%"><br>"""),
|
||||||
# comments
|
# comments
|
||||||
("""<!---->< 1>""", """<!----><1>""", 0),
|
("""<!---->< 1>""", """<!----><1>"""),
|
||||||
("""<!-- a - b -->< 2>""", """<!-- a - b --><2>""", 0),
|
("""<!-- a - b -->< 2>""", """<!-- a - b --><2>"""),
|
||||||
("""<!----->< 3>""", """<!-----><3>""", 0),
|
("""<!----->< 3>""", """<!-----><3>"""),
|
||||||
("""<!------>< 4>""", """<!------><4>""", 0),
|
("""<!------>< 4>""", """<!------><4>"""),
|
||||||
("""<!------->< 5>""", """<!-------><5>""", 0),
|
("""<!------->< 5>""", """<!-------><5>"""),
|
||||||
("""<!-- -->< 7>""", """<!-- --><7>""", 0),
|
("""<!-- -->< 7>""", """<!-- --><7>"""),
|
||||||
("""<!---- />-->""", """<!---- />-->""", 0),
|
("""<!---- />-->""", """<!---- />-->"""),
|
||||||
("""<!-- a-2 -->< 9>""", """<!-- a-2 --><9>""", 0),
|
("""<!-- a-2 -->< 9>""", """<!-- a-2 --><9>"""),
|
||||||
("""<!-- --- -->< 10>""", """<!-- --- --><10>""", 0),
|
("""<!-- --- -->< 10>""", """<!-- --- --><10>"""),
|
||||||
# invalid comments
|
# invalid comments
|
||||||
("""<!-- -- >< 8>""", """<!-- --><8>""", 1),
|
("""<!-- -- >< 8>""", """<!-- --><8>"""),
|
||||||
("""<!---- >< 6>""", """<!----><6>""", 1),
|
("""<!---- >< 6>""", """<!----><6>"""),
|
||||||
("""<!- blubb -->""", """<!-- blubb -->""", 1),
|
("""<!- blubb -->""", """<!-- blubb -->"""),
|
||||||
("""<!-- blubb ->""", """<!-- blubb -->""", 1),
|
("""<!-- blubb ->""", """<!-- blubb -->"""),
|
||||||
("""<!- blubb ->""", """<!-- blubb -->""", 2),
|
("""<!- blubb ->""", """<!-- blubb -->"""),
|
||||||
("""<! -- blubb -->""", """<!-- blubb -->""", 1),
|
("""<! -- blubb -->""", """<!-- blubb -->"""),
|
||||||
("""<!-- blubb -- >""", """<!-- blubb -->""", 1),
|
("""<!-- blubb -- >""", """<!-- blubb -->"""),
|
||||||
# end tags
|
# end tags
|
||||||
("""</a>""", """</a>""", 0),
|
("""</a>""", """</a>"""),
|
||||||
("""</ a>""", """</a>""", 0),
|
("""</ a>""", """</a>"""),
|
||||||
("""</ a >""", """</a>""", 0),
|
("""</ a >""", """</a>"""),
|
||||||
("""</a >""", """</a>""", 0),
|
("""</a >""", """</a>"""),
|
||||||
("""< / a>""", """</a>""", 0),
|
("""< / a>""", """</a>"""),
|
||||||
("""< /a>""", """</a>""", 0),
|
("""< /a>""", """</a>"""),
|
||||||
("""</aä>""", """</a>""", 0),
|
("""</aä>""", """</a>"""),
|
||||||
# start and end tag (HTML doctype assumed)
|
# start and end tag (HTML doctype assumed)
|
||||||
("""<a/>""", """<a/>""", 0),
|
("""<a/>""", """<a/>"""),
|
||||||
("""<meta/>""", """<meta>""", 0),
|
("""<meta/>""", """<meta>"""),
|
||||||
("""<MetA/>""", """<meta>""", 0),
|
("""<MetA/>""", """<meta>"""),
|
||||||
# declaration tags
|
# declaration tags
|
||||||
("""<!DOCtype adrbook SYSTEM "adrbook.dtd">""",
|
("""<!DOCtype adrbook SYSTEM "adrbook.dtd">""",
|
||||||
"""<!DOCTYPE adrbook SYSTEM "adrbook.dtd">""", 0),
|
"""<!DOCTYPE adrbook SYSTEM "adrbook.dtd">"""),
|
||||||
# misc
|
# misc
|
||||||
("""<?xmL version="1.0" encoding="latin1"?>""",
|
("""<?xmL version="1.0" encoding="latin1"?>""",
|
||||||
"""<?xmL version="1.0" encoding="latin1"?>""", 0),
|
"""<?xmL version="1.0" encoding="latin1"?>"""),
|
||||||
# javascript
|
# javascript
|
||||||
("""<script >\n</script>""", """<script>\n</script>""", 0),
|
("""<script >\n</script>""", """<script>\n</script>"""),
|
||||||
("""<sCrIpt lang="a">bla </a> fasel</scripT>""",
|
("""<sCrIpt lang="a">bla </a> fasel</scripT>""",
|
||||||
"""<script lang="a">bla </a> fasel</script>""", 0),
|
"""<script lang="a">bla </a> fasel</script>"""),
|
||||||
# line continuation (Dr. Fun webpage)
|
# line continuation (Dr. Fun webpage)
|
||||||
("<img bo\\\nrder=0 >", """<img border="0">""", 1),
|
("""<img bo\\\nrder=0 >""", """<img border="0">"""),
|
||||||
|
("""<img align="mid\\\ndle">""", """<img align="middle">"""),
|
||||||
|
("""<img align='mid\\\ndle'>""", """<img align="middle">"""),
|
||||||
# href with $
|
# href with $
|
||||||
("""<a href="123$456">""", """<a href="123$456">""", 0),
|
("""<a href="123$456">""", """<a href="123$456">"""),
|
||||||
# quoting
|
# quoting
|
||||||
("""<a href=/ >""", """<a href="/">""", 0),
|
("""<a href=/ >""", """<a href="/">"""),
|
||||||
("""<a href= />""", """<a href="/">""", 0),
|
("""<a href= />""", """<a href="/">"""),
|
||||||
("""<a href= >""", """<a href="">""", 0),
|
("""<a href= >""", """<a href="">"""),
|
||||||
("""<a href="'" >""", """<a href="'">""", 0),
|
("""<a href="'" >""", """<a href="'">"""),
|
||||||
("""<a href='"' >""", """<a href=""">""", 0),
|
("""<a href='"' >""", """<a href=""">"""),
|
||||||
("""<a href="bla" %]" >""", """<a href="bla">""", 0),
|
("""<a href="bla" %]" >""", """<a href="bla">"""),
|
||||||
("""<a href=bla" >""", """<a href="bla">""", 1),
|
("""<a href=bla" >""", """<a href="bla">"""),
|
||||||
("""<a onmouseover=MM_swapImage('nav1','',"""\
|
("""<a onmouseover=MM_swapImage('nav1','',"""\
|
||||||
"""'/images/dwnavpoint_over.gif',1);movein(this); b="c">""",
|
"""'/images/dwnavpoint_over.gif',1);movein(this); b="c">""",
|
||||||
"""<a onmouseover="MM_swapImage('nav1','',"""\
|
"""<a onmouseover="MM_swapImage('nav1','',"""\
|
||||||
"""'/images/dwnavpoint_over.gif',1);movein(this);" b="c">""", 0),
|
"""'/images/dwnavpoint_over.gif',1);movein(this);" b="c">"""),
|
||||||
("""<a onClick=location.href('/index.htm') b="c">""",
|
("""<a onClick=location.href('/index.htm') b="c">""",
|
||||||
"""<a onclick="location.href('/index.htm')" b="c">""", 0),
|
"""<a onclick="location.href('/index.htm')" b="c">"""),
|
||||||
# entity resolving
|
# entity resolving
|
||||||
("""<a href="mailto:" >""", """<a href="mailto:">""", 0),
|
("""<a href="mailto:" >""", """<a href="mailto:">"""),
|
||||||
# non-ascii characters
|
# non-ascii characters
|
||||||
("""<Üzgür> fahr </langsamer> ¹²³¼½¬{""",
|
("""<Üzgür> fahr </langsamer> ¹²³¼½¬{""",
|
||||||
"""<Üzgür> fahr </langsamer> ¹²³¼½¬{""", 0),
|
"""<Üzgür> fahr </langsamer> ¹²³¼½¬{"""),
|
||||||
# mailto link
|
# mailto link
|
||||||
("""<a href=mailto:calvin@LocalHost?subject=Hallo&to=michi>1</a>""",
|
("""<a href=mailto:calvin@LocalHost?subject=Hallo&to=michi>1</a>""",
|
||||||
"""<a href="mailto:calvin@LocalHost?subject=Hallo&to=michi">1</a>""", 0),
|
"""<a href="mailto:calvin@LocalHost?subject=Hallo&to=michi">1</a>"""),
|
||||||
# doctype XHTML
|
# doctype XHTML
|
||||||
("""<!DOCTYPe html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"><MeTa a="b"/>""",
|
("""<!DOCTYPe html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"><MeTa a="b"/>""",
|
||||||
"""<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"><meta a="b"/>""", 0),
|
"""<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"><meta a="b"/>"""),
|
||||||
# missing > in end tag
|
# missing > in end tag
|
||||||
("""</td <td a="b" >""", """</td><td a="b">""", 1),
|
("""</td <td a="b" >""", """</td><td a="b">"""),
|
||||||
("""</td<td a="b" >""", """</td><td a="b">""", 1),
|
("""</td<td a="b" >""", """</td><td a="b">"""),
|
||||||
# missing beginning quote
|
# missing beginning quote
|
||||||
("""<td a=b">""", """<td a="b">""", 1),
|
("""<td a=b">""", """<td a="b">"""),
|
||||||
# missing end quote (TODO)
|
# missing end quote (XXX TODO)
|
||||||
#("""<td a="b>""", """<td a="b">""", 1),
|
#("""<td a="b>\n""", """<td a="b">\n"""),
|
||||||
#("""<td a="b></td>""", """<td a="b"></td>""", 1),
|
#("""<td a="b></td>\na""", """<td a="b"></td>\na"""),
|
||||||
#("""<td a="b c="d"></td>""", """<td a="b" c="d"></td>""", 1),
|
#("""<a b="c><a b="c>\n""", """<a b="c"><a b="c">\n"""),
|
||||||
#("""<a b="c><a b="c">""", """<a b="c><a b=" c>""", 1),
|
#("""<td a="b c="d"></td>\n""", """<td a="b" c="d"></td>\n"""),
|
||||||
]
|
]
|
||||||
|
|
||||||
flushtests = [
|
flushtests = [
|
||||||
("<", "<", 0),
|
("<", "<"),
|
||||||
("<a", "<a", 0),
|
("<a", "<a"),
|
||||||
("<!a", "<!a", 0),
|
("<!a", "<!a"),
|
||||||
("<?a", "<?a", 0),
|
("<?a", "<?a"),
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -169,14 +171,14 @@ class TestParser (MsgTestCase):
|
||||||
"""
|
"""
|
||||||
Parse all test patterns in one go.
|
Parse all test patterns in one go.
|
||||||
"""
|
"""
|
||||||
for _in, _out, _errs in parsetests:
|
for _in, _out in parsetests:
|
||||||
out = StringIO.StringIO()
|
out = StringIO.StringIO()
|
||||||
handler = linkcheck.HtmlParser.htmllib.HtmlPrettyPrinter(out)
|
handler = linkcheck.HtmlParser.htmllib.HtmlPrettyPrinter(out)
|
||||||
self.htmlparser.handler = handler
|
self.htmlparser.handler = handler
|
||||||
self.htmlparser.feed(_in)
|
self.htmlparser.feed(_in)
|
||||||
self.check_results(self.htmlparser, _in, _out, _errs, out)
|
self.check_results(self.htmlparser, _in, _out, out)
|
||||||
|
|
||||||
def check_results (self, htmlparser, _in, _out, _errs, out):
|
def check_results (self, htmlparser, _in, _out, out):
|
||||||
"""
|
"""
|
||||||
Check parse results.
|
Check parse results.
|
||||||
"""
|
"""
|
||||||
|
|
@ -185,30 +187,25 @@ class TestParser (MsgTestCase):
|
||||||
msg = "Test error; in: %r, out: %r, expect: %r" % \
|
msg = "Test error; in: %r, out: %r, expect: %r" % \
|
||||||
(_in, res, _out)
|
(_in, res, _out)
|
||||||
self.assertEqual(res, _out, msg=msg)
|
self.assertEqual(res, _out, msg=msg)
|
||||||
num = len(htmlparser.handler.errors)
|
|
||||||
errors = ", ".join(htmlparser.handler.errors)
|
|
||||||
msg = "Number of errors parsing %r: %d, expected: %d\nErrors: %s" % \
|
|
||||||
(_in, num, _errs, errors)
|
|
||||||
self.assertEqual(num, _errs, msg=msg)
|
|
||||||
htmlparser.reset()
|
htmlparser.reset()
|
||||||
|
|
||||||
def test_feed (self):
|
def test_feed (self):
|
||||||
"""
|
"""
|
||||||
Parse all test patterns sequentially.
|
Parse all test patterns sequentially.
|
||||||
"""
|
"""
|
||||||
for _in, _out, _errs in parsetests:
|
for _in, _out in parsetests:
|
||||||
out = StringIO.StringIO()
|
out = StringIO.StringIO()
|
||||||
handler = linkcheck.HtmlParser.htmllib.HtmlPrettyPrinter(out)
|
handler = linkcheck.HtmlParser.htmllib.HtmlPrettyPrinter(out)
|
||||||
self.htmlparser.handler = handler
|
self.htmlparser.handler = handler
|
||||||
for c in _in:
|
for c in _in:
|
||||||
self.htmlparser.feed(c)
|
self.htmlparser.feed(c)
|
||||||
self.check_results(self.htmlparser, _in, _out, _errs, out)
|
self.check_results(self.htmlparser, _in, _out, out)
|
||||||
|
|
||||||
def test_interwoven (self):
|
def test_interwoven (self):
|
||||||
"""
|
"""
|
||||||
Parse all test patterns on two parsers interwoven.
|
Parse all test patterns on two parsers interwoven.
|
||||||
"""
|
"""
|
||||||
for _in, _out, _errs in parsetests:
|
for _in, _out in parsetests:
|
||||||
out = StringIO.StringIO()
|
out = StringIO.StringIO()
|
||||||
out2 = StringIO.StringIO()
|
out2 = StringIO.StringIO()
|
||||||
handler = linkcheck.HtmlParser.htmllib.HtmlPrettyPrinter(out)
|
handler = linkcheck.HtmlParser.htmllib.HtmlPrettyPrinter(out)
|
||||||
|
|
@ -218,19 +215,19 @@ class TestParser (MsgTestCase):
|
||||||
for c in _in:
|
for c in _in:
|
||||||
self.htmlparser.feed(c)
|
self.htmlparser.feed(c)
|
||||||
self.htmlparser2.feed(c)
|
self.htmlparser2.feed(c)
|
||||||
self.check_results(self.htmlparser, _in, _out, _errs, out)
|
self.check_results(self.htmlparser, _in, _out, out)
|
||||||
self.check_results(self.htmlparser2, _in, _out, _errs, out2)
|
self.check_results(self.htmlparser2, _in, _out, out2)
|
||||||
|
|
||||||
def test_flush (self):
|
def test_flush (self):
|
||||||
"""
|
"""
|
||||||
Test parser flushing.
|
Test parser flushing.
|
||||||
"""
|
"""
|
||||||
for _in, _out, _errs in flushtests:
|
for _in, _out in flushtests:
|
||||||
out = StringIO.StringIO()
|
out = StringIO.StringIO()
|
||||||
handler = linkcheck.HtmlParser.htmllib.HtmlPrettyPrinter(out)
|
handler = linkcheck.HtmlParser.htmllib.HtmlPrettyPrinter(out)
|
||||||
self.htmlparser.handler = handler
|
self.htmlparser.handler = handler
|
||||||
self.htmlparser.feed(_in)
|
self.htmlparser.feed(_in)
|
||||||
self.check_results(self.htmlparser, _in, _out, _errs, out)
|
self.check_results(self.htmlparser, _in, _out, out)
|
||||||
|
|
||||||
def test_entities (self):
|
def test_entities (self):
|
||||||
"""
|
"""
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue