mirror of
https://github.com/Hopiu/linkchecker.git
synced 2026-05-17 11:01:06 +00:00
unit tests
git-svn-id: https://linkchecker.svn.sourceforge.net/svnroot/linkchecker/trunk/linkchecker@1421 e7d03fd6-7b0d-0410-9947-9c21f3af8025
This commit is contained in:
parent
9f7e3e67a9
commit
14a9b5c426
8 changed files with 957 additions and 0 deletions
17
linkcheck/tests/__init__.py
Normal file
17
linkcheck/tests/__init__.py
Normal file
|
|
@ -0,0 +1,17 @@
|
|||
# -*- coding: iso-8859-1 -*-
|
||||
"""unit tests for the linkcheck module"""
|
||||
# Copyright (C) 2004 Bastian Kleineidam
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 2 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program; if not, write to the Free Software
|
||||
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||
71
linkcheck/tests/test_cgi.py
Normal file
71
linkcheck/tests/test_cgi.py
Normal file
|
|
@ -0,0 +1,71 @@
|
|||
# -*- coding: iso-8859-1 -*-
|
||||
"""test cgi form routines"""
|
||||
# Copyright (C) 2004 Bastian Kleineidam
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 2 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program; if not, write to the Free Software
|
||||
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||
|
||||
import unittest
|
||||
import linkcheck.lc_cgi
|
||||
|
||||
class Store (object):
|
||||
"""value storing class implementing FieldStorage interface"""
|
||||
|
||||
def __init__ (self, value):
|
||||
"""store given value"""
|
||||
self.value = value
|
||||
|
||||
|
||||
class TestCgi (unittest.TestCase):
|
||||
"""test cgi routines"""
|
||||
|
||||
def test_form_valid_url (self):
|
||||
"""check url validity"""
|
||||
form = {"url": Store("http://www.heise.de/"),
|
||||
"level": Store("0"),
|
||||
}
|
||||
linkcheck.lc_cgi.checkform(form)
|
||||
|
||||
def test_form_empty_url (self):
|
||||
"""check with empty url"""
|
||||
form = {"url": Store(""),
|
||||
"level": Store("0"),
|
||||
}
|
||||
self.assertRaises(linkcheck.lc_cgi.FormError,
|
||||
linkcheck.lc_cgi.checkform, form)
|
||||
|
||||
def test_form_default_url (self):
|
||||
"""check with default url"""
|
||||
form = {"url": Store("http://"),
|
||||
"level": Store("0"),
|
||||
}
|
||||
self.assertRaises(linkcheck.lc_cgi.FormError,
|
||||
linkcheck.lc_cgi.checkform, form)
|
||||
|
||||
def test_form_invalid_url (self):
|
||||
"""check url (in)validity"""
|
||||
form = {"url": Store("http://www.foo bar/"),
|
||||
"level": Store("0"),
|
||||
}
|
||||
self.assertRaises(linkcheck.lc_cgi.FormError,
|
||||
linkcheck.lc_cgi.checkform, form)
|
||||
|
||||
def test_suite ():
|
||||
"""build and return a TestSuite"""
|
||||
suite = unittest.TestSuite()
|
||||
suite.addTest(unittest.makeSuite(TestCgi))
|
||||
return suite
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
143
linkcheck/tests/test_containers.py
Normal file
143
linkcheck/tests/test_containers.py
Normal file
|
|
@ -0,0 +1,143 @@
|
|||
# -*- coding: iso-8859-1 -*-
|
||||
"""test container routines"""
|
||||
# Copyright (C) 2004 Bastian Kleineidam
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 2 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program; if not, write to the Free Software
|
||||
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||
|
||||
import unittest
|
||||
import random
|
||||
import linkcheck.containers
|
||||
|
||||
|
||||
class TestListDict (unittest.TestCase):
|
||||
"""test list dictionary routines"""
|
||||
|
||||
def setUp (self):
|
||||
"""set up self.d as empty listdict"""
|
||||
self.d = linkcheck.containers.ListDict()
|
||||
|
||||
def test_insert (self):
|
||||
"""test insertion order"""
|
||||
self.assert_(not self.d)
|
||||
self.d[2] = 1
|
||||
self.d[1] = 2
|
||||
self.assert_(2 in self.d)
|
||||
self.assert_(1 in self.d)
|
||||
|
||||
def test_delete (self):
|
||||
"""test deletion order"""
|
||||
self.assert_(not self.d)
|
||||
self.d[2] = 1
|
||||
self.d[1] = 2
|
||||
del self.d[1]
|
||||
self.assert_(2 in self.d)
|
||||
self.assert_(1 not in self.d)
|
||||
|
||||
def test_update (self):
|
||||
"""test update order"""
|
||||
self.assert_(not self.d)
|
||||
self.d[2] = 1
|
||||
self.d[1] = 2
|
||||
self.d[1] = 1
|
||||
self.assertEqual(self.d[1], 1)
|
||||
|
||||
def test_sorting (self):
|
||||
"""test sorting"""
|
||||
self.assert_(not self.d)
|
||||
toinsert = random.sample(xrange(10000000), 60)
|
||||
for x in toinsert:
|
||||
self.d[x] = x
|
||||
for i, k in enumerate(self.d.keys()):
|
||||
self.assertEqual(self.d[k], toinsert[i])
|
||||
|
||||
|
||||
class TestSetList (unittest.TestCase):
|
||||
"""test set list routines"""
|
||||
|
||||
def setUp (self):
|
||||
"""set up self.l as empty setlist"""
|
||||
self.l = linkcheck.containers.SetList()
|
||||
|
||||
def test_append (self):
|
||||
"""test append and equal elements"""
|
||||
self.assert_(not self.l)
|
||||
self.l.append(1)
|
||||
self.l.append(1)
|
||||
self.assertEqual(len(self.l), 1)
|
||||
|
||||
def test_append2 (self):
|
||||
"""test append and equal elements 2"""
|
||||
self.assert_(not self.l)
|
||||
self.l.append(1)
|
||||
self.l.append(2)
|
||||
self.l.append(1)
|
||||
self.assertEqual(len(self.l), 2)
|
||||
|
||||
def test_extend (self):
|
||||
"""test extend and equal elements"""
|
||||
self.assert_(not self.l)
|
||||
self.l.extend([1, 2, 1])
|
||||
self.assertEqual(len(self.l), 2)
|
||||
self.assertEqual(self.l[0], 1)
|
||||
self.assertEqual(self.l[1], 2)
|
||||
|
||||
def test_setitem (self):
|
||||
"""test setting of equal elements"""
|
||||
self.assert_(not self.l)
|
||||
self.l.extend([1, 2, 3])
|
||||
self.l[1] = 3
|
||||
self.assertEqual(len(self.l), 2)
|
||||
self.assertEqual(self.l[0], 1)
|
||||
self.assertEqual(self.l[1], 3)
|
||||
|
||||
|
||||
class TestLRU (unittest.TestCase):
|
||||
"""test routines of LRU queue"""
|
||||
|
||||
def setUp (self):
|
||||
"""set up self.lru as empty LRU queue"""
|
||||
self.count = 4
|
||||
self.lru = linkcheck.containers.LRU(self.count)
|
||||
|
||||
def test_len (self):
|
||||
"""test LRU length correctness"""
|
||||
self.assertEqual(len(self.lru), 0)
|
||||
for i in range(self.count):
|
||||
self.lru[str(i)] = str(i)
|
||||
self.assertEqual(len(self.lru), i+1)
|
||||
# overflow (inserting (self.count+1)th element
|
||||
self.lru[""] = ""
|
||||
self.assertEqual(len(self.lru), self.count)
|
||||
|
||||
def test_overflow (self):
|
||||
"""test LRU capacity overflow"""
|
||||
for i in range(self.count):
|
||||
self.lru[str(i)] = str(i)
|
||||
# overflow (inserting (self.count+1)th element
|
||||
self.lru[""] = ""
|
||||
# zero must have been deleted
|
||||
self.assert_(not self.lru.has_key('0'))
|
||||
|
||||
|
||||
def test_suite ():
|
||||
"""build and return a TestSuite"""
|
||||
suite = unittest.TestSuite()
|
||||
suite.addTest(unittest.makeSuite(TestListDict))
|
||||
suite.addTest(unittest.makeSuite(TestSetList))
|
||||
suite.addTest(unittest.makeSuite(TestLRU))
|
||||
return suite
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
60
linkcheck/tests/test_linkname.py
Normal file
60
linkcheck/tests/test_linkname.py
Normal file
|
|
@ -0,0 +1,60 @@
|
|||
# -*- coding: iso-8859-1 -*-
|
||||
"""test linkname routines"""
|
||||
# Copyright (C) 2004 Bastian Kleineidam
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 2 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program; if not, write to the Free Software
|
||||
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||
|
||||
import unittest
|
||||
import linkcheck.linkname
|
||||
|
||||
class TestLinkname (unittest.TestCase):
|
||||
"""test href and image name parsing"""
|
||||
|
||||
def image_name_test (self, txt, expected):
|
||||
"""helper function calling linkname.image_name()"""
|
||||
self.assertEqual(linkcheck.linkname.image_name(txt), expected)
|
||||
|
||||
def href_name_test (self, txt, expected):
|
||||
"""helper function calling linkname.href_name()"""
|
||||
self.assertEqual(linkcheck.linkname.href_name(txt), expected)
|
||||
|
||||
def test_image_name (self):
|
||||
"""test image name parsing"""
|
||||
self.image_name_test("<img src='' alt=''></a>", '')
|
||||
self.image_name_test("<img src alt=abc></a>", 'abc')
|
||||
|
||||
def test_href_name (self):
|
||||
"""test href name parsing"""
|
||||
self.href_name_test("<b>guru guru</a>", 'guru guru')
|
||||
self.href_name_test("a\njo</a>", "a\njo")
|
||||
self.href_name_test("test<</a>", "test<")
|
||||
self.href_name_test("test</</a>", "test</")
|
||||
self.href_name_test("test</a</a>", "test</a")
|
||||
self.href_name_test("test", "")
|
||||
self.href_name_test("\n", "")
|
||||
self.href_name_test("", "")
|
||||
self.href_name_test('"</a>"foo', '"')
|
||||
self.href_name_test("<img src='' alt=''></a>", '')
|
||||
self.href_name_test("<img src alt=abc></a>", 'abc')
|
||||
|
||||
|
||||
def test_suite ():
|
||||
"""build and return a TestSuite"""
|
||||
suite = unittest.TestSuite()
|
||||
suite.addTest(unittest.makeSuite(TestLinkname))
|
||||
return suite
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
209
linkcheck/tests/test_parser.py
Normal file
209
linkcheck/tests/test_parser.py
Normal file
|
|
@ -0,0 +1,209 @@
|
|||
# -*- coding: iso-8859-1 -*-
|
||||
"""test html parsing"""
|
||||
# Copyright (C) 2004 Bastian Kleineidam
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 2 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program; if not, write to the Free Software
|
||||
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||
|
||||
import linkcheck.HtmlParser
|
||||
import linkcheck.HtmlParser.htmlsax
|
||||
import linkcheck.HtmlParser.htmllib
|
||||
import cStringIO as StringIO
|
||||
import unittest
|
||||
|
||||
|
||||
# list of tuples (<test pattern>, <expected parse output>)
|
||||
parsetests = [
|
||||
# start tags
|
||||
("""<a b="c" >""", """<a b="c">"""),
|
||||
("""<a b='c' >""", """<a b="c">"""),
|
||||
("""<a b=c" >""", """<a b="c">"""),
|
||||
("""<a b=c' >""", """<a b="c'">"""),
|
||||
("""<a b="c >""", """<a b="c >"""),
|
||||
("""<a b="" >""", """<a b="">"""),
|
||||
("""<a b='' >""", """<a b="">"""),
|
||||
("""<a b=>""", """<a b="">"""),
|
||||
("""<a b= >""", """<a b="">"""),
|
||||
("""<a =c>""", """<a c>"""),
|
||||
("""<a =c >""", """<a c>"""),
|
||||
("""<a =>""", """<a>"""),
|
||||
("""<a = >""", """<a>"""),
|
||||
("""<a b= "c" >""", """<a b="c">"""),
|
||||
("""<a b ="c" >""", """<a b="c">"""),
|
||||
("""<a b = "c" >""", """<a b="c">"""),
|
||||
("""<a >""", """<a>"""),
|
||||
("""< a>""", """<a>"""),
|
||||
("""< a >""", """<a>"""),
|
||||
("""<>""", """<>"""),
|
||||
("""< >""", """< >"""),
|
||||
# reduce test
|
||||
("""<a b="c"><""", """<a b="c"><"""),
|
||||
("""d>""", """d>"""),
|
||||
# numbers in tag
|
||||
("""<h1>bla</h1>""", """<h1>bla</h1>"""),
|
||||
# more start tags
|
||||
("""<a b=c"><a b="c">""", """<a b="c"><a b="c">"""),
|
||||
("""<a b="c><a b="c">""", """<a b="c><a b=" c>"""),
|
||||
("""<a b=/c/></a><br>""", """<a b="/c/"></a><br>"""),
|
||||
("""<br/>""", """<br>"""),
|
||||
("""<a b="50%"><br>""", """<a b="50%"><br>"""),
|
||||
# comments
|
||||
("""<!---->< 1>""", """<!----><1>"""),
|
||||
("""<!-- a - b -->< 2>""", """<!-- a - b --><2>"""),
|
||||
("""<!----->< 3>""", """<!-----><3>"""),
|
||||
("""<!------>< 4>""", """<!------><4>"""),
|
||||
("""<!------->< 5>""", """<!-------><5>"""),
|
||||
("""<!---- >< 6>""", """<!----><6>"""),
|
||||
("""<!-- -->< 7>""", """<!-- --><7>"""),
|
||||
("""<!-- -- >< 8>""", """<!-- --><8>"""),
|
||||
("""<!---- />-->""", """<!---- />-->"""),
|
||||
("""<!-- a-2 -->< 9>""", """<!-- a-2 --><9>"""),
|
||||
("""<!-- --- -->< 10>""", """<!-- --- --><10>"""),
|
||||
# end tags
|
||||
("""</a>""", """</a>"""),
|
||||
("""</ a>""", """</a>"""),
|
||||
("""</ a >""", """</a>"""),
|
||||
("""</a >""", """</a>"""),
|
||||
("""< / a>""", """</a>"""),
|
||||
("""< /a>""", """</a>"""),
|
||||
# missing > in end tag
|
||||
("""</td <td a="b" >""", """</td><td a="b">"""),
|
||||
# start and end tag
|
||||
("""<a/>""", """<a></a>"""),
|
||||
# declaration tags
|
||||
("""<!DOCtype adrbook SYSTEM "adrbook.dtd">""",
|
||||
"""<!DOCTYPE adrbook SYSTEM "adrbook.dtd">"""),
|
||||
# misc
|
||||
("""<?xmL version="1.0" encoding="latin1"?>""",
|
||||
"""<?xmL version="1.0" encoding="latin1"?>"""),
|
||||
# javascript
|
||||
("""<script >\n</script>""", """<script>\n</script>"""),
|
||||
("""<sCrIpt lang="a">bla </a> fasel</scripT>""",
|
||||
"""<script lang="a">bla </a> fasel</script>"""),
|
||||
# line continuation (Dr. Fun webpage)
|
||||
("<img bo\\\nrder=0 >", """<img bo rder="0">"""),
|
||||
# href with $
|
||||
("""<a href="123$456">""", """<a href="123$456">"""),
|
||||
# quoting
|
||||
("""<a href=/ >""", """<a href="/">"""),
|
||||
("""<a href= />""", """<a href="/">"""),
|
||||
("""<a href= >""", """<a href="">"""),
|
||||
("""<a href="'" >""", """<a href="'">"""),
|
||||
("""<a href='"' >""", """<a href=""">"""),
|
||||
("""<a href="bla" %]" >""", """<a href="bla">"""),
|
||||
("""<a href=bla" >""", """<a href="bla">"""),
|
||||
("""<a onmouseover=MM_swapImage('nav1','',"""\
|
||||
"""'/images/dwnavpoint_over.gif',1);movein(this); b="c">""",
|
||||
"""<a onmouseover="MM_swapImage('nav1','',"""\
|
||||
"""'/images/dwnavpoint_over.gif',1);movein(this);" b="c">"""),
|
||||
("""<a onClick=location.href('/index.htm') b="c">""",
|
||||
"""<a onclick="location.href('/index.htm')" b="c">"""),
|
||||
# entity resolving
|
||||
("""<a href="mailto:" >""", """<a href="mailto:">"""),
|
||||
# non-ascii characters
|
||||
("""<Üzgür> fahr </langsamer> ¹²³¼½¬{""",
|
||||
"""<Üzgür> fahr </langsamer> ¹²³¼½¬{"""),
|
||||
# mailto link
|
||||
("""<a href=mailto:calvin@LocalHost?subject=Hallo&to=michi>1</a>""",
|
||||
"""<a href="mailto:calvin@LocalHost?subject=Hallo&to=michi">1</a>"""),
|
||||
]
|
||||
|
||||
flushtests = [
|
||||
("<", "<"),
|
||||
("<a", "<a"),
|
||||
("<!a", "<!a"),
|
||||
("<?a", "<?a"),
|
||||
]
|
||||
|
||||
|
||||
class TestParser (unittest.TestCase):
|
||||
"""test html parser"""
|
||||
|
||||
def setUp (self):
|
||||
"""initialize two internal html parser to be used for testing"""
|
||||
self.htmlparser = linkcheck.HtmlParser.htmlsax.parser()
|
||||
self.htmlparser2 = linkcheck.HtmlParser.htmlsax.parser()
|
||||
|
||||
def test_parse (self):
|
||||
"""parse all test patterns in one go"""
|
||||
for _in, _out in parsetests:
|
||||
out = StringIO.StringIO()
|
||||
self.htmlparser.handler = \
|
||||
linkcheck.HtmlParser.htmllib.HtmlPrettyPrinter(out)
|
||||
self.htmlparser.feed(_in)
|
||||
self.htmlparser.flush()
|
||||
res = out.getvalue()
|
||||
self.assertEqual(res, _out)
|
||||
self.htmlparser.reset()
|
||||
|
||||
def test_feed (self):
|
||||
"""parse all test patterns sequentially"""
|
||||
for _in, _out in parsetests:
|
||||
out = StringIO.StringIO()
|
||||
self.htmlparser.handler = \
|
||||
linkcheck.HtmlParser.htmllib.HtmlPrettyPrinter(out)
|
||||
for c in _in:
|
||||
self.htmlparser.feed(c)
|
||||
self.htmlparser.flush()
|
||||
res = out.getvalue()
|
||||
self.assertEqual(res, _out)
|
||||
self.htmlparser.reset()
|
||||
|
||||
def test_interwoven (self):
|
||||
"""parse all test patterns on two parsers interwoven"""
|
||||
for _in, _out in parsetests:
|
||||
out = StringIO.StringIO()
|
||||
out2 = StringIO.StringIO()
|
||||
self.htmlparser.handler = \
|
||||
linkcheck.HtmlParser.htmllib.HtmlPrettyPrinter(out)
|
||||
self.htmlparser2.handler = \
|
||||
linkcheck.HtmlParser.htmllib.HtmlPrettyPrinter(out2)
|
||||
for c in _in:
|
||||
self.htmlparser.feed(c)
|
||||
self.htmlparser2.feed(c)
|
||||
self.htmlparser.flush()
|
||||
self.htmlparser2.flush()
|
||||
res = out.getvalue()
|
||||
res2 = out2.getvalue()
|
||||
self.assertEqual(res, _out)
|
||||
self.assertEqual(res2, _out)
|
||||
self.htmlparser.reset()
|
||||
|
||||
def test_flush (self):
|
||||
"""test parser flushing"""
|
||||
for _in, _out in flushtests:
|
||||
out = StringIO.StringIO()
|
||||
self.htmlparser.handler = \
|
||||
linkcheck.HtmlParser.htmllib.HtmlPrettyPrinter(out)
|
||||
self.htmlparser.feed(_in)
|
||||
self.htmlparser.flush()
|
||||
res = out.getvalue()
|
||||
self.assertEqual(res, _out)
|
||||
self.htmlparser.reset()
|
||||
|
||||
def test_entities (self):
|
||||
"""test entity resolving"""
|
||||
for c in "abcdefghijklmnopqrstuvwxyz":
|
||||
self.assertEqual(
|
||||
linkcheck.HtmlParser.resolve_entities("&#%d;"%ord(c)), c)
|
||||
|
||||
|
||||
def test_suite ():
|
||||
"""build and return a TestSuite"""
|
||||
suite = unittest.TestSuite()
|
||||
suite.addTest(unittest.makeSuite(TestParser))
|
||||
return suite
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
92
linkcheck/tests/test_robotparser.py
Normal file
92
linkcheck/tests/test_robotparser.py
Normal file
|
|
@ -0,0 +1,92 @@
|
|||
# -*- coding: iso-8859-1 -*-
|
||||
# Copyright (C) 2004 Bastian Kleineidam
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 2 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program; if not, write to the Free Software
|
||||
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||
|
||||
import unittest
|
||||
|
||||
import linkcheck.robotparser2
|
||||
|
||||
|
||||
class TestRobotParser (unittest.TestCase):
|
||||
"""test robots.txt parser (needs internet access)"""
|
||||
|
||||
def setUp (self):
|
||||
"""initialize self.rp as a robots.txt parser"""
|
||||
self.rp = linkcheck.robotparser2.RobotFileParser()
|
||||
|
||||
def check (self, a, b):
|
||||
"""helper function comparing two results a and b"""
|
||||
if not b:
|
||||
ac = "access denied"
|
||||
else:
|
||||
ac = "access allowed"
|
||||
if a != b:
|
||||
self.fail("%s != %s (%s)" % (a, b, ac))
|
||||
|
||||
def test_existing_robots (self):
|
||||
"""test parsing and access of an existing robots.txt file"""
|
||||
# robots.txt that exists, gotten to by redirection
|
||||
self.rp.set_url('http://www.musi-cal.com/robots.txt')
|
||||
self.rp.read()
|
||||
# test for re.escape
|
||||
self.check(self.rp.can_fetch('*', 'http://www.musi-cal.com/'), True)
|
||||
# this should match the first rule, which is a disallow
|
||||
self.check(self.rp.can_fetch('', 'http://www.musi-cal.com/'), False)
|
||||
# various cherry pickers
|
||||
self.check(self.rp.can_fetch('CherryPickerSE',
|
||||
'http://www.musi-cal.com/cgi-bin/event-search'
|
||||
'?city=San+Francisco'), False)
|
||||
self.check(self.rp.can_fetch('CherryPickerSE/1.0',
|
||||
'http://www.musi-cal.com/cgi-bin/event-search'
|
||||
'?city=San+Francisco'), False)
|
||||
self.check(self.rp.can_fetch('CherryPickerSE/1.5',
|
||||
'http://www.musi-cal.com/cgi-bin/event-search'
|
||||
'?city=San+Francisco'), False)
|
||||
# case sensitivity
|
||||
self.check(self.rp.can_fetch('ExtractorPro',
|
||||
'http://www.musi-cal.com/blubba'), False)
|
||||
self.check(self.rp.can_fetch('extractorpro',
|
||||
'http://www.musi-cal.com/blubba'), False)
|
||||
# substring test
|
||||
self.check(self.rp.can_fetch('toolpak/1.1',
|
||||
'http://www.musi-cal.com/blubba'), False)
|
||||
# tests for catch-all * agent
|
||||
self.check(self.rp.can_fetch('spam',
|
||||
'http://www.musi-cal.com/vsearch'), False)
|
||||
self.check(self.rp.can_fetch('spam',
|
||||
'http://www.musi-cal.com/Musician/me'), True)
|
||||
self.check(self.rp.can_fetch('spam',
|
||||
'http://www.musi-cal.com/'), True)
|
||||
self.check(self.rp.can_fetch('spam',
|
||||
'http://www.musi-cal.com/'), True)
|
||||
|
||||
def test_nonexisting_robots (self):
|
||||
"""test access of a non-existing robots.txt file"""
|
||||
# robots.txt that does not exist
|
||||
self.rp.set_url('http://www.lycos.com/robots.txt')
|
||||
self.rp.read()
|
||||
self.check(self.rp.can_fetch('Mozilla',
|
||||
'http://www.lycos.com/search'), True)
|
||||
|
||||
|
||||
def test_suite ():
|
||||
"""build and return a TestSuite"""
|
||||
suite = unittest.TestSuite()
|
||||
suite.addTest(unittest.makeSuite(TestRobotParser))
|
||||
return suite
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
81
linkcheck/tests/test_strformat.py
Normal file
81
linkcheck/tests/test_strformat.py
Normal file
|
|
@ -0,0 +1,81 @@
|
|||
# -*- coding: iso-8859-1 -*-
|
||||
"""test string formatting operations"""
|
||||
# Copyright (C) 2004 Bastian Kleineidam
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 2 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program; if not, write to the Free Software
|
||||
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||
|
||||
import unittest
|
||||
import os
|
||||
|
||||
import linkcheck.strformat
|
||||
|
||||
|
||||
class TestStrFormat (unittest.TestCase):
|
||||
"""test string formatting routines"""
|
||||
|
||||
def test_unquote (self):
|
||||
"""test quote stripping"""
|
||||
self.assertEquals(linkcheck.strformat.unquote(""), "")
|
||||
self.assertEquals(linkcheck.strformat.unquote(None), "")
|
||||
self.assertEquals(linkcheck.strformat.unquote("'"), "'")
|
||||
self.assertEquals(linkcheck.strformat.unquote("\""), "\"")
|
||||
self.assertEquals(linkcheck.strformat.unquote("\"\""), "")
|
||||
self.assertEquals(linkcheck.strformat.unquote("''"), "")
|
||||
self.assertEquals(linkcheck.strformat.unquote("'a'"), "a")
|
||||
self.assertEquals(linkcheck.strformat.unquote("'a\"'"), "a\"")
|
||||
self.assertEquals(linkcheck.strformat.unquote("'\"a'"), "\"a")
|
||||
self.assertEquals(linkcheck.strformat.unquote('"a\'"'), 'a\'')
|
||||
self.assertEquals(linkcheck.strformat.unquote('"\'a"'), '\'a')
|
||||
# even mis-matching quotes should be removed...
|
||||
self.assertEquals(linkcheck.strformat.unquote("'a\""), "a")
|
||||
self.assertEquals(linkcheck.strformat.unquote("\"a'"), "a")
|
||||
|
||||
def test_wrap (self):
|
||||
"""test line wrapping"""
|
||||
s = "11%(sep)s22%(sep)s33%(sep)s44%(sep)s55" % {'sep': os.linesep}
|
||||
# testing width <= 0
|
||||
self.assertEquals(linkcheck.strformat.wrap(s, -1), s)
|
||||
self.assertEquals(linkcheck.strformat.wrap(s, 0), s)
|
||||
s2 = "11 22%(sep)s33 44%(sep)s55" % {'sep': os.linesep}
|
||||
# splitting lines
|
||||
self.assertEquals(linkcheck.strformat.wrap(s2, 2), s)
|
||||
# combining lines
|
||||
self.assertEquals(linkcheck.strformat.wrap(s, 5), s2)
|
||||
|
||||
def test_remove_markup (self):
|
||||
"""test markup removing"""
|
||||
self.assertEquals(linkcheck.strformat.remove_markup("<a>"), "")
|
||||
self.assertEquals(linkcheck.strformat.remove_markup("<>"), "")
|
||||
self.assertEquals(linkcheck.strformat.remove_markup("<<>"), "")
|
||||
self.assertEquals(linkcheck.strformat.remove_markup("a < b"), "a < b")
|
||||
|
||||
def test_strsize (self):
|
||||
"""test byte size strings"""
|
||||
self.assertRaises(ValueError, linkcheck.strformat.strsize, -1)
|
||||
self.assertEquals(linkcheck.strformat.strsize(0), "0 Bytes")
|
||||
self.assertEquals(linkcheck.strformat.strsize(1), "1 Byte")
|
||||
self.assertEquals(linkcheck.strformat.strsize(2), "2 Bytes")
|
||||
self.assertEquals(linkcheck.strformat.strsize(1023), "1023 Bytes")
|
||||
self.assertEquals(linkcheck.strformat.strsize(1024), "1.00 kB")
|
||||
|
||||
|
||||
def test_suite ():
|
||||
"""build and return a TestSuite"""
|
||||
suite = unittest.TestSuite()
|
||||
suite.addTest(unittest.makeSuite(TestStrFormat))
|
||||
return suite
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
284
linkcheck/tests/test_url.py
Normal file
284
linkcheck/tests/test_url.py
Normal file
|
|
@ -0,0 +1,284 @@
|
|||
# -*- coding: iso-8859-1 -*-
|
||||
"""test url routines"""
|
||||
# Copyright (C) 2004 Bastian Kleineidam
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 2 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program; if not, write to the Free Software
|
||||
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||
|
||||
import unittest
|
||||
import linkcheck.url
|
||||
|
||||
# 'ftp://user:pass@ftp.foo.net/foo/bar':
|
||||
# 'ftp://user:pass@ftp.foo.net/foo/bar',
|
||||
# 'http://USER:pass@www.Example.COM/foo/bar':
|
||||
# 'http://USER:pass@www.example.com/foo/bar',
|
||||
# '-': '-',
|
||||
|
||||
# All portions of the URI must be utf-8 encoded NFC form Unicode strings
|
||||
#valid: http://example.com/?q=%C3%87 (C-cedilla U+00C7)
|
||||
#valid: http://example.com/?q=%E2%85%A0 (Roman numeral one U+2160)
|
||||
#invalid: http://example.com/?q=%C7 (C-cedilla ISO-8859-1)
|
||||
#invalid: http://example.com/?q=C%CC%A7
|
||||
# (Latin capital letter C + Combining cedilla U+0327)
|
||||
|
||||
|
||||
class TestUrl (unittest.TestCase):
|
||||
"""test url norming and quoting"""
|
||||
|
||||
def test_pathattack (self):
|
||||
"""windows winamp path attack prevention"""
|
||||
url = "http://server/..%5c..%5c..%5c..%5c..%5c..%5c..%5c.."\
|
||||
"%5ccskin.zip"
|
||||
nurl = "http://server/cskin.zip"
|
||||
self.assertEquals(
|
||||
linkcheck.url.url_quote(linkcheck.url.url_norm(url)), nurl)
|
||||
|
||||
def test_norm_quote (self):
|
||||
"""test url norm quoting"""
|
||||
url = "http://groups.google.com/groups?hl=en&lr&ie=UTF-8&"\
|
||||
"threadm=3845B54D.E546F9BD%40monmouth.com&rnum=2&"\
|
||||
"prev=/groups%3Fq%3Dlogitech%2Bwingman%2Bextreme%2Bdigital"\
|
||||
"%2B3d%26hl%3Den%26lr%3D%26ie%3DUTF-8%26selm%3D3845B54D.E5"\
|
||||
"46F9BD%2540monmouth.com%26rnum%3D2"
|
||||
nurl = url
|
||||
self.assertEqual(linkcheck.url.url_norm(url), nurl)
|
||||
url = "http://redirect.alexa.com/redirect?"\
|
||||
"http://www.offeroptimizer.com"
|
||||
nurl = url
|
||||
self.assertEqual(linkcheck.url.url_norm(url), nurl)
|
||||
url = "http://www.lesgensducinema.com/photo/Philippe%20Nahon.jpg"
|
||||
nurl = url
|
||||
self.assertEqual(linkcheck.url.url_norm(url), nurl)
|
||||
# Only perform percent-encoding where it is essential.
|
||||
url = "http://example.com/%7Ejane"
|
||||
nurl = "http://example.com/~jane"
|
||||
self.assertEqual(linkcheck.url.url_norm(url), nurl)
|
||||
url = "http://example.com/%7ejane"
|
||||
self.assertEqual(linkcheck.url.url_norm(url), nurl)
|
||||
# Always use uppercase A-through-F characters when percent-encoding.
|
||||
url = "http://example.com/?q=1%2a2"
|
||||
nurl = "http://example.com/?q=1%2A2"
|
||||
self.assertEqual(linkcheck.url.url_norm(url), nurl)
|
||||
|
||||
def test_norm_case_sensitivity (self):
|
||||
"""test url norm case sensitivity"""
|
||||
# Always provide the URI scheme in lowercase characters.
|
||||
url = "HTTP://example.com/"
|
||||
nurl = "http://example.com/"
|
||||
self.assertEqual(linkcheck.url.url_norm(url), nurl)
|
||||
# Always provide the host, if any, in lowercase characters.
|
||||
url = "http://EXAMPLE.COM/"
|
||||
nurl = "http://example.com/"
|
||||
self.assertEqual(linkcheck.url.url_norm(url), nurl)
|
||||
|
||||
def test_norm_defaultport (self):
|
||||
"""test url norm default port recognition"""
|
||||
# For schemes that define a port, use an empty port if the default
|
||||
# is desired
|
||||
url = "http://example.com:80/"
|
||||
nurl = "http://example.com/"
|
||||
self.assertEqual(linkcheck.url.url_norm(url), nurl)
|
||||
url = "http://example.com:8080/"
|
||||
nurl = "http://example.com:8080/"
|
||||
self.assertEqual(linkcheck.url.url_norm(url), nurl)
|
||||
|
||||
def test_norm_host_dot (self):
|
||||
"""test url norm host dot removal"""
|
||||
url = "http://example.com./"
|
||||
nurl = "http://example.com/"
|
||||
self.assertEqual(linkcheck.url.url_norm(url), nurl)
|
||||
url = "http://example.com.:81/"
|
||||
nurl = "http://example.com:81/"
|
||||
self.assertEqual(linkcheck.url.url_norm(url), nurl)
|
||||
|
||||
def test_norm_fragment (self):
|
||||
"""test url norm fragment preserving"""
|
||||
# Empty fragment identifiers must be preserved:
|
||||
url = "http://www.w3.org/2000/01/rdf-schema#"
|
||||
nurl = url
|
||||
self.assertEqual(linkcheck.url.url_norm(url), nurl)
|
||||
|
||||
def test_norm_path (self):
|
||||
"""test url norm empty path handling"""
|
||||
# For schemes that define an empty path to be equivalent to a
|
||||
# path of "/", use "/".
|
||||
url = "http://example.com"
|
||||
nurl = "http://example.com/"
|
||||
self.assertEqual(linkcheck.url.url_norm(url), nurl)
|
||||
|
||||
def test_norm_path_backslashes (self):
|
||||
"""test url norm backslash path handling"""
|
||||
# note: yes, this is not rfc conform (see url.py for more details)
|
||||
url = r"http://example.com\test.html"
|
||||
nurl = "http://example.com/test.html"
|
||||
self.assertEqual(linkcheck.url.url_norm(url), nurl)
|
||||
url = r"http://example.com/a\test.html"
|
||||
nurl = "http://example.com/a/test.html"
|
||||
self.assertEqual(linkcheck.url.url_norm(url), nurl)
|
||||
url = r"http://example.com\a\test.html"
|
||||
nurl = "http://example.com/a/test.html"
|
||||
self.assertEqual(linkcheck.url.url_norm(url), nurl)
|
||||
url = r"http://example.com\a/test.html"
|
||||
nurl = "http://example.com/a/test.html"
|
||||
self.assertEqual(linkcheck.url.url_norm(url), nurl)
|
||||
|
||||
def test_norm_path_slashes (self):
|
||||
"""test url norm slashes in path handling"""
|
||||
# reduce duplicate slashes
|
||||
url = "http://example.com//a/test.html"
|
||||
nurl = "http://example.com/a/test.html"
|
||||
self.assertEqual(linkcheck.url.url_norm(url), nurl)
|
||||
url = "http://example.com//a/b/"
|
||||
nurl = "http://example.com/a/b/"
|
||||
self.assertEqual(linkcheck.url.url_norm(url), nurl)
|
||||
|
||||
def test_norm_path_dots (self):
|
||||
"""test url norm dots in path handling"""
|
||||
# Prevent dot-segments appearing in non-relative URI paths.
|
||||
url = "http://example.com/a/./b"
|
||||
nurl = "http://example.com/a/b"
|
||||
self.assertEqual(linkcheck.url.url_norm(url), nurl)
|
||||
url = "http://example.com/a/../a/b"
|
||||
self.assertEqual(linkcheck.url.url_norm(url), nurl)
|
||||
|
||||
def test_norm_path_relative (self):
|
||||
"""test url norm relative path handling"""
|
||||
# normalize redundant path segments
|
||||
url = '/foo/bar/.'
|
||||
nurl = '/foo/bar/'
|
||||
self.assertEqual(linkcheck.url.url_norm(url), nurl)
|
||||
url = '/foo/bar/./'
|
||||
nurl = '/foo/bar/'
|
||||
self.assertEqual(linkcheck.url.url_norm(url), nurl)
|
||||
url = '/foo/bar/..'
|
||||
nurl = '/foo/'
|
||||
self.assertEqual(linkcheck.url.url_norm(url), nurl)
|
||||
url = '/foo/bar/../'
|
||||
nurl = '/foo/'
|
||||
self.assertEqual(linkcheck.url.url_norm(url), nurl)
|
||||
url = '/foo/bar/../baz'
|
||||
nurl = '/foo/baz'
|
||||
self.assertEqual(linkcheck.url.url_norm(url), nurl)
|
||||
url = '/foo/bar/../..'
|
||||
nurl = '/'
|
||||
self.assertEqual(linkcheck.url.url_norm(url), nurl)
|
||||
url = '/foo/bar/../../'
|
||||
nurl = '/'
|
||||
self.assertEqual(linkcheck.url.url_norm(url), nurl)
|
||||
url = '/foo/bar/../../baz'
|
||||
nurl = '/baz'
|
||||
self.assertEqual(linkcheck.url.url_norm(url), nurl)
|
||||
url = '/foo/bar/../../../baz'
|
||||
nurl = '/baz'
|
||||
self.assertEqual(linkcheck.url.url_norm(url), nurl)
|
||||
url = '/foo/bar/../../../../baz'
|
||||
nurl = '/baz'
|
||||
self.assertEqual(linkcheck.url.url_norm(url), nurl)
|
||||
url = '/./foo'
|
||||
nurl = '/foo'
|
||||
self.assertEqual(linkcheck.url.url_norm(url), nurl)
|
||||
url = '/../foo'
|
||||
nurl = '/foo'
|
||||
self.assertEqual(linkcheck.url.url_norm(url), nurl)
|
||||
url = '/foo.'
|
||||
nurl = '/foo.'
|
||||
self.assertEqual(linkcheck.url.url_norm(url), nurl)
|
||||
url = '/.foo'
|
||||
nurl = '/.foo'
|
||||
self.assertEqual(linkcheck.url.url_norm(url), nurl)
|
||||
url = '/foo..'
|
||||
nurl = '/foo..'
|
||||
self.assertEqual(linkcheck.url.url_norm(url), nurl)
|
||||
url = '/..foo'
|
||||
nurl = '/..foo'
|
||||
self.assertEqual(linkcheck.url.url_norm(url), nurl)
|
||||
url = '/./../foo'
|
||||
nurl = '/foo'
|
||||
self.assertEqual(linkcheck.url.url_norm(url), nurl)
|
||||
url = '/./foo/.'
|
||||
nurl = '/foo/'
|
||||
self.assertEqual(linkcheck.url.url_norm(url), nurl)
|
||||
url = '/foo/./bar'
|
||||
nurl = '/foo/bar'
|
||||
self.assertEqual(linkcheck.url.url_norm(url), nurl)
|
||||
url = '/foo/../bar'
|
||||
nurl = '/bar'
|
||||
self.assertEqual(linkcheck.url.url_norm(url), nurl)
|
||||
url = '/foo//'
|
||||
nurl = '/foo/'
|
||||
self.assertEqual(linkcheck.url.url_norm(url), nurl)
|
||||
url = '/foo///bar//'
|
||||
nurl = '/foo/bar/'
|
||||
self.assertEqual(linkcheck.url.url_norm(url), nurl)
|
||||
|
||||
def test_norm_other (self):
|
||||
"""test norming of other schemes"""
|
||||
# no netloc and no path
|
||||
url = 'mailto:'
|
||||
nurl = 'mailto:'
|
||||
self.assertEqual(linkcheck.url.url_norm(url), nurl)
|
||||
# no netloc and no path
|
||||
url = 'news:'
|
||||
nurl = 'news:'
|
||||
self.assertEqual(linkcheck.url.url_norm(url), nurl)
|
||||
# using netloc
|
||||
url = 'snews:'
|
||||
nurl = 'snews://'
|
||||
self.assertEqual(linkcheck.url.url_norm(url), nurl)
|
||||
# using netloc and path
|
||||
url = 'nntp:'
|
||||
nurl = 'nntp:///'
|
||||
self.assertEqual(linkcheck.url.url_norm(url), nurl)
|
||||
url = "news:§$%&/´`(§%"
|
||||
nurl = 'news:%A7%24%25%26/%B4%60%28%A7%25'
|
||||
self.assertEqual(linkcheck.url.url_norm(url), nurl)
|
||||
|
||||
def test_norm_with_auth (self):
|
||||
"""test norming of urls with authentication tokens"""
|
||||
url = "telnet://user@www.imadoofus.org"
|
||||
nurl = url
|
||||
self.assertEqual(linkcheck.url.url_norm(url), nurl)
|
||||
url = "telnet://user:pass@www.imadoofus.org"
|
||||
nurl = url
|
||||
self.assertEqual(linkcheck.url.url_norm(url), nurl)
|
||||
url = "http://user:pass@www.imadoofus.org/"
|
||||
nurl = url
|
||||
self.assertEqual(linkcheck.url.url_norm(url), nurl)
|
||||
|
||||
def test_valid (self):
|
||||
"""test url validity functions"""
|
||||
self.assert_(linkcheck.url.is_valid_url("http://www.imadoofus.com"))
|
||||
self.assert_(linkcheck.url.is_valid_url("http://www.imadoofus.com/"))
|
||||
self.assert_(linkcheck.url.is_valid_url(
|
||||
"http://www.imadoofus.com/~calvin"))
|
||||
self.assert_(linkcheck.url.is_valid_url(
|
||||
"http://www.imadoofus.com/a,b"))
|
||||
self.assert_(linkcheck.url.is_valid_url(
|
||||
"http://www.imadoofus.com#anchor55"))
|
||||
self.assert_(linkcheck.url.is_valid_js_url(
|
||||
"http://www.imadoofus.com/?hulla=do"))
|
||||
|
||||
def test_needs_quoting (self):
|
||||
"""test url quoting necessity"""
|
||||
url = "mailto:<calvin@debian.org>?subject=Halli Hallo"
|
||||
self.assert_(linkcheck.url.url_needs_quoting(url), url)
|
||||
|
||||
def test_suite ():
|
||||
"""build and return a TestSuite"""
|
||||
suite = unittest.TestSuite()
|
||||
suite.addTest(unittest.makeSuite(TestUrl))
|
||||
return suite
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
Loading…
Reference in a new issue