unit tests

git-svn-id: https://linkchecker.svn.sourceforge.net/svnroot/linkchecker/trunk/linkchecker@1421 e7d03fd6-7b0d-0410-9947-9c21f3af8025
This commit is contained in:
calvin 2004-08-16 19:20:06 +00:00
parent 9f7e3e67a9
commit 14a9b5c426
8 changed files with 957 additions and 0 deletions

View file

@ -0,0 +1,17 @@
# -*- coding: iso-8859-1 -*-
"""unit tests for the linkcheck module"""
# Copyright (C) 2004 Bastian Kleineidam
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.

View file

@ -0,0 +1,71 @@
# -*- coding: iso-8859-1 -*-
"""test cgi form routines"""
# Copyright (C) 2004 Bastian Kleineidam
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
import unittest
import linkcheck.lc_cgi
class Store (object):
"""value storing class implementing FieldStorage interface"""
def __init__ (self, value):
"""store given value"""
self.value = value
class TestCgi (unittest.TestCase):
"""test cgi routines"""
def test_form_valid_url (self):
"""check url validity"""
form = {"url": Store("http://www.heise.de/"),
"level": Store("0"),
}
linkcheck.lc_cgi.checkform(form)
def test_form_empty_url (self):
"""check with empty url"""
form = {"url": Store(""),
"level": Store("0"),
}
self.assertRaises(linkcheck.lc_cgi.FormError,
linkcheck.lc_cgi.checkform, form)
def test_form_default_url (self):
"""check with default url"""
form = {"url": Store("http://"),
"level": Store("0"),
}
self.assertRaises(linkcheck.lc_cgi.FormError,
linkcheck.lc_cgi.checkform, form)
def test_form_invalid_url (self):
"""check url (in)validity"""
form = {"url": Store("http://www.foo bar/"),
"level": Store("0"),
}
self.assertRaises(linkcheck.lc_cgi.FormError,
linkcheck.lc_cgi.checkform, form)
def test_suite ():
"""build and return a TestSuite"""
suite = unittest.TestSuite()
suite.addTest(unittest.makeSuite(TestCgi))
return suite
if __name__ == '__main__':
unittest.main()

View file

@ -0,0 +1,143 @@
# -*- coding: iso-8859-1 -*-
"""test container routines"""
# Copyright (C) 2004 Bastian Kleineidam
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
import unittest
import random
import linkcheck.containers
class TestListDict (unittest.TestCase):
"""test list dictionary routines"""
def setUp (self):
"""set up self.d as empty listdict"""
self.d = linkcheck.containers.ListDict()
def test_insert (self):
"""test insertion order"""
self.assert_(not self.d)
self.d[2] = 1
self.d[1] = 2
self.assert_(2 in self.d)
self.assert_(1 in self.d)
def test_delete (self):
"""test deletion order"""
self.assert_(not self.d)
self.d[2] = 1
self.d[1] = 2
del self.d[1]
self.assert_(2 in self.d)
self.assert_(1 not in self.d)
def test_update (self):
"""test update order"""
self.assert_(not self.d)
self.d[2] = 1
self.d[1] = 2
self.d[1] = 1
self.assertEqual(self.d[1], 1)
def test_sorting (self):
"""test sorting"""
self.assert_(not self.d)
toinsert = random.sample(xrange(10000000), 60)
for x in toinsert:
self.d[x] = x
for i, k in enumerate(self.d.keys()):
self.assertEqual(self.d[k], toinsert[i])
class TestSetList (unittest.TestCase):
"""test set list routines"""
def setUp (self):
"""set up self.l as empty setlist"""
self.l = linkcheck.containers.SetList()
def test_append (self):
"""test append and equal elements"""
self.assert_(not self.l)
self.l.append(1)
self.l.append(1)
self.assertEqual(len(self.l), 1)
def test_append2 (self):
"""test append and equal elements 2"""
self.assert_(not self.l)
self.l.append(1)
self.l.append(2)
self.l.append(1)
self.assertEqual(len(self.l), 2)
def test_extend (self):
"""test extend and equal elements"""
self.assert_(not self.l)
self.l.extend([1, 2, 1])
self.assertEqual(len(self.l), 2)
self.assertEqual(self.l[0], 1)
self.assertEqual(self.l[1], 2)
def test_setitem (self):
"""test setting of equal elements"""
self.assert_(not self.l)
self.l.extend([1, 2, 3])
self.l[1] = 3
self.assertEqual(len(self.l), 2)
self.assertEqual(self.l[0], 1)
self.assertEqual(self.l[1], 3)
class TestLRU (unittest.TestCase):
"""test routines of LRU queue"""
def setUp (self):
"""set up self.lru as empty LRU queue"""
self.count = 4
self.lru = linkcheck.containers.LRU(self.count)
def test_len (self):
"""test LRU length correctness"""
self.assertEqual(len(self.lru), 0)
for i in range(self.count):
self.lru[str(i)] = str(i)
self.assertEqual(len(self.lru), i+1)
# overflow (inserting (self.count+1)th element
self.lru[""] = ""
self.assertEqual(len(self.lru), self.count)
def test_overflow (self):
"""test LRU capacity overflow"""
for i in range(self.count):
self.lru[str(i)] = str(i)
# overflow (inserting (self.count+1)th element
self.lru[""] = ""
# zero must have been deleted
self.assert_(not self.lru.has_key('0'))
def test_suite ():
"""build and return a TestSuite"""
suite = unittest.TestSuite()
suite.addTest(unittest.makeSuite(TestListDict))
suite.addTest(unittest.makeSuite(TestSetList))
suite.addTest(unittest.makeSuite(TestLRU))
return suite
if __name__ == '__main__':
unittest.main()

View file

@ -0,0 +1,60 @@
# -*- coding: iso-8859-1 -*-
"""test linkname routines"""
# Copyright (C) 2004 Bastian Kleineidam
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
import unittest
import linkcheck.linkname
class TestLinkname (unittest.TestCase):
"""test href and image name parsing"""
def image_name_test (self, txt, expected):
"""helper function calling linkname.image_name()"""
self.assertEqual(linkcheck.linkname.image_name(txt), expected)
def href_name_test (self, txt, expected):
"""helper function calling linkname.href_name()"""
self.assertEqual(linkcheck.linkname.href_name(txt), expected)
def test_image_name (self):
"""test image name parsing"""
self.image_name_test("<img src='' alt=''></a>", '')
self.image_name_test("<img src alt=abc></a>", 'abc')
def test_href_name (self):
"""test href name parsing"""
self.href_name_test("<b>guru guru</a>", 'guru guru')
self.href_name_test("a\njo</a>", "a\njo")
self.href_name_test("test<</a>", "test<")
self.href_name_test("test</</a>", "test</")
self.href_name_test("test</a</a>", "test</a")
self.href_name_test("test", "")
self.href_name_test("\n", "")
self.href_name_test("", "")
self.href_name_test('"</a>"foo', '"')
self.href_name_test("<img src='' alt=''></a>", '')
self.href_name_test("<img src alt=abc></a>", 'abc')
def test_suite ():
"""build and return a TestSuite"""
suite = unittest.TestSuite()
suite.addTest(unittest.makeSuite(TestLinkname))
return suite
if __name__ == '__main__':
unittest.main()

View file

@ -0,0 +1,209 @@
# -*- coding: iso-8859-1 -*-
"""test html parsing"""
# Copyright (C) 2004 Bastian Kleineidam
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
import linkcheck.HtmlParser
import linkcheck.HtmlParser.htmlsax
import linkcheck.HtmlParser.htmllib
import cStringIO as StringIO
import unittest
# list of tuples (<test pattern>, <expected parse output>)
parsetests = [
# start tags
("""<a b="c" >""", """<a b="c">"""),
("""<a b='c' >""", """<a b="c">"""),
("""<a b=c" >""", """<a b="c">"""),
("""<a b=c' >""", """<a b="c'">"""),
("""<a b="c >""", """<a b="c >"""),
("""<a b="" >""", """<a b="">"""),
("""<a b='' >""", """<a b="">"""),
("""<a b=>""", """<a b="">"""),
("""<a b= >""", """<a b="">"""),
("""<a =c>""", """<a c>"""),
("""<a =c >""", """<a c>"""),
("""<a =>""", """<a>"""),
("""<a = >""", """<a>"""),
("""<a b= "c" >""", """<a b="c">"""),
("""<a b ="c" >""", """<a b="c">"""),
("""<a b = "c" >""", """<a b="c">"""),
("""<a >""", """<a>"""),
("""< a>""", """<a>"""),
("""< a >""", """<a>"""),
("""<>""", """<>"""),
("""< >""", """< >"""),
# reduce test
("""<a b="c"><""", """<a b="c"><"""),
("""d>""", """d>"""),
# numbers in tag
("""<h1>bla</h1>""", """<h1>bla</h1>"""),
# more start tags
("""<a b=c"><a b="c">""", """<a b="c"><a b="c">"""),
("""<a b="c><a b="c">""", """<a b="c><a b=" c>"""),
("""<a b=/c/></a><br>""", """<a b="/c/"></a><br>"""),
("""<br/>""", """<br>"""),
("""<a b="50%"><br>""", """<a b="50%"><br>"""),
# comments
("""<!---->< 1>""", """<!----><1>"""),
("""<!-- a - b -->< 2>""", """<!-- a - b --><2>"""),
("""<!----->< 3>""", """<!-----><3>"""),
("""<!------>< 4>""", """<!------><4>"""),
("""<!------->< 5>""", """<!-------><5>"""),
("""<!---- >< 6>""", """<!----><6>"""),
("""<!-- -->< 7>""", """<!-- --><7>"""),
("""<!-- -- >< 8>""", """<!-- --><8>"""),
("""<!---- />-->""", """<!---- />-->"""),
("""<!-- a-2 -->< 9>""", """<!-- a-2 --><9>"""),
("""<!-- --- -->< 10>""", """<!-- --- --><10>"""),
# end tags
("""</a>""", """</a>"""),
("""</ a>""", """</a>"""),
("""</ a >""", """</a>"""),
("""</a >""", """</a>"""),
("""< / a>""", """</a>"""),
("""< /a>""", """</a>"""),
# missing > in end tag
("""</td <td a="b" >""", """</td><td a="b">"""),
# start and end tag
("""<a/>""", """<a></a>"""),
# declaration tags
("""<!DOCtype adrbook SYSTEM "adrbook.dtd">""",
"""<!DOCTYPE adrbook SYSTEM "adrbook.dtd">"""),
# misc
("""<?xmL version="1.0" encoding="latin1"?>""",
"""<?xmL version="1.0" encoding="latin1"?>"""),
# javascript
("""<script >\n</script>""", """<script>\n</script>"""),
("""<sCrIpt lang="a">bla </a> fasel</scripT>""",
"""<script lang="a">bla </a> fasel</script>"""),
# line continuation (Dr. Fun webpage)
("<img bo\\\nrder=0 >", """<img bo rder="0">"""),
# href with $
("""<a href="123$456">""", """<a href="123$456">"""),
# quoting
("""<a href=/ >""", """<a href="/">"""),
("""<a href= />""", """<a href="/">"""),
("""<a href= >""", """<a href="">"""),
("""<a href="'" >""", """<a href="'">"""),
("""<a href='"' >""", """<a href="&quot;">"""),
("""<a href="bla" %]" >""", """<a href="bla">"""),
("""<a href=bla" >""", """<a href="bla">"""),
("""<a onmouseover=MM_swapImage('nav1','',"""\
"""'/images/dwnavpoint_over.gif',1);movein(this); b="c">""",
"""<a onmouseover="MM_swapImage('nav1','',"""\
"""'/images/dwnavpoint_over.gif',1);movein(this);" b="c">"""),
("""<a onClick=location.href('/index.htm') b="c">""",
"""<a onclick="location.href('/index.htm')" b="c">"""),
# entity resolving
("""<a href="&#109;ailto:" >""", """<a href="mailto:">"""),
# non-ascii characters
("""<Üzgür> fahr </langsamer> ¹²³¼½¬{""",
"""<Üzgür> fahr </langsamer> ¹²³¼½¬{"""),
# mailto link
("""<a href=mailto:calvin@LocalHost?subject=Hallo&to=michi>1</a>""",
"""<a href="mailto:calvin@LocalHost?subject=Hallo&amp;to=michi">1</a>"""),
]
flushtests = [
("<", "<"),
("<a", "<a"),
("<!a", "<!a"),
("<?a", "<?a"),
]
class TestParser (unittest.TestCase):
"""test html parser"""
def setUp (self):
"""initialize two internal html parser to be used for testing"""
self.htmlparser = linkcheck.HtmlParser.htmlsax.parser()
self.htmlparser2 = linkcheck.HtmlParser.htmlsax.parser()
def test_parse (self):
"""parse all test patterns in one go"""
for _in, _out in parsetests:
out = StringIO.StringIO()
self.htmlparser.handler = \
linkcheck.HtmlParser.htmllib.HtmlPrettyPrinter(out)
self.htmlparser.feed(_in)
self.htmlparser.flush()
res = out.getvalue()
self.assertEqual(res, _out)
self.htmlparser.reset()
def test_feed (self):
"""parse all test patterns sequentially"""
for _in, _out in parsetests:
out = StringIO.StringIO()
self.htmlparser.handler = \
linkcheck.HtmlParser.htmllib.HtmlPrettyPrinter(out)
for c in _in:
self.htmlparser.feed(c)
self.htmlparser.flush()
res = out.getvalue()
self.assertEqual(res, _out)
self.htmlparser.reset()
def test_interwoven (self):
"""parse all test patterns on two parsers interwoven"""
for _in, _out in parsetests:
out = StringIO.StringIO()
out2 = StringIO.StringIO()
self.htmlparser.handler = \
linkcheck.HtmlParser.htmllib.HtmlPrettyPrinter(out)
self.htmlparser2.handler = \
linkcheck.HtmlParser.htmllib.HtmlPrettyPrinter(out2)
for c in _in:
self.htmlparser.feed(c)
self.htmlparser2.feed(c)
self.htmlparser.flush()
self.htmlparser2.flush()
res = out.getvalue()
res2 = out2.getvalue()
self.assertEqual(res, _out)
self.assertEqual(res2, _out)
self.htmlparser.reset()
def test_flush (self):
"""test parser flushing"""
for _in, _out in flushtests:
out = StringIO.StringIO()
self.htmlparser.handler = \
linkcheck.HtmlParser.htmllib.HtmlPrettyPrinter(out)
self.htmlparser.feed(_in)
self.htmlparser.flush()
res = out.getvalue()
self.assertEqual(res, _out)
self.htmlparser.reset()
def test_entities (self):
"""test entity resolving"""
for c in "abcdefghijklmnopqrstuvwxyz":
self.assertEqual(
linkcheck.HtmlParser.resolve_entities("&#%d;"%ord(c)), c)
def test_suite ():
"""build and return a TestSuite"""
suite = unittest.TestSuite()
suite.addTest(unittest.makeSuite(TestParser))
return suite
if __name__ == '__main__':
unittest.main()

View file

@ -0,0 +1,92 @@
# -*- coding: iso-8859-1 -*-
# Copyright (C) 2004 Bastian Kleineidam
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
import unittest
import linkcheck.robotparser2
class TestRobotParser (unittest.TestCase):
"""test robots.txt parser (needs internet access)"""
def setUp (self):
"""initialize self.rp as a robots.txt parser"""
self.rp = linkcheck.robotparser2.RobotFileParser()
def check (self, a, b):
"""helper function comparing two results a and b"""
if not b:
ac = "access denied"
else:
ac = "access allowed"
if a != b:
self.fail("%s != %s (%s)" % (a, b, ac))
def test_existing_robots (self):
"""test parsing and access of an existing robots.txt file"""
# robots.txt that exists, gotten to by redirection
self.rp.set_url('http://www.musi-cal.com/robots.txt')
self.rp.read()
# test for re.escape
self.check(self.rp.can_fetch('*', 'http://www.musi-cal.com/'), True)
# this should match the first rule, which is a disallow
self.check(self.rp.can_fetch('', 'http://www.musi-cal.com/'), False)
# various cherry pickers
self.check(self.rp.can_fetch('CherryPickerSE',
'http://www.musi-cal.com/cgi-bin/event-search'
'?city=San+Francisco'), False)
self.check(self.rp.can_fetch('CherryPickerSE/1.0',
'http://www.musi-cal.com/cgi-bin/event-search'
'?city=San+Francisco'), False)
self.check(self.rp.can_fetch('CherryPickerSE/1.5',
'http://www.musi-cal.com/cgi-bin/event-search'
'?city=San+Francisco'), False)
# case sensitivity
self.check(self.rp.can_fetch('ExtractorPro',
'http://www.musi-cal.com/blubba'), False)
self.check(self.rp.can_fetch('extractorpro',
'http://www.musi-cal.com/blubba'), False)
# substring test
self.check(self.rp.can_fetch('toolpak/1.1',
'http://www.musi-cal.com/blubba'), False)
# tests for catch-all * agent
self.check(self.rp.can_fetch('spam',
'http://www.musi-cal.com/vsearch'), False)
self.check(self.rp.can_fetch('spam',
'http://www.musi-cal.com/Musician/me'), True)
self.check(self.rp.can_fetch('spam',
'http://www.musi-cal.com/'), True)
self.check(self.rp.can_fetch('spam',
'http://www.musi-cal.com/'), True)
def test_nonexisting_robots (self):
"""test access of a non-existing robots.txt file"""
# robots.txt that does not exist
self.rp.set_url('http://www.lycos.com/robots.txt')
self.rp.read()
self.check(self.rp.can_fetch('Mozilla',
'http://www.lycos.com/search'), True)
def test_suite ():
"""build and return a TestSuite"""
suite = unittest.TestSuite()
suite.addTest(unittest.makeSuite(TestRobotParser))
return suite
if __name__ == '__main__':
unittest.main()

View file

@ -0,0 +1,81 @@
# -*- coding: iso-8859-1 -*-
"""test string formatting operations"""
# Copyright (C) 2004 Bastian Kleineidam
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
import unittest
import os
import linkcheck.strformat
class TestStrFormat (unittest.TestCase):
"""test string formatting routines"""
def test_unquote (self):
"""test quote stripping"""
self.assertEquals(linkcheck.strformat.unquote(""), "")
self.assertEquals(linkcheck.strformat.unquote(None), "")
self.assertEquals(linkcheck.strformat.unquote("'"), "'")
self.assertEquals(linkcheck.strformat.unquote("\""), "\"")
self.assertEquals(linkcheck.strformat.unquote("\"\""), "")
self.assertEquals(linkcheck.strformat.unquote("''"), "")
self.assertEquals(linkcheck.strformat.unquote("'a'"), "a")
self.assertEquals(linkcheck.strformat.unquote("'a\"'"), "a\"")
self.assertEquals(linkcheck.strformat.unquote("'\"a'"), "\"a")
self.assertEquals(linkcheck.strformat.unquote('"a\'"'), 'a\'')
self.assertEquals(linkcheck.strformat.unquote('"\'a"'), '\'a')
# even mis-matching quotes should be removed...
self.assertEquals(linkcheck.strformat.unquote("'a\""), "a")
self.assertEquals(linkcheck.strformat.unquote("\"a'"), "a")
def test_wrap (self):
"""test line wrapping"""
s = "11%(sep)s22%(sep)s33%(sep)s44%(sep)s55" % {'sep': os.linesep}
# testing width <= 0
self.assertEquals(linkcheck.strformat.wrap(s, -1), s)
self.assertEquals(linkcheck.strformat.wrap(s, 0), s)
s2 = "11 22%(sep)s33 44%(sep)s55" % {'sep': os.linesep}
# splitting lines
self.assertEquals(linkcheck.strformat.wrap(s2, 2), s)
# combining lines
self.assertEquals(linkcheck.strformat.wrap(s, 5), s2)
def test_remove_markup (self):
"""test markup removing"""
self.assertEquals(linkcheck.strformat.remove_markup("<a>"), "")
self.assertEquals(linkcheck.strformat.remove_markup("<>"), "")
self.assertEquals(linkcheck.strformat.remove_markup("<<>"), "")
self.assertEquals(linkcheck.strformat.remove_markup("a < b"), "a < b")
def test_strsize (self):
"""test byte size strings"""
self.assertRaises(ValueError, linkcheck.strformat.strsize, -1)
self.assertEquals(linkcheck.strformat.strsize(0), "0 Bytes")
self.assertEquals(linkcheck.strformat.strsize(1), "1 Byte")
self.assertEquals(linkcheck.strformat.strsize(2), "2 Bytes")
self.assertEquals(linkcheck.strformat.strsize(1023), "1023 Bytes")
self.assertEquals(linkcheck.strformat.strsize(1024), "1.00 kB")
def test_suite ():
"""build and return a TestSuite"""
suite = unittest.TestSuite()
suite.addTest(unittest.makeSuite(TestStrFormat))
return suite
if __name__ == '__main__':
unittest.main()

284
linkcheck/tests/test_url.py Normal file
View file

@ -0,0 +1,284 @@
# -*- coding: iso-8859-1 -*-
"""test url routines"""
# Copyright (C) 2004 Bastian Kleineidam
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
import unittest
import linkcheck.url
# 'ftp://user:pass@ftp.foo.net/foo/bar':
# 'ftp://user:pass@ftp.foo.net/foo/bar',
# 'http://USER:pass@www.Example.COM/foo/bar':
# 'http://USER:pass@www.example.com/foo/bar',
# '-': '-',
# All portions of the URI must be utf-8 encoded NFC form Unicode strings
#valid: http://example.com/?q=%C3%87 (C-cedilla U+00C7)
#valid: http://example.com/?q=%E2%85%A0 (Roman numeral one U+2160)
#invalid: http://example.com/?q=%C7 (C-cedilla ISO-8859-1)
#invalid: http://example.com/?q=C%CC%A7
# (Latin capital letter C + Combining cedilla U+0327)
class TestUrl (unittest.TestCase):
"""test url norming and quoting"""
def test_pathattack (self):
"""windows winamp path attack prevention"""
url = "http://server/..%5c..%5c..%5c..%5c..%5c..%5c..%5c.."\
"%5ccskin.zip"
nurl = "http://server/cskin.zip"
self.assertEquals(
linkcheck.url.url_quote(linkcheck.url.url_norm(url)), nurl)
def test_norm_quote (self):
"""test url norm quoting"""
url = "http://groups.google.com/groups?hl=en&lr&ie=UTF-8&"\
"threadm=3845B54D.E546F9BD%40monmouth.com&rnum=2&"\
"prev=/groups%3Fq%3Dlogitech%2Bwingman%2Bextreme%2Bdigital"\
"%2B3d%26hl%3Den%26lr%3D%26ie%3DUTF-8%26selm%3D3845B54D.E5"\
"46F9BD%2540monmouth.com%26rnum%3D2"
nurl = url
self.assertEqual(linkcheck.url.url_norm(url), nurl)
url = "http://redirect.alexa.com/redirect?"\
"http://www.offeroptimizer.com"
nurl = url
self.assertEqual(linkcheck.url.url_norm(url), nurl)
url = "http://www.lesgensducinema.com/photo/Philippe%20Nahon.jpg"
nurl = url
self.assertEqual(linkcheck.url.url_norm(url), nurl)
# Only perform percent-encoding where it is essential.
url = "http://example.com/%7Ejane"
nurl = "http://example.com/~jane"
self.assertEqual(linkcheck.url.url_norm(url), nurl)
url = "http://example.com/%7ejane"
self.assertEqual(linkcheck.url.url_norm(url), nurl)
# Always use uppercase A-through-F characters when percent-encoding.
url = "http://example.com/?q=1%2a2"
nurl = "http://example.com/?q=1%2A2"
self.assertEqual(linkcheck.url.url_norm(url), nurl)
def test_norm_case_sensitivity (self):
"""test url norm case sensitivity"""
# Always provide the URI scheme in lowercase characters.
url = "HTTP://example.com/"
nurl = "http://example.com/"
self.assertEqual(linkcheck.url.url_norm(url), nurl)
# Always provide the host, if any, in lowercase characters.
url = "http://EXAMPLE.COM/"
nurl = "http://example.com/"
self.assertEqual(linkcheck.url.url_norm(url), nurl)
def test_norm_defaultport (self):
"""test url norm default port recognition"""
# For schemes that define a port, use an empty port if the default
# is desired
url = "http://example.com:80/"
nurl = "http://example.com/"
self.assertEqual(linkcheck.url.url_norm(url), nurl)
url = "http://example.com:8080/"
nurl = "http://example.com:8080/"
self.assertEqual(linkcheck.url.url_norm(url), nurl)
def test_norm_host_dot (self):
"""test url norm host dot removal"""
url = "http://example.com./"
nurl = "http://example.com/"
self.assertEqual(linkcheck.url.url_norm(url), nurl)
url = "http://example.com.:81/"
nurl = "http://example.com:81/"
self.assertEqual(linkcheck.url.url_norm(url), nurl)
def test_norm_fragment (self):
"""test url norm fragment preserving"""
# Empty fragment identifiers must be preserved:
url = "http://www.w3.org/2000/01/rdf-schema#"
nurl = url
self.assertEqual(linkcheck.url.url_norm(url), nurl)
def test_norm_path (self):
"""test url norm empty path handling"""
# For schemes that define an empty path to be equivalent to a
# path of "/", use "/".
url = "http://example.com"
nurl = "http://example.com/"
self.assertEqual(linkcheck.url.url_norm(url), nurl)
def test_norm_path_backslashes (self):
"""test url norm backslash path handling"""
# note: yes, this is not rfc conform (see url.py for more details)
url = r"http://example.com\test.html"
nurl = "http://example.com/test.html"
self.assertEqual(linkcheck.url.url_norm(url), nurl)
url = r"http://example.com/a\test.html"
nurl = "http://example.com/a/test.html"
self.assertEqual(linkcheck.url.url_norm(url), nurl)
url = r"http://example.com\a\test.html"
nurl = "http://example.com/a/test.html"
self.assertEqual(linkcheck.url.url_norm(url), nurl)
url = r"http://example.com\a/test.html"
nurl = "http://example.com/a/test.html"
self.assertEqual(linkcheck.url.url_norm(url), nurl)
def test_norm_path_slashes (self):
"""test url norm slashes in path handling"""
# reduce duplicate slashes
url = "http://example.com//a/test.html"
nurl = "http://example.com/a/test.html"
self.assertEqual(linkcheck.url.url_norm(url), nurl)
url = "http://example.com//a/b/"
nurl = "http://example.com/a/b/"
self.assertEqual(linkcheck.url.url_norm(url), nurl)
def test_norm_path_dots (self):
"""test url norm dots in path handling"""
# Prevent dot-segments appearing in non-relative URI paths.
url = "http://example.com/a/./b"
nurl = "http://example.com/a/b"
self.assertEqual(linkcheck.url.url_norm(url), nurl)
url = "http://example.com/a/../a/b"
self.assertEqual(linkcheck.url.url_norm(url), nurl)
def test_norm_path_relative (self):
"""test url norm relative path handling"""
# normalize redundant path segments
url = '/foo/bar/.'
nurl = '/foo/bar/'
self.assertEqual(linkcheck.url.url_norm(url), nurl)
url = '/foo/bar/./'
nurl = '/foo/bar/'
self.assertEqual(linkcheck.url.url_norm(url), nurl)
url = '/foo/bar/..'
nurl = '/foo/'
self.assertEqual(linkcheck.url.url_norm(url), nurl)
url = '/foo/bar/../'
nurl = '/foo/'
self.assertEqual(linkcheck.url.url_norm(url), nurl)
url = '/foo/bar/../baz'
nurl = '/foo/baz'
self.assertEqual(linkcheck.url.url_norm(url), nurl)
url = '/foo/bar/../..'
nurl = '/'
self.assertEqual(linkcheck.url.url_norm(url), nurl)
url = '/foo/bar/../../'
nurl = '/'
self.assertEqual(linkcheck.url.url_norm(url), nurl)
url = '/foo/bar/../../baz'
nurl = '/baz'
self.assertEqual(linkcheck.url.url_norm(url), nurl)
url = '/foo/bar/../../../baz'
nurl = '/baz'
self.assertEqual(linkcheck.url.url_norm(url), nurl)
url = '/foo/bar/../../../../baz'
nurl = '/baz'
self.assertEqual(linkcheck.url.url_norm(url), nurl)
url = '/./foo'
nurl = '/foo'
self.assertEqual(linkcheck.url.url_norm(url), nurl)
url = '/../foo'
nurl = '/foo'
self.assertEqual(linkcheck.url.url_norm(url), nurl)
url = '/foo.'
nurl = '/foo.'
self.assertEqual(linkcheck.url.url_norm(url), nurl)
url = '/.foo'
nurl = '/.foo'
self.assertEqual(linkcheck.url.url_norm(url), nurl)
url = '/foo..'
nurl = '/foo..'
self.assertEqual(linkcheck.url.url_norm(url), nurl)
url = '/..foo'
nurl = '/..foo'
self.assertEqual(linkcheck.url.url_norm(url), nurl)
url = '/./../foo'
nurl = '/foo'
self.assertEqual(linkcheck.url.url_norm(url), nurl)
url = '/./foo/.'
nurl = '/foo/'
self.assertEqual(linkcheck.url.url_norm(url), nurl)
url = '/foo/./bar'
nurl = '/foo/bar'
self.assertEqual(linkcheck.url.url_norm(url), nurl)
url = '/foo/../bar'
nurl = '/bar'
self.assertEqual(linkcheck.url.url_norm(url), nurl)
url = '/foo//'
nurl = '/foo/'
self.assertEqual(linkcheck.url.url_norm(url), nurl)
url = '/foo///bar//'
nurl = '/foo/bar/'
self.assertEqual(linkcheck.url.url_norm(url), nurl)
def test_norm_other (self):
"""test norming of other schemes"""
# no netloc and no path
url = 'mailto:'
nurl = 'mailto:'
self.assertEqual(linkcheck.url.url_norm(url), nurl)
# no netloc and no path
url = 'news:'
nurl = 'news:'
self.assertEqual(linkcheck.url.url_norm(url), nurl)
# using netloc
url = 'snews:'
nurl = 'snews://'
self.assertEqual(linkcheck.url.url_norm(url), nurl)
# using netloc and path
url = 'nntp:'
nurl = 'nntp:///'
self.assertEqual(linkcheck.url.url_norm(url), nurl)
url = "news:§$%&/´`(§%"
nurl = 'news:%A7%24%25%26/%B4%60%28%A7%25'
self.assertEqual(linkcheck.url.url_norm(url), nurl)
def test_norm_with_auth (self):
"""test norming of urls with authentication tokens"""
url = "telnet://user@www.imadoofus.org"
nurl = url
self.assertEqual(linkcheck.url.url_norm(url), nurl)
url = "telnet://user:pass@www.imadoofus.org"
nurl = url
self.assertEqual(linkcheck.url.url_norm(url), nurl)
url = "http://user:pass@www.imadoofus.org/"
nurl = url
self.assertEqual(linkcheck.url.url_norm(url), nurl)
def test_valid (self):
"""test url validity functions"""
self.assert_(linkcheck.url.is_valid_url("http://www.imadoofus.com"))
self.assert_(linkcheck.url.is_valid_url("http://www.imadoofus.com/"))
self.assert_(linkcheck.url.is_valid_url(
"http://www.imadoofus.com/~calvin"))
self.assert_(linkcheck.url.is_valid_url(
"http://www.imadoofus.com/a,b"))
self.assert_(linkcheck.url.is_valid_url(
"http://www.imadoofus.com#anchor55"))
self.assert_(linkcheck.url.is_valid_js_url(
"http://www.imadoofus.com/?hulla=do"))
def test_needs_quoting (self):
"""test url quoting necessity"""
url = "mailto:<calvin@debian.org>?subject=Halli Hallo"
self.assert_(linkcheck.url.url_needs_quoting(url), url)
def test_suite ():
"""build and return a TestSuite"""
suite = unittest.TestSuite()
suite.addTest(unittest.makeSuite(TestUrl))
return suite
if __name__ == '__main__':
unittest.main()