Do not parse URL CGI part recursively.

This commit is contained in:
Bastian Kleineidam 2010-10-27 20:51:56 +02:00
parent b2e172b80e
commit e429dbcc13
3 changed files with 11 additions and 5 deletions

View file

@ -3,6 +3,9 @@
Fixes:
- checking: Do not check content of already cached URLs.
Closes: SF bug #1720083
- checking: Do not parse URL CGI part recursively, avoiding maximum
recursion limit errors.
Closes: SF bug #3096115
Changes:
- cmdline: Don't log a warning if URL has been redirected.

View file

@ -251,11 +251,10 @@ def url_parse_query (query, encoding=None):
encoding = url_encoding
query = query.encode(encoding, 'ignore')
# if ? is in the query, split it off, seen at msdn.microsoft.com
if '?' in query:
query, append = query.split('?', 1)
append = '?'+url_parse_query(append)
else:
append = ""
append = ""
while '?' in query:
query, rest = query.rsplit('?', 1)
append = '?'+url_parse_query(rest)+append
l = []
for k, v, sep in parse_qsl(query, True):
k = url_quote_part(k, '/-:,;')

View file

@ -526,6 +526,10 @@ class TestUrl (unittest.TestCase):
u = "scid=kb;en-us;Q248840&b=c;hulla=bulla"
self.assertEqual(linkcheck.url.url_parse_query(u), u)
def test_long_cgi (self):
u = "/test%s;" % ("?a="*1000)
self.assertEqual(linkcheck.url.url_parse_query(u), u)
def test_port (self):
is_numeric_port = linkcheck.url.is_numeric_port
self.assertTrue(is_numeric_port("80"))