diff --git a/doc/changelog.txt b/doc/changelog.txt index 108dbe42..d4de373a 100644 --- a/doc/changelog.txt +++ b/doc/changelog.txt @@ -3,6 +3,9 @@ Fixes: - checking: Do not check content of already cached URLs. Closes: SF bug #1720083 +- checking: Do not parse URL CGI part recursively, avoiding maximum + recursion limit errors. + Closes: SF bug #3096115 Changes: - cmdline: Don't log a warning if URL has been redirected. diff --git a/linkcheck/url.py b/linkcheck/url.py index ff226b37..d7161401 100644 --- a/linkcheck/url.py +++ b/linkcheck/url.py @@ -251,11 +251,10 @@ def url_parse_query (query, encoding=None): encoding = url_encoding query = query.encode(encoding, 'ignore') # if ? is in the query, split it off, seen at msdn.microsoft.com - if '?' in query: - query, append = query.split('?', 1) - append = '?'+url_parse_query(append) - else: - append = "" + append = "" + while '?' in query: + query, rest = query.rsplit('?', 1) + append = '?'+url_parse_query(rest)+append l = [] for k, v, sep in parse_qsl(query, True): k = url_quote_part(k, '/-:,;') diff --git a/tests/test_url.py b/tests/test_url.py index 6ac0f266..cb34beb6 100644 --- a/tests/test_url.py +++ b/tests/test_url.py @@ -526,6 +526,10 @@ class TestUrl (unittest.TestCase): u = "scid=kb;en-us;Q248840&b=c;hulla=bulla" self.assertEqual(linkcheck.url.url_parse_query(u), u) + def test_long_cgi (self): + u = "/test%s;" % ("?a="*1000) + self.assertEqual(linkcheck.url.url_parse_query(u), u) + def test_port (self): is_numeric_port = linkcheck.url.is_numeric_port self.assertTrue(is_numeric_port("80"))