diff --git a/linkcheck/tests/test_url.py b/linkcheck/tests/test_url.py index 39a15d6d..91b722db 100644 --- a/linkcheck/tests/test_url.py +++ b/linkcheck/tests/test_url.py @@ -70,6 +70,10 @@ class TestUrl (unittest.TestCase): url = "http://example.com/?q=1%2a2" nurl = "http://example.com/?q=1%2A2" self.assertEqual(linkcheck.url.url_norm(url), nurl) + # the no-quote chars + url = "http://example.com/a*+-();b" + nurl = url + self.assertEqual(linkcheck.url.url_norm(url), nurl) def test_norm_case_sensitivity (self): """test url norm case sensitivity""" @@ -243,8 +247,8 @@ class TestUrl (unittest.TestCase): url = 'nntp:' nurl = 'nntp:///' self.assertEqual(linkcheck.url.url_norm(url), nurl) - url = "news:§$%&/´`(§%" - nurl = 'news:%A7%24%25%26/%B4%60%28%A7%25' + url = "news:§$%&/´`§%" + nurl = 'news:%A7%24%25%26/%B4%60%A7%25' self.assertEqual(linkcheck.url.url_norm(url), nurl) # javascript url url = "javascript:loadthis()" diff --git a/linkcheck/url.py b/linkcheck/url.py index aa70e5ca..ac51cf59 100644 --- a/linkcheck/url.py +++ b/linkcheck/url.py @@ -194,10 +194,7 @@ def url_norm (url): # quote parts again urlparts[0] = urllib.quote(urlparts[0]) # scheme urlparts[1] = urllib.quote(urlparts[1], ':@') # host - nopathquote = ';/=,~' - # note: the list of chars not to quote is different for javascript urls - if urlparts[0]=='javascript': - nopathquote += '()+-' + nopathquote = ';/=,~*-+()' urlparts[2] = urllib.quote(urlparts[2], nopathquote) # path res = urlparse.urlunsplit(urlparts) if url.endswith('#') and not urlparts[4]: