mirror of
https://github.com/Hopiu/linkchecker.git
synced 2026-03-16 22:10:26 +00:00
Fixed URL encoding
This commit is contained in:
parent
9bc4772ba4
commit
bee8023540
6 changed files with 20 additions and 21 deletions
|
|
@ -279,6 +279,9 @@ def url_norm (url, encoding=None):
|
|||
url = url.encode('ascii')
|
||||
except UnicodeEncodeError:
|
||||
pass
|
||||
encode_unicode = True
|
||||
else:
|
||||
encode_unicode = False
|
||||
urlparts = list(urlparse.urlsplit(url))
|
||||
# scheme
|
||||
urlparts[0] = urllib.unquote(urlparts[0]).lower()
|
||||
|
|
@ -312,7 +315,7 @@ def url_norm (url, encoding=None):
|
|||
if url.endswith('#') and not urlparts[4]:
|
||||
# re-append trailing empty fragment
|
||||
res += '#'
|
||||
if isinstance(url, unicode):
|
||||
if encode_unicode:
|
||||
res = unicode(res)
|
||||
return (res, is_idn)
|
||||
|
||||
|
|
|
|||
|
|
@ -121,11 +121,11 @@ real url http://localhost:8001/?d=directory&p=page1
|
|||
name should not be cached
|
||||
valid
|
||||
|
||||
url http://localhost:8001/?quoted=ü
|
||||
cache key http://localhost:8001/?quoted=%%FC
|
||||
real url http://localhost:8001/?quoted=%%FC
|
||||
url http://localhost:8001/?quoted=ü
|
||||
cache key http://localhost:8001/?quoted=%%C3%%BC
|
||||
real url http://localhost:8001/?quoted=%%C3%%BC
|
||||
name html entities
|
||||
warning Base URL is not properly normed. Normed URL is http://localhost:8001/?quoted=%%FC.
|
||||
warning Base URL is not properly normed. Normed URL is http://localhost:8001/?quoted=%%C3%%BC.
|
||||
valid
|
||||
|
||||
url clsid:12345
|
||||
|
|
|
|||
|
|
@ -3,9 +3,6 @@
|
|||
<meta rel="SHORTCUT ICON" href="favicon.ico">
|
||||
<meta rel="ICON" href="favicon.ico">
|
||||
|
||||
<!-- unparsable domain name -->
|
||||
<a href="http://imadööfus.org%0D%3Cfont%20face=%22Verdana,%20Arial,%20Helvetica,%20sans-serif%22%20size=%222%22%3E%3Chttp://www.imadoofus.org%3E%20%0D%20%20%20%20%20%20%20%20%20%20%20%20%20%20">blubb</a>
|
||||
|
||||
<!-- empty tag -->
|
||||
<tr background>
|
||||
|
||||
|
|
|
|||
|
|
@ -19,12 +19,6 @@ cache key file://%(curdir)s/%(datadir)s/favicon.ico
|
|||
real url file://%(curdir)s/%(datadir)s/favicon.ico
|
||||
valid
|
||||
|
||||
url http://imadööfus.org%%0D%%3Cfont%%20face=%%22Verdana,%%20Arial,%%20Helvetica,%%20sans-serif%%22%%20size=%%222%%22%%3E%%3Chttp://www.imadoofus.org%%3E%%20%%0D%%20%%20%%20%%20%%20%%20%%20%%20%%20%%20%%20%%20%%20%%20
|
||||
cache key None
|
||||
real url
|
||||
name blubb
|
||||
error
|
||||
|
||||
url
|
||||
cache key None
|
||||
real url
|
||||
|
|
|
|||
|
|
@ -31,9 +31,10 @@ class TestError (LinkCheckTest):
|
|||
attrs = self.get_attrs(url=url)
|
||||
attrs['nurl'] = self.norm("file://%(curdir)s/%(url)s" % attrs)
|
||||
resultlines = [
|
||||
u"url %(nurl)s" % attrs,
|
||||
u"url file://%(curdir)s/%(url)s" % attrs,
|
||||
u"cache key %(nurl)s" % attrs,
|
||||
u"real url %(nurl)s" % attrs,
|
||||
u"warning Base URL is not properly normed. Normed URL is %(nurl)s." % attrs,
|
||||
u"error",
|
||||
]
|
||||
self.direct(url, resultlines)
|
||||
|
|
@ -44,10 +45,11 @@ class TestError (LinkCheckTest):
|
|||
attrs = self.get_attrs(url=url)
|
||||
attrs['nurl'] = self.norm("file://%(curdir)s/%(url)s" % attrs)
|
||||
resultlines = [
|
||||
u"url %(nurl)s" % attrs,
|
||||
u"url file://%(curdir)s/%(url)s" % attrs,
|
||||
u"cache key %(nurl)s" % attrs,
|
||||
u"real url %(nurl)s" % attrs,
|
||||
u"name %(url)s" % attrs,
|
||||
u"warning Base URL is not properly normed. Normed URL is %(nurl)s." % attrs,
|
||||
u"error",
|
||||
]
|
||||
self.direct(url, resultlines)
|
||||
|
|
@ -55,10 +57,11 @@ class TestError (LinkCheckTest):
|
|||
attrs = self.get_attrs(url=url)
|
||||
attrs['nurl'] = self.norm("file://%(curdir)s/%(url)s" % attrs)
|
||||
resultlines = [
|
||||
u"url %(nurl)s" % attrs,
|
||||
u"url file://%(curdir)s/%(url)s" % attrs,
|
||||
u"cache key %(nurl)s" % attrs,
|
||||
u"real url %(nurl)s" % attrs,
|
||||
u"name %(url)s" % attrs,
|
||||
u"warning Base URL is not properly normed. Normed URL is %(nurl)s." % attrs,
|
||||
u"error",
|
||||
]
|
||||
self.direct(url, resultlines)
|
||||
|
|
@ -92,10 +95,11 @@ class TestError (LinkCheckTest):
|
|||
attrs = self.get_attrs(url=url)
|
||||
attrs['nurl'] = self.norm("file://%(curdir)s/%(url)s" % attrs)
|
||||
resultlines = [
|
||||
u"url %(nurl)s" % attrs,
|
||||
u"url file://%(curdir)s/%(url)s" % attrs,
|
||||
u"cache key %(nurl)s" % attrs,
|
||||
u"real url %(nurl)s" % attrs,
|
||||
u"name %(url)s" % attrs,
|
||||
u"warning Base URL is not properly normed. Normed URL is %(nurl)s." % attrs,
|
||||
u"error",
|
||||
]
|
||||
self.direct(url, resultlines)
|
||||
|
|
@ -106,10 +110,11 @@ class TestError (LinkCheckTest):
|
|||
attrs = self.get_attrs(url=url)
|
||||
attrs['nurl'] = self.norm("file://%(curdir)s/%(url)s" % attrs)
|
||||
resultlines = [
|
||||
u"url %(nurl)s" % attrs,
|
||||
u"url file://%(curdir)s/%(url)s" % attrs,
|
||||
u"cache key %(nurl)s" % attrs,
|
||||
u"real url %(nurl)s" % attrs,
|
||||
u"name %(url)s" % attrs,
|
||||
u"warning Base URL is not properly normed. Normed URL is %(nurl)s." % attrs,
|
||||
u"error",
|
||||
]
|
||||
self.direct(url, resultlines)
|
||||
|
|
@ -120,10 +125,11 @@ class TestError (LinkCheckTest):
|
|||
attrs = self.get_attrs(url=url)
|
||||
attrs['nurl'] = self.norm("file://%(curdir)s/%(url)s" % attrs)
|
||||
resultlines = [
|
||||
u"url %(nurl)s" % attrs,
|
||||
u"url file://%(curdir)s/%(url)s" % attrs,
|
||||
u"cache key %(nurl)s" % attrs,
|
||||
u"real url %(nurl)s" % attrs,
|
||||
u"name %(url)s" % attrs,
|
||||
u"warning Base URL is not properly normed. Normed URL is %(nurl)s." % attrs,
|
||||
u"error",
|
||||
]
|
||||
self.direct(url, resultlines)
|
||||
|
|
|
|||
|
|
@ -133,7 +133,6 @@ class TestHttp (httpserver.HttpServerTest):
|
|||
u"url http://www.example.org/",
|
||||
u"cache key http://www.example.org/",
|
||||
u"real url http://www.example.org/",
|
||||
u"warning Access denied by robots.txt, checked only syntax.",
|
||||
u"valid",
|
||||
]
|
||||
self.direct(url, resultlines, recursionlevel=1)
|
||||
|
|
|
|||
Loading…
Reference in a new issue