mirror of
https://github.com/Hopiu/linkchecker.git
synced 2026-03-24 01:40:23 +00:00
handle leading query exclam after host without path
git-svn-id: https://linkchecker.svn.sourceforge.net/svnroot/linkchecker/trunk/linkchecker@1981 e7d03fd6-7b0d-0410-9947-9c21f3af8025
This commit is contained in:
parent
265f942ba1
commit
b150ff6ea8
1 changed files with 24 additions and 12 deletions
|
|
@ -169,16 +169,8 @@ def idna_encode (host):
|
|||
return host, False
|
||||
|
||||
|
||||
def url_norm (url):
|
||||
"""Fix and normalize URL which must be quoted. Supports unicode
|
||||
hostnames (IDNA encoding) according to RFC 3490.
|
||||
|
||||
@return (normed url, idna flag)
|
||||
"""
|
||||
urlparts = list(urlparse.urlsplit(url))
|
||||
# scheme
|
||||
urlparts[0] = urllib.unquote(urlparts[0]).lower()
|
||||
# host
|
||||
def url_fix_host (urlparts):
|
||||
"""Unquote and fix hostname. Returns is_idn."""
|
||||
urlparts[1], is_idn = idna_encode(urllib.unquote(urlparts[1]).lower())
|
||||
# a leading backslash in path causes urlsplit() to add the
|
||||
# path components up to the first slash to host
|
||||
|
|
@ -193,7 +185,13 @@ def url_norm (url):
|
|||
urlparts[2] = "%s%s" % (comps, urllib.unquote(urlparts[2]))
|
||||
urlparts[1] = urlparts[1][:i]
|
||||
else:
|
||||
urlparts[2] = urllib.unquote(urlparts[2]) # path
|
||||
# a leading ? in path causes urlsplit() to add the query to the
|
||||
# host name
|
||||
i = urlparts[1].find("?")
|
||||
if i != -1:
|
||||
urlparts[1], urlparts[3] = urlparts[1].split('?', 1)
|
||||
# path
|
||||
urlparts[2] = urllib.unquote(urlparts[2])
|
||||
if urlparts[1]:
|
||||
userpass, host = urllib.splituser(urlparts[1])
|
||||
if userpass:
|
||||
|
|
@ -210,8 +208,22 @@ def url_norm (url):
|
|||
urlparts[1] = userpass+host
|
||||
else:
|
||||
urlparts[1] = "%s%s:%d" % (userpass, host, port)
|
||||
|
||||
|
||||
def url_norm (url):
|
||||
"""Fix and normalize URL which must be quoted. Supports unicode
|
||||
hostnames (IDNA encoding) according to RFC 3490.
|
||||
|
||||
@return (normed url, idna flag)
|
||||
"""
|
||||
urlparts = list(urlparse.urlsplit(url))
|
||||
# scheme
|
||||
urlparts[0] = urllib.unquote(urlparts[0]).lower()
|
||||
# host (with path or query side effects)
|
||||
is_idn = url_fix_host(urlparts)
|
||||
# query
|
||||
l = []
|
||||
for k, v in parse_qsl(urlparts[3], True): # query
|
||||
for k, v in parse_qsl(urlparts[3], True):
|
||||
k = urllib.quote(k, '/-:,')
|
||||
if v:
|
||||
v = urllib.quote(v, '/-:,')
|
||||
|
|
|
|||
Loading…
Reference in a new issue