Fix UNC path handling on Windows.

This commit is contained in:
Bastian Kleineidam 2012-06-24 10:30:54 +02:00
parent b550a9dcb5
commit 5c045fef44
5 changed files with 28 additions and 14 deletions

View file

@ -12,6 +12,7 @@ Fixes:
- gui: Fix saving of the debugmemory option.
- checking: Do not handle <object codebase="..."> attribute as parent
URL but as normal URL to be checked.
- checking: Fix UNC path handling on Windows.
7.9 "The Dark Knight" (released 10.6.2012)

View file

@ -25,7 +25,7 @@ import urllib
import urllib2
from . import urlbase, get_index_html, get_url_from
from .. import log, LOG_CHECK, fileutil, LinkCheckerError
from .. import log, LOG_CHECK, fileutil, LinkCheckerError, url as urlutil
from ..bookmarks import firefox
from .const import WARN_FILE_MISSING_SLASH, WARN_FILE_SYSTEM_PATH
@ -137,7 +137,7 @@ class FileUrl (urlbase.UrlBase):
urlparts = list(urlparse.urlsplit(base_url))
# ignore query part for filesystem urls
urlparts[3] = ''
self.base_url = urlparse.urlunsplit(urlparts)
self.base_url = urlutil.urlunsplit(urlparts)
super(FileUrl, self).build_url()
# ignore query and fragment url parts for filesystem urls
self.urlparts[3] = self.urlparts[4] = ''
@ -145,7 +145,7 @@ class FileUrl (urlbase.UrlBase):
self.add_warning(_("Added trailing slash to directory."),
tag=WARN_FILE_MISSING_SLASH)
self.urlparts[2] += '/'
self.url = urlparse.urlunsplit(self.urlparts)
self.url = urlutil.urlunsplit(self.urlparts)
def add_size_info (self):
"""Get size of file content from filename path."""

View file

@ -161,7 +161,7 @@ class HttpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport):
"a GET request was used instead.") %
{"name": server})
# redirections might have changed the URL
self.url = urlparse.urlunsplit(self.urlparts)
self.url = urlutil.urlunsplit(self.urlparts)
# check response
if response:
self.check_response(response)
@ -305,7 +305,7 @@ class HttpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport):
num, response = self.follow_redirection(response, set_result, redirected)
if num == -1:
return num, response
redirected = urlparse.urlunsplit(self.urlparts)
redirected = urlutil.urlunsplit(self.urlparts)
tries += num
return tries, response
@ -528,10 +528,10 @@ class HttpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport):
# http://tools.ietf.org/html/rfc2616#section-3.2.2
anchor = ''
if self.proxy:
path = urlparse.urlunsplit((self.urlparts[0], self.urlparts[1],
path = urlutil.urlunsplit((self.urlparts[0], self.urlparts[1],
self.urlparts[2], self.urlparts[3], anchor))
else:
path = urlparse.urlunsplit(('', '', self.urlparts[2],
path = urlutil.urlunsplit(('', '', self.urlparts[2],
self.urlparts[3], anchor))
self.url_connection.putrequest(self.method, path, skip_host=True,
skip_accept_encoding=True)

View file

@ -355,7 +355,7 @@ class UrlBase (object):
"""
# remove anchor from content cache key since we assume
# URLs with different anchors to have the same content
self.cache_content_key = urlparse.urlunsplit(self.urlparts[:4]+[u''])
self.cache_content_key = urlutil.urlunsplit(self.urlparts[:4]+[u''])
assert isinstance(self.cache_content_key, unicode), self
log.debug(LOG_CACHE, "Content cache key %r", self.cache_content_key)
# construct cache key
@ -380,7 +380,7 @@ class UrlBase (object):
try:
self.build_url()
# check url warnings
effectiveurl = urlparse.urlunsplit(self.urlparts)
effectiveurl = urlutil.urlunsplit(self.urlparts)
if self.url != effectiveurl:
self.add_warning(_("Effective URL %(url)r.") %
{"url": effectiveurl},
@ -409,7 +409,7 @@ class UrlBase (object):
# strip the parent url query and anchor
urlparts = list(urlparse.urlsplit(self.parent_url))
urlparts[4] = ""
parent_url = urlparse.urlunsplit(urlparts)
parent_url = urlutil.urlunsplit(urlparts)
self.url = urljoin(parent_url, base_url)
else:
self.url = base_url
@ -417,11 +417,11 @@ class UrlBase (object):
urlparts = list(urlparse.urlsplit(self.url))
if urlparts[2]:
urlparts[2] = urlutil.collapse_segments(urlparts[2])
self.url = urlparse.urlunsplit(urlparts)
self.url = urlutil.urlunsplit(urlparts)
# split into (modifiable) list
self.urlparts = strformat.url_unicode_split(self.url)
# and unsplit again
self.url = urlparse.urlunsplit(self.urlparts)
self.url = urlutil.urlunsplit(self.urlparts)
# check userinfo@host:port syntax
self.userinfo, host = urllib.splituser(self.urlparts[1])
# set host lowercase

View file

@ -269,6 +269,19 @@ def url_parse_query (query, encoding=None):
return ''.join(l) + append
def urlunsplit (urlparts):
"""Same as urlparse.urlunsplit but with extra UNC path handling
for Windows OS."""
res = urlparse.urlunsplit(urlparts)
if os.name == 'nt' and urlparts[0] == 'file' and '|' not in urlparts[2]:
# UNC paths must have 4 slashes: 'file:////server/path'
# Depending on the path in urlparts[2], urlparse.urlunsplit()
# left only two or three slashes. This is fixed below
repl = 'file://' if urlparts[2].startswith('//') else 'file:/'
res = res.replace('file:', repl)
return res
def url_norm (url, encoding=None):
"""Normalize the given URL which must be quoted. Supports unicode
hostnames (IDNA encoding) according to RFC 3490.
@ -315,7 +328,7 @@ def url_norm (url, encoding=None):
urlparts[1] = url_quote_part(urlparts[1], safechars='@:', encoding=encoding) # host
urlparts[2] = url_quote_part(urlparts[2], safechars=_nopathquote_chars, encoding=encoding) # path
urlparts[4] = url_quote_part(urlparts[4], encoding=encoding) # anchor
res = urlparse.urlunsplit(urlparts)
res = urlunsplit(urlparts)
if url.endswith('#') and not urlparts[4]:
# re-append trailing empty fragment
res += '#'
@ -384,7 +397,7 @@ def url_quote (url):
l.append("%s%s" % (k, sep))
urlparts[3] = ''.join(l)
urlparts[4] = url_quote_part(urlparts[4]) # anchor
return urlparse.urlunsplit(urlparts)
return urlunsplit(urlparts)
def url_quote_part (s, safechars='/', encoding=None):