mirror of
https://github.com/Hopiu/linkchecker.git
synced 2026-05-28 07:43:59 +00:00
do not catch UnicodeError, handle that intern
git-svn-id: https://linkchecker.svn.sourceforge.net/svnroot/linkchecker/trunk/linkchecker@3269 e7d03fd6-7b0d-0410-9947-9c21f3af8025
This commit is contained in:
parent
98d5387ac0
commit
7e1e01bd36
6 changed files with 28 additions and 7 deletions
|
|
@ -65,6 +65,12 @@
|
|||
Removed: linkcheck/checker/{ignored,error}url.py
|
||||
Added: linkcheck/checker/unknownurl.py
|
||||
|
||||
* Convert the "label too long" domain name parse error into
|
||||
a more friendly error message.
|
||||
Type: bugfix
|
||||
Changed: linkcheck/checker/{__init__,urlbase,httpurl,fileurl}.py,
|
||||
linkchecker
|
||||
|
||||
3.4 "The Chumscrubbers" (released 4.2.2006)
|
||||
|
||||
* Ignore decoding errors when retrieving the robots.txt URL.
|
||||
|
|
|
|||
|
|
@ -34,8 +34,6 @@ import linkcheck.dns.exception
|
|||
# Catch these exception on syntax checks.
|
||||
ExcSyntaxList = [
|
||||
linkcheck.LinkCheckerError,
|
||||
# .encode('idna') raises this
|
||||
UnicodeError,
|
||||
]
|
||||
|
||||
# Catch these exceptions on content and connect checks. All other
|
||||
|
|
|
|||
|
|
@ -107,7 +107,7 @@ class FileUrl (urlbase.UrlBase):
|
|||
base_url = re.sub("^file://(/?)([a-zA-Z]):", r"file:///\2|", base_url)
|
||||
# norm base url again after changing
|
||||
if self.base_url != base_url:
|
||||
base_url, is_idn = linkcheck.url.url_norm(base_url)
|
||||
base_url, is_idn = linkcheck.checker.urlbase.url_norm(base_url)
|
||||
self.base_url = unicode(base_url)
|
||||
|
||||
def build_url (self):
|
||||
|
|
|
|||
|
|
@ -310,7 +310,8 @@ class HttpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport):
|
|||
"Redirected to %r", newurl)
|
||||
self.add_info(_("Redirected to %(url)s.") % {'url': newurl},
|
||||
tag="http-redirect")
|
||||
redirected, is_idn = linkcheck.url.url_norm(newurl)
|
||||
# norm base url - can raise UnicodeError from url.idna_encode()
|
||||
redirected, is_idn = linkcheck.checker.urlbase.url_norm(newurl)
|
||||
assert None == linkcheck.log.debug(linkcheck.LOG_CHECK,
|
||||
"Norm redirected to %r", redirected)
|
||||
urlparts = linkcheck.strformat.url_unicode_split(redirected)
|
||||
|
|
|
|||
|
|
@ -53,6 +53,17 @@ def urljoin (parent, url, scheme):
|
|||
return urlparse.urljoin(parent, url)
|
||||
|
||||
|
||||
def url_norm (url):
|
||||
"""
|
||||
Wrapper for url.url_norm() to convert UnicodeError in LinkCheckerError.
|
||||
"""
|
||||
try:
|
||||
return linkcheck.url.url_norm(url)
|
||||
except UnicodeError:
|
||||
msg = _("URL has unparsable domain name: %s") % sys.exc_info()[1]
|
||||
raise linkcheck.LinkCheckerError(msg)
|
||||
|
||||
|
||||
class UrlBase (object):
|
||||
"""
|
||||
An URL with additional information like validity etc.
|
||||
|
|
@ -294,8 +305,8 @@ class UrlBase (object):
|
|||
Construct self.url and self.urlparts out of the given base
|
||||
url information self.base_url, self.parent_url and self.base_ref.
|
||||
"""
|
||||
# norm base url
|
||||
base_url, is_idn = linkcheck.url.url_norm(self.base_url)
|
||||
# norm base url - can raise UnicodeError from url.idna_encode()
|
||||
base_url, is_idn = url_norm(self.base_url)
|
||||
if is_idn:
|
||||
self.add_warning(_("""URL %r has a unicode domain name which
|
||||
is not yet widely supported. You should use
|
||||
|
|
|
|||
|
|
@ -715,7 +715,12 @@ for url in args:
|
|||
# syntactic sugar
|
||||
url = "ftp://%s" % url
|
||||
url_data = get_url_from(url, 0, aggregate, assume_local=True)
|
||||
linkcheck.add_intern_pattern(url_data, config)
|
||||
try:
|
||||
linkcheck.add_intern_pattern(url_data, config)
|
||||
except UnicodeError:
|
||||
linkcheck.log.error(linkcheck.LOG_CMDLINE,
|
||||
_("URL has unparsable domain name: %s"), sys.exc_info()[1])
|
||||
sys.exit(1)
|
||||
aggregate.urlqueue.put(url_data)
|
||||
# set up profiling/psyco
|
||||
if do_profile and not has_profile:
|
||||
|
|
|
|||
Loading…
Reference in a new issue