do not catch UnicodeError, handle that intern

git-svn-id: https://linkchecker.svn.sourceforge.net/svnroot/linkchecker/trunk/linkchecker@3269 e7d03fd6-7b0d-0410-9947-9c21f3af8025
This commit is contained in:
calvin 2006-05-19 17:13:16 +00:00
parent 98d5387ac0
commit 7e1e01bd36
6 changed files with 28 additions and 7 deletions

View file

@ -65,6 +65,12 @@
Removed: linkcheck/checker/{ignored,error}url.py
Added: linkcheck/checker/unknownurl.py
* Convert the "label too long" domain name parse error into
a more friendly error message.
Type: bugfix
Changed: linkcheck/checker/{__init__,urlbase,httpurl,fileurl}.py,
linkchecker
3.4 "The Chumscrubbers" (released 4.2.2006)
* Ignore decoding errors when retrieving the robots.txt URL.

View file

@ -34,8 +34,6 @@ import linkcheck.dns.exception
# Catch these exception on syntax checks.
ExcSyntaxList = [
linkcheck.LinkCheckerError,
# .encode('idna') raises this
UnicodeError,
]
# Catch these exceptions on content and connect checks. All other

View file

@ -107,7 +107,7 @@ class FileUrl (urlbase.UrlBase):
base_url = re.sub("^file://(/?)([a-zA-Z]):", r"file:///\2|", base_url)
# norm base url again after changing
if self.base_url != base_url:
base_url, is_idn = linkcheck.url.url_norm(base_url)
base_url, is_idn = linkcheck.checker.urlbase.url_norm(base_url)
self.base_url = unicode(base_url)
def build_url (self):

View file

@ -310,7 +310,8 @@ class HttpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport):
"Redirected to %r", newurl)
self.add_info(_("Redirected to %(url)s.") % {'url': newurl},
tag="http-redirect")
redirected, is_idn = linkcheck.url.url_norm(newurl)
# norm base url - can raise UnicodeError from url.idna_encode()
redirected, is_idn = linkcheck.checker.urlbase.url_norm(newurl)
assert None == linkcheck.log.debug(linkcheck.LOG_CHECK,
"Norm redirected to %r", redirected)
urlparts = linkcheck.strformat.url_unicode_split(redirected)

View file

@ -53,6 +53,17 @@ def urljoin (parent, url, scheme):
return urlparse.urljoin(parent, url)
def url_norm (url):
"""
Wrapper for url.url_norm() to convert UnicodeError in LinkCheckerError.
"""
try:
return linkcheck.url.url_norm(url)
except UnicodeError:
msg = _("URL has unparsable domain name: %s") % sys.exc_info()[1]
raise linkcheck.LinkCheckerError(msg)
class UrlBase (object):
"""
An URL with additional information like validity etc.
@ -294,8 +305,8 @@ class UrlBase (object):
Construct self.url and self.urlparts out of the given base
url information self.base_url, self.parent_url and self.base_ref.
"""
# norm base url
base_url, is_idn = linkcheck.url.url_norm(self.base_url)
# norm base url - can raise UnicodeError from url.idna_encode()
base_url, is_idn = url_norm(self.base_url)
if is_idn:
self.add_warning(_("""URL %r has a unicode domain name which
is not yet widely supported. You should use

View file

@ -715,7 +715,12 @@ for url in args:
# syntactic sugar
url = "ftp://%s" % url
url_data = get_url_from(url, 0, aggregate, assume_local=True)
linkcheck.add_intern_pattern(url_data, config)
try:
linkcheck.add_intern_pattern(url_data, config)
except UnicodeError:
linkcheck.log.error(linkcheck.LOG_CMDLINE,
_("URL has unparsable domain name: %s"), sys.exc_info()[1])
sys.exit(1)
aggregate.urlqueue.put(url_data)
# set up profiling/psyco
if do_profile and not has_profile: