From 7e1e01bd369ad6b62a0c4f66257b7de05141ce47 Mon Sep 17 00:00:00 2001 From: calvin Date: Fri, 19 May 2006 17:13:16 +0000 Subject: [PATCH] do not catch UnicodeError, handle that intern git-svn-id: https://linkchecker.svn.sourceforge.net/svnroot/linkchecker/trunk/linkchecker@3269 e7d03fd6-7b0d-0410-9947-9c21f3af8025 --- ChangeLog | 6 ++++++ linkcheck/checker/__init__.py | 2 -- linkcheck/checker/fileurl.py | 2 +- linkcheck/checker/httpurl.py | 3 ++- linkcheck/checker/urlbase.py | 15 +++++++++++++-- linkchecker | 7 ++++++- 6 files changed, 28 insertions(+), 7 deletions(-) diff --git a/ChangeLog b/ChangeLog index 77d8d970..54d978b6 100644 --- a/ChangeLog +++ b/ChangeLog @@ -65,6 +65,12 @@ Removed: linkcheck/checker/{ignored,error}url.py Added: linkcheck/checker/unknownurl.py + * Convert the "label too long" domain name parse error into + a more friendly error message. + Type: bugfix + Changed: linkcheck/checker/{__init__,urlbase,httpurl,fileurl}.py, + linkchecker + 3.4 "The Chumscrubbers" (released 4.2.2006) * Ignore decoding errors when retrieving the robots.txt URL. diff --git a/linkcheck/checker/__init__.py b/linkcheck/checker/__init__.py index 64a55705..e206c309 100644 --- a/linkcheck/checker/__init__.py +++ b/linkcheck/checker/__init__.py @@ -34,8 +34,6 @@ import linkcheck.dns.exception # Catch these exception on syntax checks. ExcSyntaxList = [ linkcheck.LinkCheckerError, - # .encode('idna') raises this - UnicodeError, ] # Catch these exceptions on content and connect checks. All other diff --git a/linkcheck/checker/fileurl.py b/linkcheck/checker/fileurl.py index 53a0e218..74a2ec18 100644 --- a/linkcheck/checker/fileurl.py +++ b/linkcheck/checker/fileurl.py @@ -107,7 +107,7 @@ class FileUrl (urlbase.UrlBase): base_url = re.sub("^file://(/?)([a-zA-Z]):", r"file:///\2|", base_url) # norm base url again after changing if self.base_url != base_url: - base_url, is_idn = linkcheck.url.url_norm(base_url) + base_url, is_idn = linkcheck.checker.urlbase.url_norm(base_url) self.base_url = unicode(base_url) def build_url (self): diff --git a/linkcheck/checker/httpurl.py b/linkcheck/checker/httpurl.py index e56cad7d..51811714 100644 --- a/linkcheck/checker/httpurl.py +++ b/linkcheck/checker/httpurl.py @@ -310,7 +310,8 @@ class HttpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport): "Redirected to %r", newurl) self.add_info(_("Redirected to %(url)s.") % {'url': newurl}, tag="http-redirect") - redirected, is_idn = linkcheck.url.url_norm(newurl) + # norm base url - can raise UnicodeError from url.idna_encode() + redirected, is_idn = linkcheck.checker.urlbase.url_norm(newurl) assert None == linkcheck.log.debug(linkcheck.LOG_CHECK, "Norm redirected to %r", redirected) urlparts = linkcheck.strformat.url_unicode_split(redirected) diff --git a/linkcheck/checker/urlbase.py b/linkcheck/checker/urlbase.py index 645b0eca..cbf7db6c 100644 --- a/linkcheck/checker/urlbase.py +++ b/linkcheck/checker/urlbase.py @@ -53,6 +53,17 @@ def urljoin (parent, url, scheme): return urlparse.urljoin(parent, url) +def url_norm (url): + """ + Wrapper for url.url_norm() to convert UnicodeError in LinkCheckerError. + """ + try: + return linkcheck.url.url_norm(url) + except UnicodeError: + msg = _("URL has unparsable domain name: %s") % sys.exc_info()[1] + raise linkcheck.LinkCheckerError(msg) + + class UrlBase (object): """ An URL with additional information like validity etc. @@ -294,8 +305,8 @@ class UrlBase (object): Construct self.url and self.urlparts out of the given base url information self.base_url, self.parent_url and self.base_ref. """ - # norm base url - base_url, is_idn = linkcheck.url.url_norm(self.base_url) + # norm base url - can raise UnicodeError from url.idna_encode() + base_url, is_idn = url_norm(self.base_url) if is_idn: self.add_warning(_("""URL %r has a unicode domain name which is not yet widely supported. You should use diff --git a/linkchecker b/linkchecker index fbb55c34..5803b823 100755 --- a/linkchecker +++ b/linkchecker @@ -715,7 +715,12 @@ for url in args: # syntactic sugar url = "ftp://%s" % url url_data = get_url_from(url, 0, aggregate, assume_local=True) - linkcheck.add_intern_pattern(url_data, config) + try: + linkcheck.add_intern_pattern(url_data, config) + except UnicodeError: + linkcheck.log.error(linkcheck.LOG_CMDLINE, + _("URL has unparsable domain name: %s"), sys.exc_info()[1]) + sys.exit(1) aggregate.urlqueue.put(url_data) # set up profiling/psyco if do_profile and not has_profile: