diff --git a/doc/changelog.txt b/doc/changelog.txt
index 7e9eea42..5cc7364b 100644
--- a/doc/changelog.txt
+++ b/doc/changelog.txt
@@ -3,9 +3,9 @@
Features:
- checking: Support URLs.
- logging: Sending SIGUSR1 signal prints the stack trace of all current
- running threads. This makes it easier to debug deadlocks.
-- gui: Added support of Drag-and-Drop of local files. If the local file is
- a LinkChecker project (.lcp) it is loaded automatically, else the check
+ running threads. This makes debugging deadlocks easier.
+- gui: Support Drag-and-Drop of local files. If the local file is
+ a LinkChecker project (.lcp) file it is loaded, else the check
URL is set to the local file URL.
Changes:
@@ -14,6 +14,8 @@ Changes:
Fixes:
- checking: Fix a crash when closing a Word document after scanning failed.
Closes: GH bug #369
+- checking: Catch UnicodeError from idna.encode() fixing an internal error when
+ trying to connect to certain invalid hostnames.
8.3 "Mahna Mahna Killer" (released 6.1.2013)
diff --git a/linkcheck/checker/const.py b/linkcheck/checker/const.py
index 81557c59..53dd2483 100644
--- a/linkcheck/checker/const.py
+++ b/linkcheck/checker/const.py
@@ -53,6 +53,8 @@ ExcCacheList = [
ftplib.error_temp,
ftplib.error_perm,
ftplib.error_proto,
+ # idna.encode(), called from socket.create_connection()
+ UnicodeError,
]
# Exceptions that do not put the URL in the cache so that the URL can
diff --git a/linkcheck/checker/urlbase.py b/linkcheck/checker/urlbase.py
index b8b0fb50..a813bdf8 100644
--- a/linkcheck/checker/urlbase.py
+++ b/linkcheck/checker/urlbase.py
@@ -526,14 +526,17 @@ class UrlBase (object):
self.check_connection()
self.add_size_info()
self.add_country_info()
- except tuple(ExcList):
+ except tuple(ExcList) as exc:
value = self.handle_exception()
# make nicer error msg for unknown hosts
- if isinstance(value, socket.error) and value.args[0] == -2:
+ if isinstance(exc, socket.error) and exc.args[0] == -2:
value = _('Hostname not found')
# make nicer error msg for bad status line
- if isinstance(value, httplib.BadStatusLine):
+ elif isinstance(exc, httplib.BadStatusLine):
value = _('Bad HTTP response %(line)r') % {"line": str(value)}
+ elif isinstance(exc, UnicodeError):
+ # idna.encode(host) failed
+ value = _('Bad hostname %(host)r: %(msg)s') % {'host': self.host, 'msg': str(value)}
self.set_result(unicode_safe(value), valid=False)
self.checktime = time.time() - check_start
if self.do_check_content:
diff --git a/tests/checker/data/http.html b/tests/checker/data/http.html
index a9ea63a3..6dae00da 100644
--- a/tests/checker/data/http.html
+++ b/tests/checker/data/http.html
@@ -21,3 +21,6 @@