From b8b0398dd2d6c77506dd961cdaed77074f0f5521 Mon Sep 17 00:00:00 2001 From: Bastian Kleineidam Date: Sun, 7 Mar 2010 22:11:55 +0100 Subject: [PATCH] Ensure redirected URL is Unicode encoded. --- doc/changelog.txt | 2 ++ linkcheck/checker/httpurl.py | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/doc/changelog.txt b/doc/changelog.txt index 993c06ec..c9fe1548 100644 --- a/doc/changelog.txt +++ b/doc/changelog.txt @@ -9,6 +9,8 @@ Fixes: Closes: SF bug #2901667 - http: Retry if server closed connection and sent an empty status line. Fixes the "BadStatusLine" errors. +- http: Prevent UnicodeDecodeError on redirection by ensuring that + the redirected URL will be Unicode encoded. - installer: Add commandline executable to Windows installer. Closes: SF bug #2903257 - http: Warn about permanent redirections even when redirected URL is diff --git a/linkcheck/checker/httpurl.py b/linkcheck/checker/httpurl.py index 9fcf683c..926069bd 100644 --- a/linkcheck/checker/httpurl.py +++ b/linkcheck/checker/httpurl.py @@ -313,8 +313,8 @@ Use URL `%(newurl)s' instead for checking.""") % { newurl = self.headers.getheader("Location", self.headers.getheader("Uri", "")) # make new url absolute and unicode - newurl = urlparse.urljoin(redirected, newurl) newurl = unicode_safe(newurl) + newurl = urlparse.urljoin(redirected, newurl) log.debug(LOG_CHECK, "Redirected to %r", newurl) self.add_info(_("Redirected to `%(url)s'.") % {'url': newurl}) # norm base url - can raise UnicodeError from url.idna_encode()