From 5e13a78f668d5a296800da714eed62daf471c4e4 Mon Sep 17 00:00:00 2001 From: Bastian Kleineidam Date: Fri, 9 Mar 2012 11:54:18 +0100 Subject: [PATCH] Fix non-ascii HTTP header debugging. --- doc/changelog.txt | 3 +++ linkcheck/checker/httpurl.py | 8 ++++++-- linkcheck/i18n.py | 2 +- linkcheck/strformat.py | 6 +++--- 4 files changed, 13 insertions(+), 6 deletions(-) diff --git a/doc/changelog.txt b/doc/changelog.txt index 9d01495a..15560461 100644 --- a/doc/changelog.txt +++ b/doc/changelog.txt @@ -6,6 +6,9 @@ Fixes: Closes: SF bug #3495407 - checking: Fix non-ascii HTTP header handling. Closes: SF bug #3495621 +- checking: Fix non-ascii HTTP header debugging. + Closes: SF bug #3488675 + 7.5 "Kukushka" (released 13.02.2012) diff --git a/linkcheck/checker/httpurl.py b/linkcheck/checker/httpurl.py index 56749582..bc0ad799 100644 --- a/linkcheck/checker/httpurl.py +++ b/linkcheck/checker/httpurl.py @@ -37,6 +37,9 @@ from .const import WARN_HTTP_ROBOTS_DENIED, \ WARN_HTTP_DECOMPRESS_ERROR, WARN_HTTP_UNSUPPORTED_ENCODING, \ WARN_HTTP_AUTH_UNKNOWN +# assumed HTTP header encoding +HEADER_ENCODING = "iso-8859-1" + # helper alias unicode_safe = strformat.unicode_safe @@ -197,7 +200,8 @@ class HttpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport): response.reason = unicode_safe(response.reason) log.debug(LOG_CHECK, "Response: %s %s", response.status, response.reason) - log.debug(LOG_CHECK, "Headers: %s", self.headers) + uheaders = unicode_safe(self.headers, encoding=HEADER_ENCODING) + log.debug(LOG_CHECK, "Headers: %s", uheaders) # proxy enforcement (overrides standard proxy) if response.status == 305 and self.headers: oldproxy = (self.proxy, self.proxyauth) @@ -453,7 +457,7 @@ class HttpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport): value = self.headers.get(name) if value is None: return default - return value.decode("iso-8859-1", "replace") + return unicode_safe(value, encoding=HEADER_ENCODING) def get_alias_cache_data (self): """ diff --git a/linkcheck/i18n.py b/linkcheck/i18n.py index 92297caf..536afd62 100644 --- a/linkcheck/i18n.py +++ b/linkcheck/i18n.py @@ -1,5 +1,5 @@ # -*- coding: iso-8859-1 -*- -# Copyright (C) 2000-2011 Bastian Kleineidam +# Copyright (C) 2000-2012 Bastian Kleineidam # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by diff --git a/linkcheck/strformat.py b/linkcheck/strformat.py index bff4ce2a..42e87909 100644 --- a/linkcheck/strformat.py +++ b/linkcheck/strformat.py @@ -1,5 +1,5 @@ # -*- coding: iso-8859-1 -*- -# Copyright (C) 2000-2011 Bastian Kleineidam +# Copyright (C) 2000-2012 Bastian Kleineidam # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -34,7 +34,7 @@ import pydoc from . import i18n -def unicode_safe (s, encoding=i18n.default_encoding): +def unicode_safe (s, encoding=i18n.default_encoding, errors='replace'): """Get unicode string without raising encoding errors. Unknown characters of the given encoding will be ignored. @@ -48,7 +48,7 @@ def unicode_safe (s, encoding=i18n.default_encoding): if isinstance(s, unicode): # s is already unicode, nothing to do return s - return unicode(str(s), encoding, "ignore") + return unicode(str(s), encoding, errors) def ascii_safe (s):