Properly handle non-ASCII HTTP header values.

This commit is contained in:
Bastian Kleineidam 2012-01-14 11:01:09 +01:00
parent a5055faf25
commit 4c15fc6a8b
3 changed files with 26 additions and 6 deletions

View file

@ -1,5 +1,9 @@
7.5 "" (released xx.xx.2012)
Fixes:
- checking: Properly handle non-ascii HTTP header values.
Closes: SF bug #3473359
Changes:
- checking: Add steam:// URIs to the list of ignored URIs.
Closes: SF bug #3471570

View file

@ -17,6 +17,7 @@
"""
Helper functions dealing with HTTP headers.
"""
from ..containers import CaselessDict
DEFAULT_TIMEOUT_SECS = 300
@ -99,3 +100,18 @@ def get_content_encoding (headers):
@rtype: string
"""
return headers.get("Content-Encoding", "").strip()
def decode_headers (headers):
"""Decode ISO-8859-1 headers to unicode. Since a dictionary is
returned, multiple header entries are not preserved.
@return: decoded keys and values
@rtype: CaselessDict(unicode -> unicode)
"""
headers_encoded = CaselessDict()
for key, value in headers.items():
key = key.decode("iso-8859-1", "replace")
value = value.decode("iso-8859-1", "replace")
headers_encoded[key] = value
return headers_encoded

View file

@ -201,7 +201,7 @@ class HttpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport):
# proxy enforcement (overrides standard proxy)
if response.status == 305 and self.headers:
oldproxy = (self.proxy, self.proxyauth)
newproxy = self.headers.getheader("Location")
newproxy = self.headers.get("Location")
self.add_info(_("Enforced proxy `%(name)s'.") %
{"name": newproxy})
self.set_proxy(newproxy)
@ -307,8 +307,8 @@ class HttpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport):
def follow_redirection (self, response, set_result, redirected):
"""Follow one redirection of http response."""
newurl = self.headers.getheader("Location",
self.headers.getheader("Uri", ""))
newurl = self.headers.get("Location",
self.headers.get("Uri", ""))
# make new url absolute and unicode
newurl = urlparse.urljoin(redirected, unicode_safe(newurl))
log.debug(LOG_CHECK, "Redirected to %r", newurl)
@ -551,7 +551,7 @@ class HttpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport):
self.url_connection.endheaders()
response = self.url_connection.getresponse(True)
self.timeout = headers.http_timeout(response)
self.headers = response.msg
self.headers = headers.decode_headers(response.msg)
self.content_type = None
self.persistent = not response.will_close
if self.persistent and self.method == "HEAD":
@ -564,7 +564,7 @@ class HttpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport):
# Note that for POST method the connection should also be closed,
# but this method is never used.
if self.persistent and (self.method == "GET" or
self.headers.getheader("Content-Length") != "0"):
self.headers.get("Content-Length") != "0"):
# always read content from persistent connections
self._read_content(response)
assert not response.will_close
@ -646,7 +646,7 @@ class HttpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport):
self.method = "GET"
response = self._try_http_response()
response = self.follow_redirections(response, set_result=False)[1]
self.headers = response.msg
self.headers = headers.decode_headers(response.msg)
self.content_type = None
# Re-read size info, since the GET request result could be different
# than a former HEAD request.