mirror of
https://github.com/Hopiu/linkchecker.git
synced 2026-04-07 08:10:57 +00:00
Properly handle non-ASCII HTTP header values.
This commit is contained in:
parent
a5055faf25
commit
4c15fc6a8b
3 changed files with 26 additions and 6 deletions
|
|
@ -1,5 +1,9 @@
|
|||
7.5 "" (released xx.xx.2012)
|
||||
|
||||
Fixes:
|
||||
- checking: Properly handle non-ascii HTTP header values.
|
||||
Closes: SF bug #3473359
|
||||
|
||||
Changes:
|
||||
- checking: Add steam:// URIs to the list of ignored URIs.
|
||||
Closes: SF bug #3471570
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
"""
|
||||
Helper functions dealing with HTTP headers.
|
||||
"""
|
||||
from ..containers import CaselessDict
|
||||
|
||||
DEFAULT_TIMEOUT_SECS = 300
|
||||
|
||||
|
|
@ -99,3 +100,18 @@ def get_content_encoding (headers):
|
|||
@rtype: string
|
||||
"""
|
||||
return headers.get("Content-Encoding", "").strip()
|
||||
|
||||
|
||||
def decode_headers (headers):
|
||||
"""Decode ISO-8859-1 headers to unicode. Since a dictionary is
|
||||
returned, multiple header entries are not preserved.
|
||||
|
||||
@return: decoded keys and values
|
||||
@rtype: CaselessDict(unicode -> unicode)
|
||||
"""
|
||||
headers_encoded = CaselessDict()
|
||||
for key, value in headers.items():
|
||||
key = key.decode("iso-8859-1", "replace")
|
||||
value = value.decode("iso-8859-1", "replace")
|
||||
headers_encoded[key] = value
|
||||
return headers_encoded
|
||||
|
|
|
|||
|
|
@ -201,7 +201,7 @@ class HttpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport):
|
|||
# proxy enforcement (overrides standard proxy)
|
||||
if response.status == 305 and self.headers:
|
||||
oldproxy = (self.proxy, self.proxyauth)
|
||||
newproxy = self.headers.getheader("Location")
|
||||
newproxy = self.headers.get("Location")
|
||||
self.add_info(_("Enforced proxy `%(name)s'.") %
|
||||
{"name": newproxy})
|
||||
self.set_proxy(newproxy)
|
||||
|
|
@ -307,8 +307,8 @@ class HttpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport):
|
|||
|
||||
def follow_redirection (self, response, set_result, redirected):
|
||||
"""Follow one redirection of http response."""
|
||||
newurl = self.headers.getheader("Location",
|
||||
self.headers.getheader("Uri", ""))
|
||||
newurl = self.headers.get("Location",
|
||||
self.headers.get("Uri", ""))
|
||||
# make new url absolute and unicode
|
||||
newurl = urlparse.urljoin(redirected, unicode_safe(newurl))
|
||||
log.debug(LOG_CHECK, "Redirected to %r", newurl)
|
||||
|
|
@ -551,7 +551,7 @@ class HttpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport):
|
|||
self.url_connection.endheaders()
|
||||
response = self.url_connection.getresponse(True)
|
||||
self.timeout = headers.http_timeout(response)
|
||||
self.headers = response.msg
|
||||
self.headers = headers.decode_headers(response.msg)
|
||||
self.content_type = None
|
||||
self.persistent = not response.will_close
|
||||
if self.persistent and self.method == "HEAD":
|
||||
|
|
@ -564,7 +564,7 @@ class HttpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport):
|
|||
# Note that for POST method the connection should also be closed,
|
||||
# but this method is never used.
|
||||
if self.persistent and (self.method == "GET" or
|
||||
self.headers.getheader("Content-Length") != "0"):
|
||||
self.headers.get("Content-Length") != "0"):
|
||||
# always read content from persistent connections
|
||||
self._read_content(response)
|
||||
assert not response.will_close
|
||||
|
|
@ -646,7 +646,7 @@ class HttpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport):
|
|||
self.method = "GET"
|
||||
response = self._try_http_response()
|
||||
response = self.follow_redirections(response, set_result=False)[1]
|
||||
self.headers = response.msg
|
||||
self.headers = headers.decode_headers(response.msg)
|
||||
self.content_type = None
|
||||
# Re-read size info, since the GET request result could be different
|
||||
# than a former HEAD request.
|
||||
|
|
|
|||
Loading…
Reference in a new issue