mirror of
https://github.com/Hopiu/linkchecker.git
synced 2026-04-02 14:10:24 +00:00
Update HttpUrl.encoding after following redirects
This commit is contained in:
parent
0325ecd73f
commit
a04214465a
1 changed files with 18 additions and 7 deletions
|
|
@ -176,13 +176,7 @@ class HttpUrl(internpaturl.InternPatternUrl, proxysupport.ProxySupport):
|
|||
self.url_connection = self.session.send(request, **kwargs)
|
||||
self.headers = self.url_connection.headers
|
||||
log.debug(LOG_CHECK, "Response headers %s", self.headers)
|
||||
if self.url_connection.encoding == "ISO-8859-1":
|
||||
# Can't trust ISO-8859-1 because it is Requests' fallback for text
|
||||
# content-types. We fall back to it in UrlBase.get_content() if
|
||||
# Beautiful Soup doesn't return an encoding.
|
||||
self.encoding = None
|
||||
else:
|
||||
self.encoding = self.url_connection.encoding
|
||||
self.set_encoding(self.url_connection.encoding)
|
||||
log.debug(LOG_CHECK, "Response encoding %s", self.encoding)
|
||||
self._add_ssl_info()
|
||||
|
||||
|
|
@ -236,6 +230,19 @@ class HttpUrl(internpaturl.InternPatternUrl, proxysupport.ProxySupport):
|
|||
"""Return content MIME type or empty string."""
|
||||
self.content_type = httputil.get_content_type(self.headers)
|
||||
|
||||
def set_encoding(self, encoding):
|
||||
"""Set content encoding"""
|
||||
if encoding == "ISO-8859-1":
|
||||
# Although RFC 2616 (HTTP/1.1) says that text data in a non-ISO-8859-1
|
||||
# (or subset) character set must be labelled with a charset,
|
||||
# that is not always the case and then the default ISO-8859-1 is
|
||||
# set by Requests.
|
||||
# We fall back to it in UrlBase.get_content() if Beautiful Soup
|
||||
# doesn't return an encoding.
|
||||
self.encoding = None
|
||||
else:
|
||||
self.encoding = encoding
|
||||
|
||||
def is_redirect(self):
|
||||
"""Check if current response is a redirect."""
|
||||
return (
|
||||
|
|
@ -286,6 +293,10 @@ class HttpUrl(internpaturl.InternPatternUrl, proxysupport.ProxySupport):
|
|||
if self.is_redirect():
|
||||
# run connection plugins for old connection
|
||||
self.aggregate.plugin_manager.run_connection_plugins(self)
|
||||
if response:
|
||||
log.debug(LOG_CHECK, "Redirected response headers %s", response.headers)
|
||||
self.set_encoding(response.encoding)
|
||||
log.debug(LOG_CHECK, "Redirected response encoding %s", self.encoding)
|
||||
|
||||
def check_response(self):
|
||||
"""Check final result and log it."""
|
||||
|
|
|
|||
Loading…
Reference in a new issue