mirror of
https://github.com/Hopiu/linkchecker.git
synced 2026-04-30 11:04:50 +00:00
Fix data size for HTTP requests.
This commit is contained in:
parent
851e1121e9
commit
1faedafb33
4 changed files with 23 additions and 11 deletions
|
|
@ -175,9 +175,10 @@ class FileUrl (urlbase.UrlBase):
|
|||
data = get_index_html(get_files(self.get_os_filename()))
|
||||
if isinstance(data, unicode):
|
||||
data = data.encode("iso8859-1", "ignore")
|
||||
size = len(data)
|
||||
else:
|
||||
data = super(FileUrl, self).read_content()
|
||||
return data
|
||||
data, size = super(FileUrl, self).read_content()
|
||||
return data, size
|
||||
|
||||
def is_html (self):
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -218,6 +218,7 @@ class FtpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport):
|
|||
else:
|
||||
# download file in BINARY mode
|
||||
ftpcmd = "RETR %s" % self.filename
|
||||
# XXX limit the download size to some sane value
|
||||
buf = StringIO()
|
||||
def stor_data (s):
|
||||
"""Helper method storing given data"""
|
||||
|
|
@ -225,7 +226,7 @@ class FtpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport):
|
|||
self.url_connection.retrbinary(ftpcmd, stor_data)
|
||||
data = buf.getvalue()
|
||||
buf.close()
|
||||
return data
|
||||
return data, len(data)
|
||||
|
||||
def close_connection (self):
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -145,7 +145,7 @@ class HttpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport):
|
|||
def add_size_info (self):
|
||||
"""Get size of URL content from HTTP header."""
|
||||
if self.headers and "Content-Length" in self.headers and \
|
||||
"Content-Encoding" not in self.headers:
|
||||
"Transfer-Encoding" not in self.headers:
|
||||
# Note that content-encoding causes size differences since
|
||||
# the content data is always decoded.
|
||||
try:
|
||||
|
|
@ -154,6 +154,8 @@ class HttpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport):
|
|||
self.dlsize = self.size
|
||||
except (ValueError, OverflowError):
|
||||
pass
|
||||
else:
|
||||
self.size = -1
|
||||
|
||||
def check_connection (self):
|
||||
"""
|
||||
|
|
@ -592,14 +594,21 @@ Use URL `%(newurl)s' instead for checking.""") % {
|
|||
response = self._try_http_response()
|
||||
response = self.follow_redirections(response, set_result=False)[1]
|
||||
self.headers = response.msg
|
||||
# Re-read size info, since the GET request result could be different
|
||||
# than a former HEAD request.
|
||||
self.add_size_info()
|
||||
if self._data is None:
|
||||
self._read_content(response)
|
||||
data = self._data
|
||||
self._data = None
|
||||
return data
|
||||
data, size = self._data, self._size
|
||||
self._data = self._size = None
|
||||
return data, size
|
||||
|
||||
def _read_content (self, response):
|
||||
"""Read URL contents and store then in self._data.
|
||||
This way, the method can be called by other functions than
|
||||
read_content()"""
|
||||
data = response.read()
|
||||
self._size = len(data)
|
||||
encoding = headers.get_content_encoding(self.headers)
|
||||
if encoding in _supported_encodings:
|
||||
try:
|
||||
|
|
|
|||
|
|
@ -628,14 +628,15 @@ class UrlBase (object):
|
|||
if self.data is None:
|
||||
log.debug(LOG_CHECK, "Get content of %r", self.url)
|
||||
t = time.time()
|
||||
self.data = self.read_content()
|
||||
self.data, self.dlsize = self.read_content()
|
||||
self.dltime = time.time() - t
|
||||
self.dlsize = len(self.data)
|
||||
return self.data
|
||||
|
||||
def read_content (self):
|
||||
"""Return data for this URL. Can be overridden in subclasses."""
|
||||
return self.url_connection.read()
|
||||
"""Return data and data size for this URL.
|
||||
Can be overridden in subclasses."""
|
||||
data = self.url_connection.read()
|
||||
return data, len(data)
|
||||
|
||||
def check_content (self):
|
||||
"""Check content data for warnings, syntax errors, viruses etc."""
|
||||
|
|
|
|||
Loading…
Reference in a new issue