Fix data size for HTTP requests.

This commit is contained in:
Bastian Kleineidam 2010-08-04 00:06:25 +02:00
parent 851e1121e9
commit 1faedafb33
4 changed files with 23 additions and 11 deletions

View file

@ -175,9 +175,10 @@ class FileUrl (urlbase.UrlBase):
data = get_index_html(get_files(self.get_os_filename()))
if isinstance(data, unicode):
data = data.encode("iso8859-1", "ignore")
size = len(data)
else:
data = super(FileUrl, self).read_content()
return data
data, size = super(FileUrl, self).read_content()
return data, size
def is_html (self):
"""

View file

@ -218,6 +218,7 @@ class FtpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport):
else:
# download file in BINARY mode
ftpcmd = "RETR %s" % self.filename
# XXX limit the download size to some sane value
buf = StringIO()
def stor_data (s):
"""Helper method storing given data"""
@ -225,7 +226,7 @@ class FtpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport):
self.url_connection.retrbinary(ftpcmd, stor_data)
data = buf.getvalue()
buf.close()
return data
return data, len(data)
def close_connection (self):
"""

View file

@ -145,7 +145,7 @@ class HttpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport):
def add_size_info (self):
"""Get size of URL content from HTTP header."""
if self.headers and "Content-Length" in self.headers and \
"Content-Encoding" not in self.headers:
"Transfer-Encoding" not in self.headers:
# Note that content-encoding causes size differences since
# the content data is always decoded.
try:
@ -154,6 +154,8 @@ class HttpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport):
self.dlsize = self.size
except (ValueError, OverflowError):
pass
else:
self.size = -1
def check_connection (self):
"""
@ -592,14 +594,21 @@ Use URL `%(newurl)s' instead for checking.""") % {
response = self._try_http_response()
response = self.follow_redirections(response, set_result=False)[1]
self.headers = response.msg
# Re-read size info, since the GET request result could be different
# than a former HEAD request.
self.add_size_info()
if self._data is None:
self._read_content(response)
data = self._data
self._data = None
return data
data, size = self._data, self._size
self._data = self._size = None
return data, size
def _read_content (self, response):
"""Read URL contents and store then in self._data.
This way, the method can be called by other functions than
read_content()"""
data = response.read()
self._size = len(data)
encoding = headers.get_content_encoding(self.headers)
if encoding in _supported_encodings:
try:

View file

@ -628,14 +628,15 @@ class UrlBase (object):
if self.data is None:
log.debug(LOG_CHECK, "Get content of %r", self.url)
t = time.time()
self.data = self.read_content()
self.data, self.dlsize = self.read_content()
self.dltime = time.time() - t
self.dlsize = len(self.data)
return self.data
def read_content (self):
"""Return data for this URL. Can be overridden in subclasses."""
return self.url_connection.read()
"""Return data and data size for this URL.
Can be overridden in subclasses."""
data = self.url_connection.read()
return data, len(data)
def check_content (self):
"""Check content data for warnings, syntax errors, viruses etc."""