Fix showing content size.

This commit is contained in:
Bastian Kleineidam 2009-03-01 23:04:48 +01:00
parent a0ba9a7446
commit 7862147ca3
5 changed files with 38 additions and 67 deletions

View file

@ -9,6 +9,8 @@
* Improved progress dialog in GUI client.
* The content size of downloads is now shown again.
5.0.2 "All the boys love Mandy Lane" (released 13.2.2009)
* Properly detect location of the log configuration file in the Windows

View file

@ -20,7 +20,6 @@ Handle local file: links.
import re
import os
import time
import urlparse
import urllib
import urllib2
@ -162,36 +161,16 @@ class FileUrl (urlbase.UrlBase):
{"path": path, "realpath": realpath},
tag=WARN_FILE_SYSTEM_PATH)
def get_content (self):
"""
Return file content, or in case of directories a dummy HTML file
with links to the files.
"""
if not self.valid:
return ""
if self.data is not None:
return self.data
elif self.is_directory():
return self.get_directory_content()
def read_content (self):
"""Return file content, or in case of directories a dummy HTML file
with links to the files."""
if self.is_directory():
data = get_index_html(get_files(self.get_os_filename()))
if isinstance(data, unicode):
data = data.encode("iso8859-1", "ignore")
else:
return super(FileUrl, self).get_content()
def get_directory_content (self):
"""
Get dummy HTML data for the directory content.
@return: HTML data
@rtype: string
"""
t = time.time()
files = get_files(self.get_os_filename())
data = get_index_html(files)
if isinstance(data, unicode):
data = data.encode("iso8859-1", "ignore")
self.data = data
self.dltime = time.time() - t
self.dlsize = len(self.data)
return self.data
data = super(FileUrl, self).read_content()
return data
def is_html (self):
"""

View file

@ -19,7 +19,6 @@ Handle FTP links.
"""
import ftplib
import time
import urllib
from cStringIO import StringIO
@ -194,20 +193,13 @@ class FtpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport):
if ro.search(self.url):
getattr(self, "parse_"+key)()
def get_content (self):
"""
Return URL target content, or in case of directories a dummy HTML
file with links to the files.
"""
if not self.valid:
return ""
if self.data is not None:
return self.data
t = time.time()
def read_content (self):
"""Return URL target content, or in case of directories a dummy HTML
file with links to the files."""
if self.is_directory():
self.url_connection.cwd(self.filename)
self.files = self.get_files()
self.data = get_index_html(self.files)
data = get_index_html(self.files)
else:
# download file in BINARY mode
ftpcmd = "RETR %s" % self.filename
@ -216,11 +208,9 @@ class FtpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport):
"""Helper method storing given data"""
buf.write(s)
self.url_connection.retrbinary(ftpcmd, stor_data)
self.data = buf.getvalue()
data = buf.getvalue()
buf.close()
self.dltime = time.time() - t
self.dlsize = len(self.data)
return self.data
return data
def close_connection (self):
"""

View file

@ -20,7 +20,6 @@ Handle http links.
import urlparse
import urllib
import time
import re
import zlib
import socket
@ -124,6 +123,8 @@ class HttpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport):
self.headers = None
self.auth = None
self.cookies = []
# temporary data filled when reading redirections
self._data = None
def allows_robots (self, url):
"""
@ -548,26 +549,24 @@ Use URL `%(newurl)s' instead for checking.""") % {
h.connect()
return h
def get_content (self):
"""
Get content of the URL target. The content data is cached after
def read_content (self):
"""Get content of the URL target. The content data is cached after
the first call to this method.
@return: URL content, decompressed and decoded
@rtype: string
"""
if self.data is None:
self.method = "GET"
response = self._get_http_response()
response = self.follow_redirections(response, set_result=False)[1]
self.headers = response.msg
self.method = "GET"
response = self._get_http_response()
response = self.follow_redirections(response, set_result=False)[1]
self.headers = response.msg
if self._data is None:
self._read_content(response)
if self.data is None:
self.data = ""
return self.data
data = self._data
self._data = None
return data
def _read_content (self, response):
t = time.time()
data = response.read()
encoding = headers.get_content_encoding(self.headers)
if encoding in _supported_encodings:
@ -582,10 +581,8 @@ Use URL `%(newurl)s' instead for checking.""") % {
tag=WARN_HTTP_DECOMPRESS_ERROR)
f = StringIO(data)
data = f.read()
if self.data is None and self.method == "GET" and \
response.status not in [301, 302]:
self.data = data
self.dltime = time.time() - t
# store temporary data
self._data = data
def encoding_supported (self):
"""Check if page encoding is supported."""

View file

@ -599,16 +599,19 @@ class UrlBase (object):
return True
def get_content (self):
"""
Precondition: url_connection is an opened URL.
"""
"""Precondition: url_connection is an opened URL."""
if self.data is None:
log.debug(LOG_CHECK, "Get content of %r", self.url)
t = time.time()
self.data = self.url_connection.read()
self.data = self.read_content()
self.dltime = time.time() - t
self.dlsize = len(self.data)
return self.data
def read_content (self):
"""Return data for this URL. Can be overridden in subclasses."""
return self.url_connection.read()
def check_content (self):
"""Check content data for warnings, syntax errors, viruses etc."""
if not (self.can_get_content() and self.valid):