Fix showing content size.

2026-05-14 17:43:11 +00:00 · 2009-03-01 23:04:48 +01:00 · 2009-03-01 23:04:48 +01:00 · 7862147ca3
commit 7862147ca3
parent a0ba9a7446
5 changed files with 38 additions and 67 deletions
--- a/ChangeLog.txt
+++ b/ChangeLog.txt
@ -9,6 +9,8 @@

  * Improved progress dialog in GUI client.

+  * The content size of downloads is now shown again.
+
 5.0.2 "All the boys love Mandy Lane" (released 13.2.2009)

  * Properly detect location of the log configuration file in the Windows
--- a/linkcheck/checker/fileurl.py
+++ b/linkcheck/checker/fileurl.py
@ -20,7 +20,6 @@ Handle local file: links.

 import re
 import os
-import time
 import urlparse
 import urllib
 import urllib2
@ -162,36 +161,16 @@ class FileUrl (urlbase.UrlBase):
                            {"path": path, "realpath": realpath},
                               tag=WARN_FILE_SYSTEM_PATH)

-    def get_content (self):
-        """
-        Return file content, or in case of directories a dummy HTML file
-        with links to the files.
-        """
-        if not self.valid:
-            return ""
-        if self.data is not None:
-            return self.data
-        elif self.is_directory():
-            return self.get_directory_content()
+    def read_content (self):
+        """Return file content, or in case of directories a dummy HTML file
+        with links to the files."""
+        if self.is_directory():
+            data = get_index_html(get_files(self.get_os_filename()))
+            if isinstance(data, unicode):
+                data = data.encode("iso8859-1", "ignore")
        else:
-            return super(FileUrl, self).get_content()
-
-    def get_directory_content (self):
-        """
-        Get dummy HTML data for the directory content.
-
-        @return: HTML data
-        @rtype: string
-        """
-        t = time.time()
-        files = get_files(self.get_os_filename())
-        data = get_index_html(files)
-        if isinstance(data, unicode):
-            data = data.encode("iso8859-1", "ignore")
-        self.data = data
-        self.dltime = time.time() - t
-        self.dlsize = len(self.data)
-        return self.data
+            data = super(FileUrl, self).read_content()
+        return data

    def is_html (self):
        """
--- a/linkcheck/checker/ftpurl.py
+++ b/linkcheck/checker/ftpurl.py
@ -19,7 +19,6 @@ Handle FTP links.
 """

 import ftplib
-import time
 import urllib
 from cStringIO import StringIO

@ -194,20 +193,13 @@ class FtpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport):
            if ro.search(self.url):
                getattr(self, "parse_"+key)()

-    def get_content (self):
-        """
-        Return URL target content, or in case of directories a dummy HTML
-        file with links to the files.
-        """
-        if not self.valid:
-            return ""
-        if self.data is not None:
-            return self.data
-        t = time.time()
+    def read_content (self):
+        """Return URL target content, or in case of directories a dummy HTML
+        file with links to the files."""
        if self.is_directory():
            self.url_connection.cwd(self.filename)
            self.files = self.get_files()
-            self.data = get_index_html(self.files)
+            data = get_index_html(self.files)
        else:
            # download file in BINARY mode
            ftpcmd = "RETR %s" % self.filename
@ -216,11 +208,9 @@ class FtpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport):
                """Helper method storing given data"""
                buf.write(s)
            self.url_connection.retrbinary(ftpcmd, stor_data)
-            self.data = buf.getvalue()
+            data = buf.getvalue()
            buf.close()
-        self.dltime = time.time() - t
-        self.dlsize = len(self.data)
-        return self.data
+        return data

    def close_connection (self):
        """
--- a/linkcheck/checker/httpurl.py
+++ b/linkcheck/checker/httpurl.py
@ -20,7 +20,6 @@ Handle http links.

 import urlparse
 import urllib
-import time
 import re
 import zlib
 import socket
@ -124,6 +123,8 @@ class HttpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport):
        self.headers = None
        self.auth = None
        self.cookies = []
+        # temporary data filled when reading redirections
+        self._data = None

    def allows_robots (self, url):
        """
@ -548,26 +549,24 @@ Use URL `%(newurl)s' instead for checking.""") % {
        h.connect()
        return h

-    def get_content (self):
-        """
-        Get content of the URL target. The content data is cached after
+    def read_content (self):
+        """Get content of the URL target. The content data is cached after
        the first call to this method.

        @return: URL content, decompressed and decoded
        @rtype: string
        """
-        if self.data is None:
-            self.method = "GET"
-            response = self._get_http_response()
-            response = self.follow_redirections(response, set_result=False)[1]
-            self.headers = response.msg
+        self.method = "GET"
+        response = self._get_http_response()
+        response = self.follow_redirections(response, set_result=False)[1]
+        self.headers = response.msg
+        if self._data is None:
            self._read_content(response)
-            if self.data is None:
-                self.data = ""
-        return self.data
+        data = self._data
+        self._data = None
+        return data

    def _read_content (self, response):
-        t = time.time()
        data = response.read()
        encoding = headers.get_content_encoding(self.headers)
        if encoding in _supported_encodings:
@ -582,10 +581,8 @@ Use URL `%(newurl)s' instead for checking.""") % {
                                 tag=WARN_HTTP_DECOMPRESS_ERROR)
                f = StringIO(data)
            data = f.read()
-        if self.data is None and self.method == "GET" and \
-           response.status not in [301, 302]:
-            self.data = data
-            self.dltime = time.time() - t
+        # store temporary data
+        self._data = data

    def encoding_supported (self):
        """Check if page encoding is supported."""
--- a/linkcheck/checker/urlbase.py
+++ b/linkcheck/checker/urlbase.py
@ -599,16 +599,19 @@ class UrlBase (object):
        return True

    def get_content (self):
-        """
-        Precondition: url_connection is an opened URL.
-        """
+        """Precondition: url_connection is an opened URL."""
        if self.data is None:
+            log.debug(LOG_CHECK, "Get content of %r", self.url)
            t = time.time()
-            self.data = self.url_connection.read()
+            self.data = self.read_content()
            self.dltime = time.time() - t
            self.dlsize = len(self.data)
        return self.data

+    def read_content (self):
+        """Return data for this URL. Can be overridden in subclasses."""
+        return self.url_connection.read()
+
    def check_content (self):
        """Check content data for warnings, syntax errors, viruses etc."""
        if not (self.can_get_content() and self.valid):