mirror of
https://github.com/Hopiu/linkchecker.git
synced 2026-03-30 12:40:33 +00:00
Fix showing content size.
This commit is contained in:
parent
a0ba9a7446
commit
7862147ca3
5 changed files with 38 additions and 67 deletions
|
|
@ -9,6 +9,8 @@
|
|||
|
||||
* Improved progress dialog in GUI client.
|
||||
|
||||
* The content size of downloads is now shown again.
|
||||
|
||||
5.0.2 "All the boys love Mandy Lane" (released 13.2.2009)
|
||||
|
||||
* Properly detect location of the log configuration file in the Windows
|
||||
|
|
|
|||
|
|
@ -20,7 +20,6 @@ Handle local file: links.
|
|||
|
||||
import re
|
||||
import os
|
||||
import time
|
||||
import urlparse
|
||||
import urllib
|
||||
import urllib2
|
||||
|
|
@ -162,36 +161,16 @@ class FileUrl (urlbase.UrlBase):
|
|||
{"path": path, "realpath": realpath},
|
||||
tag=WARN_FILE_SYSTEM_PATH)
|
||||
|
||||
def get_content (self):
|
||||
"""
|
||||
Return file content, or in case of directories a dummy HTML file
|
||||
with links to the files.
|
||||
"""
|
||||
if not self.valid:
|
||||
return ""
|
||||
if self.data is not None:
|
||||
return self.data
|
||||
elif self.is_directory():
|
||||
return self.get_directory_content()
|
||||
def read_content (self):
|
||||
"""Return file content, or in case of directories a dummy HTML file
|
||||
with links to the files."""
|
||||
if self.is_directory():
|
||||
data = get_index_html(get_files(self.get_os_filename()))
|
||||
if isinstance(data, unicode):
|
||||
data = data.encode("iso8859-1", "ignore")
|
||||
else:
|
||||
return super(FileUrl, self).get_content()
|
||||
|
||||
def get_directory_content (self):
|
||||
"""
|
||||
Get dummy HTML data for the directory content.
|
||||
|
||||
@return: HTML data
|
||||
@rtype: string
|
||||
"""
|
||||
t = time.time()
|
||||
files = get_files(self.get_os_filename())
|
||||
data = get_index_html(files)
|
||||
if isinstance(data, unicode):
|
||||
data = data.encode("iso8859-1", "ignore")
|
||||
self.data = data
|
||||
self.dltime = time.time() - t
|
||||
self.dlsize = len(self.data)
|
||||
return self.data
|
||||
data = super(FileUrl, self).read_content()
|
||||
return data
|
||||
|
||||
def is_html (self):
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -19,7 +19,6 @@ Handle FTP links.
|
|||
"""
|
||||
|
||||
import ftplib
|
||||
import time
|
||||
import urllib
|
||||
from cStringIO import StringIO
|
||||
|
||||
|
|
@ -194,20 +193,13 @@ class FtpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport):
|
|||
if ro.search(self.url):
|
||||
getattr(self, "parse_"+key)()
|
||||
|
||||
def get_content (self):
|
||||
"""
|
||||
Return URL target content, or in case of directories a dummy HTML
|
||||
file with links to the files.
|
||||
"""
|
||||
if not self.valid:
|
||||
return ""
|
||||
if self.data is not None:
|
||||
return self.data
|
||||
t = time.time()
|
||||
def read_content (self):
|
||||
"""Return URL target content, or in case of directories a dummy HTML
|
||||
file with links to the files."""
|
||||
if self.is_directory():
|
||||
self.url_connection.cwd(self.filename)
|
||||
self.files = self.get_files()
|
||||
self.data = get_index_html(self.files)
|
||||
data = get_index_html(self.files)
|
||||
else:
|
||||
# download file in BINARY mode
|
||||
ftpcmd = "RETR %s" % self.filename
|
||||
|
|
@ -216,11 +208,9 @@ class FtpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport):
|
|||
"""Helper method storing given data"""
|
||||
buf.write(s)
|
||||
self.url_connection.retrbinary(ftpcmd, stor_data)
|
||||
self.data = buf.getvalue()
|
||||
data = buf.getvalue()
|
||||
buf.close()
|
||||
self.dltime = time.time() - t
|
||||
self.dlsize = len(self.data)
|
||||
return self.data
|
||||
return data
|
||||
|
||||
def close_connection (self):
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -20,7 +20,6 @@ Handle http links.
|
|||
|
||||
import urlparse
|
||||
import urllib
|
||||
import time
|
||||
import re
|
||||
import zlib
|
||||
import socket
|
||||
|
|
@ -124,6 +123,8 @@ class HttpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport):
|
|||
self.headers = None
|
||||
self.auth = None
|
||||
self.cookies = []
|
||||
# temporary data filled when reading redirections
|
||||
self._data = None
|
||||
|
||||
def allows_robots (self, url):
|
||||
"""
|
||||
|
|
@ -548,26 +549,24 @@ Use URL `%(newurl)s' instead for checking.""") % {
|
|||
h.connect()
|
||||
return h
|
||||
|
||||
def get_content (self):
|
||||
"""
|
||||
Get content of the URL target. The content data is cached after
|
||||
def read_content (self):
|
||||
"""Get content of the URL target. The content data is cached after
|
||||
the first call to this method.
|
||||
|
||||
@return: URL content, decompressed and decoded
|
||||
@rtype: string
|
||||
"""
|
||||
if self.data is None:
|
||||
self.method = "GET"
|
||||
response = self._get_http_response()
|
||||
response = self.follow_redirections(response, set_result=False)[1]
|
||||
self.headers = response.msg
|
||||
self.method = "GET"
|
||||
response = self._get_http_response()
|
||||
response = self.follow_redirections(response, set_result=False)[1]
|
||||
self.headers = response.msg
|
||||
if self._data is None:
|
||||
self._read_content(response)
|
||||
if self.data is None:
|
||||
self.data = ""
|
||||
return self.data
|
||||
data = self._data
|
||||
self._data = None
|
||||
return data
|
||||
|
||||
def _read_content (self, response):
|
||||
t = time.time()
|
||||
data = response.read()
|
||||
encoding = headers.get_content_encoding(self.headers)
|
||||
if encoding in _supported_encodings:
|
||||
|
|
@ -582,10 +581,8 @@ Use URL `%(newurl)s' instead for checking.""") % {
|
|||
tag=WARN_HTTP_DECOMPRESS_ERROR)
|
||||
f = StringIO(data)
|
||||
data = f.read()
|
||||
if self.data is None and self.method == "GET" and \
|
||||
response.status not in [301, 302]:
|
||||
self.data = data
|
||||
self.dltime = time.time() - t
|
||||
# store temporary data
|
||||
self._data = data
|
||||
|
||||
def encoding_supported (self):
|
||||
"""Check if page encoding is supported."""
|
||||
|
|
|
|||
|
|
@ -599,16 +599,19 @@ class UrlBase (object):
|
|||
return True
|
||||
|
||||
def get_content (self):
|
||||
"""
|
||||
Precondition: url_connection is an opened URL.
|
||||
"""
|
||||
"""Precondition: url_connection is an opened URL."""
|
||||
if self.data is None:
|
||||
log.debug(LOG_CHECK, "Get content of %r", self.url)
|
||||
t = time.time()
|
||||
self.data = self.url_connection.read()
|
||||
self.data = self.read_content()
|
||||
self.dltime = time.time() - t
|
||||
self.dlsize = len(self.data)
|
||||
return self.data
|
||||
|
||||
def read_content (self):
|
||||
"""Return data for this URL. Can be overridden in subclasses."""
|
||||
return self.url_connection.read()
|
||||
|
||||
def check_content (self):
|
||||
"""Check content data for warnings, syntax errors, viruses etc."""
|
||||
if not (self.can_get_content() and self.valid):
|
||||
|
|
|
|||
Loading…
Reference in a new issue