Display downloaded bytes.

This commit is contained in:
Bastian Kleineidam 2014-03-14 21:06:10 +01:00
parent c51caf1133
commit 6437f08277
20 changed files with 52 additions and 28 deletions

View file

@ -14,6 +14,7 @@ Changes:
- checking: Disable URL length warning for data: URLs.
- installation: Check requirement for Python requests >= 2.2.0.
Closes: GH bug #478
- logging: Display downloaded bytes.
Fixes:
- checking: Fix internal errors in debug output.

View file

@ -631,6 +631,8 @@ class UrlBase (object):
if self.size == 0:
self.add_warning(_("Content size is zero."),
tag=WARN_URL_CONTENT_SIZE_ZERO)
else:
self.aggregate.add_downloaded_bytes(self.size)
return self.data
def read_content(self):

View file

@ -124,7 +124,7 @@ def check_urls (aggregate):
aggregate.start_threads()
check_url(aggregate)
aggregate.finish()
aggregate.logger.end_log_output()
aggregate.logger.end_log_output(aggregate.downloaded_bytes)
except LinkCheckerInterrupt:
raise
except KeyboardInterrupt:
@ -178,11 +178,11 @@ def abort (aggregate):
try:
aggregate.abort()
aggregate.finish()
aggregate.logger.end_log_output()
aggregate.logger.end_log_output(aggregate.downloaded_bytes)
break
except KeyboardInterrupt:
log.warn(LOG_CHECK, _("user abort; force shutdown"))
aggregate.logger.end_log_output()
aggregate.logger.end_log_output(aggregate.downloaded_bytes)
abort_now()

View file

@ -30,6 +30,7 @@ from . import logger, status, checker, interrupt
_threads_lock = threading.RLock()
_hosts_lock = threading.RLock()
_downloadedbytes_lock = threading.RLock()
def new_request_session(config):
"""Create a new request session."""
@ -60,13 +61,13 @@ class Aggregate (object):
requests_per_second = config["maxrequestspersecond"]
self.wait_time_min = 1.0 / requests_per_second
self.wait_time_max = max(self.wait_time_min + 0.5, 0.5)
self.downloaded_bytes = 0
@synchronized(_threads_lock)
def start_threads (self):
"""Spawn threads for URL checking and status printing."""
if self.config["status"]:
t = status.Status(self.urlqueue, self.config.status_logger,
self.config["status_wait_seconds"])
t = status.Status(self, self.config["status_wait_seconds"])
t.start()
self.threads.append(t)
if self.config["maxrunseconds"]:
@ -166,3 +167,7 @@ class Aggregate (object):
"""Determine if checking is finished."""
self.remove_stopped_threads()
return self.urlqueue.empty() and not self.threads
@synchronized(_downloadedbytes_lock)
def add_downloaded_bytes(self, numbytes):
self.downloaded_bytes += numbytes

View file

@ -42,7 +42,8 @@ class StatusLogger (object):
"""Save file descriptor for logging."""
self.fd = fd
def log_status (self, checked, in_progress, queue, duration):
def log_status (self, checked, in_progress, queue, duration,
downloaded_bytes):
"""Write status message to file descriptor."""
msg = _n("%2d thread active", "%2d threads active", in_progress) % \
in_progress
@ -52,6 +53,8 @@ class StatusLogger (object):
msg = _n("%4d link checked", "%4d links checked", checked) % checked
self.write(u"%s, " % msg)
msg = _("runtime %s") % strformat.strduration_long(duration)
self.write(u"%s, " % msg)
msg = _("downloaded %s") % strformat.strsize(downloaded_bytes)
self.writeln(msg)
self.flush()

View file

@ -38,12 +38,12 @@ class Logger (object):
for logger in self.loggers:
logger.start_output()
def end_log_output (self):
def end_log_output (self, downloaded_bytes):
"""
End output of all configured loggers.
"""
for logger in self.loggers:
logger.end_output()
logger.end_output(downloaded_bytes)
def do_print (self, url_data):
"""Determine if URL entry should be logged or not."""

View file

@ -22,7 +22,7 @@ from . import task
class Status (task.LoggedCheckedTask):
"""Thread that gathers and logs the status periodically."""
def __init__ (self, urlqueue, logger, wait_seconds):
def __init__ (self, aggregator, wait_seconds):
"""Initialize the status logger task.
@param urlqueue: the URL queue
@ptype urlqueue: Urlqueue
@ -31,8 +31,9 @@ class Status (task.LoggedCheckedTask):
@param wait_seconds: interval in seconds to report status
@ptype wait_seconds: int
"""
logger = aggregator.config.status_logger
super(Status, self).__init__(logger)
self.urlqueue = urlqueue
self.aggregator = aggregator
self.wait_seconds = wait_seconds
assert self.wait_seconds >= 1
@ -52,5 +53,7 @@ class Status (task.LoggedCheckedTask):
def log_status (self):
"""Log a status message."""
duration = time.time() - self.start_time
checked, in_progress, queue = self.urlqueue.status()
self.logger.log_status(checked, in_progress, queue, duration)
checked, in_progress, queue = self.aggregator.urlqueue.status()
downloaded_bytes = self.aggregator.downloaded_bytes
self.logger.log_status(checked, in_progress, queue, duration,
downloaded_bytes)

View file

@ -84,7 +84,7 @@ class LinkCheckerMain (QtGui.QMainWindow, Ui_MainWindow):
"""The main window displaying checked URLs."""
log_url_signal = QtCore.pyqtSignal(object)
log_status_signal = QtCore.pyqtSignal(int, int, int, float)
log_status_signal = QtCore.pyqtSignal(int, int, int, float, int)
log_stats_signal = QtCore.pyqtSignal(object)
error_signal = QtCore.pyqtSignal(str)
@ -569,11 +569,13 @@ Version 2 or later.
"""Show given link in status bar."""
self.statusBar.showMessage(link)
def log_status (self, checked, in_progress, queued, duration):
def log_status (self, checked, in_progress, queued, duration,
downloaded_bytes):
"""Update number of checked, active and queued links."""
self.label_checked.setText(u"%d" % checked)
self.label_active.setText(u"%d" % in_progress)
self.label_queued.setText(u"%d" % queued)
# XXX display downloaded bytes
def log_stats (self, statistics):
"""Set statistic information for selected URL."""

View file

@ -55,8 +55,9 @@ class SignalLogger (_Logger):
"""Emit URL data which gets logged in the main window."""
self.log_url_signal.emit(url_data)
def end_output (self):
def end_output (self, downloaded_bytes=None):
"""Emit statistic data which gets logged in the main window."""
self.stats.downloaded_bytes = downloaded_bytes
self.log_stats_signal.emit(self.stats)
@ -67,6 +68,8 @@ class StatusLogger (object):
"""Store given signal object."""
self.signal = signal
def log_status (self, checked, in_progress, queued, duration):
def log_status (self, checked, in_progress, queued, duration,
downloaded_bytes):
"""Emit signal with given status information."""
self.signal.emit(checked, in_progress, queued, duration)
self.signal.emit(checked, in_progress, queued, duration,
downloaded_bytes)

View file

@ -89,6 +89,8 @@ class LogStatistics (object):
self.min_url_length = 0
self.avg_url_length = 0.0
self.avg_number = 0
# overall downloaded bytes
self.downloaded_bytes = None
def log_url (self, url_data, do_print):
"""Log URL statistics."""
@ -399,7 +401,7 @@ class _Logger (object):
pass
@abc.abstractmethod
def end_output (self):
def end_output (self, downloaded_bytes=None):
"""
End of output, used for cleanup (eg output buffer flushing).
"""

View file

@ -66,7 +66,7 @@ class BlacklistLogger (_Logger):
if not url_data.valid:
self.blacklist[key] = 1
def end_output (self):
def end_output (self, downloaded_bytes=None):
"""
Write blacklist file.
"""

View file

@ -130,7 +130,7 @@ class CSVLogger (_Logger):
# empty queue
self.queue.truncate(0)
def end_output (self):
def end_output (self, downloaded_bytes=None):
"""Write end of checking info as csv comment."""
if self.has_part("outro"):
self.write_outro()

View file

@ -95,7 +95,7 @@ class CustomXMLLogger (xmllog._XMLLogger):
self.xml_endtag(u'urldata')
self.flush()
def end_output (self):
def end_output (self, downloaded_bytes=None):
"""
Write XML end tag.
"""

View file

@ -83,7 +83,7 @@ class _GraphLogger (_Logger):
"""Write end-of-graph marker."""
pass
def end_output (self):
def end_output (self, downloaded_bytes=None):
"""Write edges and end of checking info as gml comment."""
self.write_edges()
self.end_graph()

View file

@ -81,7 +81,7 @@ class GraphXMLLogger (_XMLLogger, _GraphLogger):
self.xml_endtag(u"data")
self.xml_endtag(u"edge")
def end_output (self):
def end_output (self, downloaded_bytes=None):
"""Finish graph output, and print end of checking info as xml
comment."""
self.xml_endtag(u"graph")

View file

@ -324,7 +324,7 @@ class HtmlLogger (_Logger):
configuration.DonateUrl+u"</a>."))
self.writeln(u"</small></body></html>")
def end_output (self):
def end_output (self, downloaded_bytes=None):
"""Write end of checking info as HTML."""
if self.has_part("stats"):
self.write_stats()

View file

@ -43,7 +43,7 @@ class NoneLogger (_Logger):
"""Do nothing."""
pass
def end_output (self):
def end_output (self, downloaded_bytes=None):
"""
Do nothing.
"""

View file

@ -113,7 +113,7 @@ class SitemapXmlLogger (xmllog._XMLLogger):
self.xml_endtag(u'url')
self.flush()
def end_output (self):
def end_output (self, downloaded_bytes=None):
"""Write XML end tag."""
self.xml_endtag(u"urlset")
self.xml_end_output()

View file

@ -131,7 +131,7 @@ class SQLLogger (_Logger):
})
self.flush()
def end_output (self):
def end_output (self, downloaded_bytes=None):
"""
Write end of checking info as sql comment.
"""

View file

@ -260,6 +260,8 @@ class TextLogger (_Logger):
"""Write check statistic info."""
self.writeln()
self.writeln(_("Statistics:"))
if self.stats.downloaded_bytes is not None:
self.writeln(_("Downloaded: %s.") % strformat.strsize(self.stats.downloaded_bytes))
if self.stats.number > 0:
self.writeln(_(
"Content types: %(image)d image, %(text)d text, %(video)d video, "
@ -272,9 +274,10 @@ class TextLogger (_Logger):
else:
self.writeln(_("No statistics available since no URLs were checked."))
def end_output (self):
def end_output (self, downloaded_bytes=None):
"""Write end of output info, and flush all output buffers."""
if self.has_part('stats'):
self.stats.downloaded_bytes = downloaded_bytes
self.write_stats()
if self.has_part('outro'):
self.write_outro()