Display downloaded bytes.

This commit is contained in:
Bastian Kleineidam 2014-03-14 21:06:10 +01:00
parent c51caf1133
commit 6437f08277
20 changed files with 52 additions and 28 deletions

View file

@ -14,6 +14,7 @@ Changes:
- checking: Disable URL length warning for data: URLs. - checking: Disable URL length warning for data: URLs.
- installation: Check requirement for Python requests >= 2.2.0. - installation: Check requirement for Python requests >= 2.2.0.
Closes: GH bug #478 Closes: GH bug #478
- logging: Display downloaded bytes.
Fixes: Fixes:
- checking: Fix internal errors in debug output. - checking: Fix internal errors in debug output.

View file

@ -631,6 +631,8 @@ class UrlBase (object):
if self.size == 0: if self.size == 0:
self.add_warning(_("Content size is zero."), self.add_warning(_("Content size is zero."),
tag=WARN_URL_CONTENT_SIZE_ZERO) tag=WARN_URL_CONTENT_SIZE_ZERO)
else:
self.aggregate.add_downloaded_bytes(self.size)
return self.data return self.data
def read_content(self): def read_content(self):

View file

@ -124,7 +124,7 @@ def check_urls (aggregate):
aggregate.start_threads() aggregate.start_threads()
check_url(aggregate) check_url(aggregate)
aggregate.finish() aggregate.finish()
aggregate.logger.end_log_output() aggregate.logger.end_log_output(aggregate.downloaded_bytes)
except LinkCheckerInterrupt: except LinkCheckerInterrupt:
raise raise
except KeyboardInterrupt: except KeyboardInterrupt:
@ -178,11 +178,11 @@ def abort (aggregate):
try: try:
aggregate.abort() aggregate.abort()
aggregate.finish() aggregate.finish()
aggregate.logger.end_log_output() aggregate.logger.end_log_output(aggregate.downloaded_bytes)
break break
except KeyboardInterrupt: except KeyboardInterrupt:
log.warn(LOG_CHECK, _("user abort; force shutdown")) log.warn(LOG_CHECK, _("user abort; force shutdown"))
aggregate.logger.end_log_output() aggregate.logger.end_log_output(aggregate.downloaded_bytes)
abort_now() abort_now()

View file

@ -30,6 +30,7 @@ from . import logger, status, checker, interrupt
_threads_lock = threading.RLock() _threads_lock = threading.RLock()
_hosts_lock = threading.RLock() _hosts_lock = threading.RLock()
_downloadedbytes_lock = threading.RLock()
def new_request_session(config): def new_request_session(config):
"""Create a new request session.""" """Create a new request session."""
@ -60,13 +61,13 @@ class Aggregate (object):
requests_per_second = config["maxrequestspersecond"] requests_per_second = config["maxrequestspersecond"]
self.wait_time_min = 1.0 / requests_per_second self.wait_time_min = 1.0 / requests_per_second
self.wait_time_max = max(self.wait_time_min + 0.5, 0.5) self.wait_time_max = max(self.wait_time_min + 0.5, 0.5)
self.downloaded_bytes = 0
@synchronized(_threads_lock) @synchronized(_threads_lock)
def start_threads (self): def start_threads (self):
"""Spawn threads for URL checking and status printing.""" """Spawn threads for URL checking and status printing."""
if self.config["status"]: if self.config["status"]:
t = status.Status(self.urlqueue, self.config.status_logger, t = status.Status(self, self.config["status_wait_seconds"])
self.config["status_wait_seconds"])
t.start() t.start()
self.threads.append(t) self.threads.append(t)
if self.config["maxrunseconds"]: if self.config["maxrunseconds"]:
@ -166,3 +167,7 @@ class Aggregate (object):
"""Determine if checking is finished.""" """Determine if checking is finished."""
self.remove_stopped_threads() self.remove_stopped_threads()
return self.urlqueue.empty() and not self.threads return self.urlqueue.empty() and not self.threads
@synchronized(_downloadedbytes_lock)
def add_downloaded_bytes(self, numbytes):
self.downloaded_bytes += numbytes

View file

@ -42,7 +42,8 @@ class StatusLogger (object):
"""Save file descriptor for logging.""" """Save file descriptor for logging."""
self.fd = fd self.fd = fd
def log_status (self, checked, in_progress, queue, duration): def log_status (self, checked, in_progress, queue, duration,
downloaded_bytes):
"""Write status message to file descriptor.""" """Write status message to file descriptor."""
msg = _n("%2d thread active", "%2d threads active", in_progress) % \ msg = _n("%2d thread active", "%2d threads active", in_progress) % \
in_progress in_progress
@ -52,6 +53,8 @@ class StatusLogger (object):
msg = _n("%4d link checked", "%4d links checked", checked) % checked msg = _n("%4d link checked", "%4d links checked", checked) % checked
self.write(u"%s, " % msg) self.write(u"%s, " % msg)
msg = _("runtime %s") % strformat.strduration_long(duration) msg = _("runtime %s") % strformat.strduration_long(duration)
self.write(u"%s, " % msg)
msg = _("downloaded %s") % strformat.strsize(downloaded_bytes)
self.writeln(msg) self.writeln(msg)
self.flush() self.flush()

View file

@ -38,12 +38,12 @@ class Logger (object):
for logger in self.loggers: for logger in self.loggers:
logger.start_output() logger.start_output()
def end_log_output (self): def end_log_output (self, downloaded_bytes):
""" """
End output of all configured loggers. End output of all configured loggers.
""" """
for logger in self.loggers: for logger in self.loggers:
logger.end_output() logger.end_output(downloaded_bytes)
def do_print (self, url_data): def do_print (self, url_data):
"""Determine if URL entry should be logged or not.""" """Determine if URL entry should be logged or not."""

View file

@ -22,7 +22,7 @@ from . import task
class Status (task.LoggedCheckedTask): class Status (task.LoggedCheckedTask):
"""Thread that gathers and logs the status periodically.""" """Thread that gathers and logs the status periodically."""
def __init__ (self, urlqueue, logger, wait_seconds): def __init__ (self, aggregator, wait_seconds):
"""Initialize the status logger task. """Initialize the status logger task.
@param urlqueue: the URL queue @param urlqueue: the URL queue
@ptype urlqueue: Urlqueue @ptype urlqueue: Urlqueue
@ -31,8 +31,9 @@ class Status (task.LoggedCheckedTask):
@param wait_seconds: interval in seconds to report status @param wait_seconds: interval in seconds to report status
@ptype wait_seconds: int @ptype wait_seconds: int
""" """
logger = aggregator.config.status_logger
super(Status, self).__init__(logger) super(Status, self).__init__(logger)
self.urlqueue = urlqueue self.aggregator = aggregator
self.wait_seconds = wait_seconds self.wait_seconds = wait_seconds
assert self.wait_seconds >= 1 assert self.wait_seconds >= 1
@ -52,5 +53,7 @@ class Status (task.LoggedCheckedTask):
def log_status (self): def log_status (self):
"""Log a status message.""" """Log a status message."""
duration = time.time() - self.start_time duration = time.time() - self.start_time
checked, in_progress, queue = self.urlqueue.status() checked, in_progress, queue = self.aggregator.urlqueue.status()
self.logger.log_status(checked, in_progress, queue, duration) downloaded_bytes = self.aggregator.downloaded_bytes
self.logger.log_status(checked, in_progress, queue, duration,
downloaded_bytes)

View file

@ -84,7 +84,7 @@ class LinkCheckerMain (QtGui.QMainWindow, Ui_MainWindow):
"""The main window displaying checked URLs.""" """The main window displaying checked URLs."""
log_url_signal = QtCore.pyqtSignal(object) log_url_signal = QtCore.pyqtSignal(object)
log_status_signal = QtCore.pyqtSignal(int, int, int, float) log_status_signal = QtCore.pyqtSignal(int, int, int, float, int)
log_stats_signal = QtCore.pyqtSignal(object) log_stats_signal = QtCore.pyqtSignal(object)
error_signal = QtCore.pyqtSignal(str) error_signal = QtCore.pyqtSignal(str)
@ -569,11 +569,13 @@ Version 2 or later.
"""Show given link in status bar.""" """Show given link in status bar."""
self.statusBar.showMessage(link) self.statusBar.showMessage(link)
def log_status (self, checked, in_progress, queued, duration): def log_status (self, checked, in_progress, queued, duration,
downloaded_bytes):
"""Update number of checked, active and queued links.""" """Update number of checked, active and queued links."""
self.label_checked.setText(u"%d" % checked) self.label_checked.setText(u"%d" % checked)
self.label_active.setText(u"%d" % in_progress) self.label_active.setText(u"%d" % in_progress)
self.label_queued.setText(u"%d" % queued) self.label_queued.setText(u"%d" % queued)
# XXX display downloaded bytes
def log_stats (self, statistics): def log_stats (self, statistics):
"""Set statistic information for selected URL.""" """Set statistic information for selected URL."""

View file

@ -55,8 +55,9 @@ class SignalLogger (_Logger):
"""Emit URL data which gets logged in the main window.""" """Emit URL data which gets logged in the main window."""
self.log_url_signal.emit(url_data) self.log_url_signal.emit(url_data)
def end_output (self): def end_output (self, downloaded_bytes=None):
"""Emit statistic data which gets logged in the main window.""" """Emit statistic data which gets logged in the main window."""
self.stats.downloaded_bytes = downloaded_bytes
self.log_stats_signal.emit(self.stats) self.log_stats_signal.emit(self.stats)
@ -67,6 +68,8 @@ class StatusLogger (object):
"""Store given signal object.""" """Store given signal object."""
self.signal = signal self.signal = signal
def log_status (self, checked, in_progress, queued, duration): def log_status (self, checked, in_progress, queued, duration,
downloaded_bytes):
"""Emit signal with given status information.""" """Emit signal with given status information."""
self.signal.emit(checked, in_progress, queued, duration) self.signal.emit(checked, in_progress, queued, duration,
downloaded_bytes)

View file

@ -89,6 +89,8 @@ class LogStatistics (object):
self.min_url_length = 0 self.min_url_length = 0
self.avg_url_length = 0.0 self.avg_url_length = 0.0
self.avg_number = 0 self.avg_number = 0
# overall downloaded bytes
self.downloaded_bytes = None
def log_url (self, url_data, do_print): def log_url (self, url_data, do_print):
"""Log URL statistics.""" """Log URL statistics."""
@ -399,7 +401,7 @@ class _Logger (object):
pass pass
@abc.abstractmethod @abc.abstractmethod
def end_output (self): def end_output (self, downloaded_bytes=None):
""" """
End of output, used for cleanup (eg output buffer flushing). End of output, used for cleanup (eg output buffer flushing).
""" """

View file

@ -66,7 +66,7 @@ class BlacklistLogger (_Logger):
if not url_data.valid: if not url_data.valid:
self.blacklist[key] = 1 self.blacklist[key] = 1
def end_output (self): def end_output (self, downloaded_bytes=None):
""" """
Write blacklist file. Write blacklist file.
""" """

View file

@ -130,7 +130,7 @@ class CSVLogger (_Logger):
# empty queue # empty queue
self.queue.truncate(0) self.queue.truncate(0)
def end_output (self): def end_output (self, downloaded_bytes=None):
"""Write end of checking info as csv comment.""" """Write end of checking info as csv comment."""
if self.has_part("outro"): if self.has_part("outro"):
self.write_outro() self.write_outro()

View file

@ -95,7 +95,7 @@ class CustomXMLLogger (xmllog._XMLLogger):
self.xml_endtag(u'urldata') self.xml_endtag(u'urldata')
self.flush() self.flush()
def end_output (self): def end_output (self, downloaded_bytes=None):
""" """
Write XML end tag. Write XML end tag.
""" """

View file

@ -83,7 +83,7 @@ class _GraphLogger (_Logger):
"""Write end-of-graph marker.""" """Write end-of-graph marker."""
pass pass
def end_output (self): def end_output (self, downloaded_bytes=None):
"""Write edges and end of checking info as gml comment.""" """Write edges and end of checking info as gml comment."""
self.write_edges() self.write_edges()
self.end_graph() self.end_graph()

View file

@ -81,7 +81,7 @@ class GraphXMLLogger (_XMLLogger, _GraphLogger):
self.xml_endtag(u"data") self.xml_endtag(u"data")
self.xml_endtag(u"edge") self.xml_endtag(u"edge")
def end_output (self): def end_output (self, downloaded_bytes=None):
"""Finish graph output, and print end of checking info as xml """Finish graph output, and print end of checking info as xml
comment.""" comment."""
self.xml_endtag(u"graph") self.xml_endtag(u"graph")

View file

@ -324,7 +324,7 @@ class HtmlLogger (_Logger):
configuration.DonateUrl+u"</a>.")) configuration.DonateUrl+u"</a>."))
self.writeln(u"</small></body></html>") self.writeln(u"</small></body></html>")
def end_output (self): def end_output (self, downloaded_bytes=None):
"""Write end of checking info as HTML.""" """Write end of checking info as HTML."""
if self.has_part("stats"): if self.has_part("stats"):
self.write_stats() self.write_stats()

View file

@ -43,7 +43,7 @@ class NoneLogger (_Logger):
"""Do nothing.""" """Do nothing."""
pass pass
def end_output (self): def end_output (self, downloaded_bytes=None):
""" """
Do nothing. Do nothing.
""" """

View file

@ -113,7 +113,7 @@ class SitemapXmlLogger (xmllog._XMLLogger):
self.xml_endtag(u'url') self.xml_endtag(u'url')
self.flush() self.flush()
def end_output (self): def end_output (self, downloaded_bytes=None):
"""Write XML end tag.""" """Write XML end tag."""
self.xml_endtag(u"urlset") self.xml_endtag(u"urlset")
self.xml_end_output() self.xml_end_output()

View file

@ -131,7 +131,7 @@ class SQLLogger (_Logger):
}) })
self.flush() self.flush()
def end_output (self): def end_output (self, downloaded_bytes=None):
""" """
Write end of checking info as sql comment. Write end of checking info as sql comment.
""" """

View file

@ -260,6 +260,8 @@ class TextLogger (_Logger):
"""Write check statistic info.""" """Write check statistic info."""
self.writeln() self.writeln()
self.writeln(_("Statistics:")) self.writeln(_("Statistics:"))
if self.stats.downloaded_bytes is not None:
self.writeln(_("Downloaded: %s.") % strformat.strsize(self.stats.downloaded_bytes))
if self.stats.number > 0: if self.stats.number > 0:
self.writeln(_( self.writeln(_(
"Content types: %(image)d image, %(text)d text, %(video)d video, " "Content types: %(image)d image, %(text)d text, %(video)d video, "
@ -272,9 +274,10 @@ class TextLogger (_Logger):
else: else:
self.writeln(_("No statistics available since no URLs were checked.")) self.writeln(_("No statistics available since no URLs were checked."))
def end_output (self): def end_output (self, downloaded_bytes=None):
"""Write end of output info, and flush all output buffers.""" """Write end of output info, and flush all output buffers."""
if self.has_part('stats'): if self.has_part('stats'):
self.stats.downloaded_bytes = downloaded_bytes
self.write_stats() self.write_stats()
if self.has_part('outro'): if self.has_part('outro'):
self.write_outro() self.write_outro()