diff --git a/doc/changelog.txt b/doc/changelog.txt index f98bc26a..81b9767d 100644 --- a/doc/changelog.txt +++ b/doc/changelog.txt @@ -14,6 +14,7 @@ Changes: - checking: Disable URL length warning for data: URLs. - installation: Check requirement for Python requests >= 2.2.0. Closes: GH bug #478 +- logging: Display downloaded bytes. Fixes: - checking: Fix internal errors in debug output. diff --git a/linkcheck/checker/urlbase.py b/linkcheck/checker/urlbase.py index 47f87321..cd2968f1 100644 --- a/linkcheck/checker/urlbase.py +++ b/linkcheck/checker/urlbase.py @@ -631,6 +631,8 @@ class UrlBase (object): if self.size == 0: self.add_warning(_("Content size is zero."), tag=WARN_URL_CONTENT_SIZE_ZERO) + else: + self.aggregate.add_downloaded_bytes(self.size) return self.data def read_content(self): diff --git a/linkcheck/director/__init__.py b/linkcheck/director/__init__.py index 6bbaab57..eba3bef4 100644 --- a/linkcheck/director/__init__.py +++ b/linkcheck/director/__init__.py @@ -124,7 +124,7 @@ def check_urls (aggregate): aggregate.start_threads() check_url(aggregate) aggregate.finish() - aggregate.logger.end_log_output() + aggregate.logger.end_log_output(aggregate.downloaded_bytes) except LinkCheckerInterrupt: raise except KeyboardInterrupt: @@ -178,11 +178,11 @@ def abort (aggregate): try: aggregate.abort() aggregate.finish() - aggregate.logger.end_log_output() + aggregate.logger.end_log_output(aggregate.downloaded_bytes) break except KeyboardInterrupt: log.warn(LOG_CHECK, _("user abort; force shutdown")) - aggregate.logger.end_log_output() + aggregate.logger.end_log_output(aggregate.downloaded_bytes) abort_now() diff --git a/linkcheck/director/aggregator.py b/linkcheck/director/aggregator.py index 5cf43414..7855916a 100644 --- a/linkcheck/director/aggregator.py +++ b/linkcheck/director/aggregator.py @@ -30,6 +30,7 @@ from . import logger, status, checker, interrupt _threads_lock = threading.RLock() _hosts_lock = threading.RLock() +_downloadedbytes_lock = threading.RLock() def new_request_session(config): """Create a new request session.""" @@ -60,13 +61,13 @@ class Aggregate (object): requests_per_second = config["maxrequestspersecond"] self.wait_time_min = 1.0 / requests_per_second self.wait_time_max = max(self.wait_time_min + 0.5, 0.5) + self.downloaded_bytes = 0 @synchronized(_threads_lock) def start_threads (self): """Spawn threads for URL checking and status printing.""" if self.config["status"]: - t = status.Status(self.urlqueue, self.config.status_logger, - self.config["status_wait_seconds"]) + t = status.Status(self, self.config["status_wait_seconds"]) t.start() self.threads.append(t) if self.config["maxrunseconds"]: @@ -166,3 +167,7 @@ class Aggregate (object): """Determine if checking is finished.""" self.remove_stopped_threads() return self.urlqueue.empty() and not self.threads + + @synchronized(_downloadedbytes_lock) + def add_downloaded_bytes(self, numbytes): + self.downloaded_bytes += numbytes diff --git a/linkcheck/director/console.py b/linkcheck/director/console.py index f4cd42e1..6b06f996 100644 --- a/linkcheck/director/console.py +++ b/linkcheck/director/console.py @@ -42,7 +42,8 @@ class StatusLogger (object): """Save file descriptor for logging.""" self.fd = fd - def log_status (self, checked, in_progress, queue, duration): + def log_status (self, checked, in_progress, queue, duration, + downloaded_bytes): """Write status message to file descriptor.""" msg = _n("%2d thread active", "%2d threads active", in_progress) % \ in_progress @@ -52,6 +53,8 @@ class StatusLogger (object): msg = _n("%4d link checked", "%4d links checked", checked) % checked self.write(u"%s, " % msg) msg = _("runtime %s") % strformat.strduration_long(duration) + self.write(u"%s, " % msg) + msg = _("downloaded %s") % strformat.strsize(downloaded_bytes) self.writeln(msg) self.flush() diff --git a/linkcheck/director/logger.py b/linkcheck/director/logger.py index 22fe8a70..eba46bc6 100644 --- a/linkcheck/director/logger.py +++ b/linkcheck/director/logger.py @@ -38,12 +38,12 @@ class Logger (object): for logger in self.loggers: logger.start_output() - def end_log_output (self): + def end_log_output (self, downloaded_bytes): """ End output of all configured loggers. """ for logger in self.loggers: - logger.end_output() + logger.end_output(downloaded_bytes) def do_print (self, url_data): """Determine if URL entry should be logged or not.""" diff --git a/linkcheck/director/status.py b/linkcheck/director/status.py index 1837387c..1efbf11d 100644 --- a/linkcheck/director/status.py +++ b/linkcheck/director/status.py @@ -22,7 +22,7 @@ from . import task class Status (task.LoggedCheckedTask): """Thread that gathers and logs the status periodically.""" - def __init__ (self, urlqueue, logger, wait_seconds): + def __init__ (self, aggregator, wait_seconds): """Initialize the status logger task. @param urlqueue: the URL queue @ptype urlqueue: Urlqueue @@ -31,8 +31,9 @@ class Status (task.LoggedCheckedTask): @param wait_seconds: interval in seconds to report status @ptype wait_seconds: int """ + logger = aggregator.config.status_logger super(Status, self).__init__(logger) - self.urlqueue = urlqueue + self.aggregator = aggregator self.wait_seconds = wait_seconds assert self.wait_seconds >= 1 @@ -52,5 +53,7 @@ class Status (task.LoggedCheckedTask): def log_status (self): """Log a status message.""" duration = time.time() - self.start_time - checked, in_progress, queue = self.urlqueue.status() - self.logger.log_status(checked, in_progress, queue, duration) + checked, in_progress, queue = self.aggregator.urlqueue.status() + downloaded_bytes = self.aggregator.downloaded_bytes + self.logger.log_status(checked, in_progress, queue, duration, + downloaded_bytes) diff --git a/linkcheck/gui/__init__.py b/linkcheck/gui/__init__.py index b1a2f920..274078ed 100644 --- a/linkcheck/gui/__init__.py +++ b/linkcheck/gui/__init__.py @@ -84,7 +84,7 @@ class LinkCheckerMain (QtGui.QMainWindow, Ui_MainWindow): """The main window displaying checked URLs.""" log_url_signal = QtCore.pyqtSignal(object) - log_status_signal = QtCore.pyqtSignal(int, int, int, float) + log_status_signal = QtCore.pyqtSignal(int, int, int, float, int) log_stats_signal = QtCore.pyqtSignal(object) error_signal = QtCore.pyqtSignal(str) @@ -569,11 +569,13 @@ Version 2 or later. """Show given link in status bar.""" self.statusBar.showMessage(link) - def log_status (self, checked, in_progress, queued, duration): + def log_status (self, checked, in_progress, queued, duration, + downloaded_bytes): """Update number of checked, active and queued links.""" self.label_checked.setText(u"%d" % checked) self.label_active.setText(u"%d" % in_progress) self.label_queued.setText(u"%d" % queued) + # XXX display downloaded bytes def log_stats (self, statistics): """Set statistic information for selected URL.""" diff --git a/linkcheck/gui/logger.py b/linkcheck/gui/logger.py index 6bf18267..83b5a283 100644 --- a/linkcheck/gui/logger.py +++ b/linkcheck/gui/logger.py @@ -55,8 +55,9 @@ class SignalLogger (_Logger): """Emit URL data which gets logged in the main window.""" self.log_url_signal.emit(url_data) - def end_output (self): + def end_output (self, downloaded_bytes=None): """Emit statistic data which gets logged in the main window.""" + self.stats.downloaded_bytes = downloaded_bytes self.log_stats_signal.emit(self.stats) @@ -67,6 +68,8 @@ class StatusLogger (object): """Store given signal object.""" self.signal = signal - def log_status (self, checked, in_progress, queued, duration): + def log_status (self, checked, in_progress, queued, duration, + downloaded_bytes): """Emit signal with given status information.""" - self.signal.emit(checked, in_progress, queued, duration) + self.signal.emit(checked, in_progress, queued, duration, + downloaded_bytes) diff --git a/linkcheck/logger/__init__.py b/linkcheck/logger/__init__.py index 15e1e34e..f086201b 100644 --- a/linkcheck/logger/__init__.py +++ b/linkcheck/logger/__init__.py @@ -89,6 +89,8 @@ class LogStatistics (object): self.min_url_length = 0 self.avg_url_length = 0.0 self.avg_number = 0 + # overall downloaded bytes + self.downloaded_bytes = None def log_url (self, url_data, do_print): """Log URL statistics.""" @@ -399,7 +401,7 @@ class _Logger (object): pass @abc.abstractmethod - def end_output (self): + def end_output (self, downloaded_bytes=None): """ End of output, used for cleanup (eg output buffer flushing). """ diff --git a/linkcheck/logger/blacklist.py b/linkcheck/logger/blacklist.py index 055077ba..b1ec53d5 100644 --- a/linkcheck/logger/blacklist.py +++ b/linkcheck/logger/blacklist.py @@ -66,7 +66,7 @@ class BlacklistLogger (_Logger): if not url_data.valid: self.blacklist[key] = 1 - def end_output (self): + def end_output (self, downloaded_bytes=None): """ Write blacklist file. """ diff --git a/linkcheck/logger/csvlog.py b/linkcheck/logger/csvlog.py index 051d94bc..899c4576 100644 --- a/linkcheck/logger/csvlog.py +++ b/linkcheck/logger/csvlog.py @@ -130,7 +130,7 @@ class CSVLogger (_Logger): # empty queue self.queue.truncate(0) - def end_output (self): + def end_output (self, downloaded_bytes=None): """Write end of checking info as csv comment.""" if self.has_part("outro"): self.write_outro() diff --git a/linkcheck/logger/customxml.py b/linkcheck/logger/customxml.py index 10602723..4319f6b2 100644 --- a/linkcheck/logger/customxml.py +++ b/linkcheck/logger/customxml.py @@ -95,7 +95,7 @@ class CustomXMLLogger (xmllog._XMLLogger): self.xml_endtag(u'urldata') self.flush() - def end_output (self): + def end_output (self, downloaded_bytes=None): """ Write XML end tag. """ diff --git a/linkcheck/logger/graph.py b/linkcheck/logger/graph.py index f5aed857..fe948aa7 100644 --- a/linkcheck/logger/graph.py +++ b/linkcheck/logger/graph.py @@ -83,7 +83,7 @@ class _GraphLogger (_Logger): """Write end-of-graph marker.""" pass - def end_output (self): + def end_output (self, downloaded_bytes=None): """Write edges and end of checking info as gml comment.""" self.write_edges() self.end_graph() diff --git a/linkcheck/logger/gxml.py b/linkcheck/logger/gxml.py index 04e7cb72..b0976654 100644 --- a/linkcheck/logger/gxml.py +++ b/linkcheck/logger/gxml.py @@ -81,7 +81,7 @@ class GraphXMLLogger (_XMLLogger, _GraphLogger): self.xml_endtag(u"data") self.xml_endtag(u"edge") - def end_output (self): + def end_output (self, downloaded_bytes=None): """Finish graph output, and print end of checking info as xml comment.""" self.xml_endtag(u"graph") diff --git a/linkcheck/logger/html.py b/linkcheck/logger/html.py index 93300218..e5e32e1d 100644 --- a/linkcheck/logger/html.py +++ b/linkcheck/logger/html.py @@ -324,7 +324,7 @@ class HtmlLogger (_Logger): configuration.DonateUrl+u".")) self.writeln(u"") - def end_output (self): + def end_output (self, downloaded_bytes=None): """Write end of checking info as HTML.""" if self.has_part("stats"): self.write_stats() diff --git a/linkcheck/logger/none.py b/linkcheck/logger/none.py index 7f3308fe..720d613d 100644 --- a/linkcheck/logger/none.py +++ b/linkcheck/logger/none.py @@ -43,7 +43,7 @@ class NoneLogger (_Logger): """Do nothing.""" pass - def end_output (self): + def end_output (self, downloaded_bytes=None): """ Do nothing. """ diff --git a/linkcheck/logger/sitemapxml.py b/linkcheck/logger/sitemapxml.py index 5a21d36c..bd1e8776 100644 --- a/linkcheck/logger/sitemapxml.py +++ b/linkcheck/logger/sitemapxml.py @@ -113,7 +113,7 @@ class SitemapXmlLogger (xmllog._XMLLogger): self.xml_endtag(u'url') self.flush() - def end_output (self): + def end_output (self, downloaded_bytes=None): """Write XML end tag.""" self.xml_endtag(u"urlset") self.xml_end_output() diff --git a/linkcheck/logger/sql.py b/linkcheck/logger/sql.py index cca8fc31..42463daf 100644 --- a/linkcheck/logger/sql.py +++ b/linkcheck/logger/sql.py @@ -131,7 +131,7 @@ class SQLLogger (_Logger): }) self.flush() - def end_output (self): + def end_output (self, downloaded_bytes=None): """ Write end of checking info as sql comment. """ diff --git a/linkcheck/logger/text.py b/linkcheck/logger/text.py index c9a5f4a6..251113a9 100644 --- a/linkcheck/logger/text.py +++ b/linkcheck/logger/text.py @@ -260,6 +260,8 @@ class TextLogger (_Logger): """Write check statistic info.""" self.writeln() self.writeln(_("Statistics:")) + if self.stats.downloaded_bytes is not None: + self.writeln(_("Downloaded: %s.") % strformat.strsize(self.stats.downloaded_bytes)) if self.stats.number > 0: self.writeln(_( "Content types: %(image)d image, %(text)d text, %(video)d video, " @@ -272,9 +274,10 @@ class TextLogger (_Logger): else: self.writeln(_("No statistics available since no URLs were checked.")) - def end_output (self): + def end_output (self, downloaded_bytes=None): """Write end of output info, and flush all output buffers.""" if self.has_part('stats'): + self.stats.downloaded_bytes = downloaded_bytes self.write_stats() if self.has_part('outro'): self.write_outro()