Log number of checked unique URLs.

This commit is contained in:
Bastian Kleineidam 2014-03-14 23:46:17 +01:00
parent 91c6e1d29f
commit fc73c6ca6e
20 changed files with 43 additions and 33 deletions

View file

@ -113,15 +113,9 @@ class UrlQueue (object):
self._put(item)
self.not_empty.notify()
def put_denied(self, url_data):
"""Determine if put() will not append the item on the queue.
@return True (reliable) or False (unreliable)
"""
return self.shutdown or self.max_allowed_urls == 0
def _put (self, url_data):
"""Put URL in queue, increase number of unfished tasks."""
if self.put_denied(url_data):
if self.shutdown or self.max_allowed_urls == 0:
return
log.debug(LOG_CACHE, "queueing %s", url_data.url)
key = url_data.cache_url

View file

@ -124,7 +124,7 @@ def check_urls (aggregate):
aggregate.start_threads()
check_url(aggregate)
aggregate.finish()
aggregate.logger.end_log_output(aggregate.downloaded_bytes)
aggregate.end_log_output()
except LinkCheckerInterrupt:
raise
except KeyboardInterrupt:
@ -178,11 +178,11 @@ def abort (aggregate):
try:
aggregate.abort()
aggregate.finish()
aggregate.logger.end_log_output(aggregate.downloaded_bytes)
aggregate.end_log_output()
break
except KeyboardInterrupt:
log.warn(LOG_CHECK, _("user abort; force shutdown"))
aggregate.logger.end_log_output(aggregate.downloaded_bytes)
aggregate.end_log_output()
abort_now()

View file

@ -172,3 +172,11 @@ class Aggregate (object):
def add_downloaded_bytes(self, numbytes):
"""Add to number of downloaded bytes."""
self.downloaded_bytes += numbytes
def end_log_output(self):
"""Print ending output to log."""
kwargs = dict(
downloaded_bytes=self.downloaded_bytes,
num_urls = len(self.result_cache),
)
self.logger.end_log_output(**kwargs)

View file

@ -42,15 +42,17 @@ class StatusLogger (object):
"""Save file descriptor for logging."""
self.fd = fd
def log_status (self, checked, in_progress, queue, duration):
def log_status (self, checked, in_progress, queue, duration, num_urls):
"""Write status message to file descriptor."""
msg = _n("%2d thread active", "%2d threads active", in_progress) % \
in_progress
self.write(u"%s, " % msg)
msg = _n("%5d link queued", "%5d links queued", queue) % queue
self.write(u"%s, " % msg)
msg = _n("%4d link checked", "%4d links checked", checked) % checked
self.write(u"%s, " % msg)
msg = _n("%4d link", "%4d links", checked) % checked
self.write(u"%s" % msg)
msg = _n("%3d URL", "%3d URLs", num_urls) % num_urls
self.write(u" in %s checked, " % msg)
msg = _("runtime %s") % strformat.strduration_long(duration)
self.writeln(msg)
self.flush()

View file

@ -38,12 +38,12 @@ class Logger (object):
for logger in self.loggers:
logger.start_output()
def end_log_output (self, downloaded_bytes):
def end_log_output (self, **kwargs):
"""
End output of all configured loggers.
"""
for logger in self.loggers:
logger.end_output(downloaded_bytes)
logger.end_output(**kwargs)
def do_print (self, url_data):
"""Determine if URL entry should be logged or not."""

View file

@ -54,4 +54,5 @@ class Status (task.LoggedCheckedTask):
"""Log a status message."""
duration = time.time() - self.start_time
checked, in_progress, queue = self.aggregator.urlqueue.status()
self.logger.log_status(checked, in_progress, queue, duration)
num_urls = len(self.aggregator.result_cache)
self.logger.log_status(checked, in_progress, queue, duration, num_urls)

View file

@ -569,7 +569,7 @@ Version 2 or later.
"""Show given link in status bar."""
self.statusBar.showMessage(link)
def log_status (self, checked, in_progress, queued, duration):
def log_status (self, checked, in_progress, queued, duration, num_urls):
"""Update number of checked, active and queued links."""
self.label_checked.setText(u"%d" % checked)
self.label_active.setText(u"%d" % in_progress)

View file

@ -68,6 +68,6 @@ class StatusLogger (object):
"""Store given signal object."""
self.signal = signal
def log_status (self, checked, in_progress, queued, duration):
def log_status (self, checked, in_progress, queued, duration, num_urls):
"""Emit signal with given status information."""
self.signal.emit(checked, in_progress, queued, duration)
self.signal.emit(checked, in_progress, queued, duration, num_urls)

View file

@ -401,7 +401,7 @@ class _Logger (object):
pass
@abc.abstractmethod
def end_output (self, downloaded_bytes=None):
def end_output (self, downloaded_bytes=None, num_urls=None):
"""
End of output, used for cleanup (eg output buffer flushing).
"""

View file

@ -66,7 +66,7 @@ class BlacklistLogger (_Logger):
if not url_data.valid:
self.blacklist[key] = 1
def end_output (self, downloaded_bytes=None):
def end_output (self, downloaded_bytes=None, num_urls=None):
"""
Write blacklist file.
"""

View file

@ -130,7 +130,7 @@ class CSVLogger (_Logger):
# empty queue
self.queue.truncate(0)
def end_output (self, downloaded_bytes=None):
def end_output (self, downloaded_bytes=None, num_urls=None):
"""Write end of checking info as csv comment."""
if self.has_part("outro"):
self.write_outro()

View file

@ -95,7 +95,7 @@ class CustomXMLLogger (xmllog._XMLLogger):
self.xml_endtag(u'urldata')
self.flush()
def end_output (self, downloaded_bytes=None):
def end_output (self, downloaded_bytes=None, num_urls=None):
"""
Write XML end tag.
"""

View file

@ -83,7 +83,7 @@ class _GraphLogger (_Logger):
"""Write end-of-graph marker."""
pass
def end_output (self, downloaded_bytes=None):
def end_output (self, downloaded_bytes=None, num_urls=None):
"""Write edges and end of checking info as gml comment."""
self.write_edges()
self.end_graph()

View file

@ -81,7 +81,7 @@ class GraphXMLLogger (_XMLLogger, _GraphLogger):
self.xml_endtag(u"data")
self.xml_endtag(u"edge")
def end_output (self, downloaded_bytes=None):
def end_output (self, downloaded_bytes=None, num_urls=None):
"""Finish graph output, and print end of checking info as xml
comment."""
self.xml_endtag(u"graph")

View file

@ -324,7 +324,7 @@ class HtmlLogger (_Logger):
configuration.DonateUrl+u"</a>."))
self.writeln(u"</small></body></html>")
def end_output (self, downloaded_bytes=None):
def end_output (self, downloaded_bytes=None, num_urls=None):
"""Write end of checking info as HTML."""
if self.has_part("stats"):
self.write_stats()

View file

@ -43,7 +43,7 @@ class NoneLogger (_Logger):
"""Do nothing."""
pass
def end_output (self, downloaded_bytes=None):
def end_output (self, downloaded_bytes=None, num_urls=None):
"""
Do nothing.
"""

View file

@ -113,7 +113,7 @@ class SitemapXmlLogger (xmllog._XMLLogger):
self.xml_endtag(u'url')
self.flush()
def end_output (self, downloaded_bytes=None):
def end_output (self, downloaded_bytes=None, num_urls=None):
"""Write XML end tag."""
self.xml_endtag(u"urlset")
self.xml_end_output()

View file

@ -131,7 +131,7 @@ class SQLLogger (_Logger):
})
self.flush()
def end_output (self, downloaded_bytes=None):
def end_output (self, downloaded_bytes=None, num_urls=None):
"""
Write end of checking info as sql comment.
"""

View file

@ -221,9 +221,13 @@ class TextLogger (_Logger):
"""Write end of checking message."""
self.writeln()
self.write(_("That's it.") + " ")
self.write(_n("%d link checked.", "%d links checked.",
self.write(_n("%d link", "%d links",
self.stats.number) % self.stats.number)
self.write(u" ")
if self.stats.num_urls is not None:
self.write(_n("in %d URL", "in %d URLs",
self.stats.num_urls) % self.stats.num_urls)
self.write(u" checked. ")
warning_text = _n("%d warning found", "%d warnings found",
self.stats.warnings_printed) % self.stats.warnings_printed
if self.stats.warnings_printed:
@ -274,10 +278,11 @@ class TextLogger (_Logger):
else:
self.writeln(_("No statistics available since no URLs were checked."))
def end_output (self, downloaded_bytes=None):
def end_output (self, downloaded_bytes=None, num_urls=None):
"""Write end of output info, and flush all output buffers."""
self.stats.downloaded_bytes = downloaded_bytes
self.stats.num_urls = num_urls
if self.has_part('stats'):
self.stats.downloaded_bytes = downloaded_bytes
self.write_stats()
if self.has_part('outro'):
self.write_outro()

View file

@ -89,7 +89,7 @@ class TestLogger (linkcheck.logger._Logger):
# note: do not append url_data.result since this is
# platform dependent
def end_output (self, linknumber=-1):
def end_output (self, linknumber=-1, **kwargs):
"""
Stores differences between expected and result in self.diff.
"""