mirror of
https://github.com/Hopiu/linkchecker.git
synced 2026-05-04 21:04:41 +00:00
Display downloaded bytes.
This commit is contained in:
parent
c51caf1133
commit
6437f08277
20 changed files with 52 additions and 28 deletions
|
|
@ -14,6 +14,7 @@ Changes:
|
||||||
- checking: Disable URL length warning for data: URLs.
|
- checking: Disable URL length warning for data: URLs.
|
||||||
- installation: Check requirement for Python requests >= 2.2.0.
|
- installation: Check requirement for Python requests >= 2.2.0.
|
||||||
Closes: GH bug #478
|
Closes: GH bug #478
|
||||||
|
- logging: Display downloaded bytes.
|
||||||
|
|
||||||
Fixes:
|
Fixes:
|
||||||
- checking: Fix internal errors in debug output.
|
- checking: Fix internal errors in debug output.
|
||||||
|
|
|
||||||
|
|
@ -631,6 +631,8 @@ class UrlBase (object):
|
||||||
if self.size == 0:
|
if self.size == 0:
|
||||||
self.add_warning(_("Content size is zero."),
|
self.add_warning(_("Content size is zero."),
|
||||||
tag=WARN_URL_CONTENT_SIZE_ZERO)
|
tag=WARN_URL_CONTENT_SIZE_ZERO)
|
||||||
|
else:
|
||||||
|
self.aggregate.add_downloaded_bytes(self.size)
|
||||||
return self.data
|
return self.data
|
||||||
|
|
||||||
def read_content(self):
|
def read_content(self):
|
||||||
|
|
|
||||||
|
|
@ -124,7 +124,7 @@ def check_urls (aggregate):
|
||||||
aggregate.start_threads()
|
aggregate.start_threads()
|
||||||
check_url(aggregate)
|
check_url(aggregate)
|
||||||
aggregate.finish()
|
aggregate.finish()
|
||||||
aggregate.logger.end_log_output()
|
aggregate.logger.end_log_output(aggregate.downloaded_bytes)
|
||||||
except LinkCheckerInterrupt:
|
except LinkCheckerInterrupt:
|
||||||
raise
|
raise
|
||||||
except KeyboardInterrupt:
|
except KeyboardInterrupt:
|
||||||
|
|
@ -178,11 +178,11 @@ def abort (aggregate):
|
||||||
try:
|
try:
|
||||||
aggregate.abort()
|
aggregate.abort()
|
||||||
aggregate.finish()
|
aggregate.finish()
|
||||||
aggregate.logger.end_log_output()
|
aggregate.logger.end_log_output(aggregate.downloaded_bytes)
|
||||||
break
|
break
|
||||||
except KeyboardInterrupt:
|
except KeyboardInterrupt:
|
||||||
log.warn(LOG_CHECK, _("user abort; force shutdown"))
|
log.warn(LOG_CHECK, _("user abort; force shutdown"))
|
||||||
aggregate.logger.end_log_output()
|
aggregate.logger.end_log_output(aggregate.downloaded_bytes)
|
||||||
abort_now()
|
abort_now()
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -30,6 +30,7 @@ from . import logger, status, checker, interrupt
|
||||||
|
|
||||||
_threads_lock = threading.RLock()
|
_threads_lock = threading.RLock()
|
||||||
_hosts_lock = threading.RLock()
|
_hosts_lock = threading.RLock()
|
||||||
|
_downloadedbytes_lock = threading.RLock()
|
||||||
|
|
||||||
def new_request_session(config):
|
def new_request_session(config):
|
||||||
"""Create a new request session."""
|
"""Create a new request session."""
|
||||||
|
|
@ -60,13 +61,13 @@ class Aggregate (object):
|
||||||
requests_per_second = config["maxrequestspersecond"]
|
requests_per_second = config["maxrequestspersecond"]
|
||||||
self.wait_time_min = 1.0 / requests_per_second
|
self.wait_time_min = 1.0 / requests_per_second
|
||||||
self.wait_time_max = max(self.wait_time_min + 0.5, 0.5)
|
self.wait_time_max = max(self.wait_time_min + 0.5, 0.5)
|
||||||
|
self.downloaded_bytes = 0
|
||||||
|
|
||||||
@synchronized(_threads_lock)
|
@synchronized(_threads_lock)
|
||||||
def start_threads (self):
|
def start_threads (self):
|
||||||
"""Spawn threads for URL checking and status printing."""
|
"""Spawn threads for URL checking and status printing."""
|
||||||
if self.config["status"]:
|
if self.config["status"]:
|
||||||
t = status.Status(self.urlqueue, self.config.status_logger,
|
t = status.Status(self, self.config["status_wait_seconds"])
|
||||||
self.config["status_wait_seconds"])
|
|
||||||
t.start()
|
t.start()
|
||||||
self.threads.append(t)
|
self.threads.append(t)
|
||||||
if self.config["maxrunseconds"]:
|
if self.config["maxrunseconds"]:
|
||||||
|
|
@ -166,3 +167,7 @@ class Aggregate (object):
|
||||||
"""Determine if checking is finished."""
|
"""Determine if checking is finished."""
|
||||||
self.remove_stopped_threads()
|
self.remove_stopped_threads()
|
||||||
return self.urlqueue.empty() and not self.threads
|
return self.urlqueue.empty() and not self.threads
|
||||||
|
|
||||||
|
@synchronized(_downloadedbytes_lock)
|
||||||
|
def add_downloaded_bytes(self, numbytes):
|
||||||
|
self.downloaded_bytes += numbytes
|
||||||
|
|
|
||||||
|
|
@ -42,7 +42,8 @@ class StatusLogger (object):
|
||||||
"""Save file descriptor for logging."""
|
"""Save file descriptor for logging."""
|
||||||
self.fd = fd
|
self.fd = fd
|
||||||
|
|
||||||
def log_status (self, checked, in_progress, queue, duration):
|
def log_status (self, checked, in_progress, queue, duration,
|
||||||
|
downloaded_bytes):
|
||||||
"""Write status message to file descriptor."""
|
"""Write status message to file descriptor."""
|
||||||
msg = _n("%2d thread active", "%2d threads active", in_progress) % \
|
msg = _n("%2d thread active", "%2d threads active", in_progress) % \
|
||||||
in_progress
|
in_progress
|
||||||
|
|
@ -52,6 +53,8 @@ class StatusLogger (object):
|
||||||
msg = _n("%4d link checked", "%4d links checked", checked) % checked
|
msg = _n("%4d link checked", "%4d links checked", checked) % checked
|
||||||
self.write(u"%s, " % msg)
|
self.write(u"%s, " % msg)
|
||||||
msg = _("runtime %s") % strformat.strduration_long(duration)
|
msg = _("runtime %s") % strformat.strduration_long(duration)
|
||||||
|
self.write(u"%s, " % msg)
|
||||||
|
msg = _("downloaded %s") % strformat.strsize(downloaded_bytes)
|
||||||
self.writeln(msg)
|
self.writeln(msg)
|
||||||
self.flush()
|
self.flush()
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -38,12 +38,12 @@ class Logger (object):
|
||||||
for logger in self.loggers:
|
for logger in self.loggers:
|
||||||
logger.start_output()
|
logger.start_output()
|
||||||
|
|
||||||
def end_log_output (self):
|
def end_log_output (self, downloaded_bytes):
|
||||||
"""
|
"""
|
||||||
End output of all configured loggers.
|
End output of all configured loggers.
|
||||||
"""
|
"""
|
||||||
for logger in self.loggers:
|
for logger in self.loggers:
|
||||||
logger.end_output()
|
logger.end_output(downloaded_bytes)
|
||||||
|
|
||||||
def do_print (self, url_data):
|
def do_print (self, url_data):
|
||||||
"""Determine if URL entry should be logged or not."""
|
"""Determine if URL entry should be logged or not."""
|
||||||
|
|
|
||||||
|
|
@ -22,7 +22,7 @@ from . import task
|
||||||
class Status (task.LoggedCheckedTask):
|
class Status (task.LoggedCheckedTask):
|
||||||
"""Thread that gathers and logs the status periodically."""
|
"""Thread that gathers and logs the status periodically."""
|
||||||
|
|
||||||
def __init__ (self, urlqueue, logger, wait_seconds):
|
def __init__ (self, aggregator, wait_seconds):
|
||||||
"""Initialize the status logger task.
|
"""Initialize the status logger task.
|
||||||
@param urlqueue: the URL queue
|
@param urlqueue: the URL queue
|
||||||
@ptype urlqueue: Urlqueue
|
@ptype urlqueue: Urlqueue
|
||||||
|
|
@ -31,8 +31,9 @@ class Status (task.LoggedCheckedTask):
|
||||||
@param wait_seconds: interval in seconds to report status
|
@param wait_seconds: interval in seconds to report status
|
||||||
@ptype wait_seconds: int
|
@ptype wait_seconds: int
|
||||||
"""
|
"""
|
||||||
|
logger = aggregator.config.status_logger
|
||||||
super(Status, self).__init__(logger)
|
super(Status, self).__init__(logger)
|
||||||
self.urlqueue = urlqueue
|
self.aggregator = aggregator
|
||||||
self.wait_seconds = wait_seconds
|
self.wait_seconds = wait_seconds
|
||||||
assert self.wait_seconds >= 1
|
assert self.wait_seconds >= 1
|
||||||
|
|
||||||
|
|
@ -52,5 +53,7 @@ class Status (task.LoggedCheckedTask):
|
||||||
def log_status (self):
|
def log_status (self):
|
||||||
"""Log a status message."""
|
"""Log a status message."""
|
||||||
duration = time.time() - self.start_time
|
duration = time.time() - self.start_time
|
||||||
checked, in_progress, queue = self.urlqueue.status()
|
checked, in_progress, queue = self.aggregator.urlqueue.status()
|
||||||
self.logger.log_status(checked, in_progress, queue, duration)
|
downloaded_bytes = self.aggregator.downloaded_bytes
|
||||||
|
self.logger.log_status(checked, in_progress, queue, duration,
|
||||||
|
downloaded_bytes)
|
||||||
|
|
|
||||||
|
|
@ -84,7 +84,7 @@ class LinkCheckerMain (QtGui.QMainWindow, Ui_MainWindow):
|
||||||
"""The main window displaying checked URLs."""
|
"""The main window displaying checked URLs."""
|
||||||
|
|
||||||
log_url_signal = QtCore.pyqtSignal(object)
|
log_url_signal = QtCore.pyqtSignal(object)
|
||||||
log_status_signal = QtCore.pyqtSignal(int, int, int, float)
|
log_status_signal = QtCore.pyqtSignal(int, int, int, float, int)
|
||||||
log_stats_signal = QtCore.pyqtSignal(object)
|
log_stats_signal = QtCore.pyqtSignal(object)
|
||||||
error_signal = QtCore.pyqtSignal(str)
|
error_signal = QtCore.pyqtSignal(str)
|
||||||
|
|
||||||
|
|
@ -569,11 +569,13 @@ Version 2 or later.
|
||||||
"""Show given link in status bar."""
|
"""Show given link in status bar."""
|
||||||
self.statusBar.showMessage(link)
|
self.statusBar.showMessage(link)
|
||||||
|
|
||||||
def log_status (self, checked, in_progress, queued, duration):
|
def log_status (self, checked, in_progress, queued, duration,
|
||||||
|
downloaded_bytes):
|
||||||
"""Update number of checked, active and queued links."""
|
"""Update number of checked, active and queued links."""
|
||||||
self.label_checked.setText(u"%d" % checked)
|
self.label_checked.setText(u"%d" % checked)
|
||||||
self.label_active.setText(u"%d" % in_progress)
|
self.label_active.setText(u"%d" % in_progress)
|
||||||
self.label_queued.setText(u"%d" % queued)
|
self.label_queued.setText(u"%d" % queued)
|
||||||
|
# XXX display downloaded bytes
|
||||||
|
|
||||||
def log_stats (self, statistics):
|
def log_stats (self, statistics):
|
||||||
"""Set statistic information for selected URL."""
|
"""Set statistic information for selected URL."""
|
||||||
|
|
|
||||||
|
|
@ -55,8 +55,9 @@ class SignalLogger (_Logger):
|
||||||
"""Emit URL data which gets logged in the main window."""
|
"""Emit URL data which gets logged in the main window."""
|
||||||
self.log_url_signal.emit(url_data)
|
self.log_url_signal.emit(url_data)
|
||||||
|
|
||||||
def end_output (self):
|
def end_output (self, downloaded_bytes=None):
|
||||||
"""Emit statistic data which gets logged in the main window."""
|
"""Emit statistic data which gets logged in the main window."""
|
||||||
|
self.stats.downloaded_bytes = downloaded_bytes
|
||||||
self.log_stats_signal.emit(self.stats)
|
self.log_stats_signal.emit(self.stats)
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -67,6 +68,8 @@ class StatusLogger (object):
|
||||||
"""Store given signal object."""
|
"""Store given signal object."""
|
||||||
self.signal = signal
|
self.signal = signal
|
||||||
|
|
||||||
def log_status (self, checked, in_progress, queued, duration):
|
def log_status (self, checked, in_progress, queued, duration,
|
||||||
|
downloaded_bytes):
|
||||||
"""Emit signal with given status information."""
|
"""Emit signal with given status information."""
|
||||||
self.signal.emit(checked, in_progress, queued, duration)
|
self.signal.emit(checked, in_progress, queued, duration,
|
||||||
|
downloaded_bytes)
|
||||||
|
|
|
||||||
|
|
@ -89,6 +89,8 @@ class LogStatistics (object):
|
||||||
self.min_url_length = 0
|
self.min_url_length = 0
|
||||||
self.avg_url_length = 0.0
|
self.avg_url_length = 0.0
|
||||||
self.avg_number = 0
|
self.avg_number = 0
|
||||||
|
# overall downloaded bytes
|
||||||
|
self.downloaded_bytes = None
|
||||||
|
|
||||||
def log_url (self, url_data, do_print):
|
def log_url (self, url_data, do_print):
|
||||||
"""Log URL statistics."""
|
"""Log URL statistics."""
|
||||||
|
|
@ -399,7 +401,7 @@ class _Logger (object):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
@abc.abstractmethod
|
@abc.abstractmethod
|
||||||
def end_output (self):
|
def end_output (self, downloaded_bytes=None):
|
||||||
"""
|
"""
|
||||||
End of output, used for cleanup (eg output buffer flushing).
|
End of output, used for cleanup (eg output buffer flushing).
|
||||||
"""
|
"""
|
||||||
|
|
|
||||||
|
|
@ -66,7 +66,7 @@ class BlacklistLogger (_Logger):
|
||||||
if not url_data.valid:
|
if not url_data.valid:
|
||||||
self.blacklist[key] = 1
|
self.blacklist[key] = 1
|
||||||
|
|
||||||
def end_output (self):
|
def end_output (self, downloaded_bytes=None):
|
||||||
"""
|
"""
|
||||||
Write blacklist file.
|
Write blacklist file.
|
||||||
"""
|
"""
|
||||||
|
|
|
||||||
|
|
@ -130,7 +130,7 @@ class CSVLogger (_Logger):
|
||||||
# empty queue
|
# empty queue
|
||||||
self.queue.truncate(0)
|
self.queue.truncate(0)
|
||||||
|
|
||||||
def end_output (self):
|
def end_output (self, downloaded_bytes=None):
|
||||||
"""Write end of checking info as csv comment."""
|
"""Write end of checking info as csv comment."""
|
||||||
if self.has_part("outro"):
|
if self.has_part("outro"):
|
||||||
self.write_outro()
|
self.write_outro()
|
||||||
|
|
|
||||||
|
|
@ -95,7 +95,7 @@ class CustomXMLLogger (xmllog._XMLLogger):
|
||||||
self.xml_endtag(u'urldata')
|
self.xml_endtag(u'urldata')
|
||||||
self.flush()
|
self.flush()
|
||||||
|
|
||||||
def end_output (self):
|
def end_output (self, downloaded_bytes=None):
|
||||||
"""
|
"""
|
||||||
Write XML end tag.
|
Write XML end tag.
|
||||||
"""
|
"""
|
||||||
|
|
|
||||||
|
|
@ -83,7 +83,7 @@ class _GraphLogger (_Logger):
|
||||||
"""Write end-of-graph marker."""
|
"""Write end-of-graph marker."""
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def end_output (self):
|
def end_output (self, downloaded_bytes=None):
|
||||||
"""Write edges and end of checking info as gml comment."""
|
"""Write edges and end of checking info as gml comment."""
|
||||||
self.write_edges()
|
self.write_edges()
|
||||||
self.end_graph()
|
self.end_graph()
|
||||||
|
|
|
||||||
|
|
@ -81,7 +81,7 @@ class GraphXMLLogger (_XMLLogger, _GraphLogger):
|
||||||
self.xml_endtag(u"data")
|
self.xml_endtag(u"data")
|
||||||
self.xml_endtag(u"edge")
|
self.xml_endtag(u"edge")
|
||||||
|
|
||||||
def end_output (self):
|
def end_output (self, downloaded_bytes=None):
|
||||||
"""Finish graph output, and print end of checking info as xml
|
"""Finish graph output, and print end of checking info as xml
|
||||||
comment."""
|
comment."""
|
||||||
self.xml_endtag(u"graph")
|
self.xml_endtag(u"graph")
|
||||||
|
|
|
||||||
|
|
@ -324,7 +324,7 @@ class HtmlLogger (_Logger):
|
||||||
configuration.DonateUrl+u"</a>."))
|
configuration.DonateUrl+u"</a>."))
|
||||||
self.writeln(u"</small></body></html>")
|
self.writeln(u"</small></body></html>")
|
||||||
|
|
||||||
def end_output (self):
|
def end_output (self, downloaded_bytes=None):
|
||||||
"""Write end of checking info as HTML."""
|
"""Write end of checking info as HTML."""
|
||||||
if self.has_part("stats"):
|
if self.has_part("stats"):
|
||||||
self.write_stats()
|
self.write_stats()
|
||||||
|
|
|
||||||
|
|
@ -43,7 +43,7 @@ class NoneLogger (_Logger):
|
||||||
"""Do nothing."""
|
"""Do nothing."""
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def end_output (self):
|
def end_output (self, downloaded_bytes=None):
|
||||||
"""
|
"""
|
||||||
Do nothing.
|
Do nothing.
|
||||||
"""
|
"""
|
||||||
|
|
|
||||||
|
|
@ -113,7 +113,7 @@ class SitemapXmlLogger (xmllog._XMLLogger):
|
||||||
self.xml_endtag(u'url')
|
self.xml_endtag(u'url')
|
||||||
self.flush()
|
self.flush()
|
||||||
|
|
||||||
def end_output (self):
|
def end_output (self, downloaded_bytes=None):
|
||||||
"""Write XML end tag."""
|
"""Write XML end tag."""
|
||||||
self.xml_endtag(u"urlset")
|
self.xml_endtag(u"urlset")
|
||||||
self.xml_end_output()
|
self.xml_end_output()
|
||||||
|
|
|
||||||
|
|
@ -131,7 +131,7 @@ class SQLLogger (_Logger):
|
||||||
})
|
})
|
||||||
self.flush()
|
self.flush()
|
||||||
|
|
||||||
def end_output (self):
|
def end_output (self, downloaded_bytes=None):
|
||||||
"""
|
"""
|
||||||
Write end of checking info as sql comment.
|
Write end of checking info as sql comment.
|
||||||
"""
|
"""
|
||||||
|
|
|
||||||
|
|
@ -260,6 +260,8 @@ class TextLogger (_Logger):
|
||||||
"""Write check statistic info."""
|
"""Write check statistic info."""
|
||||||
self.writeln()
|
self.writeln()
|
||||||
self.writeln(_("Statistics:"))
|
self.writeln(_("Statistics:"))
|
||||||
|
if self.stats.downloaded_bytes is not None:
|
||||||
|
self.writeln(_("Downloaded: %s.") % strformat.strsize(self.stats.downloaded_bytes))
|
||||||
if self.stats.number > 0:
|
if self.stats.number > 0:
|
||||||
self.writeln(_(
|
self.writeln(_(
|
||||||
"Content types: %(image)d image, %(text)d text, %(video)d video, "
|
"Content types: %(image)d image, %(text)d text, %(video)d video, "
|
||||||
|
|
@ -272,9 +274,10 @@ class TextLogger (_Logger):
|
||||||
else:
|
else:
|
||||||
self.writeln(_("No statistics available since no URLs were checked."))
|
self.writeln(_("No statistics available since no URLs were checked."))
|
||||||
|
|
||||||
def end_output (self):
|
def end_output (self, downloaded_bytes=None):
|
||||||
"""Write end of output info, and flush all output buffers."""
|
"""Write end of output info, and flush all output buffers."""
|
||||||
if self.has_part('stats'):
|
if self.has_part('stats'):
|
||||||
|
self.stats.downloaded_bytes = downloaded_bytes
|
||||||
self.write_stats()
|
self.write_stats()
|
||||||
if self.has_part('outro'):
|
if self.has_part('outro'):
|
||||||
self.write_outro()
|
self.write_outro()
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue