mirror of
https://github.com/Hopiu/linkchecker.git
synced 2026-04-08 08:30:59 +00:00
Add configuration entry for maximum number of URLs.
This commit is contained in:
parent
a013a67358
commit
8750d55a73
8 changed files with 467 additions and 407 deletions
|
|
@ -182,6 +182,9 @@
|
|||
# Stop checking new URLs after the given number of seconds. Same as if the
|
||||
# user hits Ctrl-C after X seconds.
|
||||
#maxrunseconds=600
|
||||
# Maximum number of URLs to check. New URLs will not be queued after the
|
||||
# given number of URLs is checked.
|
||||
#maxnumurls=153
|
||||
|
||||
##################### filtering options ##########################
|
||||
[filtering]
|
||||
|
|
|
|||
|
|
@ -152,6 +152,14 @@ GUI).
|
|||
Standard ist nicht zu stoppen bis alle URLs geprüft sind.
|
||||
.br
|
||||
Kommandozeilenoption: keine
|
||||
.TP
|
||||
\fBmaxnumurls=\fP\fINUMBER\fP
|
||||
Maximale Anzahl von URLs die geprüft werden. Neue URLs werden nicht
|
||||
angenommen nachdem die angegebene Anzahl von URLs geprüft wurde.
|
||||
.br
|
||||
Standard ist alle URLs anzunehmen und zu prüfen.
|
||||
.br
|
||||
Kommandozeilenoption: keine
|
||||
.SS [filtering]
|
||||
.TP
|
||||
\fBignore=\fP\fIREGEX\fP (MULTILINE)
|
||||
|
|
|
|||
|
|
@ -144,6 +144,14 @@ after the given number of seconds.
|
|||
The default is not to stop until all URLs are checked.
|
||||
.br
|
||||
Command line option: none
|
||||
.TP
|
||||
\fBmaxnumurls=\fP\fINUMBER\fP
|
||||
Maximum number of URLs to check. New URLs will not be queued after the
|
||||
given number of URLs is checked.
|
||||
.br
|
||||
The default is to queue and check all URLs.
|
||||
.br
|
||||
Command line option: none
|
||||
.SS \fB[filtering]\fP
|
||||
.TP
|
||||
\fBignore=\fP\fIREGEX\fP (MULTILINE)
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
8
linkcheck/cache/urlqueue.py
vendored
8
linkcheck/cache/urlqueue.py
vendored
|
|
@ -40,7 +40,7 @@ class UrlQueue (object):
|
|||
"""A queue supporting several consumer tasks. The task_done() idea is
|
||||
from the Python 2.5 implementation of Queue.Queue()."""
|
||||
|
||||
def __init__ (self, allowed_puts=None):
|
||||
def __init__ (self, max_allowed_puts=None):
|
||||
"""Initialize the queue state and task counters."""
|
||||
# Note: don't put a maximum size on the queue since it would
|
||||
# lead to deadlocks when all worker threads called put().
|
||||
|
|
@ -61,9 +61,9 @@ class UrlQueue (object):
|
|||
self.shutdown = False
|
||||
# Each put() decreases the number of allowed puts.
|
||||
# This way we can restrict the number of URLs that are checked.
|
||||
if allowed_puts is not None and allowed_puts <= 0:
|
||||
raise ValueError("Non-positive number of allowed puts: %d" % allowed_puts)
|
||||
self.allowed_puts = allowed_puts
|
||||
if max_allowed_puts is not None and max_allowed_puts <= 0:
|
||||
raise ValueError("Non-positive number of allowed puts: %d" % max_allowed_puts)
|
||||
self.allowed_puts = max_allowed_puts
|
||||
|
||||
def qsize (self):
|
||||
"""Return the approximate size of the queue (not reliable!)."""
|
||||
|
|
|
|||
|
|
@ -228,6 +228,7 @@ class Configuration (dict):
|
|||
self["localwebroot"] = None
|
||||
self["warnsslcertdaysvalid"] = 14
|
||||
self["maxrunseconds"] = None
|
||||
self["maxnumurls"] = None
|
||||
from ..logger import Loggers
|
||||
self.loggers = dict(**Loggers)
|
||||
|
||||
|
|
|
|||
|
|
@ -213,7 +213,7 @@ def abort_now ():
|
|||
|
||||
def get_aggregate (config):
|
||||
"""Get an aggregator instance with given configuration."""
|
||||
_urlqueue = urlqueue.UrlQueue()
|
||||
_urlqueue = urlqueue.UrlQueue(max_allowed_puts=config["maxnumurls"])
|
||||
connections = connection.ConnectionPool(wait=config["wait"])
|
||||
cookies = cookie.CookieJar()
|
||||
_robots_txt = robots_txt.RobotsTxt()
|
||||
|
|
|
|||
Loading…
Reference in a new issue