Add configuration entry for maximum number of URLs.

This commit is contained in:
Bastian Kleineidam 2012-10-14 11:13:55 +02:00
parent a013a67358
commit 8750d55a73
8 changed files with 467 additions and 407 deletions

View file

@ -182,6 +182,9 @@
# Stop checking new URLs after the given number of seconds. Same as if the
# user hits Ctrl-C after X seconds.
#maxrunseconds=600
# Maximum number of URLs to check. New URLs will not be queued after the
# given number of URLs is checked.
#maxnumurls=153
##################### filtering options ##########################
[filtering]

442
doc/de.po

File diff suppressed because it is too large Load diff

View file

@ -152,6 +152,14 @@ GUI).
Standard ist nicht zu stoppen bis alle URLs geprüft sind.
.br
Kommandozeilenoption: keine
.TP
\fBmaxnumurls=\fP\fINUMBER\fP
Maximale Anzahl von URLs die geprüft werden. Neue URLs werden nicht
angenommen nachdem die angegebene Anzahl von URLs geprüft wurde.
.br
Standard ist alle URLs anzunehmen und zu prüfen.
.br
Kommandozeilenoption: keine
.SS [filtering]
.TP
\fBignore=\fP\fIREGEX\fP (MULTILINE)

View file

@ -144,6 +144,14 @@ after the given number of seconds.
The default is not to stop until all URLs are checked.
.br
Command line option: none
.TP
\fBmaxnumurls=\fP\fINUMBER\fP
Maximum number of URLs to check. New URLs will not be queued after the
given number of URLs is checked.
.br
The default is to queue and check all URLs.
.br
Command line option: none
.SS \fB[filtering]\fP
.TP
\fBignore=\fP\fIREGEX\fP (MULTILINE)

File diff suppressed because it is too large Load diff

View file

@ -40,7 +40,7 @@ class UrlQueue (object):
"""A queue supporting several consumer tasks. The task_done() idea is
from the Python 2.5 implementation of Queue.Queue()."""
def __init__ (self, allowed_puts=None):
def __init__ (self, max_allowed_puts=None):
"""Initialize the queue state and task counters."""
# Note: don't put a maximum size on the queue since it would
# lead to deadlocks when all worker threads called put().
@ -61,9 +61,9 @@ class UrlQueue (object):
self.shutdown = False
# Each put() decreases the number of allowed puts.
# This way we can restrict the number of URLs that are checked.
if allowed_puts is not None and allowed_puts <= 0:
raise ValueError("Non-positive number of allowed puts: %d" % allowed_puts)
self.allowed_puts = allowed_puts
if max_allowed_puts is not None and max_allowed_puts <= 0:
raise ValueError("Non-positive number of allowed puts: %d" % max_allowed_puts)
self.allowed_puts = max_allowed_puts
def qsize (self):
"""Return the approximate size of the queue (not reliable!)."""

View file

@ -228,6 +228,7 @@ class Configuration (dict):
self["localwebroot"] = None
self["warnsslcertdaysvalid"] = 14
self["maxrunseconds"] = None
self["maxnumurls"] = None
from ..logger import Loggers
self.loggers = dict(**Loggers)

View file

@ -213,7 +213,7 @@ def abort_now ():
def get_aggregate (config):
"""Get an aggregator instance with given configuration."""
_urlqueue = urlqueue.UrlQueue()
_urlqueue = urlqueue.UrlQueue(max_allowed_puts=config["maxnumurls"])
connections = connection.ConnectionPool(wait=config["wait"])
cookies = cookie.CookieJar()
_robots_txt = robots_txt.RobotsTxt()