mirror of
https://github.com/Hopiu/linkchecker.git
synced 2026-03-16 22:10:26 +00:00
Make ResultCache max_size configurable (#544)
* Make ResultCache max_size configurable fixes #463 * Add tests and docs. * fix documentation... ...adapt the source, not the auto-generated man pages themselves as requested in #544. * fix typo.
This commit is contained in:
parent
c94e953dbd
commit
f395c74aac
12 changed files with 29 additions and 8 deletions
|
|
@ -173,6 +173,8 @@
|
|||
#robotstxt=1
|
||||
# Allowed URL schemes as a comma-separated list.
|
||||
#allowedschemes=http,https
|
||||
# Size of the result cache. Checking more urls might increase memory usage during runtime
|
||||
#resultcachesize=100000
|
||||
|
||||
##################### filtering options ##########################
|
||||
[filtering]
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
.\" Man page generated from reStructuredText.
|
||||
.
|
||||
.TH "LINKCHECKER" "1" "Januar 28, 2021" "2021-01-28" "LinkChecker"
|
||||
.TH "LINKCHECKER" "1" "Juni 20, 2021" "2021-06-20" "LinkChecker"
|
||||
.SH NAME
|
||||
linkchecker \- Kommandozeilenprogramm zum Prüfen von HTML Dokumenten und Webseiten auf ungültige Verknüpfungen
|
||||
.
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
.\" Man page generated from reStructuredText.
|
||||
.
|
||||
.TH "LINKCHECKERRC" "5" "Januar 28, 2021" "2021-01-28" "LinkChecker"
|
||||
.TH "LINKCHECKERRC" "5" "Juni 20, 2021" "2021-06-20" "LinkChecker"
|
||||
.SH NAME
|
||||
linkcheckerrc \- Konfigurationsdatei für LinkChecker
|
||||
.
|
||||
|
|
@ -104,6 +104,11 @@ Command line option: \fB\-\-no\-robots\fP
|
|||
\fBallowedschemes=\fP\fINAME\fP[\fB,\fP\fINAME\fP\&...]
|
||||
Allowed URL schemes as comma\-separated list.
|
||||
Command line option: none
|
||||
.TP
|
||||
\fBresultcachesize=\fP\fINUMBER\fP
|
||||
Set the result cache size.
|
||||
The default is 100 000 URLs.
|
||||
Command line option: none
|
||||
.UNINDENT
|
||||
.SS filtering
|
||||
.INDENT 0.0
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
.\" Man page generated from reStructuredText.
|
||||
.
|
||||
.TH "LINKCHECKER" "1" "January 28, 2021" "2021-01-28" "LinkChecker"
|
||||
.TH "LINKCHECKER" "1" "June 20, 2021" "2021-06-20" "LinkChecker"
|
||||
.SH NAME
|
||||
linkchecker \- command line client to check HTML documents and websites for broken links
|
||||
.
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
.\" Man page generated from reStructuredText.
|
||||
.
|
||||
.TH "LINKCHECKERRC" "5" "January 28, 2021" "2021-01-28" "LinkChecker"
|
||||
.TH "LINKCHECKERRC" "5" "June 20, 2021" "2021-06-20" "LinkChecker"
|
||||
.SH NAME
|
||||
linkcheckerrc \- configuration file for LinkChecker
|
||||
.
|
||||
|
|
@ -137,6 +137,11 @@ Command line option: \fB\-\-no\-robots\fP
|
|||
\fBallowedschemes=\fP\fINAME\fP[\fB,\fP\fINAME\fP\&...]
|
||||
Allowed URL schemes as comma\-separated list.
|
||||
Command line option: none
|
||||
.TP
|
||||
\fBresultcachesize=\fP\fINUMBER\fP
|
||||
Set the result cache size.
|
||||
The default is 100 000 URLs.
|
||||
Command line option: none
|
||||
.UNINDENT
|
||||
.SS filtering
|
||||
.INDENT 0.0
|
||||
|
|
|
|||
|
|
@ -98,6 +98,10 @@ checking
|
|||
**allowedschemes=**\ *NAME*\ [**,**\ *NAME*...]
|
||||
Allowed URL schemes as comma-separated list.
|
||||
Command line option: none
|
||||
**resultcachesize=**\ *NUMBER*
|
||||
Set the result cache size.
|
||||
The default is 100 000 URLs.
|
||||
Command line option: none
|
||||
|
||||
filtering
|
||||
^^^^^^^^^
|
||||
|
|
|
|||
4
linkcheck/cache/results.py
vendored
4
linkcheck/cache/results.py
vendored
|
|
@ -32,11 +32,11 @@ class ResultCache:
|
|||
format: {cache key (string) -> result (UrlData.towire())}
|
||||
"""
|
||||
|
||||
def __init__(self, max_size=100000):
|
||||
def __init__(self, result_cache_size):
|
||||
"""Initialize result cache."""
|
||||
# mapping {URL -> cached result}
|
||||
self.cache = {}
|
||||
self.max_size = max_size
|
||||
self.max_size = result_cache_size
|
||||
|
||||
@synchronized(cache_lock)
|
||||
def get_result(self, key):
|
||||
|
|
|
|||
|
|
@ -184,6 +184,7 @@ class Configuration(dict):
|
|||
self["aborttimeout"] = 300
|
||||
self["recursionlevel"] = -1
|
||||
self["useragent"] = UserAgent
|
||||
self["resultcachesize"] = 100000
|
||||
# authentication
|
||||
self["authentication"] = []
|
||||
self["loginurl"] = None
|
||||
|
|
|
|||
|
|
@ -134,7 +134,7 @@ def get_aggregate(config):
|
|||
_urlqueue = urlqueue.UrlQueue(max_allowed_urls=config["maxnumurls"])
|
||||
_robots_txt = robots_txt.RobotsTxt(config["useragent"])
|
||||
plugin_manager = plugins.PluginManager(config)
|
||||
result_cache = results.ResultCache()
|
||||
result_cache = results.ResultCache(config["resultcachesize"])
|
||||
return aggregator.Aggregate(
|
||||
config, _urlqueue, _robots_txt, plugin_manager, result_cache
|
||||
)
|
||||
|
|
|
|||
4
tests/cache/test_urlqueue.py
vendored
4
tests/cache/test_urlqueue.py
vendored
|
|
@ -17,6 +17,7 @@
|
|||
import unittest
|
||||
from collections import namedtuple
|
||||
|
||||
import linkcheck.configuration
|
||||
from linkcheck.cache.results import ResultCache
|
||||
from linkcheck.cache.urlqueue import Empty, NUM_PUTS_CLEANUP, UrlQueue
|
||||
|
||||
|
|
@ -26,7 +27,8 @@ Aggregate = namedtuple("Aggregate", "result_cache")
|
|||
|
||||
class TestUrlQueue(unittest.TestCase):
|
||||
def setUp(self):
|
||||
self.result_cache = ResultCache()
|
||||
config = linkcheck.configuration.Configuration()
|
||||
self.result_cache = ResultCache(config["resultcachesize"])
|
||||
self.urlqueue = UrlQueue()
|
||||
self.urldata1 = UrlData(
|
||||
url="Foo",
|
||||
|
|
|
|||
|
|
@ -15,6 +15,7 @@ maxnumurls=1000
|
|||
maxrunseconds=1
|
||||
maxfilesizeparse=100
|
||||
maxfilesizedownload=100
|
||||
resultcachesize=100000
|
||||
|
||||
[filtering]
|
||||
ignore=
|
||||
|
|
|
|||
|
|
@ -55,6 +55,7 @@ class TestConfig(unittest.TestCase):
|
|||
self.assertEqual(config["maxrunseconds"], 1)
|
||||
self.assertEqual(config["maxfilesizeparse"], 100)
|
||||
self.assertEqual(config["maxfilesizedownload"], 100)
|
||||
self.assertEqual(config["resultcachesize"], 100000)
|
||||
# filtering section
|
||||
patterns = [x["pattern"].pattern for x in config["externlinks"]]
|
||||
for prefix in ("ignore_", "nofollow_"):
|
||||
|
|
|
|||
Loading…
Reference in a new issue