mirror of
https://github.com/Hopiu/linkchecker.git
synced 2026-04-29 10:34:42 +00:00
Increase checked cache in URL queue.
This commit is contained in:
parent
4c16d3e702
commit
7a6436f08f
9 changed files with 87 additions and 58 deletions
|
|
@ -7,6 +7,7 @@ Features:
|
|||
Closes: SF bug #3538365
|
||||
- checking: Support WML sites.
|
||||
Closes: SF bug #3553175
|
||||
- checking: Show number of parsed URLs in page content.
|
||||
- cmdline: Added Nagios plugin script.
|
||||
|
||||
Changes:
|
||||
|
|
|
|||
|
|
@ -84,8 +84,8 @@ def parse_bookmark_file (file):
|
|||
def parse_bookmark_json (data):
|
||||
"""Parse complete JSON data for Chromium Bookmarks."""
|
||||
for entry in data["roots"].values():
|
||||
for entry in parse_bookmark_node(entry):
|
||||
yield entry
|
||||
for url, name in parse_bookmark_node(entry):
|
||||
yield url, name
|
||||
|
||||
|
||||
def parse_bookmark_node (node):
|
||||
|
|
|
|||
2
linkcheck/cache/urlqueue.py
vendored
2
linkcheck/cache/urlqueue.py
vendored
|
|
@ -54,7 +54,7 @@ class UrlQueue (object):
|
|||
self.unfinished_tasks = 0
|
||||
self.finished_tasks = 0
|
||||
self.in_progress = {}
|
||||
self.checked = LFUCache(size=10000)
|
||||
self.checked = LFUCache(size=100000)
|
||||
self.shutdown = False
|
||||
self.unsorted = 0
|
||||
|
||||
|
|
|
|||
|
|
@ -262,6 +262,7 @@ class FileUrl (urlbase.UrlBase):
|
|||
mime = self.get_content_type()
|
||||
key = self.ContentMimetypes[mime]
|
||||
getattr(self, "parse_"+key)()
|
||||
self.add_num_url_info()
|
||||
|
||||
def parse_firefox (self):
|
||||
"""Parse a Firefox3 bookmark file."""
|
||||
|
|
|
|||
|
|
@ -198,6 +198,7 @@ class FtpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport):
|
|||
return
|
||||
key = self.ContentMimetypes[self.get_content_type(self.get_content)]
|
||||
getattr(self, "parse_"+key)()
|
||||
self.add_num_url_info()
|
||||
|
||||
def get_content_type (self, read=None):
|
||||
"""Return URL content type, or an empty string if content
|
||||
|
|
|
|||
|
|
@ -806,6 +806,7 @@ class HttpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport):
|
|||
self.parse_word()
|
||||
elif ctype == "text/vnd.wap.wml":
|
||||
self.parse_wml()
|
||||
self.add_num_url_info()
|
||||
|
||||
def get_robots_txt_url (self):
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -205,6 +205,8 @@ class UrlBase (object):
|
|||
self.do_check_content = True
|
||||
# MIME content type
|
||||
self.content_type = None
|
||||
# number of URLs in page content
|
||||
self.num_urls = 0
|
||||
|
||||
def set_result (self, msg, valid=True, overwrite=False):
|
||||
"""
|
||||
|
|
@ -941,6 +943,7 @@ class UrlBase (object):
|
|||
Default parse type is html.
|
||||
"""
|
||||
self.parse_html()
|
||||
self.add_num_url_info()
|
||||
|
||||
def get_user_password (self):
|
||||
"""Get tuple (user, password) from configured authentication.
|
||||
|
|
@ -960,6 +963,7 @@ class UrlBase (object):
|
|||
|
||||
def add_url (self, url, line=0, column=0, name=u"", base=None):
|
||||
"""Queue URL data for checking."""
|
||||
self.num_urls += 1
|
||||
if base:
|
||||
base_ref = urlutil.url_norm(base)[0]
|
||||
else:
|
||||
|
|
@ -971,6 +975,13 @@ class UrlBase (object):
|
|||
# Only queue URLs which have a result or are not strict extern.
|
||||
self.aggregate.urlqueue.put(url_data)
|
||||
|
||||
def add_num_url_info(self):
|
||||
"""Add number of URLs parsed to info."""
|
||||
if self.num_urls > 0:
|
||||
attrs = {"num": self.num_urls}
|
||||
msg = _n("%(num)d URL parsed.", "%(num)d URLs parsed.", self.num_urls)
|
||||
self.add_info(msg % attrs)
|
||||
|
||||
def parse_opera (self):
|
||||
"""Parse an opera bookmark file."""
|
||||
log.debug(LOG_CHECK, "Parsing Opera bookmarks %s", self)
|
||||
|
|
|
|||
65
po/de.po
65
po/de.po
|
|
@ -5,8 +5,8 @@ msgid ""
|
|||
msgstr ""
|
||||
"Project-Id-Version: $Id$\n"
|
||||
"Report-Msgid-Bugs-To: calvin@users.sourceforge.net\n"
|
||||
"POT-Creation-Date: 2012-08-26 10:48+0200\n"
|
||||
"PO-Revision-Date: 2012-08-26 10:48+0100\n"
|
||||
"POT-Creation-Date: 2012-09-02 19:55+0200\n"
|
||||
"PO-Revision-Date: 2012-09-02 19:56+0100\n"
|
||||
"Last-Translator: Bastian Kleineidam <calvin@users.sourceforge.net>\n"
|
||||
"Language-Team: de <de@li.org>\n"
|
||||
"Language: \n"
|
||||
|
|
@ -744,122 +744,129 @@ msgstr "URL besitzt einen nicht analysierbaren Rechnernamen: %(name)s"
|
|||
msgid "Leading or trailing whitespace in URL `%(url)s'."
|
||||
msgstr "Die URL %(url)s enthält Leerzeichen am Anfang oder Ende."
|
||||
|
||||
#: ../linkcheck/checker/urlbase.py:379
|
||||
#: ../linkcheck/checker/urlbase.py:381
|
||||
msgid "URL is missing"
|
||||
msgstr "URL fehlt"
|
||||
|
||||
#: ../linkcheck/checker/urlbase.py:382
|
||||
#: ../linkcheck/checker/urlbase.py:384
|
||||
msgid "URL is empty"
|
||||
msgstr "URL ist leer"
|
||||
|
||||
#: ../linkcheck/checker/urlbase.py:389
|
||||
#: ../linkcheck/checker/urlbase.py:391
|
||||
#, python-format
|
||||
msgid "Effective URL %(url)r."
|
||||
msgstr "Effektive URL %(url)r."
|
||||
|
||||
#: ../linkcheck/checker/urlbase.py:448
|
||||
#: ../linkcheck/checker/urlbase.py:450
|
||||
#, python-format
|
||||
msgid "URL has invalid port %(port)r"
|
||||
msgstr "URL hat eine ungültige Portnummer %(port)r"
|
||||
|
||||
#: ../linkcheck/checker/urlbase.py:453
|
||||
#: ../linkcheck/checker/urlbase.py:455
|
||||
msgid "URL has empty hostname"
|
||||
msgstr "URL hat leeren Rechnernamen"
|
||||
|
||||
#: ../linkcheck/checker/urlbase.py:464
|
||||
#: ../linkcheck/checker/urlbase.py:466
|
||||
#, python-format
|
||||
msgid "URL %(url)s has obfuscated IP address %(ip)s"
|
||||
msgstr "URL %(url)s besitzt die verschleierte IP-Adresse %(ip)s"
|
||||
|
||||
#: ../linkcheck/checker/urlbase.py:491
|
||||
#: ../linkcheck/checker/urlbase.py:493
|
||||
#, python-format
|
||||
msgid "URL is located in %(country)s."
|
||||
msgstr "URL befindet sich in %(country)s."
|
||||
|
||||
#: ../linkcheck/checker/urlbase.py:516
|
||||
#: ../linkcheck/checker/urlbase.py:518
|
||||
msgid "Hostname not found"
|
||||
msgstr "Rechnername nicht gefunden"
|
||||
|
||||
#: ../linkcheck/checker/urlbase.py:519
|
||||
#: ../linkcheck/checker/urlbase.py:521
|
||||
#, python-format
|
||||
msgid "Bad HTTP response %(line)r"
|
||||
msgstr "Ungültige HTTP Antwort %(line)r"
|
||||
|
||||
#: ../linkcheck/checker/urlbase.py:532
|
||||
#: ../linkcheck/checker/urlbase.py:534
|
||||
#, python-format
|
||||
msgid "could not get content: %(msg)r"
|
||||
msgstr "konnte Inhalt nicht parsen: %(msg)r"
|
||||
|
||||
#: ../linkcheck/checker/urlbase.py:673
|
||||
#: ../linkcheck/checker/urlbase.py:675
|
||||
#, python-format
|
||||
msgid "Anchor `%(name)s' not found."
|
||||
msgstr "Anker `%(name)s' nicht gefunden."
|
||||
|
||||
#: ../linkcheck/checker/urlbase.py:674
|
||||
#: ../linkcheck/checker/urlbase.py:676
|
||||
#, python-format
|
||||
msgid "Available anchors: %(anchors)s."
|
||||
msgstr "Verfügbare Anker: %(anchors)s."
|
||||
|
||||
#: ../linkcheck/checker/urlbase.py:728
|
||||
#: ../linkcheck/checker/urlbase.py:730
|
||||
#: ../linkcheck/checker/fileurl.py:193
|
||||
#: ../linkcheck/checker/httpurl.py:685
|
||||
msgid "File size too large"
|
||||
msgstr "Dateigröße ist zu groß"
|
||||
|
||||
#: ../linkcheck/checker/urlbase.py:770
|
||||
#: ../linkcheck/checker/urlbase.py:772
|
||||
#, python-format
|
||||
msgid "Found %(match)r at line %(line)d in link contents."
|
||||
msgstr "Habe %(match)r in Zeile %(line)d im Inhalt der Verknüpfung gefunden."
|
||||
|
||||
#: ../linkcheck/checker/urlbase.py:786
|
||||
#: ../linkcheck/checker/urlbase.py:788
|
||||
msgid "Content size is zero."
|
||||
msgstr "Größe des Inhalts ist Null."
|
||||
|
||||
#: ../linkcheck/checker/urlbase.py:792
|
||||
#: ../linkcheck/checker/urlbase.py:794
|
||||
#, python-format
|
||||
msgid "Content size %(dlsize)s is larger than %(maxbytes)s."
|
||||
msgstr "Inhalt %(dlsize)s is größer als %(maxbytes)s."
|
||||
|
||||
#: ../linkcheck/checker/urlbase.py:797
|
||||
#: ../linkcheck/checker/urlbase.py:799
|
||||
#, python-format
|
||||
msgid "Download size (%(dlsize)d Byte) does not equal content size (%(size)d Byte)."
|
||||
msgstr "Download Grüße (%(dlsize)d Byte) ist ungleich der Inhaltsgröße (%(size)d Byte)."
|
||||
|
||||
#: ../linkcheck/checker/urlbase.py:818
|
||||
#: ../linkcheck/checker/urlbase.py:883
|
||||
#: ../linkcheck/checker/urlbase.py:820
|
||||
#: ../linkcheck/checker/urlbase.py:885
|
||||
msgid "valid HTML syntax"
|
||||
msgstr "gültige HTML Syntax"
|
||||
|
||||
#: ../linkcheck/checker/urlbase.py:824
|
||||
#: ../linkcheck/checker/urlbase.py:826
|
||||
#, python-format
|
||||
msgid "tidy HTML parsing caused error: %(msg)s "
|
||||
msgstr "tidy HTML Parser verursachte Fehler: %(msg)s"
|
||||
|
||||
#: ../linkcheck/checker/urlbase.py:846
|
||||
#: ../linkcheck/checker/urlbase.py:919
|
||||
#: ../linkcheck/checker/urlbase.py:848
|
||||
#: ../linkcheck/checker/urlbase.py:921
|
||||
msgid "valid CSS syntax"
|
||||
msgstr "gültige CSS Syntax"
|
||||
|
||||
#: ../linkcheck/checker/urlbase.py:852
|
||||
#: ../linkcheck/checker/urlbase.py:854
|
||||
#, python-format
|
||||
msgid "cssutils parsing caused error: %(msg)s"
|
||||
msgstr "cssutils Parser verursachte Fehler: %(msg)s"
|
||||
|
||||
#: ../linkcheck/checker/urlbase.py:861
|
||||
#: ../linkcheck/checker/urlbase.py:863
|
||||
#, python-format
|
||||
msgid "%(w3type)s validation error at line %(line)s col %(column)s: %(msg)s"
|
||||
msgstr "%(w3type)s Validierungsfehler in Zeile %(line)s Spalte %(column)s: %(msg)s"
|
||||
|
||||
#: ../linkcheck/checker/urlbase.py:892
|
||||
#: ../linkcheck/checker/urlbase.py:894
|
||||
#, python-format
|
||||
msgid "HTML W3C validation caused error: %(msg)s "
|
||||
msgstr "HTML W3C Validierung verursachte Fehler: %(msg)s"
|
||||
|
||||
#: ../linkcheck/checker/urlbase.py:927
|
||||
#: ../linkcheck/checker/urlbase.py:929
|
||||
#, python-format
|
||||
msgid "CSS W3C validation caused error: %(msg)s "
|
||||
msgstr "CSS W3C Validierung verursachte Fehler: %(msg)s"
|
||||
|
||||
#: ../linkcheck/checker/urlbase.py:982
|
||||
#, python-format
|
||||
msgid "%(num)d URL parsed."
|
||||
msgid_plural "%(num)d URLs parsed."
|
||||
msgstr[0] "%(num)d URL geparst."
|
||||
msgstr[1] "%(num)d URLs geparst."
|
||||
|
||||
#: ../linkcheck/checker/proxysupport.py:43
|
||||
#, python-format
|
||||
msgid "Proxy value `%(proxy)s' must start with 'http:' or 'https:'."
|
||||
|
|
@ -925,7 +932,7 @@ msgstr "Keine Antwort vom FTP Server"
|
|||
msgid "Missing trailing directory slash in ftp url."
|
||||
msgstr "Fehlender / am Ende der FTP url."
|
||||
|
||||
#: ../linkcheck/checker/ftpurl.py:224
|
||||
#: ../linkcheck/checker/ftpurl.py:225
|
||||
msgid "FTP file size too large"
|
||||
msgstr "FTP Dateigröße ist zu groß"
|
||||
|
||||
|
|
|
|||
|
|
@ -8,7 +8,7 @@ msgid ""
|
|||
msgstr ""
|
||||
"Project-Id-Version: PACKAGE VERSION\n"
|
||||
"Report-Msgid-Bugs-To: calvin@users.sourceforge.net\n"
|
||||
"POT-Creation-Date: 2012-08-26 10:48+0200\n"
|
||||
"POT-Creation-Date: 2012-09-02 19:55+0200\n"
|
||||
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
|
||||
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
|
||||
"Language-Team: LANGUAGE <LL@li.org>\n"
|
||||
|
|
@ -713,120 +713,127 @@ msgstr ""
|
|||
msgid "Leading or trailing whitespace in URL `%(url)s'."
|
||||
msgstr ""
|
||||
|
||||
#: ../linkcheck/checker/urlbase.py:379
|
||||
#: ../linkcheck/checker/urlbase.py:381
|
||||
msgid "URL is missing"
|
||||
msgstr ""
|
||||
|
||||
#: ../linkcheck/checker/urlbase.py:382
|
||||
#: ../linkcheck/checker/urlbase.py:384
|
||||
msgid "URL is empty"
|
||||
msgstr ""
|
||||
|
||||
#: ../linkcheck/checker/urlbase.py:389
|
||||
#: ../linkcheck/checker/urlbase.py:391
|
||||
#, python-format
|
||||
msgid "Effective URL %(url)r."
|
||||
msgstr ""
|
||||
|
||||
#: ../linkcheck/checker/urlbase.py:448
|
||||
#: ../linkcheck/checker/urlbase.py:450
|
||||
#, python-format
|
||||
msgid "URL has invalid port %(port)r"
|
||||
msgstr ""
|
||||
|
||||
#: ../linkcheck/checker/urlbase.py:453
|
||||
#: ../linkcheck/checker/urlbase.py:455
|
||||
msgid "URL has empty hostname"
|
||||
msgstr ""
|
||||
|
||||
#: ../linkcheck/checker/urlbase.py:464
|
||||
#: ../linkcheck/checker/urlbase.py:466
|
||||
#, python-format
|
||||
msgid "URL %(url)s has obfuscated IP address %(ip)s"
|
||||
msgstr ""
|
||||
|
||||
#: ../linkcheck/checker/urlbase.py:491
|
||||
#: ../linkcheck/checker/urlbase.py:493
|
||||
#, python-format
|
||||
msgid "URL is located in %(country)s."
|
||||
msgstr ""
|
||||
|
||||
#: ../linkcheck/checker/urlbase.py:516
|
||||
#: ../linkcheck/checker/urlbase.py:518
|
||||
msgid "Hostname not found"
|
||||
msgstr ""
|
||||
|
||||
#: ../linkcheck/checker/urlbase.py:519
|
||||
#: ../linkcheck/checker/urlbase.py:521
|
||||
#, python-format
|
||||
msgid "Bad HTTP response %(line)r"
|
||||
msgstr ""
|
||||
|
||||
#: ../linkcheck/checker/urlbase.py:532
|
||||
#: ../linkcheck/checker/urlbase.py:534
|
||||
#, python-format
|
||||
msgid "could not get content: %(msg)r"
|
||||
msgstr ""
|
||||
|
||||
#: ../linkcheck/checker/urlbase.py:673
|
||||
#: ../linkcheck/checker/urlbase.py:675
|
||||
#, python-format
|
||||
msgid "Anchor `%(name)s' not found."
|
||||
msgstr ""
|
||||
|
||||
#: ../linkcheck/checker/urlbase.py:674
|
||||
#: ../linkcheck/checker/urlbase.py:676
|
||||
#, python-format
|
||||
msgid "Available anchors: %(anchors)s."
|
||||
msgstr ""
|
||||
|
||||
#: ../linkcheck/checker/urlbase.py:728 ../linkcheck/checker/fileurl.py:193
|
||||
#: ../linkcheck/checker/urlbase.py:730 ../linkcheck/checker/fileurl.py:193
|
||||
#: ../linkcheck/checker/httpurl.py:685
|
||||
msgid "File size too large"
|
||||
msgstr ""
|
||||
|
||||
#: ../linkcheck/checker/urlbase.py:770
|
||||
#: ../linkcheck/checker/urlbase.py:772
|
||||
#, python-format
|
||||
msgid "Found %(match)r at line %(line)d in link contents."
|
||||
msgstr ""
|
||||
|
||||
#: ../linkcheck/checker/urlbase.py:786
|
||||
#: ../linkcheck/checker/urlbase.py:788
|
||||
msgid "Content size is zero."
|
||||
msgstr ""
|
||||
|
||||
#: ../linkcheck/checker/urlbase.py:792
|
||||
#: ../linkcheck/checker/urlbase.py:794
|
||||
#, python-format
|
||||
msgid "Content size %(dlsize)s is larger than %(maxbytes)s."
|
||||
msgstr ""
|
||||
|
||||
#: ../linkcheck/checker/urlbase.py:797
|
||||
#: ../linkcheck/checker/urlbase.py:799
|
||||
#, python-format
|
||||
msgid ""
|
||||
"Download size (%(dlsize)d Byte) does not equal content size (%(size)d Byte)."
|
||||
msgstr ""
|
||||
|
||||
#: ../linkcheck/checker/urlbase.py:818 ../linkcheck/checker/urlbase.py:883
|
||||
#: ../linkcheck/checker/urlbase.py:820 ../linkcheck/checker/urlbase.py:885
|
||||
msgid "valid HTML syntax"
|
||||
msgstr ""
|
||||
|
||||
#: ../linkcheck/checker/urlbase.py:824
|
||||
#: ../linkcheck/checker/urlbase.py:826
|
||||
#, python-format
|
||||
msgid "tidy HTML parsing caused error: %(msg)s "
|
||||
msgstr ""
|
||||
|
||||
#: ../linkcheck/checker/urlbase.py:846 ../linkcheck/checker/urlbase.py:919
|
||||
#: ../linkcheck/checker/urlbase.py:848 ../linkcheck/checker/urlbase.py:921
|
||||
msgid "valid CSS syntax"
|
||||
msgstr ""
|
||||
|
||||
#: ../linkcheck/checker/urlbase.py:852
|
||||
#: ../linkcheck/checker/urlbase.py:854
|
||||
#, python-format
|
||||
msgid "cssutils parsing caused error: %(msg)s"
|
||||
msgstr ""
|
||||
|
||||
#: ../linkcheck/checker/urlbase.py:861
|
||||
#: ../linkcheck/checker/urlbase.py:863
|
||||
#, python-format
|
||||
msgid "%(w3type)s validation error at line %(line)s col %(column)s: %(msg)s"
|
||||
msgstr ""
|
||||
|
||||
#: ../linkcheck/checker/urlbase.py:892
|
||||
#: ../linkcheck/checker/urlbase.py:894
|
||||
#, python-format
|
||||
msgid "HTML W3C validation caused error: %(msg)s "
|
||||
msgstr ""
|
||||
|
||||
#: ../linkcheck/checker/urlbase.py:927
|
||||
#: ../linkcheck/checker/urlbase.py:929
|
||||
#, python-format
|
||||
msgid "CSS W3C validation caused error: %(msg)s "
|
||||
msgstr ""
|
||||
|
||||
#: ../linkcheck/checker/urlbase.py:982
|
||||
#, python-format
|
||||
msgid "%(num)d URL parsed."
|
||||
msgid_plural "%(num)d URLs parsed."
|
||||
msgstr[0] ""
|
||||
msgstr[1] ""
|
||||
|
||||
#: ../linkcheck/checker/proxysupport.py:43
|
||||
#, python-format
|
||||
msgid "Proxy value `%(proxy)s' must start with 'http:' or 'https:'."
|
||||
|
|
@ -892,7 +899,7 @@ msgstr ""
|
|||
msgid "Missing trailing directory slash in ftp url."
|
||||
msgstr ""
|
||||
|
||||
#: ../linkcheck/checker/ftpurl.py:224
|
||||
#: ../linkcheck/checker/ftpurl.py:225
|
||||
msgid "FTP file size too large"
|
||||
msgstr ""
|
||||
|
||||
|
|
|
|||
Loading…
Reference in a new issue