mirror of
https://github.com/Hopiu/linkchecker.git
synced 2026-03-17 14:30:30 +00:00
add option to ignore specific errors for specific URLs
This commit is contained in:
parent
36a45b0f96
commit
8c959589c3
8 changed files with 138 additions and 0 deletions
|
|
@ -188,6 +188,24 @@ URL checking results
|
|||
**warnings=**\ [**0**\ \|\ **1**]
|
||||
If set log warnings. Default is to log warnings.
|
||||
Command line option: :option:`--no-warnings`
|
||||
**ignoreerrors=**\ *URL_REGEX* [*MESSAGE_REGEX*] (`MULTILINE`_)
|
||||
Specify regular expressions to ignore errors for matching URLs, one
|
||||
per line. A second regular expression can be specified per line to
|
||||
only ignore matching error messages per corresponding URL. If the
|
||||
second expression is omitted, all errors are ignored. In contrast
|
||||
to filtering_, this happens *after* checking, which allows checking
|
||||
URLs despite certain expected and tolerable errors. Default is to
|
||||
not ignore any errors.
|
||||
|
||||
::
|
||||
[output]
|
||||
|
||||
ignoreerrors=
|
||||
|
||||
^https://deprecated\.example\.com ^410 Gone
|
||||
|
||||
# ignore all errors (no second expression), also for syntax check:
|
||||
^mailto:.*@example\.com$
|
||||
|
||||
Progress updates
|
||||
""""""""""""""""
|
||||
|
|
|
|||
|
|
@ -216,6 +216,7 @@ class UrlBase:
|
|||
% {"url": base_url},
|
||||
tag=WARN_URL_WHITESPACE,
|
||||
)
|
||||
self.ignore_errors = self.aggregate.config['ignoreerrors']
|
||||
|
||||
def reset(self):
|
||||
"""
|
||||
|
|
@ -270,6 +271,8 @@ class UrlBase:
|
|||
self.content_type = ""
|
||||
# URLs seen through redirections
|
||||
self.aliases = []
|
||||
# error messages (regular expressions) to ignore
|
||||
self.ignore_errors = []
|
||||
|
||||
def set_result(self, msg, valid=True, overwrite=False):
|
||||
"""
|
||||
|
|
@ -289,6 +292,16 @@ class UrlBase:
|
|||
log.warn(LOG_CHECK, "Empty result for %s", self)
|
||||
self.result = msg
|
||||
self.valid = valid
|
||||
|
||||
if not self.valid:
|
||||
for url_regex, msg_regex in self.ignore_errors:
|
||||
if not url_regex.search(self.url):
|
||||
continue
|
||||
if not msg_regex.search(self.result):
|
||||
continue
|
||||
self.valid = True
|
||||
self.result = f"Ignored: {self.result}"
|
||||
|
||||
# free content data
|
||||
self.data = None
|
||||
|
||||
|
|
|
|||
|
|
@ -165,6 +165,7 @@ class Configuration(dict):
|
|||
self["loginextrafields"] = {}
|
||||
# filtering
|
||||
self["externlinks"] = []
|
||||
self["ignoreerrors"] = []
|
||||
self["ignorewarnings"] = []
|
||||
self["internlinks"] = []
|
||||
self["checkextern"] = False
|
||||
|
|
|
|||
|
|
@ -16,6 +16,7 @@
|
|||
"""Parse configuration files"""
|
||||
|
||||
from configparser import RawConfigParser
|
||||
from re import compile as re_compile
|
||||
import os
|
||||
|
||||
from .. import (
|
||||
|
|
@ -158,6 +159,14 @@ class LCConfigParser(RawConfigParser):
|
|||
for val in loggers:
|
||||
output = self.config.logger_new(val, fileoutput=1)
|
||||
self.config['fileoutput'].append(output)
|
||||
if self.has_option(section, "ignoreerrors"):
|
||||
for line in read_multiline(self.get(section, "ignoreerrors")):
|
||||
parts = line.split(maxsplit=1)
|
||||
if len(parts) == 1:
|
||||
parts.append('')
|
||||
self.config["ignoreerrors"].append(tuple(
|
||||
re_compile(part) for part in parts
|
||||
))
|
||||
|
||||
def read_checking_config(self):
|
||||
"""Read configuration options in section "checking"."""
|
||||
|
|
|
|||
|
|
@ -18,6 +18,12 @@
|
|||
#quiet=1
|
||||
# additional file output
|
||||
#fileoutput = text, html, gml, sql
|
||||
# errors to ignore (URL regular expression, message regular expression)
|
||||
#ignoreerrors=
|
||||
# ignore all errors for broken.example.com:
|
||||
# ^https?://broken.example.com/
|
||||
# ignore SSL errors for dev.example.com:
|
||||
# ^https://dev.example.com/ ^SSLError .*
|
||||
|
||||
|
||||
##################### logger configuration ##########################
|
||||
|
|
|
|||
76
tests/checker/test_ignoreerrors.py
Normal file
76
tests/checker/test_ignoreerrors.py
Normal file
|
|
@ -0,0 +1,76 @@
|
|||
# Copyright (C) 2004-2014 Bastian Kleineidam
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 2 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License along
|
||||
# with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
"""
|
||||
Test ignoring of errors.
|
||||
"""
|
||||
|
||||
from re import compile as re_compile
|
||||
|
||||
from tests import need_network
|
||||
from . import LinkCheckTest
|
||||
|
||||
|
||||
class TestFile(LinkCheckTest):
|
||||
"""
|
||||
Test whether ignoring of errors per URL works.
|
||||
"""
|
||||
|
||||
def _test(self, url, url_regex, msg_regex, valid):
|
||||
""" Shorthand for various tests of ignoring errors. """
|
||||
confargs = {
|
||||
"ignoreerrors": [
|
||||
(re_compile(url_regex), re_compile(msg_regex))
|
||||
]
|
||||
}
|
||||
resultlines = [
|
||||
"url %s" % url,
|
||||
"cache key %s" % url,
|
||||
"real url %s" % url,
|
||||
"valid" if valid else "error",
|
||||
]
|
||||
self.direct(url, resultlines, confargs=confargs)
|
||||
|
||||
def test_no_error(self):
|
||||
""" Test that unmatched errors are not ignored. """
|
||||
self._test("mailto:good@example.com", "", "", True)
|
||||
self._test("mailto:good@example.com", "^$", "", True)
|
||||
self._test("mailto:good@example.com", "^$", "^no-match$", True)
|
||||
self._test("mailto:good@example.com",
|
||||
r"^mailto:good@example\.com$", "", True)
|
||||
self._test("mailto:good@example.com",
|
||||
r"^mailto:good@example\.com$", "^$", True)
|
||||
|
||||
def test_url_regex(self):
|
||||
""" Test that URLs are properly matched. """
|
||||
self._test("mailto:foo", r"^$", "", False)
|
||||
self._test("mailto:foo", r"", "", True)
|
||||
self._test("mailto:foo", r"^mailto:foo$", "", True)
|
||||
self._test("mailto:foobar", r"^mailto:foo", "", True)
|
||||
|
||||
def test_msg_regex(self):
|
||||
""" Test that error messages are properly matched. """
|
||||
self._test("mailto:foo", r"^mailto:foo$", "^$", False)
|
||||
self._test("mailto:foo", r"^mailto:foo$", "", True)
|
||||
self._test("mailto:foo", r"^mailto:foo$",
|
||||
r"^Missing `@' in mail address `foo'.$", True)
|
||||
|
||||
@need_network
|
||||
def test_internet(self):
|
||||
""" Test a few well-known Internet URLs. """
|
||||
self._test("http://example.com/does-not-exist",
|
||||
r"^http://example.com/.+$", "^404", True)
|
||||
self._test("http://does-not-exist.example.com",
|
||||
r"example.com", "^ConnectionError", True)
|
||||
|
|
@ -51,6 +51,9 @@ verbose=1
|
|||
warnings=1
|
||||
quiet=0
|
||||
fileoutput = Text, html, Gml, sql,csv, xml, gxml, dot
|
||||
ignoreerrors=
|
||||
^https://example.com/does-not-exist ^404
|
||||
^mailto:foo
|
||||
|
||||
[text]
|
||||
filename=imadoofus.txt
|
||||
|
|
|
|||
|
|
@ -19,6 +19,7 @@ Test config parsing.
|
|||
|
||||
import unittest
|
||||
import os
|
||||
from re import Pattern
|
||||
import linkcheck.configuration
|
||||
|
||||
|
||||
|
|
@ -66,6 +67,17 @@ class TestConfig(unittest.TestCase):
|
|||
self.assertTrue(key in patterns)
|
||||
for key in ("url-unicode-domain",):
|
||||
self.assertTrue(key in config["ignorewarnings"])
|
||||
self.assertEqual(len(config["ignoreerrors"]), 2)
|
||||
for parts in config["ignoreerrors"]:
|
||||
self.assertEqual(len(parts), 2)
|
||||
for part in parts:
|
||||
self.assertTrue(isinstance(part, Pattern))
|
||||
self.assertTrue(config["ignoreerrors"][0][1].search(
|
||||
"404 Not Found"
|
||||
))
|
||||
self.assertTrue(config["ignoreerrors"][1][0].search(
|
||||
"mailto:foo"
|
||||
))
|
||||
self.assertTrue(config["checkextern"])
|
||||
# authentication section
|
||||
patterns = [x["pattern"].pattern for x in config["authentication"]]
|
||||
|
|
|
|||
Loading…
Reference in a new issue