Check regular expressions from users for errors.

This commit is contained in:
Bastian Kleineidam 2014-03-01 19:15:48 +01:00
parent c20005a031
commit 78a99717fe
4 changed files with 35 additions and 12 deletions

View file

@ -6,18 +6,20 @@ Features:
checks into plugins (see upgrading.txt for more info).
- checking: Add options to limit the number of requests per second,
allowed URL schemes and maximum file or download size.
Closes: GH bug #397, #465
Closes: GH bug #397, #465, #420
- gui: UI language can be changed dynamically.
Closes: GH bug #391
Changes:
- checking: Use the Python requests module for HTTP and HTTPS requests.
Closes: GH bug #393, #463
Closes: GH bug #393, #463, #417
- logging: Removed download, domains and robots.txt statistics.
- logging: HTML output is now in HTML5.
- checking: Removed 301 warning since 301 redirects are used
a lot without updating the old URL links.
Closes: GH bug #444
Also, recursive redirection is not checked any more since there
is a maximum redirection limit anyway.
Closes: GH bug #444, #419
- checking: Disallowed access by robots.txt is an info now, not
a warning. Otherwise it produces a lot of warnings which
is counter-productive.
@ -46,6 +48,8 @@ Fixes:
- checking: For login pages, use twill form field counter if
the field has neither name nor id.
Closes: GH bug #428
- configuration: Check regular expressions for errors.
Closes: GH bug #410
8.6 "About Time" (released 8.1.2014)

View file

@ -121,6 +121,7 @@ def get_link_pat (arg, strict=False):
@type strict: bool
@return: dictionary with keys 'pattern', 'negate' and 'strict'
@rtype: dict
@raises: re.error on invalid regular expressions
"""
log.debug(LOG_CHECK, "Link pattern %r strict=%s", arg, strict)
if arg.startswith('!'):
@ -129,8 +130,13 @@ def get_link_pat (arg, strict=False):
else:
pattern = arg
negate = False
try:
regex = re.compile(pattern)
except re.error as msg:
log.warn(LOG_CHECK, "invalid regular expression %r: %s" % (pattern, msg))
raise
return {
"pattern": re.compile(pattern),
"pattern": regex,
"negate": negate,
"strict": strict,
}

View file

@ -261,16 +261,26 @@ class LinkCheckerMain (QtGui.QMainWindow, Ui_MainWindow):
self.config.reset_loglevel()
if data["warninglines"]:
lines = data["warninglines"].splitlines()
ro = re.compile(warninglines2regex(lines))
self.backup_config("warningregex", ro)
pattern = warninglines2regex(lines)
try:
ro = re.compile(pattern)
self.backup_config("warningregex", ro)
except re.error as err:
msg = _("Invalid regular expression %r: %s" % (pattern, err))
self.set_statusmsg(msg)
# set ignore patterns
ignorepats = data["ignorelines"].strip()
if ignorepats:
self.backup_config("externlinks")
lines = ignorepats.splitlines()
for line in lines:
pat = get_link_pat(line, strict=1)
self.config["externlinks"].append(pat)
try:
pat = get_link_pat(line, strict=1)
self.config["externlinks"].append(pat)
except re.error as err:
msg = _("Invalid regular expression %r: %s" % (pat, err))
self.set_statusmsg(msg)
def backup_config (self, key, value=None):
"""Backup config key if not already done and set given value."""

View file

@ -37,10 +37,13 @@ class RegexCheck(_ContentPlugin):
def __init__(self, config):
"""Set warning regex from config."""
super(RegexCheck, self).__init__(config)
if config["warningregex"]:
self.warningregex = re.compile(config["warningregex"])
else:
self.warningregex = None
self.warningregex = None
pattern = config["warningregex"]
if pattern:
try:
self.warningregex = re.compile(pattern)
except re.error as msg:
log.warn(LOG_PLUGIN, "Invalid regex pattern %r: %s" % (pattern, msg))
def check(self, url_data):
"""Check content."""