mirror of
https://github.com/Hopiu/linkchecker.git
synced 2026-03-20 07:50:24 +00:00
249 lines
11 KiB
Python
249 lines
11 KiB
Python
# -*- coding: iso-8859-1 -*-
|
|
# Copyright (C) 2000-2009 Bastian Kleineidam
|
|
#
|
|
# This program is free software; you can redistribute it and/or modify
|
|
# it under the terms of the GNU General Public License as published by
|
|
# the Free Software Foundation; either version 2 of the License, or
|
|
# (at your option) any later version.
|
|
#
|
|
# This program is distributed in the hope that it will be useful,
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
# GNU General Public License for more details.
|
|
#
|
|
# You should have received a copy of the GNU General Public License
|
|
# along with this program; if not, write to the Free Software
|
|
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
|
"""Parse configuration files"""
|
|
|
|
import ConfigParser
|
|
import re
|
|
from .. import log, LOG_CHECK, LinkCheckerError, get_link_pat
|
|
|
|
|
|
def read_multiline (value):
|
|
"""Helper function reading multiline values."""
|
|
for line in value.splitlines():
|
|
line = line.strip()
|
|
if not line or line.startswith('#'):
|
|
continue
|
|
yield line
|
|
|
|
|
|
class LCConfigParser (ConfigParser.RawConfigParser, object):
|
|
"""
|
|
Parse a LinkChecker configuration file.
|
|
"""
|
|
|
|
def __init__ (self, config):
|
|
super(LCConfigParser, self).__init__()
|
|
self.config = config
|
|
|
|
def read (self, files):
|
|
"""Read settings from given config files.
|
|
|
|
@raises: LinkCheckerError on syntax errors in the config file(s)
|
|
"""
|
|
try:
|
|
super(LCConfigParser, self).read(files)
|
|
# Read all the configuration parameters from the given files.
|
|
self.read_output_config()
|
|
self.read_checking_config()
|
|
self.read_authentication_config()
|
|
self.read_filtering_config()
|
|
except Exception, msg:
|
|
raise LinkCheckerError(
|
|
"Error parsing configuration: %s", str(msg))
|
|
|
|
def read_output_config (self):
|
|
"""Read configuration options in section "output"."""
|
|
section = "output"
|
|
from ..logger import Loggers
|
|
for key in Loggers.iterkeys():
|
|
if self.has_section(key):
|
|
for opt in self.options(key):
|
|
self.config[key][opt] = self.get(key, opt)
|
|
if self.has_option(key, 'parts'):
|
|
val = self.get(key, 'parts')
|
|
parts = [f.strip() for f in val.split(',')]
|
|
self.config[key]['parts'] = parts
|
|
if self.has_option(section, "warnings"):
|
|
self.config["warnings"] = self.getboolean(section, "warnings")
|
|
if self.has_option(section, "verbose"):
|
|
if self.getboolean(section, "verbose"):
|
|
self.config["verbose"] = True
|
|
self.config["warnings"] = True
|
|
if self.has_option(section, "complete"):
|
|
if self.getboolean(section, "complete"):
|
|
self.config["complete"] = True
|
|
self.config["verbose"] = True
|
|
self.config["warnings"] = True
|
|
if self.has_option(section, "quiet"):
|
|
if self.getboolean(section, "quiet"):
|
|
self.config['output'] = 'none'
|
|
self.config['quiet'] = True
|
|
if self.has_option(section, "debug"):
|
|
val = self.get(section, "debug")
|
|
parts = [f.strip() for f in val.split(',')]
|
|
self.config.set_debug(parts)
|
|
if self.has_option(section, "status"):
|
|
self.config["status"] = self.getboolean(section, "status")
|
|
if self.has_option(section, "log"):
|
|
val = self.get(section, "log").strip()
|
|
self.config['output'] = val
|
|
if self.has_option(section, "fileoutput"):
|
|
loggers = self.get(section, "fileoutput").split(",")
|
|
# strip names from whitespace
|
|
loggers = (x.strip() for x in loggers)
|
|
# no file output for the blacklist and none Logger
|
|
loggers = (x for x in loggers if x in Loggers and
|
|
x not in ("blacklist", "none"))
|
|
for val in loggers:
|
|
output = self.config.logger_new(val, fileoutput=1)
|
|
self.config['fileoutput'].append(output)
|
|
if self.has_option(section, "interactive"):
|
|
self.config["interactive"] = self.getboolean(section, "interactive")
|
|
|
|
def read_checking_config (self):
|
|
"""Read configuration options in section "checking"."""
|
|
section = "checking"
|
|
if self.has_option(section, "threads"):
|
|
num = self.getint(section, "threads")
|
|
self.config['threads'] = max(0, num)
|
|
if self.has_option(section, "timeout"):
|
|
num = self.getint(section, "timeout")
|
|
if num < 0:
|
|
raise LinkCheckerError(
|
|
_("invalid negative value for timeout: %d\n"), num)
|
|
self.config['timeout'] = num
|
|
if self.has_option(section, "anchors"):
|
|
self.config["anchors"] = self.getboolean(section, "anchors")
|
|
if self.has_option(section, "recursionlevel"):
|
|
num = self.getint(section, "recursionlevel")
|
|
self.config["recursionlevel"] = num
|
|
if self.has_option(section, "warningregex"):
|
|
val = self.get(section, "warningregex")
|
|
if val:
|
|
self.config["warningregex"] = re.compile(val)
|
|
if self.has_option(section, "warnsizebytes"):
|
|
val = self.get(section,"warnsizebytes")
|
|
self.config["warnsizebytes"] = int(val)
|
|
if self.has_option(section, "nntpserver"):
|
|
self.config["nntpserver"] = self.get(section, "nntpserver")
|
|
if self.has_option(section,"anchorcaching"):
|
|
val = self.getboolean(section, "anchorcaching")
|
|
self.config["anchorcaching"] = val
|
|
self.read_check_options(section)
|
|
|
|
def read_check_options (self, section):
|
|
"""Read check* options."""
|
|
if self.has_option(section,"checkhtml"):
|
|
self.config["checkhtml"] = self.getboolean(section, "checkhtml")
|
|
if self.has_option(section,"checkcss"):
|
|
self.config["checkcss"] = self.getboolean(section, "checkcss")
|
|
if self.has_option(section,"checkhtmlw3"):
|
|
val = self.getboolean(section, "checkhtmlw3")
|
|
self.config["checkhtmlw3"] = val
|
|
if self.has_option(section,"checkcssw3"):
|
|
self.config["checkcssw3"] = self.getboolean(section, "checkcssw3")
|
|
if self.has_option(section, "scanvirus"):
|
|
self.config["scanvirus"] = self.getboolean(section, "scanvirus")
|
|
if self.has_option(section, "clamavconf"):
|
|
self.config["clamavconf"] = self.getboolean(section, "clamavconf")
|
|
|
|
def read_authentication_config (self):
|
|
"""Read configuration options in section "authentication"."""
|
|
section = "authentication"
|
|
if self.has_option(section, "entry"):
|
|
for val in read_multiline(self.get(section, "entry")):
|
|
auth = val.split()
|
|
if len(auth) != 3:
|
|
raise LinkCheckerError(LOG_CHECK,
|
|
_("missing auth part in entry %(val)r") % \
|
|
{"val": val})
|
|
self.config["authentication"].insert(0,
|
|
{'pattern': re.compile(auth[0]),
|
|
'user': auth[1],
|
|
'password': auth[2]})
|
|
# backward compatibility
|
|
i = 1
|
|
while 1:
|
|
key = "entry%d" % i
|
|
if not self.has_option(section, key):
|
|
break
|
|
val = self.get(section, key)
|
|
auth = val.split()
|
|
log.warn(LOG_CHECK,
|
|
_("the entry%(num)d syntax is deprecated; use " \
|
|
"the new multiline configuration syntax") % {"num": i})
|
|
if len(auth) != 3:
|
|
raise LinkCheckerError(LOG_CHECK,
|
|
_("missing auth part in entry %(val)r") % \
|
|
{"val": val})
|
|
self.config["authentication"].insert(0,
|
|
{'pattern': re.compile(auth[0]),
|
|
'user': auth[1],
|
|
'password': auth[2]})
|
|
i += 1
|
|
|
|
def read_filtering_config (self):
|
|
"""
|
|
Read configuration options in section "filtering".
|
|
"""
|
|
section = "filtering"
|
|
if self.has_option(section, "nofollow"):
|
|
for line in read_multiline(self.get(section, "nofollow")):
|
|
pat = get_link_pat(line, strict=0)
|
|
self.config["externlinks"].append(pat)
|
|
# backward compatibility
|
|
i = 1
|
|
while 1:
|
|
key = "nofollow%d" % i
|
|
if not self.has_option(section, key):
|
|
break
|
|
val = self.get(section, key)
|
|
log.warn(LOG_CHECK,
|
|
_("the nofollow%(num)d syntax is deprecated; use " \
|
|
"the new multiline configuration syntax") % {"num": i})
|
|
pat = get_link_pat(val, strict=0)
|
|
self.config["externlinks"].append(pat)
|
|
i += 1
|
|
if self.has_option(section, "noproxyfor"):
|
|
for val in read_multiline(self.get(section, "noproxyfor")):
|
|
self.config["noproxyfor"].append(re.compile(val))
|
|
# backward compatibility
|
|
i = 1
|
|
while 1:
|
|
key = "noproxyfor%d" % i
|
|
if not self.has_option(section, key):
|
|
break
|
|
log.warn(LOG_CHECK,
|
|
_("the noproxyfor%(num)d syntax is deprecated; use " \
|
|
"the new multiline configuration syntax") % {"num": i})
|
|
val = self.get(section, key)
|
|
self.config["noproxyfor"].append(re.compile(val))
|
|
i += 1
|
|
if self.has_option(section, "ignorewarnings"):
|
|
self.config['ignorewarnings'] = [f.strip() for f in \
|
|
self.get(section, 'ignorewarnings').split(',')]
|
|
if self.has_option(section, "ignore"):
|
|
for line in read_multiline(self.get(section, "ignore")):
|
|
pat = get_link_pat(line, strict=1)
|
|
self.config["externlinks"].append(pat)
|
|
# backward compatibility
|
|
i = 1
|
|
while 1:
|
|
key = "ignore%d" % i
|
|
if not self.has_option(section, key):
|
|
break
|
|
# backwards compatibility: split and ignore second part
|
|
val = self.get(section, key).split()[0]
|
|
log.warn(LOG_CHECK,
|
|
_("the ignore%(num)d syntax is deprecated; use " \
|
|
"the new multiline configuration syntax") % {"num": i})
|
|
pat = get_link_pat(val, strict=1)
|
|
self.config["externlinks"].append(pat)
|
|
i += 1
|
|
if self.has_option(section, "internlinks"):
|
|
pat = get_link_pat(self.get(section, "internlinks"))
|
|
self.config["internlinks"].append(pat)
|