linkchecker/linkcheck/configuration/confparse.py

206 lines
9.2 KiB
Python

# -*- coding: iso-8859-1 -*-
# Copyright (C) 2000-2010 Bastian Kleineidam
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
"""Parse configuration files"""
import ConfigParser
import re
from .. import log, LOG_CHECK, LinkCheckerError, get_link_pat
def read_multiline (value):
"""Helper function reading multiline values."""
for line in value.splitlines():
line = line.strip()
if not line or line.startswith('#'):
continue
yield line
class LCConfigParser (ConfigParser.RawConfigParser, object):
"""
Parse a LinkChecker configuration file.
"""
def __init__ (self, config):
super(LCConfigParser, self).__init__()
self.config = config
def read (self, files):
"""Read settings from given config files.
@raises: LinkCheckerError on syntax errors in the config file(s)
"""
try:
super(LCConfigParser, self).read(files)
# Read all the configuration parameters from the given files.
self.read_output_config()
self.read_checking_config()
self.read_authentication_config()
self.read_filtering_config()
except Exception, msg:
raise LinkCheckerError(
"Error parsing configuration: %s", str(msg))
def read_output_config (self):
"""Read configuration options in section "output"."""
section = "output"
from ..logger import Loggers
for key in Loggers.iterkeys():
if self.has_section(key):
for opt in self.options(key):
self.config[key][opt] = self.get(key, opt)
if self.has_option(key, 'parts'):
val = self.get(key, 'parts')
parts = [f.strip() for f in val.split(',')]
self.config[key]['parts'] = parts
if self.has_option(section, "warnings"):
self.config["warnings"] = self.getboolean(section, "warnings")
if self.has_option(section, "verbose"):
if self.getboolean(section, "verbose"):
self.config["verbose"] = True
self.config["warnings"] = True
if self.has_option(section, "complete"):
if self.getboolean(section, "complete"):
self.config["complete"] = True
self.config["verbose"] = True
self.config["warnings"] = True
if self.has_option(section, "quiet"):
if self.getboolean(section, "quiet"):
self.config['output'] = 'none'
self.config['quiet'] = True
if self.has_option(section, "debug"):
val = self.get(section, "debug")
parts = [f.strip() for f in val.split(',')]
self.config.set_debug(parts)
if self.has_option(section, "status"):
self.config["status"] = self.getboolean(section, "status")
if self.has_option(section, "log"):
val = self.get(section, "log").strip()
self.config['output'] = val
if self.has_option(section, "fileoutput"):
loggers = self.get(section, "fileoutput").split(",")
# strip names from whitespace
loggers = (x.strip() for x in loggers)
# no file output for the blacklist and none Logger
loggers = (x for x in loggers if x in Loggers and
x not in ("blacklist", "none"))
for val in loggers:
output = self.config.logger_new(val, fileoutput=1)
self.config['fileoutput'].append(output)
if self.has_option(section, "interactive"):
self.config["interactive"] = self.getboolean(section, "interactive")
def read_checking_config (self):
"""Read configuration options in section "checking"."""
section = "checking"
if self.has_option(section, "threads"):
num = self.getint(section, "threads")
self.config['threads'] = max(0, num)
if self.has_option(section, "timeout"):
num = self.getint(section, "timeout")
if num < 0:
raise LinkCheckerError(
_("invalid negative value for timeout: %d\n"), num)
self.config['timeout'] = num
if self.has_option(section, "anchors"):
self.config["anchors"] = self.getboolean(section, "anchors")
if self.has_option(section, "recursionlevel"):
num = self.getint(section, "recursionlevel")
self.config["recursionlevel"] = num
if self.has_option(section, "warningregex"):
val = self.get(section, "warningregex")
if val:
self.config["warningregex"] = re.compile(val)
if self.has_option(section, "warnsizebytes"):
val = self.get(section, "warnsizebytes")
self.config["warnsizebytes"] = int(val)
if self.has_option(section, "nntpserver"):
self.config["nntpserver"] = self.get(section, "nntpserver")
self.read_check_options(section)
def read_check_options (self, section):
"""Read check* options."""
if self.has_option(section, "checkhtml"):
self.config["checkhtml"] = self.getboolean(section, "checkhtml")
if self.has_option(section, "checkcss"):
self.config["checkcss"] = self.getboolean(section, "checkcss")
if self.has_option(section, "checkhtmlw3"):
self.config["checkhtmlw3"] = \
self.getboolean(section, "checkhtmlw3")
if self.has_option(section, "checkcssw3"):
self.config["checkcssw3"] = self.getboolean(section, "checkcssw3")
if self.has_option(section, "scanvirus"):
self.config["scanvirus"] = self.getboolean(section, "scanvirus")
if self.has_option(section, "clamavconf"):
self.config["clamavconf"] = self.getboolean(section, "clamavconf")
if self.has_option(section, "cookies"):
self.config["sendcookies"] = self.config["storecookies"] = \
self.getboolean(section, "cookies")
def read_authentication_config (self):
"""Read configuration options in section "authentication"."""
section = "authentication"
if self.has_option(section, "entry"):
for val in read_multiline(self.get(section, "entry")):
auth = val.split()
if len(auth) == 3:
self.config.add_auth(pattern=auth[0], user=auth[1],
password=auth[2])
elif len(auth) == 2:
self.config.add_auth(pattern=auth[0], user=auth[1])
else:
raise LinkCheckerError(LOG_CHECK,
_("missing auth part in entry %(val)r") % \
{"val": val})
# read login URL and field names
if self.has_option(section, "loginurl"):
val = self.get(section, "loginurl").strip()
if not (val.lower().startswith("http:") or
val.lower().startswith("https:")):
raise LinkCheckerError(LOG_CHECK,
_("Invalid login URL `%s'. Only " \
"HTTP and HTTPS URLs are supported.") % val)
self.config["loginurl"] = val
self.config["storecookies"] = self.config["sendcookies"] = True
for key in ("loginuserfield", "loginpasswordfield"):
if self.has_option(section, key):
self.config[key] = self.get(section, key)
# read login extra fields
if self.has_option(section, "loginextrafields"):
for val in read_multiline(self.get(section, "loginextrafields")):
name, value = val.split(":", 1)
self.config["loginextrafields"][name] = value
def read_filtering_config (self):
"""
Read configuration options in section "filtering".
"""
section = "filtering"
if self.has_option(section, "nofollow"):
for line in read_multiline(self.get(section, "nofollow")):
pat = get_link_pat(line, strict=0)
self.config["externlinks"].append(pat)
if self.has_option(section, "ignorewarnings"):
self.config['ignorewarnings'] = [f.strip() for f in \
self.get(section, 'ignorewarnings').split(',')]
if self.has_option(section, "ignore"):
for line in read_multiline(self.get(section, "ignore")):
pat = get_link_pat(line, strict=1)
self.config["externlinks"].append(pat)
if self.has_option(section, "internlinks"):
pat = get_link_pat(self.get(section, "internlinks"))
self.config["internlinks"].append(pat)