linkchecker/linkcheck/configuration/confparse.py
2006-09-24 10:24:41 +00:00

319 lines
14 KiB
Python

# -*- coding: iso-8859-1 -*-
# Copyright (C) 2000-2006 Bastian Kleineidam
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
"""Parse configuration files"""
import ConfigParser
import re
import linkcheck.log
class LCConfigParser (ConfigParser.RawConfigParser, object):
"""
Parse a LinkChecker configuration file.
"""
def __init__ (self, config):
super(LCConfigParser, self).__init__()
self.config = config
def read (self, files):
"""
Read settings from given config files.
@raises: LinkCheckerError on syntax errors in the config file(s)
"""
try:
super(LCConfigParser, self).read(files)
except ConfigParser.Error, msg:
assert None == linkcheck.log.debug(linkcheck.LOG_CHECK, msg)
return
# Read all the configuration parameters from the given files.
self.read_output_config()
self.read_checking_config()
self.read_authentication_config()
self.read_filtering_config()
# re-init logger
self.config['logger'] = self.config.logger_new('text')
def read_output_config (self):
"""
Read configuration options in section "output".
"""
section = "output"
for key in linkcheck.Loggers.iterkeys():
if self.has_section(key):
for opt in self.options(key):
try:
self.config[key][opt] = self.get(key, opt)
except ConfigParser.Error, msg:
assert None == linkcheck.log.debug(linkcheck.LOG_CHECK, msg)
try:
self.config[key]['parts'] = [f.strip() for f in \
self.get(key, 'parts').split(',')]
except ConfigParser.Error, msg:
assert None == linkcheck.log.debug(linkcheck.LOG_CHECK, msg)
try:
self.config["warnings"] = self.getboolean(section, "warnings")
except ConfigParser.Error, msg:
assert None == linkcheck.log.debug(linkcheck.LOG_CHECK, msg)
try:
if self.getboolean(section, "verbose"):
self.config["verbose"] = True
self.config["warnings"] = True
except ConfigParser.Error, msg:
assert None == linkcheck.log.debug(linkcheck.LOG_CHECK, msg)
try:
if self.getboolean(section, "quiet"):
self.config['logger'] = self.config.logger_new('none')
self.config['quiet'] = True
except ConfigParser.Error, msg:
assert None == linkcheck.log.debug(linkcheck.LOG_CHECK, msg)
try:
self.config["status"] = self.getboolean(section, "status")
except ConfigParser.Error, msg:
assert None == linkcheck.log.debug(linkcheck.LOG_CHECK, msg)
try:
logger = self.get(section, "log")
if linkcheck.Loggers.has_key(logger):
self.config['logger'] = self.config.logger_new(logger)
else:
linkcheck.log.warn(_("invalid log option %r"), logger)
except ConfigParser.Error, msg:
assert None == linkcheck.log.debug(linkcheck.LOG_CHECK, msg)
try:
filelist = self.get(section, "fileoutput").split(",")
for arg in filelist:
arg = arg.strip()
# no file output for the blacklist and none Logger
if linkcheck.Loggers.has_key(arg) and \
arg not in ["blacklist", "none"]:
self.config['fileoutput'].append(
self.config.logger_new(arg, fileoutput=1))
except ConfigParser.Error, msg:
assert None == linkcheck.log.debug(linkcheck.LOG_CHECK, msg)
try:
self.config["interactive"] = self.getboolean(section, "interactive")
except ConfigParser.Error, msg:
assert None == linkcheck.log.debug(linkcheck.LOG_CHECK, msg)
def read_checking_config (self):
"""
Read configuration options in section "checking".
"""
section = "checking"
try:
num = self.getint(section, "threads")
self.config['threads'] = max(0, num)
except ConfigParser.Error, msg:
assert None == linkcheck.log.debug(linkcheck.LOG_CHECK, msg)
try:
num = self.getint(section, "timeout")
if num < 0:
raise linkcheck.LinkCheckerError(linkcheck.LOG_CHECK,
_("invalid negative value for timeout: %d\n"), num)
self.config['threads'] = num
except ConfigParser.Error, msg:
assert None == linkcheck.log.debug(linkcheck.LOG_CHECK, msg)
try:
self.config["anchors"] = self.getboolean(section, "anchors")
except ConfigParser.Error, msg:
assert None == linkcheck.log.debug(linkcheck.LOG_CHECK, msg)
try:
self.config["debug"] = self.get(section, "debug")
except ConfigParser.Error, msg:
assert None == linkcheck.log.debug(linkcheck.LOG_CHECK, msg)
try:
num = self.getint(section, "recursionlevel")
self.config["recursionlevel"] = num
except ConfigParser.Error, msg:
assert None == linkcheck.log.debug(linkcheck.LOG_CHECK, msg)
try:
arg = self.get(section, "warningregex")
if arg:
try:
self.config["warningregex"] = re.compile(arg)
except re.error, msg:
raise linkcheck.LinkCheckerError(linkcheck.LOG_CHECK,
_("syntax error in warningregex %(regex)r: %(msg)s\n") % \
{"regex": arg, "msg": msg})
except ConfigParser.Error, msg:
assert None == linkcheck.log.debug(linkcheck.LOG_CHECK, msg)
try:
self.config["warnsizebytes"] = int(self.get(section,
"warnsizebytes"))
except ConfigParser.Error, msg:
assert None == linkcheck.log.debug(linkcheck.LOG_CHECK, msg)
try:
self.config["nntpserver"] = self.get(section, "nntpserver")
except ConfigParser.Error, msg:
assert None == linkcheck.log.debug(linkcheck.LOG_CHECK, msg)
try:
self.config["anchorcaching"] = self.getboolean(section,
"anchorcaching")
except ConfigParser.Error, msg:
assert None == linkcheck.log.debug(linkcheck.LOG_CHECK, msg)
try:
value = self.get(section, "noproxyfor")
for line in value.splitlines():
line = line.strip()
if not line or line.startswith('#'):
continue
try:
arg = re.compile(arg)
except re.error, msg:
raise linkcheck.LinkCheckerError(linkcheck.LOG_CHECK,
_("syntax error in noproxyfor %(line)r: %(msg)s") % \
{"line": line, "msg": msg})
self.config["noproxyfor"].append(arg)
except ConfigParser.Error, msg:
assert None == linkcheck.log.debug(linkcheck.LOG_CHECK, msg)
try:
# XXX backward compatibility
i = 1
while 1:
arg = self.get(section, "noproxyfor%d" % i)
linkcheck.log.warn(linkcheck.LOG_CHECK,
_("the noproxyfor%(num)d syntax is deprecated; use " \
"the new multiline configuration syntax") % {"num": i})
try:
arg = re.compile(arg)
except re.error, msg:
raise linkcheck.LinkCheckerError(linkcheck.LOG_CHECK,
_("syntax error in noproxyfor%(num)d %(arg)r: %(msg)s") % \
{"num": i, "arg": arg, "msg": msg})
self.config["noproxyfor"].append(arg)
i += 1
except ConfigParser.Error, msg:
assert None == linkcheck.log.debug(linkcheck.LOG_CHECK, msg)
try:
num = self.getint(section, "maxqueuesize")
self.config["maxqueuesize"] = num
except ConfigParser.Error, msg:
assert None == linkcheck.log.debug(linkcheck.LOG_CHECK, msg)
def read_authentication_config (self):
"""
Read configuration options in section "authentication".
"""
section = "authentication"
try:
arg = self.get(section, "entry")
for line in arg.splitlines():
line = line.strip()
if not line or line.startswith('#'):
continue
auth = line.split()
if len(auth) != 3:
raise linkcheck.LinkCheckerError(linkcheck.LOG_CHECK,
_("missing auth part in entry %(line)r") % \
{"line": line})
try:
auth[0] = re.compile(auth[0])
except re.error, msg:
raise linkcheck.LinkCheckerError(linkcheck.LOG_CHECK,
_("syntax error in entry %(line)r: %(msg)s") % \
{"line": line, "msg": msg})
self.config["authentication"].insert(0, {'pattern': auth[0],
'user': auth[1],
'password': auth[2]})
except ConfigParser.Error, msg:
assert None == linkcheck.log.debug(linkcheck.LOG_CHECK, msg)
try:
# XXX backward compatibility
i = 1
while 1:
auth = self.get(section, "entry%d" % i).split()
linkcheck.log.warn(linkcheck.LOG_CHECK,
_("the entry%(num)d syntax is deprecated; use " \
"the new multiline configuration syntax") % {"num": i})
if len(auth) != 3:
break
try:
auth[0] = re.compile(auth[0])
except re.error, msg:
raise linkcheck.LinkCheckerError(linkcheck.LOG_CHECK,
_("syntax error in entry%(num)d %(arg)r: %(msg)s") % \
{"num": i, "arg": auth[0], "msg": msg})
self.config["authentication"].insert(0, {'pattern': auth[0],
'user': auth[1],
'password': auth[2]})
i += 1
except ConfigParser.Error, msg:
assert None == linkcheck.log.debug(linkcheck.LOG_CHECK, msg)
def read_filtering_config (self):
"""
Read configuration options in section "filtering".
"""
section = "filtering"
try:
arg = self.get(section, "nofollow")
for line in arg.splitlines():
line = line.strip()
if not line or line.startswith('#'):
continue
pat = linkcheck.get_link_pat(line, strict=0)
self.config["externlinks"].append(pat)
except ConfigParser.Error, msg:
assert None == linkcheck.log.debug(linkcheck.LOG_CHECK, msg)
try:
# XXX backward compatibility
i = 1
while 1:
val = self.get(section, "nofollow%d" % i)
linkcheck.log.warn(linkcheck.LOG_CHECK,
_("the nofollow%(num)d syntax is deprecated; use " \
"the new multiline configuration syntax") % {"num": i})
pat = linkcheck.get_link_pat(val, strict=0)
self.config["externlinks"].append(pat)
i += 1
except ConfigParser.Error, msg:
assert None == linkcheck.log.debug(linkcheck.LOG_CHECK, msg)
try:
self.config['ignorewarnings'] = [f.strip() for f in \
self.get(section, 'ignorewarnings').split(',')]
except ConfigParser.Error, msg:
assert None == linkcheck.log.debug(linkcheck.LOG_CHECK, msg)
try:
arg = self.get(section, "ignore")
for line in arg.splitlines():
line = line.strip()
if not line or line.startswith('#'):
continue
pat = linkcheck.get_link_pat(line, strict=1)
self.config["externlinks"].append(pat)
except ConfigParser.Error, msg:
assert None == linkcheck.log.debug(linkcheck.LOG_CHECK, msg)
try:
# XXX backward compatibility
i = 1
while 1:
# XXX backwards compatibility: split and ignore second part
val = self.get(section, "ignore%d" % i).split()[0]
linkcheck.log.warn(linkcheck.LOG_CHECK,
_("the ignore%(num)d syntax is deprecated; use " \
"the new multiline configuration syntax") % {"num": i})
pat = linkcheck.get_link_pat(val, strict=1)
self.config["externlinks"].append(pat)
i += 1
except ConfigParser.Error, msg:
assert None == linkcheck.log.debug(linkcheck.LOG_CHECK, msg)
try:
self.config["internlinks"].append(
linkcheck.get_link_pat(self.get(section, "internlinks")))
except ConfigParser.Error, msg:
assert None == linkcheck.log.debug(linkcheck.LOG_CHECK, msg)