mirror of
https://github.com/Hopiu/linkchecker.git
synced 2026-03-20 16:00:26 +00:00
git-svn-id: https://linkchecker.svn.sourceforge.net/svnroot/linkchecker/trunk/linkchecker@2163 e7d03fd6-7b0d-0410-9947-9c21f3af8025
370 lines
14 KiB
Python
370 lines
14 KiB
Python
# -*- coding: iso-8859-1 -*-
|
|
# Copyright (C) 2000-2005 Bastian Kleineidam
|
|
#
|
|
# This program is free software; you can redistribute it and/or modify
|
|
# it under the terms of the GNU General Public License as published by
|
|
# the Free Software Foundation; either version 2 of the License, or
|
|
# (at your option) any later version.
|
|
#
|
|
# This program is distributed in the hope that it will be useful,
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
# GNU General Public License for more details.
|
|
#
|
|
# You should have received a copy of the GNU General Public License
|
|
# along with this program; if not, write to the Free Software
|
|
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
|
"""
|
|
Store metadata and options.
|
|
"""
|
|
|
|
import ConfigParser
|
|
import sys
|
|
import os
|
|
import logging
|
|
import logging.config
|
|
import re
|
|
import urllib
|
|
|
|
import _linkchecker_configdata
|
|
import linkcheck
|
|
import linkcheck.log
|
|
import linkcheck.containers
|
|
|
|
Version = _linkchecker_configdata.version
|
|
AppName = u"LinkChecker"
|
|
App = AppName+u" "+Version
|
|
Author = _linkchecker_configdata.author
|
|
HtmlAuthor = Author.replace(u' ', u' ')
|
|
Copyright = u"Copyright © 2000-2005 "+Author
|
|
HtmlCopyright = u"Copyright © 2000-2005 "+HtmlAuthor
|
|
AppInfo = App+u" "+Copyright
|
|
HtmlAppInfo = App+u", "+HtmlCopyright
|
|
Url = _linkchecker_configdata.url
|
|
Email = _linkchecker_configdata.author_email
|
|
UserAgent = u"%s/%s (%s; %s)" % (AppName, Version, Url, Email)
|
|
Freeware = AppName+u""" comes with ABSOLUTELY NO WARRANTY!
|
|
This is free software, and you are welcome to redistribute it
|
|
under certain conditions. Look at the file `LICENSE' within this
|
|
distribution."""
|
|
|
|
|
|
def norm (path):
|
|
"""
|
|
Norm given system path with all available norm funcs in os.path.
|
|
"""
|
|
return os.path.normcase(os.path.normpath(os.path.expanduser(path)))
|
|
|
|
|
|
# dynamic options
|
|
class Configuration (dict):
|
|
"""
|
|
Storage for configuration options. Options can both be given from
|
|
the command line as well as from configuration files.
|
|
"""
|
|
|
|
def __init__ (self):
|
|
"""
|
|
Initialize the default options.
|
|
"""
|
|
super(Configuration, self).__init__()
|
|
self["verbose"] = False
|
|
self["warnings"] = False
|
|
self["anchors"] = False
|
|
self["anchorcaching"] = True
|
|
self["externstrictall"] = False
|
|
self["externlinks"] = []
|
|
self["internlinks"] = []
|
|
self["denyallow"] = False
|
|
self["interactive"] = False
|
|
# on ftp, password is set by Pythons ftplib
|
|
self["authentication"] = []
|
|
self["proxy"] = urllib.getproxies()
|
|
self["recursionlevel"] = -1
|
|
self["wait"] = 0
|
|
self['cookies'] = False
|
|
self["status"] = False
|
|
self["fileoutput"] = []
|
|
# Logger configurations
|
|
self["text"] = {
|
|
"filename": "linkchecker-out.txt",
|
|
'colorparent': "default",
|
|
'colorurl': "default",
|
|
'colorname': "default",
|
|
'colorreal': "default",
|
|
'colorbase': "default",
|
|
'colorvalid': "default",
|
|
'colorinvalid': "default",
|
|
'colorinfo': "default",
|
|
'colorwarning': "default",
|
|
'colordltime': "default",
|
|
'colordlsize': "default",
|
|
'colorreset': "default",
|
|
}
|
|
self['html'] = {
|
|
"filename": "linkchecker-out.html",
|
|
'colorbackground': '#fff7e5',
|
|
'colorurl': '#dcd5cf',
|
|
'colorborder': '#000000',
|
|
'colorlink': '#191c83',
|
|
'colorwarning': '#e0954e',
|
|
'colorerror': '#db4930',
|
|
'colorok': '#3ba557',
|
|
}
|
|
self['gml'] = {
|
|
"filename": "linkchecker-out.gml",
|
|
}
|
|
self['sql'] = {
|
|
"filename": "linkchecker-out.sql",
|
|
'separator': ';',
|
|
'dbname': 'linksdb',
|
|
}
|
|
self['csv'] = {
|
|
"filename": "linkchecker-out.csv",
|
|
'separator': ';',
|
|
}
|
|
self['blacklist'] = {
|
|
"filename": "~/.linkchecker_blacklist",
|
|
}
|
|
self['xml'] = {
|
|
"filename": "linkchecker-out.xml",
|
|
}
|
|
self['none'] = {}
|
|
self['logger'] = self.logger_new('text')
|
|
self["warningregex"] = None
|
|
self["warnsizebytes"] = None
|
|
self["nntpserver"] = os.environ.get("NNTP_SERVER", None)
|
|
self["threads"] = 10
|
|
|
|
def init_logging (self, debug=None):
|
|
"""
|
|
Load logging.conf file settings to set up the
|
|
application logging (not to be confused with check loggers).
|
|
When debug is not None it is expected to be a list of
|
|
logger names for which debugging will be enabled.
|
|
|
|
Activating debugging disables threading.
|
|
"""
|
|
config_dir = _linkchecker_configdata.config_dir
|
|
filename = norm(os.path.join(config_dir, "logging.conf"))
|
|
logging.config.fileConfig(filename)
|
|
handler = linkcheck.ansicolor.ColoredStreamHandler(strm=sys.stderr)
|
|
handler.setFormatter(logging.Formatter("%(levelname)s %(message)s"))
|
|
logging.getLogger(linkcheck.LOG).addHandler(handler)
|
|
if debug is not None:
|
|
self['debug'] = True
|
|
# debugging disables threading
|
|
self['threads'] = 0
|
|
# set debugging on given logger names
|
|
if 'all' in debug:
|
|
debug = linkcheck.lognames.values()
|
|
for name in debug:
|
|
logging.getLogger(name).setLevel(logging.DEBUG)
|
|
|
|
def logger_new (self, loggertype, **kwargs):
|
|
"""
|
|
Instantiate new logger and return it.
|
|
"""
|
|
args = {}
|
|
args.update(self[loggertype])
|
|
args.update(kwargs)
|
|
return linkcheck.Loggers[loggertype](**args)
|
|
|
|
def logger_add (self, loggertype, loggerclass, loggerargs=None):
|
|
"""
|
|
Add a new logger type to the known loggers.
|
|
"""
|
|
if loggerargs is None:
|
|
loggerargs = {}
|
|
linkcheck.Loggers[loggertype] = loggerclass
|
|
self[loggertype] = loggerargs
|
|
|
|
def read (self, files=None):
|
|
"""
|
|
Read settings from given config files.
|
|
"""
|
|
if files is None:
|
|
cfiles = []
|
|
else:
|
|
cfiles = files[:]
|
|
if not cfiles:
|
|
# system wide config settings
|
|
config_dir = _linkchecker_configdata.config_dir
|
|
cfiles.append(norm(os.path.join(config_dir, "linkcheckerrc")))
|
|
# per user config settings
|
|
cfiles.append(norm("~/.linkcheckerrc"))
|
|
self.read_config(cfiles)
|
|
# re-init logger
|
|
self['logger'] = self.logger_new('text')
|
|
|
|
def read_config (self, files):
|
|
"""
|
|
Read all the configuration parameters from the given files.
|
|
"""
|
|
linkcheck.log.debug(linkcheck.LOG_CHECK,
|
|
"reading configuration from %s", files)
|
|
try:
|
|
cfgparser = ConfigParser.ConfigParser()
|
|
cfgparser.read(files)
|
|
except ConfigParser.Error, msg:
|
|
linkcheck.log.debug(linkcheck.LOG_CHECK, msg)
|
|
return
|
|
self.read_output_config(cfgparser)
|
|
self.read_checking_config(cfgparser)
|
|
self.read_authentication_config(cfgparser)
|
|
self.read_filtering_config(cfgparser)
|
|
|
|
def read_output_config (self, cfgparser):
|
|
"""
|
|
Read configuration options in section "output".
|
|
"""
|
|
section = "output"
|
|
for key in linkcheck.Loggers.keys():
|
|
if cfgparser.has_section(key):
|
|
for opt in cfgparser.options(key):
|
|
try:
|
|
self[key][opt] = cfgparser.get(key, opt)
|
|
except ConfigParser.Error, msg:
|
|
linkcheck.log.debug(linkcheck.LOG_CHECK, msg)
|
|
try:
|
|
self[key]['fields'] = [f.strip() \
|
|
for f in cfgparser.get(key, 'fields').split(',')]
|
|
except ConfigParser.Error, msg:
|
|
linkcheck.log.debug(linkcheck.LOG_CHECK, msg)
|
|
try:
|
|
logger = cfgparser.get(section, "log")
|
|
if linkcheck.Loggers.has_key(logger):
|
|
self['logger'] = self.logger_new(logger)
|
|
else:
|
|
linkcheck.log.warn(_("invalid log option %r"), logger)
|
|
except ConfigParser.Error, msg:
|
|
linkcheck.log.debug(linkcheck.LOG_CHECK, msg)
|
|
try:
|
|
if cfgparser.getboolean(section, "verbose"):
|
|
self["verbose"] = True
|
|
self["warnings"] = True
|
|
except ConfigParser.Error, msg:
|
|
linkcheck.log.debug(linkcheck.LOG_CHECK, msg)
|
|
try:
|
|
if cfgparser.getboolean(section, "quiet"):
|
|
self['logger'] = self.logger_new('none')
|
|
except ConfigParser.Error, msg:
|
|
linkcheck.log.debug(linkcheck.LOG_CHECK, msg)
|
|
try:
|
|
self["status"] = cfgparser.getboolean(section, "status")
|
|
except ConfigParser.Error, msg:
|
|
linkcheck.log.debug(linkcheck.LOG_CHECK, msg)
|
|
try:
|
|
self["warnings"] = cfgparser.getboolean(section, "warnings")
|
|
except ConfigParser.Error, msg:
|
|
linkcheck.log.debug(linkcheck.LOG_CHECK, msg)
|
|
try:
|
|
filelist = cfgparser.get(section, "fileoutput").split(",")
|
|
for arg in filelist:
|
|
arg = arg.strip()
|
|
# no file output for the blacklist and none Logger
|
|
if linkcheck.Loggers.has_key(arg) and \
|
|
arg not in ["blacklist", "none"]:
|
|
self['fileoutput'].append(
|
|
self.logger_new(arg, fileoutput=1))
|
|
except ConfigParser.Error, msg:
|
|
linkcheck.log.debug(linkcheck.LOG_CHECK, msg)
|
|
try:
|
|
self["interactive"] = cfgparser.getboolean(section, "interactive")
|
|
except ConfigParser.Error, msg:
|
|
linkcheck.log.debug(linkcheck.LOG_CHECK, msg)
|
|
|
|
def read_checking_config (self, cfgparser):
|
|
"""
|
|
Read configuration options in section "checking".
|
|
"""
|
|
section = "checking"
|
|
try:
|
|
num = cfgparser.getint(section, "threads")
|
|
self['threads'] = num
|
|
except ConfigParser.Error, msg:
|
|
linkcheck.log.debug(linkcheck.LOG_CHECK, msg)
|
|
try:
|
|
self["anchors"] = cfgparser.getboolean(section, "anchors")
|
|
except ConfigParser.Error, msg:
|
|
linkcheck.log.debug(linkcheck.LOG_CHECK, msg)
|
|
try:
|
|
self["debug"] = cfgparser.get(section, "debug")
|
|
except ConfigParser.Error, msg:
|
|
linkcheck.log.debug(linkcheck.LOG_CHECK, msg)
|
|
try:
|
|
num = cfgparser.getint(section, "recursionlevel")
|
|
self["recursionlevel"] = num
|
|
except ConfigParser.Error, msg:
|
|
linkcheck.log.debug(linkcheck.LOG_CHECK, msg)
|
|
try:
|
|
self["externstrictall"] = cfgparser.getboolean(section, "externstrictall")
|
|
except ConfigParser.Error, msg:
|
|
linkcheck.log.debug(linkcheck.LOG_CHECK, msg)
|
|
try:
|
|
wr = cfgparser.get(section, "warningregex")
|
|
if wr:
|
|
self["warningregex"] = re.compile(wr)
|
|
except ConfigParser.Error, msg:
|
|
linkcheck.log.debug(linkcheck.LOG_CHECK, msg)
|
|
try:
|
|
self["warnsizebytes"] = int(cfgparser.get(section,
|
|
"warnsizebytes"))
|
|
except ConfigParser.Error, msg:
|
|
linkcheck.log.debug(linkcheck.LOG_CHECK, msg)
|
|
try:
|
|
self["nntpserver"] = cfgparser.get(section, "nntpserver")
|
|
except ConfigParser.Error, msg:
|
|
linkcheck.log.debug(linkcheck.LOG_CHECK, msg)
|
|
try:
|
|
self["anchorcaching"] = cfgparser.getboolean(section,
|
|
"anchorcaching")
|
|
except ConfigParser.Error, msg:
|
|
linkcheck.log.debug(linkcheck.LOG_CHECK, msg)
|
|
|
|
def read_authentication_config (self, cfgparser):
|
|
"""
|
|
Read configuration options in section "authentication".
|
|
"""
|
|
section = "authentication"
|
|
try:
|
|
i = 1
|
|
while 1:
|
|
auth = cfgparser.get(section, "entry%d" % i).split()
|
|
if len(auth)!=3:
|
|
break
|
|
auth[0] = re.compile(auth[0])
|
|
self["authentication"].insert(0, {'pattern': auth[0],
|
|
'user': auth[1],
|
|
'password': auth[2]})
|
|
i += 1
|
|
except ConfigParser.Error, msg:
|
|
linkcheck.log.debug(linkcheck.LOG_CHECK, msg)
|
|
|
|
def read_filtering_config (self, cfgparser):
|
|
"""
|
|
Read configuration options in section "filtering".
|
|
"""
|
|
section = "filtering"
|
|
try:
|
|
i = 1
|
|
while 1:
|
|
ctuple = cfgparser.get(section, "extern%d" % i).split()
|
|
if len(ctuple)!=2:
|
|
linkcheck.log.error(
|
|
_("extern%d: syntax error %s\n")%(i, ctuple))
|
|
break
|
|
self["externlinks"].append(
|
|
linkcheck.get_link_pat(ctuple[0], strict=int(ctuple[1])))
|
|
i += 1
|
|
except ConfigParser.Error, msg:
|
|
linkcheck.log.debug(linkcheck.LOG_CHECK, msg)
|
|
try:
|
|
self["internlinks"].append(
|
|
linkcheck.get_link_pat(cfgparser.get(section, "internlinks")))
|
|
except ConfigParser.Error, msg:
|
|
linkcheck.log.debug(linkcheck.LOG_CHECK, msg)
|
|
try:
|
|
self["denyallow"] = cfgparser.getboolean(section, "denyallow")
|
|
except ConfigParser.Error, msg:
|
|
linkcheck.log.debug(linkcheck.LOG_CHECK, msg)
|