2006-05-03 18:24:46 +00:00
# -*- coding: iso-8859-1 -*-
2014-01-08 21:33:04 +00:00
# Copyright (C) 2000-2014 Bastian Kleineidam
2006-05-03 18:24:46 +00:00
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
2009-07-24 21:58:20 +00:00
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
2006-05-03 18:24:46 +00:00
""" Parse configuration files """
import ConfigParser
2012-10-15 12:36:10 +00:00
import os
2014-02-28 23:12:34 +00:00
from . . import LinkCheckerError , get_link_pat , LOG_CHECK , log , fileutil , plugins
2006-05-03 18:24:46 +00:00
2007-12-01 15:50:33 +00:00
def read_multiline ( value ) :
""" Helper function reading multiline values. """
for line in value . splitlines ( ) :
line = line . strip ( )
if not line or line . startswith ( ' # ' ) :
continue
yield line
2006-08-02 20:12:40 +00:00
class LCConfigParser ( ConfigParser . RawConfigParser , object ) :
2006-05-03 18:24:46 +00:00
"""
Parse a LinkChecker configuration file .
"""
def __init__ ( self , config ) :
2011-02-14 20:06:34 +00:00
""" Initialize configuration. """
2006-05-03 18:24:46 +00:00
super ( LCConfigParser , self ) . __init__ ( )
self . config = config
def read ( self , files ) :
2007-12-01 15:50:33 +00:00
""" Read settings from given config files.
2006-05-03 18:24:46 +00:00
@raises : LinkCheckerError on syntax errors in the config file ( s )
"""
2012-10-15 12:36:10 +00:00
assert isinstance ( files , list ) , " Invalid file list %r " % files
2006-05-03 18:24:46 +00:00
try :
2012-10-15 12:36:10 +00:00
self . read_ok = super ( LCConfigParser , self ) . read ( files )
if len ( self . read_ok ) < len ( files ) :
failed_files = set ( files ) - set ( self . read_ok )
log . warn ( LOG_CHECK , " Could not read configuration files %s . " , failed_files )
2007-12-01 15:50:33 +00:00
# Read all the configuration parameters from the given files.
self . read_checking_config ( )
self . read_authentication_config ( )
self . read_filtering_config ( )
2014-02-28 23:12:34 +00:00
self . read_output_config ( )
self . read_plugin_config ( )
2012-11-26 17:49:07 +00:00
except Exception as msg :
2008-05-09 06:16:03 +00:00
raise LinkCheckerError (
2011-10-09 19:45:56 +00:00
_ ( " Error parsing configuration: %s " ) % unicode ( msg ) )
2006-05-03 18:24:46 +00:00
2012-09-21 14:05:34 +00:00
def read_string_option ( self , section , option , allowempty = False ) :
2014-02-28 23:12:34 +00:00
""" Read a string option. """
2012-08-23 02:52:25 +00:00
if self . has_option ( section , option ) :
2012-09-21 14:05:34 +00:00
value = self . get ( section , option )
if not allowempty and not value :
raise LinkCheckerError ( _ ( " invalid empty value for %s : %s \n " ) % ( option , value ) )
self . config [ option ] = value
2012-08-23 02:52:25 +00:00
2012-06-10 09:22:54 +00:00
def read_boolean_option ( self , section , option ) :
""" Read a boolean option. """
if self . has_option ( section , option ) :
self . config [ option ] = self . getboolean ( section , option )
2012-10-10 08:53:52 +00:00
def read_int_option ( self , section , option , key = None , min = None , max = None ) :
2012-06-20 18:10:40 +00:00
""" Read an integer option. """
if self . has_option ( section , option ) :
num = self . getint ( section , option )
2012-10-10 08:53:52 +00:00
if min is not None and num < min :
2012-06-20 18:10:40 +00:00
raise LinkCheckerError (
2012-10-10 08:53:52 +00:00
_ ( " invalid value for %s : %d must not be less than %d " ) % ( option , num , min ) )
if max is not None and num < max :
raise LinkCheckerError (
_ ( " invalid value for %s : %d must not be greater than %d " ) % ( option , num , max ) )
2012-06-20 18:10:40 +00:00
if key is None :
key = option
self . config [ key ] = num
2006-05-03 18:24:46 +00:00
def read_output_config ( self ) :
2007-12-01 15:50:33 +00:00
""" Read configuration options in section " output " . """
2006-05-03 18:24:46 +00:00
section = " output "
2013-12-11 17:41:55 +00:00
from . . logger import LoggerClasses
for c in LoggerClasses :
key = c . LoggerName
2006-05-03 18:24:46 +00:00
if self . has_section ( key ) :
for opt in self . options ( key ) :
2007-12-01 15:50:33 +00:00
self . config [ key ] [ opt ] = self . get ( key , opt )
if self . has_option ( key , ' parts ' ) :
val = self . get ( key , ' parts ' )
2011-10-10 18:32:58 +00:00
parts = [ f . strip ( ) . lower ( ) for f in val . split ( ' , ' ) ]
2007-12-01 15:50:33 +00:00
self . config [ key ] [ ' parts ' ] = parts
2012-06-10 09:22:54 +00:00
self . read_boolean_option ( section , " warnings " )
2007-12-01 15:50:33 +00:00
if self . has_option ( section , " verbose " ) :
2006-05-03 18:24:46 +00:00
if self . getboolean ( section , " verbose " ) :
self . config [ " verbose " ] = True
self . config [ " warnings " ] = True
2007-12-01 15:50:33 +00:00
if self . has_option ( section , " quiet " ) :
2006-05-03 18:24:46 +00:00
if self . getboolean ( section , " quiet " ) :
2007-12-01 15:50:33 +00:00
self . config [ ' output ' ] = ' none '
2006-05-13 09:53:53 +00:00
self . config [ ' quiet ' ] = True
2007-12-01 15:50:33 +00:00
if self . has_option ( section , " debug " ) :
val = self . get ( section , " debug " )
2011-10-10 18:32:58 +00:00
parts = [ f . strip ( ) . lower ( ) for f in val . split ( ' , ' ) ]
2007-12-01 15:50:33 +00:00
self . config . set_debug ( parts )
2012-06-10 09:22:54 +00:00
self . read_boolean_option ( section , " status " )
2007-12-01 15:50:33 +00:00
if self . has_option ( section , " log " ) :
2011-10-10 18:32:58 +00:00
val = self . get ( section , " log " ) . strip ( ) . lower ( )
2007-12-01 15:50:33 +00:00
self . config [ ' output ' ] = val
if self . has_option ( section , " fileoutput " ) :
2009-06-18 18:29:31 +00:00
loggers = self . get ( section , " fileoutput " ) . split ( " , " )
# strip names from whitespace
2011-10-10 18:32:58 +00:00
loggers = ( x . strip ( ) . lower ( ) for x in loggers )
2009-06-18 18:29:31 +00:00
# no file output for the blacklist and none Logger
2013-12-11 17:41:55 +00:00
from . . logger import LoggerNames
loggers = ( x for x in loggers if x in LoggerNames and
2009-06-18 18:29:31 +00:00
x not in ( " blacklist " , " none " ) )
for val in loggers :
output = self . config . logger_new ( val , fileoutput = 1 )
self . config [ ' fileoutput ' ] . append ( output )
2006-05-03 18:24:46 +00:00
def read_checking_config ( self ) :
2007-12-01 15:50:33 +00:00
""" Read configuration options in section " checking " . """
2006-05-03 18:24:46 +00:00
section = " checking "
2012-10-10 08:53:52 +00:00
self . read_int_option ( section , " threads " , min = - 1 )
2012-06-20 18:11:13 +00:00
self . config [ ' threads ' ] = max ( 0 , self . config [ ' threads ' ] )
2012-10-10 08:53:52 +00:00
self . read_int_option ( section , " timeout " , min = 1 )
2014-02-28 23:12:34 +00:00
self . read_int_option ( section , " aborttimeout " , min = 1 )
2012-10-10 08:53:52 +00:00
self . read_int_option ( section , " recursionlevel " , min = - 1 )
2012-08-23 02:52:25 +00:00
self . read_string_option ( section , " nntpserver " )
self . read_string_option ( section , " useragent " )
2014-02-28 23:12:34 +00:00
self . read_int_option ( section , " maxrequestspersecond " , min = 1 )
self . read_int_option ( section , " maxnumurls " , min = 0 )
self . read_int_option ( section , " maxfilesizeparse " , min = 1 )
self . read_int_option ( section , " maxfilesizedownload " , min = 1 )
if self . has_option ( section , " allowedschemes " ) :
self . config [ ' allowedschemes ' ] = [ x . strip ( ) . lower ( ) for x in \
self . get ( section , ' allowedschemes ' ) . split ( ' , ' ) ]
2012-06-10 11:18:35 +00:00
self . read_boolean_option ( section , " debugmemory " )
2012-08-23 02:52:25 +00:00
self . read_string_option ( section , " cookiefile " )
self . read_string_option ( section , " localwebroot " )
2013-12-12 21:17:57 +00:00
try :
self . read_boolean_option ( section , " sslverify " )
except ValueError :
self . read_string_option ( section , " sslverify " )
2012-10-10 08:53:52 +00:00
self . read_int_option ( section , " maxrunseconds " , min = 0 )
2006-05-03 18:24:46 +00:00
def read_authentication_config ( self ) :
2007-12-01 15:50:33 +00:00
""" Read configuration options in section " authentication " . """
2006-05-03 18:24:46 +00:00
section = " authentication "
2012-10-15 12:36:10 +00:00
password_fields = [ ]
2007-12-01 15:50:33 +00:00
if self . has_option ( section , " entry " ) :
for val in read_multiline ( self . get ( section , " entry " ) ) :
auth = val . split ( )
2010-10-25 15:33:13 +00:00
if len ( auth ) == 3 :
2010-10-25 20:07:16 +00:00
self . config . add_auth ( pattern = auth [ 0 ] , user = auth [ 1 ] ,
password = auth [ 2 ] )
2012-10-15 12:36:10 +00:00
password_fields . append ( " entry/ %s / %s " % ( auth [ 0 ] , auth [ 1 ] ) )
2010-10-25 15:33:13 +00:00
elif len ( auth ) == 2 :
2010-10-25 20:07:16 +00:00
self . config . add_auth ( pattern = auth [ 0 ] , user = auth [ 1 ] )
2010-10-25 15:33:13 +00:00
else :
2011-10-09 19:45:56 +00:00
raise LinkCheckerError (
_ ( " missing auth part in entry %(val)r " ) % { " val " : val } )
2010-10-14 16:36:11 +00:00
# read login URL and field names
if self . has_option ( section , " loginurl " ) :
val = self . get ( section , " loginurl " ) . strip ( )
if not ( val . lower ( ) . startswith ( " http: " ) or
val . lower ( ) . startswith ( " https: " ) ) :
2011-10-09 19:45:56 +00:00
raise LinkCheckerError ( _ ( " invalid login URL ` %s ' . Only " \
2010-10-14 16:36:11 +00:00
" HTTP and HTTPS URLs are supported. " ) % val )
self . config [ " loginurl " ] = val
2012-08-23 02:52:25 +00:00
self . read_string_option ( section , " loginuserfield " )
self . read_string_option ( section , " loginpasswordfield " )
2010-10-14 16:36:11 +00:00
# read login extra fields
if self . has_option ( section , " loginextrafields " ) :
for val in read_multiline ( self . get ( section , " loginextrafields " ) ) :
name , value = val . split ( " : " , 1 )
self . config [ " loginextrafields " ] [ name ] = value
2012-10-15 12:36:10 +00:00
self . check_password_readable ( section , password_fields )
def check_password_readable ( self , section , fields ) :
""" Check if there is a readable configuration file and print a warning. """
if not fields :
return
# The information which of the configuration files
# included which option is not available. To avoid false positives,
# a warning is only printed if exactly one file has been read.
if len ( self . read_ok ) != 1 :
return
fn = self . read_ok [ 0 ]
if fileutil . is_accessable_by_others ( fn ) :
log . warn ( LOG_CHECK , " The configuration file %s contains password information (in section [ %s ] and options %s ) and the file is readable by others. Please make the file only readable by you. " , fn , section , fields )
if os . name == ' posix ' :
log . warn ( LOG_CHECK , _ ( " For example execute ' chmod go-rw %s ' . " ) % fn )
elif os . name == ' nt ' :
log . warn ( LOG_CHECK , _ ( " See http://support.microsoft.com/kb/308419 for more info on setting file permissions. " ) )
2006-05-03 18:24:46 +00:00
def read_filtering_config ( self ) :
"""
Read configuration options in section " filtering " .
"""
section = " filtering "
2007-12-01 15:50:33 +00:00
if self . has_option ( section , " ignorewarnings " ) :
2014-02-28 23:12:34 +00:00
self . config [ ' ignorewarnings ' ] = [ f . strip ( ) . lower ( ) for f in \
2006-05-03 18:24:46 +00:00
self . get ( section , ' ignorewarnings ' ) . split ( ' , ' ) ]
2007-12-01 15:50:33 +00:00
if self . has_option ( section , " ignore " ) :
for line in read_multiline ( self . get ( section , " ignore " ) ) :
2008-05-09 06:16:03 +00:00
pat = get_link_pat ( line , strict = 1 )
2006-09-21 14:30:27 +00:00
self . config [ " externlinks " ] . append ( pat )
2011-02-18 06:47:22 +00:00
if self . has_option ( section , " nofollow " ) :
for line in read_multiline ( self . get ( section , " nofollow " ) ) :
pat = get_link_pat ( line , strict = 0 )
self . config [ " externlinks " ] . append ( pat )
2007-12-01 15:50:33 +00:00
if self . has_option ( section , " internlinks " ) :
2008-05-09 06:16:03 +00:00
pat = get_link_pat ( self . get ( section , " internlinks " ) )
2007-12-01 15:50:33 +00:00
self . config [ " internlinks " ] . append ( pat )
2014-02-28 23:12:34 +00:00
self . read_boolean_option ( section , " checkextern " )
def read_plugin_config ( self ) :
""" Read plugin-specific configuration values. """
folders = self . config [ " pluginfolders " ]
modules = plugins . get_plugin_modules ( folders )
for pluginclass in plugins . get_plugin_classes ( modules ) :
section = pluginclass . __name__
if self . has_section ( section ) :
self . config [ " enabledplugins " ] . append ( section )
self . config [ section ] = pluginclass . read_config ( self )