2014-01-08 21:33:04 +00:00
|
|
|
# Copyright (C) 2000-2014 Bastian Kleineidam
|
2006-05-03 18:24:46 +00:00
|
|
|
#
|
|
|
|
|
# This program is free software; you can redistribute it and/or modify
|
|
|
|
|
# it under the terms of the GNU General Public License as published by
|
|
|
|
|
# the Free Software Foundation; either version 2 of the License, or
|
|
|
|
|
# (at your option) any later version.
|
|
|
|
|
#
|
|
|
|
|
# This program is distributed in the hope that it will be useful,
|
|
|
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
|
# GNU General Public License for more details.
|
|
|
|
|
#
|
2009-07-24 21:58:20 +00:00
|
|
|
# You should have received a copy of the GNU General Public License along
|
|
|
|
|
# with this program; if not, write to the Free Software Foundation, Inc.,
|
|
|
|
|
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
2006-05-03 18:24:46 +00:00
|
|
|
"""
|
|
|
|
|
Store metadata and options.
|
|
|
|
|
"""
|
|
|
|
|
|
2020-03-31 18:46:31 +00:00
|
|
|
from functools import lru_cache
|
2006-05-03 18:24:46 +00:00
|
|
|
import os
|
2010-10-25 20:07:16 +00:00
|
|
|
import re
|
2020-05-14 19:15:28 +00:00
|
|
|
import urllib.parse
|
|
|
|
|
import urllib.request
|
2009-07-24 21:16:12 +00:00
|
|
|
import shutil
|
2012-10-10 08:53:52 +00:00
|
|
|
import socket
|
2009-01-23 08:34:24 +00:00
|
|
|
import _LinkChecker_configdata as configdata
|
2014-05-10 19:23:06 +00:00
|
|
|
from .. import (log, LOG_CHECK, get_install_data, fileutil)
|
2008-05-09 06:16:03 +00:00
|
|
|
from . import confparse
|
2017-10-17 16:26:08 +00:00
|
|
|
from xdg.BaseDirectory import xdg_config_home, xdg_data_home
|
2006-05-03 18:24:46 +00:00
|
|
|
|
2009-01-23 08:34:24 +00:00
|
|
|
Version = configdata.version
|
2011-05-05 16:30:58 +00:00
|
|
|
ReleaseDate = configdata.release_date
|
2012-05-23 19:15:30 +00:00
|
|
|
AppName = configdata.name
|
2020-04-30 19:11:59 +00:00
|
|
|
App = AppName+" "+Version
|
2010-03-13 07:47:12 +00:00
|
|
|
Author = configdata.author
|
2020-04-30 19:11:59 +00:00
|
|
|
HtmlAuthor = Author.replace(' ', ' ')
|
|
|
|
|
Copyright = "Copyright (C) 2000-2014 "+Author
|
|
|
|
|
HtmlCopyright = "Copyright © 2000-2014 "+HtmlAuthor
|
|
|
|
|
AppInfo = App+" "+Copyright
|
|
|
|
|
HtmlAppInfo = App+", "+HtmlCopyright
|
2009-01-23 08:34:24 +00:00
|
|
|
Url = configdata.url
|
2020-04-30 19:11:59 +00:00
|
|
|
SupportUrl = "https://github.com/linkchecker/linkchecker/issues"
|
2009-01-23 08:34:24 +00:00
|
|
|
Email = configdata.author_email
|
2020-04-30 19:11:59 +00:00
|
|
|
UserAgent = "Mozilla/5.0 (compatible; %s/%s; +%s)" % (AppName, Version, Url)
|
|
|
|
|
Freeware = AppName+""" comes with ABSOLUTELY NO WARRANTY!
|
2006-05-03 18:24:46 +00:00
|
|
|
This is free software, and you are welcome to redistribute it
|
|
|
|
|
under certain conditions. Look at the file `LICENSE' within this
|
|
|
|
|
distribution."""
|
2012-01-23 21:24:51 +00:00
|
|
|
Portable = configdata.portable
|
2006-05-03 18:24:46 +00:00
|
|
|
|
2020-05-16 19:19:42 +00:00
|
|
|
def normpath(path):
|
2011-11-05 11:03:55 +00:00
|
|
|
"""Norm given system path with all available norm or expand functions
|
|
|
|
|
in os.path."""
|
|
|
|
|
expanded = os.path.expanduser(os.path.expandvars(path))
|
|
|
|
|
return os.path.normcase(os.path.normpath(expanded))
|
2006-05-03 18:24:46 +00:00
|
|
|
|
|
|
|
|
|
2016-01-23 12:28:15 +00:00
|
|
|
# List Python modules in the form (module, name, version attribute)
|
2011-04-14 10:20:56 +00:00
|
|
|
Modules = (
|
2016-01-23 12:28:15 +00:00
|
|
|
# required modules
|
|
|
|
|
("requests", "Requests", "__version__"),
|
|
|
|
|
# optional modules
|
2020-04-30 19:11:59 +00:00
|
|
|
("argcomplete", "Argcomplete", None),
|
|
|
|
|
("GeoIP", "GeoIP", 'lib_version'), # on Unix systems
|
|
|
|
|
("pygeoip", "GeoIP", 'lib_version'), # on Windows systems
|
|
|
|
|
("sqlite3", "Pysqlite", 'version'),
|
|
|
|
|
("sqlite3", "Sqlite", 'sqlite_version'),
|
|
|
|
|
("gconf", "Gconf", '__version__'),
|
|
|
|
|
("meliae", "Meliae", '__version__'),
|
2011-04-14 10:20:56 +00:00
|
|
|
)
|
|
|
|
|
|
2016-01-23 12:28:15 +00:00
|
|
|
def get_modules_info():
|
|
|
|
|
"""Return unicode string with detected module info."""
|
|
|
|
|
module_infos = []
|
|
|
|
|
for (mod, name, version_attr) in Modules:
|
|
|
|
|
if not fileutil.has_module(mod):
|
|
|
|
|
continue
|
2017-02-01 14:02:35 +00:00
|
|
|
if version_attr and hasattr(mod, version_attr):
|
2016-01-23 12:28:15 +00:00
|
|
|
attr = getattr(mod, version_attr)
|
|
|
|
|
version = attr() if callable(attr) else attr
|
|
|
|
|
module_infos.append("%s %s" % (name, version))
|
|
|
|
|
else:
|
|
|
|
|
# ignore attribute errors in case library developers
|
|
|
|
|
# change the version information attribute
|
|
|
|
|
module_infos.append(name)
|
2020-04-30 19:11:59 +00:00
|
|
|
return "Modules: %s" % (", ".join(module_infos))
|
2011-04-14 10:20:56 +00:00
|
|
|
|
|
|
|
|
|
2020-05-16 19:19:42 +00:00
|
|
|
def get_share_dir():
|
2014-03-11 19:23:49 +00:00
|
|
|
"""Return absolute path of LinkChecker example configuration."""
|
|
|
|
|
return os.path.join(get_install_data(), "share", "linkchecker")
|
|
|
|
|
|
|
|
|
|
|
2020-05-16 19:19:42 +00:00
|
|
|
def get_share_file(filename, devel_dir=None):
|
2012-06-18 21:05:44 +00:00
|
|
|
"""Return a filename in the share directory.
|
|
|
|
|
@param devel_dir: directory to search when developing
|
|
|
|
|
@ptype devel_dir: string
|
|
|
|
|
@param filename: filename to search for
|
|
|
|
|
@ptype filename: string
|
|
|
|
|
@return: the found filename or None
|
2013-12-10 19:56:26 +00:00
|
|
|
@rtype: string
|
|
|
|
|
@raises: ValueError if not found
|
2012-06-18 21:05:44 +00:00
|
|
|
"""
|
2014-03-11 19:23:49 +00:00
|
|
|
paths = [get_share_dir()]
|
2014-03-10 21:34:37 +00:00
|
|
|
if devel_dir is not None:
|
|
|
|
|
# when developing
|
|
|
|
|
paths.insert(0, devel_dir)
|
2012-06-18 21:05:44 +00:00
|
|
|
for path in paths:
|
|
|
|
|
fullpath = os.path.join(path, filename)
|
|
|
|
|
if os.path.isfile(fullpath):
|
|
|
|
|
return fullpath
|
|
|
|
|
# not found
|
2013-12-10 19:56:26 +00:00
|
|
|
msg = "%s not found in %s; check your installation" % (filename, paths)
|
|
|
|
|
raise ValueError(msg)
|
2012-06-18 21:05:44 +00:00
|
|
|
|
|
|
|
|
|
2014-09-11 19:19:49 +00:00
|
|
|
def get_system_cert_file():
|
|
|
|
|
"""Try to find a system-wide SSL certificate file.
|
|
|
|
|
@return: the filename to the cert file
|
|
|
|
|
@raises: ValueError when no system cert file could be found
|
|
|
|
|
"""
|
|
|
|
|
if os.name == 'posix':
|
|
|
|
|
filename = "/etc/ssl/certs/ca-certificates.crt"
|
|
|
|
|
if os.path.isfile(filename):
|
|
|
|
|
return filename
|
|
|
|
|
msg = "no system certificate file found"
|
|
|
|
|
raise ValueError(msg)
|
|
|
|
|
|
|
|
|
|
|
2014-09-05 18:00:30 +00:00
|
|
|
def get_certifi_file():
|
|
|
|
|
"""Get the SSL certifications installed by the certifi package.
|
|
|
|
|
@return: the filename to the cert file
|
|
|
|
|
@rtype: string
|
|
|
|
|
@raises: ImportError when certifi is not installed or ValueError when
|
|
|
|
|
the file is not found
|
|
|
|
|
"""
|
|
|
|
|
import certifi
|
|
|
|
|
filename = certifi.where()
|
|
|
|
|
if os.path.isfile(filename):
|
|
|
|
|
return filename
|
|
|
|
|
msg = "%s not found; check your certifi installation" % filename
|
|
|
|
|
raise ValueError(msg)
|
|
|
|
|
|
|
|
|
|
|
2006-05-03 18:24:46 +00:00
|
|
|
# dynamic options
|
2020-05-16 19:19:42 +00:00
|
|
|
class Configuration(dict):
|
2006-05-03 18:24:46 +00:00
|
|
|
"""
|
|
|
|
|
Storage for configuration options. Options can both be given from
|
|
|
|
|
the command line as well as from configuration files.
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
def __init__ (self):
|
|
|
|
|
"""
|
|
|
|
|
Initialize the default options.
|
|
|
|
|
"""
|
|
|
|
|
super(Configuration, self).__init__()
|
2014-02-28 23:12:34 +00:00
|
|
|
## checking options
|
|
|
|
|
self["allowedschemes"] = []
|
|
|
|
|
self['cookiefile'] = None
|
add --no-robots commandline flag
While this flag can be abused, it seems to me like a legitimate use
case that you want to check a fairly small document for mistakes,
which includes references to a website which has a robots.txt that
denies all robots. It turns out that most websites do *not* add a
permission for LinkCheck to use their site, and some sites, like the
Debian BTS for example, are very hostile with bots in general.
Between me using linkcheck and me using my web browser to check those
links one by one, there is not a big difference. In fact, using
linkcheck may be *better* for the website because it will use HEAD
requests instead of a GET, and will not fetch all page elements
(javascript, images, etc) which can often be fairly big.
Besides, hostile users will patch the software themselves: it took me
only a few minutes to disable the check, and a few more to make that
into a proper patch.
By forcing robots.txt without any other option, we are hurting our
good users and not keeping hostile users from doing harm.
The patch is still incomplete, but works. It lacks: documentation and
unit tests.
Closes: #508
2016-05-19 18:43:58 +00:00
|
|
|
self['robotstxt'] = True
|
2014-02-28 23:12:34 +00:00
|
|
|
self["debugmemory"] = False
|
|
|
|
|
self["localwebroot"] = None
|
|
|
|
|
self["maxfilesizeparse"] = 1*1024*1024
|
|
|
|
|
self["maxfilesizedownload"] = 5*1024*1024
|
|
|
|
|
self["maxnumurls"] = None
|
|
|
|
|
self["maxrunseconds"] = None
|
|
|
|
|
self["maxrequestspersecond"] = 10
|
2014-03-06 20:58:35 +00:00
|
|
|
self["maxhttpredirects"] = 10
|
2014-02-28 23:12:34 +00:00
|
|
|
self["nntpserver"] = os.environ.get("NNTP_SERVER", None)
|
2020-05-14 19:15:28 +00:00
|
|
|
self["proxy"] = urllib.request.getproxies()
|
2014-02-28 23:12:34 +00:00
|
|
|
self["sslverify"] = True
|
2014-03-01 19:49:06 +00:00
|
|
|
self["threads"] = 10
|
2014-02-28 23:12:34 +00:00
|
|
|
self["timeout"] = 60
|
|
|
|
|
self["aborttimeout"] = 300
|
|
|
|
|
self["recursionlevel"] = -1
|
|
|
|
|
self["useragent"] = UserAgent
|
|
|
|
|
## authentication
|
2006-05-03 18:24:46 +00:00
|
|
|
self["authentication"] = []
|
2010-10-14 16:36:11 +00:00
|
|
|
self["loginurl"] = None
|
|
|
|
|
self["loginuserfield"] = "login"
|
|
|
|
|
self["loginpasswordfield"] = "password"
|
|
|
|
|
self["loginextrafields"] = {}
|
2014-02-28 23:12:34 +00:00
|
|
|
## filtering
|
|
|
|
|
self["externlinks"] = []
|
|
|
|
|
self["ignorewarnings"] = []
|
|
|
|
|
self["internlinks"] = []
|
|
|
|
|
self["checkextern"] = False
|
|
|
|
|
## plugins
|
|
|
|
|
self["pluginfolders"] = get_plugin_folders()
|
|
|
|
|
self["enabledplugins"] = []
|
|
|
|
|
## output
|
|
|
|
|
self['trace'] = False
|
|
|
|
|
self['quiet'] = False
|
|
|
|
|
self["verbose"] = False
|
|
|
|
|
self["warnings"] = True
|
2006-05-03 18:24:46 +00:00
|
|
|
self["fileoutput"] = []
|
2007-12-01 15:50:33 +00:00
|
|
|
self['output'] = 'text'
|
2014-02-28 23:12:34 +00:00
|
|
|
self["status"] = False
|
|
|
|
|
self["status_wait_seconds"] = 5
|
2007-12-01 15:50:33 +00:00
|
|
|
self['logger'] = None
|
2013-12-11 17:41:55 +00:00
|
|
|
self.loggers = {}
|
|
|
|
|
from ..logger import LoggerClasses
|
|
|
|
|
for c in LoggerClasses:
|
|
|
|
|
key = c.LoggerName
|
|
|
|
|
self[key] = {}
|
|
|
|
|
self.loggers[key] = c
|
2006-05-03 18:24:46 +00:00
|
|
|
|
2014-05-10 19:23:06 +00:00
|
|
|
def set_status_logger(self, status_logger):
|
|
|
|
|
"""Set the status logger."""
|
2008-06-11 13:02:22 +00:00
|
|
|
self.status_logger = status_logger
|
2010-10-23 22:25:27 +00:00
|
|
|
|
2013-12-11 17:41:55 +00:00
|
|
|
def logger_new (self, loggername, **kwargs):
|
|
|
|
|
"""Instantiate new logger and return it."""
|
2013-12-13 06:37:21 +00:00
|
|
|
args = self[loggername]
|
|
|
|
|
args.update(kwargs)
|
|
|
|
|
return self.loggers[loggername](**args)
|
2006-05-03 18:24:46 +00:00
|
|
|
|
2013-12-11 17:41:55 +00:00
|
|
|
def logger_add (self, loggerclass):
|
|
|
|
|
"""Add a new logger type to the known loggers."""
|
|
|
|
|
self.loggers[loggerclass.LoggerName] = loggerclass
|
2013-12-13 06:39:59 +00:00
|
|
|
self[loggerclass.LoggerName] = {}
|
2006-05-03 18:24:46 +00:00
|
|
|
|
|
|
|
|
def read (self, files=None):
|
|
|
|
|
"""
|
|
|
|
|
Read settings from given config files.
|
|
|
|
|
|
|
|
|
|
@raises: LinkCheckerError on syntax errors in the config file(s)
|
|
|
|
|
"""
|
|
|
|
|
if files is None:
|
|
|
|
|
cfiles = []
|
|
|
|
|
else:
|
|
|
|
|
cfiles = files[:]
|
|
|
|
|
if not cfiles:
|
2012-10-15 12:36:10 +00:00
|
|
|
userconf = get_user_config()
|
|
|
|
|
if os.path.isfile(userconf):
|
|
|
|
|
cfiles.append(userconf)
|
|
|
|
|
# filter invalid files
|
|
|
|
|
filtered_cfiles = []
|
|
|
|
|
for cfile in cfiles:
|
|
|
|
|
if not os.path.isfile(cfile):
|
|
|
|
|
log.warn(LOG_CHECK, _("Configuration file %r does not exist."), cfile)
|
|
|
|
|
elif not fileutil.is_readable(cfile):
|
|
|
|
|
log.warn(LOG_CHECK, _("Configuration file %r is not readable."), cfile)
|
|
|
|
|
else:
|
|
|
|
|
filtered_cfiles.append(cfile)
|
|
|
|
|
log.debug(LOG_CHECK, "reading configuration from %s", filtered_cfiles)
|
|
|
|
|
confparse.LCConfigParser(self).read(filtered_cfiles)
|
2007-02-21 22:17:02 +00:00
|
|
|
|
2010-10-25 20:41:31 +00:00
|
|
|
def add_auth (self, user=None, password=None, pattern=None):
|
2011-02-14 20:06:34 +00:00
|
|
|
"""Add given authentication data."""
|
2010-10-25 20:07:16 +00:00
|
|
|
if not user or not pattern:
|
|
|
|
|
log.warn(LOG_CHECK,
|
2012-01-20 23:25:02 +00:00
|
|
|
_("missing user or URL pattern in authentication data."))
|
2010-10-25 20:07:16 +00:00
|
|
|
return
|
|
|
|
|
entry = dict(
|
|
|
|
|
user=user,
|
|
|
|
|
password=password,
|
|
|
|
|
pattern=re.compile(pattern),
|
|
|
|
|
)
|
|
|
|
|
self["authentication"].append(entry)
|
|
|
|
|
|
2010-10-25 20:41:31 +00:00
|
|
|
def get_user_password (self, url):
|
2010-10-11 18:11:15 +00:00
|
|
|
"""Get tuple (user, password) from configured authentication
|
|
|
|
|
that matches the given URL.
|
|
|
|
|
Both user and password can be None if not specified, or no
|
|
|
|
|
authentication matches the given URL.
|
|
|
|
|
"""
|
|
|
|
|
for auth in self["authentication"]:
|
|
|
|
|
if auth['pattern'].match(url):
|
|
|
|
|
return (auth['user'], auth['password'])
|
|
|
|
|
return (None, None)
|
|
|
|
|
|
2013-02-27 18:37:28 +00:00
|
|
|
def get_connectionlimits(self):
|
|
|
|
|
"""Get dict with limit per connection type."""
|
|
|
|
|
return {key: self['maxconnections%s' % key] for key in ('http', 'https', 'ftp')}
|
|
|
|
|
|
2007-02-21 22:17:02 +00:00
|
|
|
def sanitize (self):
|
|
|
|
|
"Make sure the configuration is consistent."
|
2010-10-11 21:50:59 +00:00
|
|
|
if self['logger'] is None:
|
|
|
|
|
self.sanitize_logger()
|
2010-10-14 16:36:11 +00:00
|
|
|
if self['loginurl']:
|
|
|
|
|
self.sanitize_loginurl()
|
2011-03-09 09:49:18 +00:00
|
|
|
self.sanitize_proxies()
|
2014-02-28 23:12:34 +00:00
|
|
|
self.sanitize_plugins()
|
2014-03-10 21:34:37 +00:00
|
|
|
self.sanitize_ssl()
|
2012-10-10 08:53:52 +00:00
|
|
|
# set default socket timeout
|
|
|
|
|
socket.setdefaulttimeout(self['timeout'])
|
2010-10-11 21:50:59 +00:00
|
|
|
|
|
|
|
|
def sanitize_logger (self):
|
2011-02-14 20:06:34 +00:00
|
|
|
"""Make logger configuration consistent."""
|
2010-10-11 21:50:59 +00:00
|
|
|
if not self['output']:
|
2012-01-20 23:25:02 +00:00
|
|
|
log.warn(LOG_CHECK, _("activating text logger output."))
|
2010-10-11 21:50:59 +00:00
|
|
|
self['output'] = 'text'
|
|
|
|
|
self['logger'] = self.logger_new(self['output'])
|
|
|
|
|
|
2010-10-14 16:36:11 +00:00
|
|
|
def sanitize_loginurl (self):
|
2011-02-14 20:06:34 +00:00
|
|
|
"""Make login configuration consistent."""
|
2010-10-14 16:36:11 +00:00
|
|
|
url = self["loginurl"]
|
|
|
|
|
disable = False
|
|
|
|
|
if not self["loginpasswordfield"]:
|
|
|
|
|
log.warn(LOG_CHECK,
|
2012-01-20 23:25:02 +00:00
|
|
|
_("no CGI password fieldname given for login URL."))
|
2010-10-14 16:36:11 +00:00
|
|
|
disable = True
|
|
|
|
|
if not self["loginuserfield"]:
|
|
|
|
|
log.warn(LOG_CHECK,
|
2012-01-20 23:25:02 +00:00
|
|
|
_("no CGI user fieldname given for login URL."))
|
2010-10-14 16:36:11 +00:00
|
|
|
disable = True
|
|
|
|
|
if self.get_user_password(url) == (None, None):
|
|
|
|
|
log.warn(LOG_CHECK,
|
2012-01-20 23:25:02 +00:00
|
|
|
_("no user/password authentication data found for login URL."))
|
2010-10-14 16:36:11 +00:00
|
|
|
disable = True
|
|
|
|
|
if not url.lower().startswith(("http:", "https:")):
|
2012-01-20 23:25:02 +00:00
|
|
|
log.warn(LOG_CHECK, _("login URL is not a HTTP URL."))
|
2010-10-14 16:36:11 +00:00
|
|
|
disable = True
|
2020-05-14 19:15:28 +00:00
|
|
|
urlparts = urllib.parse.urlsplit(url)
|
2010-10-14 16:36:11 +00:00
|
|
|
if not urlparts[0] or not urlparts[1] or not urlparts[2]:
|
2012-01-20 23:25:02 +00:00
|
|
|
log.warn(LOG_CHECK, _("login URL is incomplete."))
|
2010-10-14 16:36:11 +00:00
|
|
|
disable = True
|
|
|
|
|
if disable:
|
|
|
|
|
log.warn(LOG_CHECK,
|
2012-01-20 23:25:02 +00:00
|
|
|
_("disabling login URL %(url)s.") % {"url": url})
|
2010-10-14 16:36:11 +00:00
|
|
|
self["loginurl"] = None
|
|
|
|
|
|
2011-03-09 09:49:18 +00:00
|
|
|
def sanitize_proxies (self):
|
|
|
|
|
"""Try to read additional proxy settings which urllib does not
|
|
|
|
|
support."""
|
|
|
|
|
if os.name != 'posix':
|
|
|
|
|
return
|
|
|
|
|
if "http" not in self["proxy"]:
|
|
|
|
|
http_proxy = get_gconf_http_proxy() or get_kde_http_proxy()
|
|
|
|
|
if http_proxy:
|
|
|
|
|
self["proxy"]["http"] = http_proxy
|
|
|
|
|
if "ftp" not in self["proxy"]:
|
|
|
|
|
ftp_proxy = get_gconf_ftp_proxy() or get_kde_ftp_proxy()
|
|
|
|
|
if ftp_proxy:
|
|
|
|
|
self["proxy"]["ftp"] = ftp_proxy
|
|
|
|
|
|
2014-02-28 23:12:34 +00:00
|
|
|
def sanitize_plugins(self):
|
|
|
|
|
"""Ensure each plugin is configurable."""
|
|
|
|
|
for plugin in self["enabledplugins"]:
|
|
|
|
|
if plugin not in self:
|
|
|
|
|
self[plugin] = {}
|
|
|
|
|
|
2014-03-10 21:34:37 +00:00
|
|
|
def sanitize_ssl(self):
|
2014-09-11 19:19:49 +00:00
|
|
|
"""Use local installed certificate file if available.
|
|
|
|
|
Tries to get system, then certifi, then the own
|
|
|
|
|
installed certificate file."""
|
2014-03-10 21:34:37 +00:00
|
|
|
if self["sslverify"] is True:
|
|
|
|
|
try:
|
2014-09-11 19:19:49 +00:00
|
|
|
self["sslverify"] = get_system_cert_file()
|
2014-03-10 21:34:37 +00:00
|
|
|
except ValueError:
|
2014-09-05 18:00:30 +00:00
|
|
|
try:
|
|
|
|
|
self["sslverify"] = get_certifi_file()
|
2014-09-11 19:19:49 +00:00
|
|
|
except (ValueError, ImportError):
|
|
|
|
|
try:
|
|
|
|
|
self["sslverify"] = get_share_file('cacert.pem')
|
|
|
|
|
except ValueError:
|
|
|
|
|
pass
|
2014-03-10 21:34:37 +00:00
|
|
|
|
2014-02-28 23:12:34 +00:00
|
|
|
|
2017-10-18 13:55:31 +00:00
|
|
|
def get_user_data():
|
|
|
|
|
"""Get the user data folder.
|
|
|
|
|
Returns "~/.linkchecker/" if this folder exists, \
|
|
|
|
|
"$XDG_DATA_HOME/linkchecker" if it does not.
|
|
|
|
|
@rtype string
|
|
|
|
|
"""
|
|
|
|
|
homedotdir = normpath("~/.linkchecker/")
|
|
|
|
|
userdata = homedotdir if os.path.isdir(homedotdir) \
|
|
|
|
|
else os.path.join(xdg_data_home, "linkchecker")
|
|
|
|
|
return userdata
|
|
|
|
|
|
2014-02-28 23:12:34 +00:00
|
|
|
def get_plugin_folders():
|
2017-10-17 16:26:08 +00:00
|
|
|
"""Get linkchecker plugin folders. Default is
|
2017-10-18 13:58:18 +00:00
|
|
|
"$XDG_DATA_HOME/linkchecker/plugins/". "~/.linkchecker/plugins/" is also
|
|
|
|
|
supported for backwards compatibility, and is used if both directories
|
|
|
|
|
exist."""
|
2014-02-28 23:12:34 +00:00
|
|
|
folders = []
|
2017-10-18 13:55:31 +00:00
|
|
|
defaultfolder = os.path.join(get_user_data(), "plugins")
|
2014-02-28 23:12:34 +00:00
|
|
|
if not os.path.exists(defaultfolder) and not Portable:
|
|
|
|
|
try:
|
|
|
|
|
make_userdir(defaultfolder)
|
2014-09-12 17:36:30 +00:00
|
|
|
except Exception as errmsg:
|
2014-02-28 23:12:34 +00:00
|
|
|
msg = _("could not create plugin directory %(dirname)r: %(errmsg)r")
|
|
|
|
|
args = dict(dirname=defaultfolder, errmsg=errmsg)
|
|
|
|
|
log.warn(LOG_CHECK, msg % args)
|
|
|
|
|
if os.path.exists(defaultfolder):
|
|
|
|
|
folders.append(defaultfolder)
|
|
|
|
|
return folders
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def make_userdir(child):
|
|
|
|
|
"""Create a child directory."""
|
|
|
|
|
userdir = os.path.dirname(child)
|
|
|
|
|
if not os.path.isdir(userdir):
|
|
|
|
|
if os.name == 'nt':
|
|
|
|
|
# Windows forbids filenames with leading dot unless
|
|
|
|
|
# a trailing dot is added.
|
|
|
|
|
userdir += "."
|
2019-04-13 19:37:39 +00:00
|
|
|
os.makedirs(userdir, 0o700)
|
2014-02-28 23:12:34 +00:00
|
|
|
|
2009-07-24 21:16:12 +00:00
|
|
|
|
2011-05-20 19:10:31 +00:00
|
|
|
def get_user_config():
|
2012-04-22 18:42:39 +00:00
|
|
|
"""Get the user configuration filename.
|
|
|
|
|
If the user configuration file does not exist, copy it from the initial
|
|
|
|
|
configuration file, but only if this is not a portable installation.
|
2012-10-15 12:36:10 +00:00
|
|
|
Returns path to user config file (which might not exist due to copy
|
|
|
|
|
failures or on portable systems).
|
|
|
|
|
@return configuration filename
|
|
|
|
|
@rtype string
|
|
|
|
|
"""
|
2012-04-22 18:42:39 +00:00
|
|
|
# initial config (with all options explained)
|
2014-03-11 19:23:49 +00:00
|
|
|
initialconf = normpath(os.path.join(get_share_dir(), "linkcheckerrc"))
|
2010-11-05 00:09:13 +00:00
|
|
|
# per user config settings
|
2017-10-17 16:26:08 +00:00
|
|
|
homedotfile = normpath("~/.linkchecker/linkcheckerrc")
|
|
|
|
|
userconf = homedotfile if os.path.isfile(homedotfile) \
|
|
|
|
|
else os.path.join(xdg_config_home, "linkchecker", "linkcheckerrc")
|
2012-04-22 18:42:39 +00:00
|
|
|
if os.path.isfile(initialconf) and not os.path.exists(userconf) and \
|
2012-01-23 21:24:51 +00:00
|
|
|
not Portable:
|
2012-04-22 18:42:39 +00:00
|
|
|
# copy the initial configuration to the user configuration
|
2009-07-24 21:16:12 +00:00
|
|
|
try:
|
2014-02-28 23:12:34 +00:00
|
|
|
make_userdir(userconf)
|
2012-04-22 18:42:39 +00:00
|
|
|
shutil.copy(initialconf, userconf)
|
2014-09-12 17:36:30 +00:00
|
|
|
except Exception as errmsg:
|
2012-04-22 18:42:39 +00:00
|
|
|
msg = _("could not copy initial configuration file %(src)r to %(dst)r: %(errmsg)r")
|
|
|
|
|
args = dict(src=initialconf, dst=userconf, errmsg=errmsg)
|
|
|
|
|
log.warn(LOG_CHECK, msg % args)
|
2011-05-20 19:10:31 +00:00
|
|
|
return userconf
|
2011-03-09 09:49:18 +00:00
|
|
|
|
|
|
|
|
|
2020-05-16 19:19:42 +00:00
|
|
|
def get_gconf_http_proxy():
|
2011-03-09 09:49:18 +00:00
|
|
|
"""Return host:port for GConf HTTP proxy if found, else None."""
|
|
|
|
|
try:
|
|
|
|
|
import gconf
|
|
|
|
|
except ImportError:
|
|
|
|
|
return None
|
2011-03-10 08:34:42 +00:00
|
|
|
try:
|
|
|
|
|
client = gconf.client_get_default()
|
|
|
|
|
if client.get_bool("/system/http_proxy/use_http_proxy"):
|
|
|
|
|
host = client.get_string("/system/http_proxy/host")
|
|
|
|
|
port = client.get_int("/system/http_proxy/port")
|
|
|
|
|
if host:
|
|
|
|
|
if not port:
|
|
|
|
|
port = 8080
|
|
|
|
|
return "%s:%d" % (host, port)
|
2014-09-12 17:36:30 +00:00
|
|
|
except Exception as msg:
|
2011-03-25 12:34:48 +00:00
|
|
|
log.debug(LOG_CHECK, "error getting HTTP proxy from gconf: %s", msg)
|
2014-02-28 23:12:34 +00:00
|
|
|
pass
|
2011-03-09 09:49:18 +00:00
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
2020-05-16 19:19:42 +00:00
|
|
|
def get_gconf_ftp_proxy():
|
2011-03-09 09:49:18 +00:00
|
|
|
"""Return host:port for GConf FTP proxy if found, else None."""
|
|
|
|
|
try:
|
|
|
|
|
import gconf
|
|
|
|
|
except ImportError:
|
|
|
|
|
return None
|
2011-03-10 08:34:42 +00:00
|
|
|
try:
|
|
|
|
|
client = gconf.client_get_default()
|
|
|
|
|
host = client.get_string("/system/proxy/ftp_host")
|
|
|
|
|
port = client.get_int("/system/proxy/ftp_port")
|
|
|
|
|
if host:
|
|
|
|
|
if not port:
|
|
|
|
|
port = 8080
|
|
|
|
|
return "%s:%d" % (host, port)
|
2014-09-12 17:36:30 +00:00
|
|
|
except Exception as msg:
|
2011-03-25 12:34:48 +00:00
|
|
|
log.debug(LOG_CHECK, "error getting FTP proxy from gconf: %s", msg)
|
2014-02-28 23:12:34 +00:00
|
|
|
pass
|
2011-03-09 09:49:18 +00:00
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
2020-05-16 19:19:42 +00:00
|
|
|
def get_kde_http_proxy():
|
2011-03-09 09:49:18 +00:00
|
|
|
"""Return host:port for KDE HTTP proxy if found, else None."""
|
|
|
|
|
config_dir = get_kde_config_dir()
|
|
|
|
|
if not config_dir:
|
|
|
|
|
# could not find any KDE configuration directory
|
|
|
|
|
return
|
|
|
|
|
try:
|
|
|
|
|
data = read_kioslaverc(config_dir)
|
|
|
|
|
return data.get("http_proxy")
|
2014-09-12 17:36:30 +00:00
|
|
|
except Exception as msg:
|
2011-03-25 12:34:48 +00:00
|
|
|
log.debug(LOG_CHECK, "error getting HTTP proxy from KDE: %s", msg)
|
2014-02-28 23:12:34 +00:00
|
|
|
pass
|
2011-03-09 09:49:18 +00:00
|
|
|
|
|
|
|
|
|
2020-05-16 19:19:42 +00:00
|
|
|
def get_kde_ftp_proxy():
|
2011-03-09 09:49:18 +00:00
|
|
|
"""Return host:port for KDE HTTP proxy if found, else None."""
|
|
|
|
|
config_dir = get_kde_config_dir()
|
|
|
|
|
if not config_dir:
|
|
|
|
|
# could not find any KDE configuration directory
|
|
|
|
|
return
|
|
|
|
|
try:
|
|
|
|
|
data = read_kioslaverc(config_dir)
|
|
|
|
|
return data.get("ftp_proxy")
|
2014-09-12 17:36:30 +00:00
|
|
|
except Exception as msg:
|
2011-03-25 12:34:48 +00:00
|
|
|
log.debug(LOG_CHECK, "error getting FTP proxy from KDE: %s", msg)
|
2014-02-28 23:12:34 +00:00
|
|
|
pass
|
2011-03-09 09:49:18 +00:00
|
|
|
|
2011-03-11 17:03:14 +00:00
|
|
|
# The following KDE functions are largely ported and ajusted from
|
|
|
|
|
# Google Chromium:
|
|
|
|
|
# http://src.chromium.org/viewvc/chrome/trunk/src/net/proxy/proxy_config_service_linux.cc?revision=HEAD&view=markup
|
|
|
|
|
# Copyright (c) 2010 The Chromium Authors. All rights reserved.
|
|
|
|
|
#
|
|
|
|
|
# Redistribution and use in source and binary forms, with or without
|
|
|
|
|
# modification, are permitted provided that the following conditions are
|
|
|
|
|
# met:
|
|
|
|
|
#
|
|
|
|
|
# * Redistributions of source code must retain the above copyright
|
|
|
|
|
# notice, this list of conditions and the following disclaimer.
|
|
|
|
|
# * Redistributions in binary form must reproduce the above
|
|
|
|
|
# copyright notice, this list of conditions and the following disclaimer
|
|
|
|
|
# in the documentation and/or other materials provided with the
|
|
|
|
|
# distribution.
|
|
|
|
|
# * Neither the name of Google Inc. nor the names of its
|
|
|
|
|
# contributors may be used to endorse or promote products derived from
|
|
|
|
|
# this software without specific prior written permission.
|
|
|
|
|
#
|
|
|
|
|
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
|
|
|
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
|
|
|
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
|
|
|
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
|
|
|
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
|
|
|
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
|
|
|
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
|
|
|
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
|
|
|
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
|
|
|
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
|
|
|
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
2011-03-09 09:49:18 +00:00
|
|
|
|
2020-05-16 19:19:42 +00:00
|
|
|
def get_kde_config_dir():
|
2011-03-09 09:49:18 +00:00
|
|
|
"""Return KDE configuration directory or None if not found."""
|
|
|
|
|
kde_home = get_kde_home_dir()
|
|
|
|
|
if not kde_home:
|
|
|
|
|
# could not determine the KDE home directory
|
|
|
|
|
return
|
|
|
|
|
return kde_home_to_config(kde_home)
|
|
|
|
|
|
|
|
|
|
|
2020-05-16 19:19:42 +00:00
|
|
|
def kde_home_to_config(kde_home):
|
2011-03-09 09:49:18 +00:00
|
|
|
"""Add subdirectories for config path to KDE home directory."""
|
|
|
|
|
return os.path.join(kde_home, "share", "config")
|
|
|
|
|
|
|
|
|
|
|
2020-05-16 19:19:42 +00:00
|
|
|
def get_kde_home_dir():
|
2011-03-09 09:49:18 +00:00
|
|
|
"""Return KDE home directory or None if not found."""
|
|
|
|
|
if os.environ.get("KDEHOME"):
|
|
|
|
|
kde_home = os.path.abspath(os.environ["KDEHOME"])
|
|
|
|
|
else:
|
|
|
|
|
home = os.environ.get("HOME")
|
|
|
|
|
if not home:
|
|
|
|
|
# $HOME is not set
|
|
|
|
|
return
|
|
|
|
|
kde3_home = os.path.join(home, ".kde")
|
|
|
|
|
kde4_home = os.path.join(home, ".kde4")
|
|
|
|
|
if fileutil.find_executable("kde4-config"):
|
|
|
|
|
# kde4
|
|
|
|
|
kde3_file = kde_home_to_config(kde3_home)
|
|
|
|
|
kde4_file = kde_home_to_config(kde4_home)
|
|
|
|
|
if os.path.exists(kde4_file) and os.path.exists(kde3_file):
|
|
|
|
|
if fileutil.get_mtime(kde4_file) >= fileutil.get_mtime(kde3_file):
|
|
|
|
|
kde_home = kde4_home
|
|
|
|
|
else:
|
|
|
|
|
kde_home = kde3_home
|
|
|
|
|
else:
|
|
|
|
|
kde_home = kde4_home
|
|
|
|
|
else:
|
|
|
|
|
# kde3
|
|
|
|
|
kde_home = kde3_home
|
|
|
|
|
return kde_home if os.path.exists(kde_home) else None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
loc_ro = re.compile(r"\[.*\]$")
|
|
|
|
|
|
2020-03-31 18:46:31 +00:00
|
|
|
@lru_cache(1)
|
2020-05-16 19:19:42 +00:00
|
|
|
def read_kioslaverc(kde_config_dir):
|
2011-03-09 09:49:18 +00:00
|
|
|
"""Read kioslaverc into data dictionary."""
|
|
|
|
|
data = {}
|
|
|
|
|
filename = os.path.join(kde_config_dir, "kioslaverc")
|
|
|
|
|
with open(filename) as fd:
|
|
|
|
|
# First read all lines into dictionary since they can occur
|
|
|
|
|
# in any order.
|
|
|
|
|
for line in fd:
|
|
|
|
|
line = line.rstrip()
|
|
|
|
|
if line.startswith('['):
|
|
|
|
|
in_proxy_settings = line.startswith("[Proxy Settings]")
|
|
|
|
|
elif in_proxy_settings:
|
|
|
|
|
if '=' not in line:
|
|
|
|
|
continue
|
|
|
|
|
key, value = line.split('=', 1)
|
|
|
|
|
key = key.strip()
|
|
|
|
|
value = value.strip()
|
|
|
|
|
if not key:
|
|
|
|
|
continue
|
|
|
|
|
# trim optional localization
|
|
|
|
|
key = loc_ro.sub("", key).strip()
|
|
|
|
|
if not key:
|
|
|
|
|
continue
|
|
|
|
|
add_kde_setting(key, value, data)
|
|
|
|
|
resolve_kde_settings(data)
|
|
|
|
|
return data
|
|
|
|
|
|
|
|
|
|
|
2020-05-16 19:19:42 +00:00
|
|
|
def add_kde_proxy(key, value, data):
|
2011-03-09 09:49:18 +00:00
|
|
|
"""Add a proxy value to data dictionary after sanity checks."""
|
|
|
|
|
if not value or value[:3] == "//:":
|
|
|
|
|
return
|
|
|
|
|
data[key] = value
|
|
|
|
|
|
|
|
|
|
|
2020-05-16 19:19:42 +00:00
|
|
|
def add_kde_setting(key, value, data):
|
2011-03-09 09:49:18 +00:00
|
|
|
"""Add a KDE proxy setting value to data dictionary."""
|
|
|
|
|
if key == "ProxyType":
|
|
|
|
|
mode = None
|
|
|
|
|
int_value = int(value)
|
|
|
|
|
if int_value == 1:
|
|
|
|
|
mode = "manual"
|
|
|
|
|
elif int_value == 2:
|
|
|
|
|
# PAC URL
|
|
|
|
|
mode = "pac"
|
|
|
|
|
elif int_value == 3:
|
|
|
|
|
# WPAD.
|
|
|
|
|
mode = "wpad"
|
|
|
|
|
elif int_value == 4:
|
|
|
|
|
# Indirect manual via environment variables.
|
|
|
|
|
mode = "indirect"
|
|
|
|
|
data["mode"] = mode
|
|
|
|
|
elif key == "Proxy Config Script":
|
|
|
|
|
data["autoconfig_url"] = value
|
|
|
|
|
elif key == "httpProxy":
|
2011-05-15 16:36:30 +00:00
|
|
|
add_kde_proxy("http_proxy", value, data)
|
2011-03-09 09:49:18 +00:00
|
|
|
elif key == "httpsProxy":
|
|
|
|
|
add_kde_proxy("https_proxy", value, data)
|
|
|
|
|
elif key == "ftpProxy":
|
|
|
|
|
add_kde_proxy("ftp_proxy", value, data)
|
|
|
|
|
elif key == "ReversedException":
|
|
|
|
|
data["reversed_bypass"] = bool(value == "true" or int(value))
|
|
|
|
|
elif key == "NoProxyFor":
|
|
|
|
|
data["ignore_hosts"] = split_hosts(value)
|
|
|
|
|
elif key == "AuthMode":
|
|
|
|
|
mode = int(value)
|
|
|
|
|
# XXX todo
|
|
|
|
|
|
|
|
|
|
|
2020-05-16 19:19:42 +00:00
|
|
|
def split_hosts(value):
|
2011-03-09 09:49:18 +00:00
|
|
|
"""Split comma-separated host list."""
|
|
|
|
|
return [host for host in value.split(", ") if host]
|
|
|
|
|
|
|
|
|
|
|
2020-05-16 19:19:42 +00:00
|
|
|
def resolve_indirect(data, key, splithosts=False):
|
2011-03-09 09:49:18 +00:00
|
|
|
"""Replace name of environment variable with its value."""
|
|
|
|
|
value = data[key]
|
|
|
|
|
env_value = os.environ.get(value)
|
|
|
|
|
if env_value:
|
|
|
|
|
if splithosts:
|
|
|
|
|
data[key] = split_hosts(env_value)
|
|
|
|
|
else:
|
|
|
|
|
data[key] = env_value
|
|
|
|
|
else:
|
|
|
|
|
del data[key]
|
|
|
|
|
|
|
|
|
|
|
2020-05-16 19:19:42 +00:00
|
|
|
def resolve_kde_settings(data):
|
2011-03-09 09:49:18 +00:00
|
|
|
"""Write final proxy configuration values in data dictionary."""
|
|
|
|
|
if "mode" not in data:
|
|
|
|
|
return
|
|
|
|
|
if data["mode"] == "indirect":
|
|
|
|
|
for key in ("http_proxy", "https_proxy", "ftp_proxy"):
|
|
|
|
|
if key in data:
|
|
|
|
|
resolve_indirect(data, key)
|
|
|
|
|
if "ignore_hosts" in data:
|
|
|
|
|
resolve_indirect(data, "ignore_hosts", splithosts=True)
|
|
|
|
|
elif data["mode"] != "manual":
|
|
|
|
|
# unsupported config
|
|
|
|
|
for key in ("http_proxy", "https_proxy", "ftp_proxy"):
|
|
|
|
|
if key in data:
|
|
|
|
|
del data[key]
|