diff --git a/linkcheck/setup_config.py b/linkcheck/setup_config.py new file mode 100644 index 00000000..01f84af2 --- /dev/null +++ b/linkcheck/setup_config.py @@ -0,0 +1,206 @@ +# Copyright (C) 2000-2016 Bastian Kleineidam +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +""" +Configure linkchecker using command-line options and configuration. +""" + +import codecs +import getpass + +from .cmdline import print_version, print_usage, print_plugins +from .director import console + +from . import LOG_CMDLINE +from . import log +import linkcheck + + +def has_encoding(encoding): + """Detect if Python can encode in a certain encoding.""" + try: + codecs.lookup(encoding) + return True + except LookupError: + return False + + +def setup_config(config, options): + """Set up linkchecker based on command-line options and configuration""" + _username = None + _password = None + + # test if running with -O + if options.debug and not __debug__: + log.warn(LOG_CMDLINE, _("Running with python -O disables debugging.")) + # apply commandline options and arguments to configuration + constructauth = False + if options.version: + print_version() + if not options.warnings: + config["warnings"] = options.warnings + if options.externstrict: + pats = [linkcheck.get_link_pat(arg, strict=True) for arg in options.externstrict] + config["externlinks"].extend(pats) + if options.extern: + pats = [linkcheck.get_link_pat(arg) for arg in options.extern] + config["externlinks"].extend(pats) + if options.norobotstxt is not None: + config["robotstxt"] = options.norobotstxt + if options.checkextern: + config["checkextern"] = True + elif not config["checkextern"]: + log.info( + LOG_CMDLINE, + "Checking intern URLs only; use --check-extern to check extern URLs.", + ) + + if options.output: + if "/" in options.output: + logtype, encoding = options.output.split("/", 1) + else: + logtype, encoding = options.output, linkcheck.i18n.default_encoding + logtype = logtype.lower() + if logtype == "blacklist": + log.warn( + LOG_CMDLINE, + _("blacklist is deprecated for option %(option)s, " + "using failures instead") % {"option": "'-o, --output'"} + ) + logtype = "failures" + if logtype not in linkcheck.logger.LoggerNames: + print_usage( + _("Unknown logger type %(type)r in %(output)r for option %(option)s") + % {"type": logtype, "output": options.output, "option": "'-o, --output'"} + ) + if logtype != "none" and not has_encoding(encoding): + print_usage( + _("Unknown encoding %(encoding)r in %(output)r for option %(option)s") + % { + "encoding": encoding, + "output": options.output, + "option": "'-o, --output'", + } + ) + config["output"] = logtype + config["logger"] = config.logger_new(logtype, encoding=encoding) + if options.fileoutput: + ns = {"fileoutput": 1} + for arg in options.fileoutput: + ftype = arg + # look for (optional) filename and encoding + if "/" in ftype: + ftype, suffix = ftype.split("/", 1) + if suffix: + if has_encoding(suffix): + # it was an encoding + ns["encoding"] = suffix + elif "/" in suffix: + # look for (optional) encoding + encoding, filename = suffix.split("/", 1) + if has_encoding(encoding): + ns["encoding"] = encoding + ns["filename"] = filename + else: + ns["filename"] = suffix + else: + ns["filename"] = suffix + if ftype == "blacklist": + log.warn( + LOG_CMDLINE, + _("blacklist logger is deprecated for option %(option)s, " + "using failures instead") % {"option": "'-F, --file-output'"} + ) + ftype = "failures" + if ftype not in linkcheck.logger.LoggerNames: + print_usage( + _("Unknown logger type %(type)r in %(output)r for option %(option)s") + % { + "type": ftype, + "output": options.fileoutput, + "option": "'-F, --file-output'", + } + ) + if ftype != "none" and "encoding" in ns and not has_encoding(ns["encoding"]): + print_usage( + _("Unknown encoding %(encoding)r in %(output)r for option %(option)s") + % { + "encoding": ns["encoding"], + "output": options.fileoutput, + "option": "'-F, --file-output'", + } + ) + logger = config.logger_new(ftype, **ns) + config["fileoutput"].append(logger) + if options.nntpserver: + config["nntpserver"] = options.nntpserver + if options.username: + _username = options.username + constructauth = True + if options.password: + if _username: + msg = _("Enter LinkChecker HTTP/FTP password for user %(user)s:") % { + "user": _username + } + else: + msg = _("Enter LinkChecker HTTP/FTP password:") + _password = getpass.getpass(console.encode(msg)) + constructauth = True + if options.quiet: + config["logger"] = config.logger_new("none") + if options.recursionlevel is not None: + config["recursionlevel"] = options.recursionlevel + if options.status is not None: + config["status"] = options.status + if options.threads is not None: + if options.threads < 1: + options.threads = 0 + config["threads"] = options.threads + if options.timeout is not None: + if options.timeout > 0: + config["timeout"] = options.timeout + else: + print_usage( + _("Illegal argument %(arg)r for option %(option)s") + % {"arg": options.timeout, "option": "'--timeout'"} + ) + if options.listplugins: + print_plugins(config["pluginfolders"]) + if options.verbose: + if options.verbose: + config["verbose"] = True + config["warnings"] = True + if options.cookiefile is not None: + config["cookiefile"] = options.cookiefile + if constructauth: + config.add_auth(pattern=".+", user=_username, password=_password) + # read missing passwords + for entry in config["authentication"]: + if entry["password"] is None: + attrs = entry.copy() + attrs["strpattern"] = attrs["pattern"].pattern + msg = ( + _("Enter LinkChecker password for user %(user)s at %(strpattern)s:") + % attrs + ) + entry["password"] = getpass.getpass(msg) + if options.useragent is not None: + config["useragent"] = options.useragent + if options.cookiefile is not None: + if linkcheck.fileutil.is_readable(options.cookiefile): + config["cookiefile"] = options.cookiefile + else: + msg = _("Could not read cookie file %s") % options.cookiefile + log.error(LOG_CMDLINE, msg) diff --git a/linkchecker b/linkchecker index 547e18ae..8ebf95de 100755 --- a/linkchecker +++ b/linkchecker @@ -20,10 +20,8 @@ client. Run this file with the -h option to see how it's done. """ import sys -import codecs import os import pprint -import getpass # installs _() and _n() gettext functions into global namespace import linkcheck @@ -34,15 +32,11 @@ logconf.init_log_config() # override argparse gettext method with the one from linkcheck.init_i18n() # argparse._ = _ # now import the rest of the linkchecker gang -from linkcheck.cmdline import ( - print_version, - print_usage, - aggregate_url, - print_plugins, -) from linkcheck.arg_parser import ArgParser +from linkcheck.setup_config import setup_config import linkcheck.configuration import linkcheck.fileutil +from linkcheck.cmdline import aggregate_url, print_usage from linkcheck.director import console, check_urls, get_aggregate from linkcheck.strformat import stripurl @@ -57,15 +51,6 @@ _username = None _password = None -def has_encoding(encoding): - """Detect if Python can encode in a certain encoding.""" - try: - codecs.lookup(encoding) - return True - except LookupError: - return False - - # instantiate option parser and configure options argparser = ArgParser() @@ -97,7 +82,6 @@ def read_stdin_urls(): # read and parse command line options and arguments options = argparser.parse_args() - # initialize logging if options.debug: allowed_debugs = logconf.lognames.keys() @@ -126,171 +110,9 @@ except linkcheck.LinkCheckerError as msg: # config error print_usage(str(msg)) linkcheck.drop_privileges() -# test if running with -O -if options.debug and not __debug__: - log.warn(LOG_CMDLINE, _("Running with python -O disables debugging.")) -# apply commandline options and arguments to configuration -constructauth = False -do_profile = False -if options.version: - print_version() -if not options.warnings: - config["warnings"] = options.warnings -if options.externstrict: - pats = [linkcheck.get_link_pat(arg, strict=True) for arg in options.externstrict] - config["externlinks"].extend(pats) -if options.extern: - pats = [linkcheck.get_link_pat(arg) for arg in options.extern] - config["externlinks"].extend(pats) -if options.norobotstxt is not None: - config["robotstxt"] = options.norobotstxt -if options.checkextern: - config["checkextern"] = True -elif not config["checkextern"]: - log.info( - LOG_CMDLINE, - "Checking intern URLs only; use --check-extern to check extern URLs.", - ) +# set up config object using options +setup_config(config, options) -if options.output: - if "/" in options.output: - logtype, encoding = options.output.split("/", 1) - else: - logtype, encoding = options.output, linkcheck.i18n.default_encoding - logtype = logtype.lower() - if logtype == "blacklist": - log.warn( - LOG_CMDLINE, - _("blacklist is deprecated for option %(option)s, " - "using failures instead") % {"option": "'-o, --output'"} - ) - logtype = "failures" - if logtype not in linkcheck.logger.LoggerNames: - print_usage( - _("Unknown logger type %(type)r in %(output)r for option %(option)s") - % {"type": logtype, "output": options.output, "option": "'-o, --output'"} - ) - if logtype != "none" and not has_encoding(encoding): - print_usage( - _("Unknown encoding %(encoding)r in %(output)r for option %(option)s") - % { - "encoding": encoding, - "output": options.output, - "option": "'-o, --output'", - } - ) - config["output"] = logtype - config["logger"] = config.logger_new(logtype, encoding=encoding) -if options.fileoutput: - ns = {"fileoutput": 1} - for arg in options.fileoutput: - ftype = arg - # look for (optional) filename and encoding - if "/" in ftype: - ftype, suffix = ftype.split("/", 1) - if suffix: - if has_encoding(suffix): - # it was an encoding - ns["encoding"] = suffix - elif "/" in suffix: - # look for (optional) encoding - encoding, filename = suffix.split("/", 1) - if has_encoding(encoding): - ns["encoding"] = encoding - ns["filename"] = filename - else: - ns["filename"] = suffix - else: - ns["filename"] = suffix - if ftype == "blacklist": - log.warn( - LOG_CMDLINE, - _("blacklist logger is deprecated for option %(option)s, " - "using failures instead") % {"option": "'-F, --file-output'"} - ) - ftype = "failures" - if ftype not in linkcheck.logger.LoggerNames: - print_usage( - _("Unknown logger type %(type)r in %(output)r for option %(option)s") - % { - "type": ftype, - "output": options.fileoutput, - "option": "'-F, --file-output'", - } - ) - if ftype != "none" and "encoding" in ns and not has_encoding(ns["encoding"]): - print_usage( - _("Unknown encoding %(encoding)r in %(output)r for option %(option)s") - % { - "encoding": ns["encoding"], - "output": options.fileoutput, - "option": "'-F, --file-output'", - } - ) - logger = config.logger_new(ftype, **ns) - config["fileoutput"].append(logger) -if options.nntpserver: - config["nntpserver"] = options.nntpserver -if options.username: - _username = options.username - constructauth = True -if options.password: - if _username: - msg = _("Enter LinkChecker HTTP/FTP password for user %(user)s:") % { - "user": _username - } - else: - msg = _("Enter LinkChecker HTTP/FTP password:") - _password = getpass.getpass(console.encode(msg)) - constructauth = True -if options.profile: - do_profile = options.profile -if options.quiet: - config["logger"] = config.logger_new("none") -if options.recursionlevel is not None: - config["recursionlevel"] = options.recursionlevel -if options.status is not None: - config["status"] = options.status -if options.threads is not None: - if options.threads < 1: - options.threads = 0 - config["threads"] = options.threads -if options.timeout is not None: - if options.timeout > 0: - config["timeout"] = options.timeout - else: - print_usage( - _("Illegal argument %(arg)r for option %(option)s") - % {"arg": options.timeout, "option": "'--timeout'"} - ) -if options.listplugins: - print_plugins(config["pluginfolders"]) -if options.verbose: - if options.verbose: - config["verbose"] = True - config["warnings"] = True -if options.cookiefile is not None: - config["cookiefile"] = options.cookiefile -if constructauth: - config.add_auth(pattern=".+", user=_username, password=_password) -# read missing passwords -for entry in config["authentication"]: - if entry["password"] is None: - attrs = entry.copy() - attrs["strpattern"] = attrs["pattern"].pattern - msg = ( - _("Enter LinkChecker password for user %(user)s at %(strpattern)s:") - % attrs - ) - entry["password"] = getpass.getpass(msg) -if options.useragent is not None: - config["useragent"] = options.useragent -if options.cookiefile is not None: - if linkcheck.fileutil.is_readable(options.cookiefile): - config["cookiefile"] = options.cookiefile - else: - msg = _("Could not read cookie file %s") % options.cookiefile - log.error(LOG_CMDLINE, msg) # now sanitize the configuration config.sanitize() @@ -316,7 +138,8 @@ elif options.url: else: log.warn(LOG_CMDLINE, _("no files or URLs given")) # set up profiling -if do_profile: +do_profile = False +if options.profile: if has_profile: if os.path.exists(_profile): print( @@ -331,6 +154,7 @@ if do_profile: except KeyboardInterrupt: print("", _("Canceled."), file=sys.stderr, sep="\n") sys.exit(1) + do_profile = True else: log.warn( LOG_CMDLINE, @@ -339,7 +163,6 @@ if do_profile: " therefore the --profile option is disabled." ), ) - do_profile = False # finally, start checking if do_profile: