diff --git a/linkcheck/__init__.py b/linkcheck/__init__.py index 1999e7b5..2bb24c7b 100644 --- a/linkcheck/__init__.py +++ b/linkcheck/__init__.py @@ -30,8 +30,6 @@ if sys.version_info < (3, 6, 0, 'final', 0): import os import re -import signal -import traceback from . import i18n, log from .logconf import ( @@ -127,40 +125,3 @@ def init_i18n(loc=None): # initialize i18n, puts _() and _n() function into global namespace init_i18n() - - -def drop_privileges(): - """Make sure to drop root privileges on POSIX systems.""" - if os.name != 'posix': - return - if os.geteuid() == 0: - log.warn( - LOG_CHECK, - _( - "Running as root user; " - "dropping privileges by changing user to nobody." - ), - ) - import pwd - - os.seteuid(pwd.getpwnam('nobody')[3]) - - -if hasattr(signal, "SIGUSR1"): - # install SIGUSR1 handler - from .decorators import signal_handler - - @signal_handler(signal.SIGUSR1) - def print_threadstacks(sig, frame): - """Print stack traces of all running threads.""" - log.warn(LOG_THREAD, "*** STACKTRACE START ***") - for threadId, stack in sys._current_frames().items(): - log.warn(LOG_THREAD, "# ThreadID: %s" % threadId) - for filename, lineno, name, line in traceback.extract_stack(stack): - log.warn( - LOG_THREAD, 'File: "%s", line %d, in %s' % (filename, lineno, name) - ) - line = line.strip() - if line: - log.warn(LOG_THREAD, " %s" % line) - log.warn(LOG_THREAD, "*** STACKTRACE END ***") diff --git a/linkcheck/__main__.py b/linkcheck/__main__.py new file mode 100644 index 00000000..89628904 --- /dev/null +++ b/linkcheck/__main__.py @@ -0,0 +1,3 @@ +from .command.linkchecker import linkchecker + +linkchecker() diff --git a/linkcheck/command/__init__.py b/linkcheck/command/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/linkcheck/arg_parser.py b/linkcheck/command/arg_parser.py similarity index 99% rename from linkcheck/arg_parser.py rename to linkcheck/command/arg_parser.py index 5afea4f3..4762c471 100644 --- a/linkcheck/arg_parser.py +++ b/linkcheck/command/arg_parser.py @@ -19,11 +19,9 @@ Create command line arguments. import argparse -from .cmdline import ( - LCArgumentParser, -) +from .. import checker, logconf, logger -from . import checker, logconf, logger +from ..cmdline import LCArgumentParser # usage texts Notes = _( diff --git a/linkcheck/command/linkchecker.py b/linkcheck/command/linkchecker.py new file mode 100644 index 00000000..807608d5 --- /dev/null +++ b/linkcheck/command/linkchecker.py @@ -0,0 +1,228 @@ +# Copyright (C) 2000-2014 Bastian Kleineidam +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +""" +Check HTML pages for broken links. This is the commandline +client. Run this file with the -h option to see how it's done. +""" + + +import os +import pprint +import signal +import sys +import traceback + +from .arg_parser import ArgParser +from .setup_config import setup_config + +from .. import configuration +from .. import fileutil +from .. import log +from .. import logconf +from .. import LinkCheckerError +from ..cmdline import aggregate_url, print_usage +from ..director import console, check_urls, get_aggregate +from ..logconf import LOG_CHECK, LOG_CMDLINE, LOG_THREAD +from ..strformat import stripurl + + +def drop_privileges(): + """Make sure to drop root privileges on POSIX systems.""" + if os.name != 'posix': + return + if os.geteuid() == 0: + log.warn( + LOG_CHECK, + _( + "Running as root user; " + "dropping privileges by changing user to nobody." + ), + ) + import pwd + + os.seteuid(pwd.getpwnam('nobody')[3]) + + +def linkchecker(): + if hasattr(signal, "SIGUSR1"): + # install SIGUSR1 handler + from ..decorators import signal_handler + + @signal_handler(signal.SIGUSR1) + def print_threadstacks(sig, frame): + """Print stack traces of all running threads.""" + log.warn(LOG_THREAD, "*** STACKTRACE START ***") + for threadId, stack in sys._current_frames().items(): + log.warn(LOG_THREAD, "# ThreadID: %s" % threadId) + for filename, lineno, name, line in traceback.extract_stack(stack): + log.warn( + LOG_THREAD, + 'File: "%s", line %d, in %s' % (filename, lineno, name) + ) + line = line.strip() + if line: + log.warn(LOG_THREAD, " %s" % line) + log.warn(LOG_THREAD, "*** STACKTRACE END ***") + + logconf.init_log_config() + + # optional modules + has_argcomplete = fileutil.has_module("argcomplete") + has_profile = fileutil.has_module("yappi") + has_meliae = fileutil.has_module("meliae") + + # default profiling filename + _profile = "linkchecker.prof" + + def read_stdin_urls(): + """Read list of URLs, separated by white-space, from stdin.""" + num = 0 + while True: + lines = sys.stdin.readlines(8 * 1024) + if not lines: + break + for line in lines: + for url in line.split(): + num += 1 + if num % 10000 == 0: + log.info(LOG_CMDLINE, "Read %d URLs from stdin", num) + yield url + + # instantiate command line option parser + argparser = ArgParser() + + # build a config object for this check session + config = configuration.Configuration() + config.set_status_logger(console.StatusLogger()) + + # ================= auto completion ===================== + if has_argcomplete: + import argcomplete + + argcomplete.autocomplete(argparser) + + # read and parse command line options and arguments + options = argparser.parse_args() + # initialize logging + if options.debug: + allowed_debugs = logconf.lognames.keys() + for _name in options.debug: + if _name not in allowed_debugs: + print_usage(_("Invalid debug level %(level)r") % {"level": _name}) + logconf.set_debug(options.debug) + elif options.quiet: + logconf.reset_loglevel() + log.debug( + LOG_CMDLINE, + _("Python %(version)s on %(platform)s") + % {"version": sys.version, "platform": sys.platform}, + ) + # read configuration files + try: + files = [] + if options.configfile: + path = configuration.normpath(options.configfile) + if os.path.isfile(path): + files.append(path) + else: + log.warn( + LOG_CMDLINE, _("Unreadable config file: %r"), options.configfile) + config.read(files=files) + except LinkCheckerError as msg: + # config error + print_usage(str(msg)) + drop_privileges() + # set up config object using options + setup_config(config, options) + # now sanitize the configuration + config.sanitize() + + log.debug(LOG_CMDLINE, "configuration: %s", pprint.pformat(sorted(config.items()))) + + # prepare checking queue + aggregate = get_aggregate(config) + if options.trace: + # enable thread tracing + config["trace"] = True + # start trace in mainthread + from .. import trace + + trace.trace_filter([r"^linkcheck"]) + trace.trace_on() + # add urls to queue + if options.stdin: + for url in read_stdin_urls(): + aggregate_url(aggregate, url) + elif options.url: + for url in options.url: + aggregate_url(aggregate, stripurl(url)) + else: + log.warn(LOG_CMDLINE, _("no files or URLs given")) + # set up profiling + do_profile = False + if options.profile: + if has_profile: + if os.path.exists(_profile): + print( + _( + "Overwrite profiling file %(file)r?\n" + "Press Ctrl-C to cancel, RETURN to continue." + ) + % {"file": _profile} + ) + try: + input() + except KeyboardInterrupt: + print("", _("Canceled."), file=sys.stderr, sep="\n") + sys.exit(1) + do_profile = True + else: + log.warn( + LOG_CMDLINE, + _( + "The `yappi' Python module is not installed," + " therefore the --profile option is disabled." + ), + ) + + # finally, start checking + if do_profile: + import yappi + + yappi.start() + check_urls(aggregate) + yappi.stop() + yappi.get_func_stats().save(_profile) + else: + check_urls(aggregate) + if config["debugmemory"]: + from .. import memoryutil + + if has_meliae: + log.info(LOG_CMDLINE, _("Dumping memory statistics...")) + filename = memoryutil.write_memory_dump() + message = _("The memory dump has been written to `%(filename)s'.") + log.info(LOG_CMDLINE, message % dict(filename=filename)) + else: + log.warn(LOG_CMDLINE, memoryutil.MemoryDebugMsg) + + stats = config["logger"].stats + # on internal errors, exit with status 2 + if stats.internal_errors: + sys.exit(2) + # on errors or printed warnings, exit with status 1 + if stats.errors or (stats.warnings_printed and config["warnings"]): + sys.exit(1) diff --git a/linkcheck/setup_config.py b/linkcheck/command/setup_config.py similarity index 87% rename from linkcheck/setup_config.py rename to linkcheck/command/setup_config.py index 01f84af2..41007213 100644 --- a/linkcheck/setup_config.py +++ b/linkcheck/command/setup_config.py @@ -20,12 +20,15 @@ Configure linkchecker using command-line options and configuration. import codecs import getpass -from .cmdline import print_version, print_usage, print_plugins -from .director import console +from .. import fileutil +from .. import i18n +from .. import logger -from . import LOG_CMDLINE -from . import log -import linkcheck +from .. import LOG_CMDLINE +from .. import get_link_pat, log + +from ..cmdline import print_version, print_usage, print_plugins +from ..director import console def has_encoding(encoding): @@ -52,10 +55,10 @@ def setup_config(config, options): if not options.warnings: config["warnings"] = options.warnings if options.externstrict: - pats = [linkcheck.get_link_pat(arg, strict=True) for arg in options.externstrict] + pats = [get_link_pat(arg, strict=True) for arg in options.externstrict] config["externlinks"].extend(pats) if options.extern: - pats = [linkcheck.get_link_pat(arg) for arg in options.extern] + pats = [get_link_pat(arg) for arg in options.extern] config["externlinks"].extend(pats) if options.norobotstxt is not None: config["robotstxt"] = options.norobotstxt @@ -71,7 +74,7 @@ def setup_config(config, options): if "/" in options.output: logtype, encoding = options.output.split("/", 1) else: - logtype, encoding = options.output, linkcheck.i18n.default_encoding + logtype, encoding = options.output, i18n.default_encoding logtype = logtype.lower() if logtype == "blacklist": log.warn( @@ -80,10 +83,12 @@ def setup_config(config, options): "using failures instead") % {"option": "'-o, --output'"} ) logtype = "failures" - if logtype not in linkcheck.logger.LoggerNames: + if logtype not in logger.LoggerNames: print_usage( _("Unknown logger type %(type)r in %(output)r for option %(option)s") - % {"type": logtype, "output": options.output, "option": "'-o, --output'"} + % {"type": logtype, + "output": options.output, + "option": "'-o, --output'"} ) if logtype != "none" and not has_encoding(encoding): print_usage( @@ -124,26 +129,29 @@ def setup_config(config, options): "using failures instead") % {"option": "'-F, --file-output'"} ) ftype = "failures" - if ftype not in linkcheck.logger.LoggerNames: + if ftype not in logger.LoggerNames: print_usage( - _("Unknown logger type %(type)r in %(output)r for option %(option)s") + _("Unknown logger type %(type)r in %(output)r" + " for option %(option)s") % { "type": ftype, "output": options.fileoutput, "option": "'-F, --file-output'", } ) - if ftype != "none" and "encoding" in ns and not has_encoding(ns["encoding"]): + if ftype != "none" and "encoding" in ns \ + and not has_encoding(ns["encoding"]): print_usage( - _("Unknown encoding %(encoding)r in %(output)r for option %(option)s") + _("Unknown encoding %(encoding)r in %(output)r" + " for option %(option)s") % { "encoding": ns["encoding"], "output": options.fileoutput, "option": "'-F, --file-output'", } ) - logger = config.logger_new(ftype, **ns) - config["fileoutput"].append(logger) + new_logger = config.logger_new(ftype, **ns) + config["fileoutput"].append(new_logger) if options.nntpserver: config["nntpserver"] = options.nntpserver if options.username: @@ -199,7 +207,7 @@ def setup_config(config, options): if options.useragent is not None: config["useragent"] = options.useragent if options.cookiefile is not None: - if linkcheck.fileutil.is_readable(options.cookiefile): + if fileutil.is_readable(options.cookiefile): config["cookiefile"] = options.cookiefile else: msg = _("Could not read cookie file %s") % options.cookiefile diff --git a/linkchecker b/linkchecker deleted file mode 100755 index 8ebf95de..00000000 --- a/linkchecker +++ /dev/null @@ -1,194 +0,0 @@ -#!/usr/bin/python3 -u -# Copyright (C) 2000-2014 Bastian Kleineidam -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License along -# with this program; if not, write to the Free Software Foundation, Inc., -# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. -""" -Check HTML pages for broken links. This is the commandline -client. Run this file with the -h option to see how it's done. -""" - -import sys -import os -import pprint - -# installs _() and _n() gettext functions into global namespace -import linkcheck -from linkcheck import log, logconf -LOG_CMDLINE = linkcheck.LOG_CMDLINE - -logconf.init_log_config() -# override argparse gettext method with the one from linkcheck.init_i18n() -# argparse._ = _ -# now import the rest of the linkchecker gang -from linkcheck.arg_parser import ArgParser -from linkcheck.setup_config import setup_config -import linkcheck.configuration -import linkcheck.fileutil -from linkcheck.cmdline import aggregate_url, print_usage -from linkcheck.director import console, check_urls, get_aggregate -from linkcheck.strformat import stripurl - -# optional modules -has_argcomplete = linkcheck.fileutil.has_module("argcomplete") -has_profile = linkcheck.fileutil.has_module("yappi") -has_meliae = linkcheck.fileutil.has_module("meliae") - -# default profiling filename -_profile = "linkchecker.prof" -_username = None -_password = None - - -# instantiate option parser and configure options -argparser = ArgParser() - -# build a config object for this check session -config = linkcheck.configuration.Configuration() -config.set_status_logger(console.StatusLogger()) - -# ================= auto completion ===================== -if has_argcomplete: - import argcomplete - - argcomplete.autocomplete(argparser) - - -def read_stdin_urls(): - """Read list of URLs, separated by white-space, from stdin.""" - num = 0 - while True: - lines = sys.stdin.readlines(8 * 1024) - if not lines: - break - for line in lines: - for url in line.split(): - num += 1 - if num % 10000 == 0: - log.info(LOG_CMDLINE, "Read %d URLs from stdin", num) - yield url - - -# read and parse command line options and arguments -options = argparser.parse_args() -# initialize logging -if options.debug: - allowed_debugs = logconf.lognames.keys() - for _name in options.debug: - if _name not in allowed_debugs: - print_usage(_("Invalid debug level %(level)r") % {"level": _name}) - logconf.set_debug(options.debug) -elif options.quiet: - logconf.reset_loglevel() -log.debug( - LOG_CMDLINE, - _("Python %(version)s on %(platform)s") - % {"version": sys.version, "platform": sys.platform}, -) -# read configuration files -try: - files = [] - if options.configfile: - path = linkcheck.configuration.normpath(options.configfile) - if os.path.isfile(path): - files.append(path) - else: - log.warn(LOG_CMDLINE, _("Unreadable config file: %r"), options.configfile) - config.read(files=files) -except linkcheck.LinkCheckerError as msg: - # config error - print_usage(str(msg)) -linkcheck.drop_privileges() -# set up config object using options -setup_config(config, options) - -# now sanitize the configuration -config.sanitize() - -log.debug(LOG_CMDLINE, "configuration: %s", pprint.pformat(sorted(config.items()))) - -# prepare checking queue -aggregate = get_aggregate(config) -if options.trace: - # enable thread tracing - config["trace"] = True - # start trace in mainthread - import linkcheck.trace - - linkcheck.trace.trace_filter([r"^linkcheck"]) - linkcheck.trace.trace_on() -# add urls to queue -if options.stdin: - for url in read_stdin_urls(): - aggregate_url(aggregate, url) -elif options.url: - for url in options.url: - aggregate_url(aggregate, stripurl(url)) -else: - log.warn(LOG_CMDLINE, _("no files or URLs given")) -# set up profiling -do_profile = False -if options.profile: - if has_profile: - if os.path.exists(_profile): - print( - _( - "Overwrite profiling file %(file)r?\n" - "Press Ctrl-C to cancel, RETURN to continue." - ) - % {"file": _profile} - ) - try: - input() - except KeyboardInterrupt: - print("", _("Canceled."), file=sys.stderr, sep="\n") - sys.exit(1) - do_profile = True - else: - log.warn( - LOG_CMDLINE, - _( - "The `yappi' Python module is not installed," - " therefore the --profile option is disabled." - ), - ) - -# finally, start checking -if do_profile: - import yappi - - yappi.start() - check_urls(aggregate) - yappi.stop() - yappi.get_func_stats().save(_profile) -else: - check_urls(aggregate) -if config["debugmemory"]: - import linkcheck.memoryutil - - if has_meliae: - log.info(LOG_CMDLINE, _("Dumping memory statistics...")) - filename = linkcheck.memoryutil.write_memory_dump() - message = _("The memory dump has been written to `%(filename)s'.") - log.info(LOG_CMDLINE, message % dict(filename=filename)) - else: - log.warn(LOG_CMDLINE, linkcheck.memoryutil.MemoryDebugMsg) - -stats = config["logger"].stats -# on internal errors, exit with status 2 -if stats.internal_errors: - sys.exit(2) -# on errors or printed warnings, exit with status 1 -if stats.errors or (stats.warnings_printed and config["warnings"]): - sys.exit(1) diff --git a/setup.cfg b/setup.cfg index 3e2c0772..4c3856b8 100644 --- a/setup.cfg +++ b/setup.cfg @@ -22,7 +22,6 @@ ignore = [flake8] filename = *.py - ./linkchecker extend-exclude = build/ _LinkChecker_configdata.py @@ -37,7 +36,6 @@ per-file-ignores = # In several files imports intentionally cause: # E402: module level import not at top of file # F401: module imported but unused - linkchecker: E402 setup.py: E402 doc/src/conf.py: E402,F821 linkcheck/__init__.py: E402,F401 @@ -48,6 +46,7 @@ per-file-ignores = # E501: line too long linkcheck/ftpparse.py: E501 linkcheck/checker/unknownurl.py: E501 + linkcheck/command/arg_parser.py: E501 scripts/update_iana_uri_schemes.py: E501 tests/test_ftpparse.py: E501 # F821 undefined name diff --git a/setup.py b/setup.py index 287895fa..c00bdc0d 100755 --- a/setup.py +++ b/setup.py @@ -217,11 +217,6 @@ class MyInstallData(install_data): class MyDistribution(Distribution): """Custom distribution class generating config file.""" - def __init__(self, attrs): - """Set console and windows scripts.""" - super().__init__(attrs) - self.console = ["linkchecker"] - def run_commands(self): """Generate config file and run commands.""" cwd = os.getcwd() @@ -306,8 +301,6 @@ class MyClean(clean): # scripts -scripts = ["linkchecker"] - myname = "LinkChecker Authors" myemail = "" @@ -370,7 +363,11 @@ setup( "clean": MyClean, }, packages=find_packages(include=["linkcheck", "linkcheck.*"]), - scripts=scripts, + entry_points={ + "console_scripts": [ + "linkchecker = linkcheck.command.linkchecker:linkchecker" + ] + }, data_files=data_files, classifiers=[ "Topic :: Internet :: WWW/HTTP :: Site Management :: Link Checking", @@ -388,6 +385,7 @@ setup( python_requires=">= 3.6", setup_requires=["setuptools_scm"], install_requires=[ + "importlib_metadata;python_version<'3.8'", "requests >= 2.4", "dnspython >= 2.0", "beautifulsoup4 >= 4.8.1", diff --git a/tests/__init__.py b/tests/__init__.py index 0e184cf6..241137a8 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -24,10 +24,6 @@ from functools import lru_cache, wraps from linkcheck import LinkCheckerInterrupt -basedir = os.path.dirname(__file__) -linkchecker_cmd = os.path.join(os.path.dirname(basedir), "linkchecker") - - def run(cmd, verbosity=0, **kwargs): """Run command without error checking. @return: command return code""" diff --git a/tests/test_linkchecker.py b/tests/test_linkchecker.py index 3559f1ee..6074be8f 100644 --- a/tests/test_linkchecker.py +++ b/tests/test_linkchecker.py @@ -14,12 +14,12 @@ # along with this program. If not, see . import unittest import sys -from . import linkchecker_cmd, run_checked +from . import run_checked -def run_with_options(options, cmd=linkchecker_cmd): +def run_with_options(options): """Run a command with given options.""" - run_checked([sys.executable, cmd] + options) + run_checked([sys.executable, "-m", "linkcheck"] + options) class TestLinkchecker(unittest.TestCase):