mirror of
https://github.com/Hopiu/linkchecker.git
synced 2026-03-24 18:00:24 +00:00
713 lines
28 KiB
Python
Executable file
713 lines
28 KiB
Python
Executable file
#!/usr/bin/python -u
|
|
# -*- coding: iso-8859-1 -*-
|
|
# Copyright (C) 2000-2012 Bastian Kleineidam
|
|
#
|
|
# This program is free software; you can redistribute it and/or modify
|
|
# it under the terms of the GNU General Public License as published by
|
|
# the Free Software Foundation; either version 2 of the License, or
|
|
# (at your option) any later version.
|
|
#
|
|
# This program is distributed in the hope that it will be useful,
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
# GNU General Public License for more details.
|
|
#
|
|
# You should have received a copy of the GNU General Public License along
|
|
# with this program; if not, write to the Free Software Foundation, Inc.,
|
|
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
"""
|
|
Check HTML pages for broken links. This is the commandline
|
|
client. Run this file with the -h option to see how it's done.
|
|
"""
|
|
|
|
import sys
|
|
import codecs
|
|
import re
|
|
import os
|
|
import pprint
|
|
import optparse
|
|
import getpass
|
|
# installs _() and _n() gettext functions into global namespace
|
|
import linkcheck
|
|
# override optparse gettext method with the one from linkcheck.init_i18n()
|
|
optparse._ = _
|
|
# now import the rest of the linkchecker gang
|
|
from linkcheck.cmdline import print_version, print_usage, LCOptionParser, \
|
|
aggregate_url
|
|
from linkcheck import log, LOG_CMDLINE, i18n, strformat
|
|
import linkcheck.checker
|
|
import linkcheck.configuration
|
|
import linkcheck.fileutil
|
|
import linkcheck.logger
|
|
import linkcheck.ansicolor
|
|
from linkcheck.director import console, check_urls, get_aggregate
|
|
# optional modules
|
|
has_optcomplete = linkcheck.fileutil.has_module("optcomplete")
|
|
has_profile = linkcheck.fileutil.has_module("cProfile")
|
|
has_pstats = linkcheck.fileutil.has_module("pstats")
|
|
has_meliae = linkcheck.fileutil.has_module("meliae")
|
|
|
|
# default profiling filename
|
|
_profile = "linkchecker.prof"
|
|
_username = None
|
|
_password = None
|
|
|
|
# usage texts
|
|
Usage = _("""USAGE\tlinkchecker [options] [file-or-url]...""")
|
|
|
|
Notes = _("""NOTES
|
|
o URLs on the command line starting with "ftp." are treated like
|
|
"ftp://ftp.", URLs starting with "www." are treated like "http://www.".
|
|
You can also give local files as arguments.
|
|
o If you have your system configured to automatically establish a
|
|
connection to the internet (e.g. with diald), it will connect when
|
|
checking links not pointing to your local system.
|
|
See the --ignore-url option on how to prevent this.
|
|
o Javascript links are currently ignored.
|
|
o If your platform does not support threading, LinkChecker disables it
|
|
automatically.
|
|
o You can supply multiple user/password pairs in a configuration file.
|
|
o When checking 'news:' links the given NNTP host doesn't need to be the
|
|
same as the host of the user browsing your pages.
|
|
""")
|
|
|
|
ProxySupport = _("""PROXY SUPPORT
|
|
To use a proxy on Unix or Windows set $http_proxy, $https_proxy or $ftp_proxy
|
|
to the proxy URL. The URL should be of the form
|
|
"http://[<user>:<pass>@]<host>[:<port>]".
|
|
LinkChecker also detects manual proxy settings of Internet Explorer under
|
|
Windows systems. On a Mac use the Internet Config to select a proxy.
|
|
|
|
LinkChecker honors the $no_proxy environment variable. It can be a list
|
|
of domain names for which no proxy will be used.
|
|
|
|
Setting a HTTP proxy on Unix for example looks like this:
|
|
|
|
export http_proxy="http://proxy.example.com:8080"
|
|
|
|
Proxy authentication is also supported:
|
|
|
|
export http_proxy="http://user1:mypass@proxy.example.org:8081"
|
|
|
|
Setting a proxy on the Windows command prompt:
|
|
|
|
set http_proxy=http://proxy.example.com:8080
|
|
|
|
""")
|
|
|
|
RegularExpressions = _("""REGULAR EXPRESSIONS
|
|
Only Python regular expressions are accepted by LinkChecker.
|
|
See http://www.amk.ca/python/howto/regex/ for an introduction in
|
|
regular expressions.
|
|
|
|
The only addition is that a leading exclamation mark negates
|
|
the regular expression.
|
|
""")
|
|
|
|
CookieFormat = _("""COOKIE FILES
|
|
A cookie file contains standard RFC 805 header data with the following
|
|
possible names:
|
|
Scheme (optional)
|
|
Sets the scheme the cookies are valid for; default scheme is 'http'.
|
|
Host (required)
|
|
Sets the domain the cookies are valid for.
|
|
Path (optional)
|
|
Gives the path the cookies are value for; default path is '/'.
|
|
Set-cookie (optional)
|
|
Set cookie name/value. Can be given more than once.
|
|
|
|
Multiple entries are separated by a blank line.
|
|
|
|
The example below will send two cookies to all URLs starting with
|
|
'http://example.org/hello/' and one to all URLs starting
|
|
with 'https://example.com/':
|
|
|
|
Host: example.org
|
|
Path: /hello
|
|
Set-cookie: ID="smee"
|
|
Set-cookie: spam="egg"
|
|
|
|
Scheme: https
|
|
Host: example.com
|
|
Set-cookie: baggage="elitist"; comment="hologram"
|
|
""")
|
|
|
|
Retval = _(r"""RETURN VALUE
|
|
The return value is non-zero when
|
|
o invalid links were found or
|
|
o warnings were found warnings are enabled
|
|
o a program error occurred
|
|
""")
|
|
|
|
Examples = _(r"""EXAMPLES
|
|
The most common use checks the given domain recursively, plus any
|
|
single URL pointing outside of the domain:
|
|
linkchecker http://www.example.org/
|
|
Beware that this checks the whole site which can have several hundred
|
|
thousands URLs. Use the -r option to restrict the recursion depth.
|
|
|
|
Don't connect to mailto: hosts, only check their URL syntax. All other
|
|
links are checked as usual:
|
|
linkchecker --ignore-url=^mailto: www.example.org
|
|
|
|
Checking local HTML files on Unix:
|
|
linkchecker ../bla.html subdir/blubber.html
|
|
|
|
Checking a local HTML file on Windows:
|
|
linkchecker c:\temp\test.html
|
|
|
|
You can skip the "http://" url part if the domain starts with "www.":
|
|
linkchecker www.example.de
|
|
|
|
You can skip the "ftp://" url part if the domain starts with "ftp.":
|
|
linkchecker -r0 ftp.example.org
|
|
""")
|
|
|
|
LoggerTypes = _(r"""OUTPUT TYPES
|
|
Note that by default only errors and warnings are logged.
|
|
You should use the --verbose option to see valid URLs,
|
|
and --complete when outputting a sitemap graph format.
|
|
|
|
text Standard text output, logging URLs in keyword: argument fashion.
|
|
html Log URLs in keyword: argument fashion, formatted as HTML.
|
|
Additionally has links to the referenced pages. Invalid URLs have
|
|
HTML and CSS syntax check links appended.
|
|
csv Log check result in CSV format with one URL per line.
|
|
gml Log parent-child relations between linked URLs as a GML sitemap
|
|
graph.
|
|
dot Log parent-child relations between linked URLs as a DOT sitemap
|
|
graph.
|
|
gxml Log check result as a GraphXML sitemap graph.
|
|
xml Log check result as machine-readable XML.
|
|
sql Log check result as SQL script with INSERT commands. An example
|
|
script to create the initial SQL table is included as create.sql.
|
|
blacklist
|
|
Suitable for cron jobs. Logs the check result into a file
|
|
~/.linkchecker/blacklist which only contains entries with invalid
|
|
URLs and the number of times they have failed.
|
|
none Logs nothing. Suitable for debugging or checking the exit code.
|
|
""")
|
|
|
|
Warnings = _(r"""IGNORE WARNINGS
|
|
The following warnings are recognized in the 'ignorewarnings' config
|
|
file entry:
|
|
""") + \
|
|
"\n".join([u" o %s - %s" % (tag, desc) \
|
|
for tag, desc in sorted(linkcheck.checker.const.Warnings.items())])
|
|
|
|
|
|
def viewprof ():
|
|
"""Print profiling data and exit."""
|
|
if not has_pstats:
|
|
log.error(LOG_CMDLINE,
|
|
_("The `pstats' Python module is not installed,"
|
|
" therefore the --viewprof option is disabled."))
|
|
sys.exit(1)
|
|
if not os.path.isfile(_profile):
|
|
log.warn(LOG_CMDLINE,
|
|
_("Could not find profiling file %(file)r.") % {"file": _profile})
|
|
print >> sys.stderr, \
|
|
_("Please run linkchecker with --profile to generate it.")
|
|
sys.exit(1)
|
|
import pstats
|
|
stats = pstats.Stats(_profile)
|
|
stats.strip_dirs().sort_stats("cumulative").print_stats(100)
|
|
sys.exit(0)
|
|
|
|
|
|
def try_compile_re (arg):
|
|
"""Try to compile the regular expression. On error print an error
|
|
message and exit."""
|
|
try:
|
|
return re.compile(arg)
|
|
except re.error, msg:
|
|
log.error(LOG_CMDLINE,
|
|
_("Syntax error in %(arg)r: %(msg)s") % {"arg": arg, "msg": msg})
|
|
sys.exit(1)
|
|
|
|
|
|
def has_encoding (encoding):
|
|
"""Detect if Python can encode in a certain encoding."""
|
|
try:
|
|
codecs.lookup(encoding)
|
|
return True
|
|
except LookupError:
|
|
return False
|
|
|
|
# instantiate option parser and configure options
|
|
class MyOptionParser (LCOptionParser):
|
|
"""Option parser for LinkChecker commandline client."""
|
|
|
|
def get_usage (self):
|
|
"""Return translated usage text."""
|
|
return Usage
|
|
|
|
def print_help (self, file=None):
|
|
"""Print translated help text."""
|
|
s = u"%s\n%s\n%s\n%s\n%s\n%s\n%s\n%s\n%s" % (self.format_help(),
|
|
Examples, LoggerTypes, RegularExpressions, CookieFormat,
|
|
ProxySupport, Notes, Retval, Warnings)
|
|
self.print_help_msg(s, file)
|
|
|
|
# instantiate option parser and configure options
|
|
optparser = MyOptionParser()
|
|
|
|
# build a config object for this check session
|
|
config = linkcheck.configuration.Configuration()
|
|
|
|
################# general options ##################
|
|
group = optparse.OptionGroup(optparser, _("General options"))
|
|
group.add_option("-f", "--config", type="string", dest="configfile",
|
|
metavar="FILENAME",
|
|
help=_(
|
|
"""Use FILENAME as configuration file. Per default LinkChecker uses
|
|
~/.linkchecker/linkcheckerrc (under Windows
|
|
%HOMEPATH%\\.linkchecker\\linkcheckerrc)."""))
|
|
group.add_option("-t", "--threads", type="int", dest="threads",
|
|
metavar="NUMBER",
|
|
help=_(
|
|
"""Generate no more than the given number of threads. Default number
|
|
of threads is 10. To disable threading specify a non-positive number."""))
|
|
group.add_option("-V", "--version", action="store_true", dest="version",
|
|
help=_("""Print version and exit."""))
|
|
group.add_option("--stdin", action="store_true", dest="stdin",
|
|
help=_(
|
|
"""Read list of white-space separated URLs to check from stdin."""))
|
|
optparser.add_option_group(group)
|
|
|
|
################# output options ##################
|
|
group = optparse.OptionGroup(optparser, _("Output options"))
|
|
group.add_option("--check-css", action="store_true", dest="checkcss",
|
|
help=_(
|
|
"""Check syntax of CSS URLs with cssutils. If it's not installed,
|
|
check with the W3C online validator."""))
|
|
group.add_option("--check-html", action="store_true", dest="checkhtml",
|
|
help=_(
|
|
"""Check syntax of HTML URLs with HTML tidy. If it's not installed,
|
|
check with the W3C online validator."""))
|
|
group.add_option("--complete", action="store_true", dest="complete",
|
|
help=_("""Log all URLs, including duplicates.
|
|
Default is to log duplicate URLs only once."""))
|
|
group.add_option("-D", "--debug", type="string", action="append",
|
|
metavar="STRING",
|
|
help=_("""Print debugging output for the given logger.
|
|
Available loggers are %(lognamelist)s.
|
|
Specifying 'all' is an alias for specifying all available loggers.
|
|
The option can be given multiple times to debug with more
|
|
than one logger.
|
|
|
|
For accurate results, threading will be disabled during debug runs.""") % \
|
|
{"lognamelist": linkcheck.lognamelist})
|
|
group.add_option("-F", "--file-output", type="string", action="append",
|
|
dest="fileoutput", metavar="TYPE[/ENCODING][/FILENAME]",
|
|
help=_(
|
|
"""Output to a file linkchecker-out.TYPE, $HOME/.linkchecker/blacklist for
|
|
'blacklist' output, or FILENAME if specified.
|
|
The ENCODING specifies the output encoding, the default is that of your
|
|
locale.
|
|
Valid encodings are listed at http://docs.python.org/lib/standard-encodings.html.
|
|
The FILENAME and ENCODING parts of the 'none' output type will be ignored,
|
|
else if the file already exists, it will be overwritten.
|
|
You can specify this option more than once. Valid file output types
|
|
are %(loggertypes)s. You can specify this option multiple times to output
|
|
to more than one file. Default is no file output. Note that you can
|
|
suppress all console output with the option '-o none'.""") % \
|
|
{'loggertypes': linkcheck.logger.LoggerKeys})
|
|
group.add_option("--no-status", action="store_false", dest="status",
|
|
default=True, help=_(
|
|
"""Do not print check status messages."""))
|
|
group.add_option("--no-warnings", action="store_false", dest="warnings",
|
|
help=_("""Don't log warnings. Default is to log warnings."""))
|
|
group.add_option("-o", "--output", type="string", dest="output",
|
|
metavar="TYPE[/ENCODING]",
|
|
help=_(
|
|
"""Specify output as %(loggertypes)s. Default output type is text.
|
|
The ENCODING specifies the output encoding, the default is that of your
|
|
locale.
|
|
Valid encodings are listed at """ \
|
|
"""http://docs.python.org/lib/standard-encodings.html.""") % \
|
|
{'loggertypes': linkcheck.logger.LoggerKeys})
|
|
group.add_option("--profile", action="store_true", dest="profile",
|
|
help=optparse.SUPPRESS_HELP)
|
|
group.add_option("-q", "--quiet", action="store_true", dest="quiet",
|
|
help=_(
|
|
"""Quiet operation, an alias for '-o none'.
|
|
This is only useful with -F."""))
|
|
group.add_option("--scan-virus", action="store_true", dest="scanvirus",
|
|
help=_(
|
|
"""Scan content of URLs with ClamAV virus scanner."""))
|
|
group.add_option("--trace", action="store_true", dest="trace",
|
|
help=_("""Print tracing information."""))
|
|
group.add_option("-v", "--verbose", action="store_true", dest="verbose",
|
|
help=_(
|
|
"""Log all URLs. Default is to log only errors and warnings."""))
|
|
group.add_option("--viewprof", action="store_true", dest="viewprof",
|
|
help=optparse.SUPPRESS_HELP)
|
|
group.add_option("-W", "--warning-regex", type="string", dest="warningregex",
|
|
metavar="REGEX",
|
|
help=_(
|
|
"""Define a regular expression which prints a warning if it matches
|
|
any content of the checked link. This applies only to valid pages,
|
|
so we can get their content.
|
|
|
|
Use this to check for pages that contain some form of error
|
|
message, for example 'This page has moved' or 'Oracle
|
|
Application error'.
|
|
|
|
Note that multiple values can be combined in the regular expression,
|
|
for example "(This page has moved|Oracle Application error)"."""))
|
|
group.add_option("--warning-size-bytes", dest="warningsizebytes",
|
|
metavar="NUMBER",
|
|
help=_(
|
|
"""Print a warning if content size info is available and exceeds the
|
|
given number of bytes."""))
|
|
optparser.add_option_group(group)
|
|
|
|
|
|
################# checking options ##################
|
|
group = optparse.OptionGroup(optparser, _("Checking options"))
|
|
group.add_option("-a", "--anchors", action="store_true", dest="anchors",
|
|
help=_(
|
|
"""Check HTTP anchor references. Default is not to check anchors.
|
|
This option enables logging of the warning 'url-anchor-not-found'."""))
|
|
group.add_option("-C", "--cookies", action="store_true", dest="cookies",
|
|
help=_(
|
|
"""Accept and send HTTP cookies according to RFC 2109. Only cookies
|
|
which are sent back to the originating server are accepted.
|
|
Sent and accepted cookies are provided as additional logging
|
|
information."""))
|
|
group.add_option("--cookiefile", type="string", dest="cookiefile",
|
|
metavar="FILENAME",
|
|
help=_(
|
|
"""Read a file with initial cookie data. The cookie data format is
|
|
explained below."""))
|
|
group.add_option("--ignore-url", type="string", action="append",
|
|
metavar="REGEX",
|
|
dest="externstrict", help=_(
|
|
"""Only check syntax of URLs matching the given regular expression.
|
|
This option can be given multiple times."""))
|
|
group.add_option("--no-follow-url", type="string", action="append",
|
|
metavar="REGEX",
|
|
dest="extern", help=_(
|
|
"""Check but do not recurse into URLs matching the given regular
|
|
expression. This option can be given multiple times."""))
|
|
group.add_option("-N", "--nntp-server", type="string", dest="nntpserver",
|
|
metavar="STRING",
|
|
help=_(
|
|
"""Specify an NNTP server for 'news:...' links. Default is the
|
|
environment variable NNTP_SERVER. If no host is given,
|
|
only the syntax of the link is checked."""))
|
|
group.add_option("-p", "--password", action="store_false", dest="password",
|
|
default=False,
|
|
help=_(
|
|
"""Read a password from console and use it for HTTP and FTP authorization.
|
|
For FTP the default password is 'anonymous@'. For HTTP there is
|
|
no default password. See also -u."""))
|
|
group.add_option("-P", "--pause", type="int", dest="pause",
|
|
metavar="NUMBER",
|
|
help=_(
|
|
"""Pause the given number of seconds between two subsequent connection
|
|
requests to the same host. Default is no pause between requests."""))
|
|
group.add_option("-r", "--recursion-level", type="int", dest="recursionlevel",
|
|
metavar="NUMBER",
|
|
help=_(
|
|
"""Check recursively all links up to given depth. A negative depth
|
|
will enable infinite recursion. Default depth is infinite."""))
|
|
group.add_option("--timeout", type="int", dest="timeout",
|
|
metavar="NUMBER",
|
|
help=_(
|
|
"""Set the timeout for connection attempts in seconds. The default
|
|
timeout is %d seconds.""") % config["timeout"])
|
|
group.add_option("-u", "--user", type="string", dest="username",
|
|
metavar="STRING",
|
|
help=_(
|
|
"""Try the given username for HTTP and FTP authorization.
|
|
For FTP the default username is 'anonymous'. For HTTP there is
|
|
no default username. See also -p."""))
|
|
group.add_option("--user-agent", type="string", dest="useragent",
|
|
metavar="STRING",
|
|
help=_(
|
|
"""Specify the User-Agent string to send to the HTTP server, for example
|
|
"Mozilla/4.0". The default is "LinkChecker/X.Y" where X.Y is the current
|
|
version of LinkChecker."""))
|
|
optparser.add_option_group(group)
|
|
|
|
################# auto completion #####################
|
|
if has_optcomplete:
|
|
import optcomplete
|
|
|
|
def FileCompleter (cwd, line, point, prefix, suffix):
|
|
"""Completes by listing all possible files, here or matching
|
|
substrings"""
|
|
startdir = os.path.dirname(prefix)
|
|
lhs = os.path.basename(prefix)
|
|
if startdir:
|
|
listdir = startdir
|
|
else:
|
|
listdir = cwd
|
|
return [os.path.join(startdir, f)
|
|
for f in os.listdir(os.path.expanduser(listdir))
|
|
if f.startswith(lhs)]
|
|
optcomplete.autocomplete(optparser, arg_completer=FileCompleter)
|
|
|
|
|
|
def read_stdin_urls ():
|
|
"""Read list of URLs, separated by white-space, from stdin."""
|
|
num = 0
|
|
while True:
|
|
lines = sys.stdin.readlines(8 * 1024)
|
|
if not lines:
|
|
break
|
|
for line in lines:
|
|
for url in line.split():
|
|
num += 1
|
|
if num % 10000 == 0:
|
|
log.info(LOG_CMDLINE, "Read %d URLs from stdin", num)
|
|
yield url
|
|
|
|
|
|
# read and parse command line options and arguments
|
|
(options, args) = optparser.parse_args()
|
|
# initialize logging
|
|
if options.debug:
|
|
allowed_debugs = linkcheck.lognames.keys()
|
|
for _name in options.debug:
|
|
if _name not in allowed_debugs:
|
|
print_usage(_("Invalid debug level %(level)r") % {'level': _name})
|
|
config.init_logging(console.StatusLogger(), debug=options.debug)
|
|
log.debug(LOG_CMDLINE, _("Python %(version)s on %(platform)s") % \
|
|
{"version": sys.version, "platform": sys.platform})
|
|
# read configuration files
|
|
try:
|
|
files = []
|
|
if options.configfile:
|
|
path = linkcheck.configuration.normpath(options.configfile)
|
|
if os.path.isfile(path):
|
|
files.append(path)
|
|
else:
|
|
log.warn(LOG_CMDLINE,
|
|
_("Unreadable config file: %r"), options.configfile)
|
|
config.read(files=files)
|
|
except linkcheck.LinkCheckerError, msg:
|
|
# config error
|
|
print_usage(str(msg))
|
|
linkcheck.drop_privileges()
|
|
# test if running with -O
|
|
if options.debug and not __debug__:
|
|
log.warn(LOG_CMDLINE, _("Running with python -O disables debugging."))
|
|
# apply commandline options and arguments to configuration
|
|
constructauth = False
|
|
do_profile = False
|
|
if options.warnings is not None:
|
|
config["warnings"] = options.warnings
|
|
if options.anchors is not None:
|
|
config["anchors"] = options.anchors
|
|
if options.externstrict:
|
|
pats = [linkcheck.get_link_pat(arg, strict=True) \
|
|
for arg in options.externstrict]
|
|
config["externlinks"].extend(pats)
|
|
if options.extern:
|
|
pats = [linkcheck.get_link_pat(arg) for arg in options.extern]
|
|
config["externlinks"].extend(pats)
|
|
if options.output:
|
|
if "/" in options.output:
|
|
logtype, encoding = options.output.split("/", 1)
|
|
else:
|
|
logtype, encoding = options.output, i18n.default_encoding
|
|
logtype = logtype.lower()
|
|
if logtype not in linkcheck.logger.Loggers:
|
|
print_usage(
|
|
_("Unknown logger type %(type)r in %(output)r for option %(option)s") % \
|
|
{"type": logtype, "output": options.output, "option": "'-o, --output'"})
|
|
if logtype != 'none' and not has_encoding(encoding):
|
|
print_usage(
|
|
_("Unknown encoding %(encoding)r in %(output)r for option %(option)s") % \
|
|
{"encoding": encoding, "output": options.output,
|
|
"option": "'-o, --output'"})
|
|
config['output'] = logtype
|
|
config['logger'] = config.logger_new(logtype, encoding=encoding)
|
|
if options.fileoutput:
|
|
ns = {'fileoutput': 1}
|
|
for arg in options.fileoutput:
|
|
ftype = arg
|
|
# look for (optional) filename and encoding
|
|
if '/' in ftype:
|
|
ftype, suffix = ftype.split('/', 1)
|
|
if suffix:
|
|
if has_encoding(suffix):
|
|
# it was an encoding
|
|
ns['encoding'] = suffix
|
|
elif '/' in suffix:
|
|
# look for (optional) encoding
|
|
encoding, filename = suffix.split('/', 1)
|
|
if has_encoding(encoding):
|
|
ns['encoding'] = encoding
|
|
ns['filename'] = filename
|
|
else:
|
|
ns['filename'] = suffix
|
|
else:
|
|
ns['filename'] = suffix
|
|
if ftype not in linkcheck.logger.Loggers:
|
|
print_usage(
|
|
_("Unknown logger type %(type)r in %(output)r for option %(option)s") % \
|
|
{"type": ftype, "output": options.fileoutput,
|
|
"option": "'-F, --file-output'"})
|
|
if ftype != 'none' and 'encoding' in ns and \
|
|
not has_encoding(ns['encoding']):
|
|
print_usage(
|
|
_("Unknown encoding %(encoding)r in %(output)r for option %(option)s") % \
|
|
{"encoding": ns['encoding'], "output": options.fileoutput,
|
|
"option": "'-F, --file-output'"})
|
|
logger = config.logger_new(ftype, **ns)
|
|
config['fileoutput'].append(logger)
|
|
if options.nntpserver:
|
|
config["nntpserver"] = options.nntpserver
|
|
if options.username is not None:
|
|
_username = options.username
|
|
constructauth = True
|
|
if options.password:
|
|
if _username:
|
|
msg = _("Enter LinkChecker HTTP/FTP password for user %(user)s:") % \
|
|
{"user": _username}
|
|
else:
|
|
msg = _("Enter LinkChecker HTTP/FTP password:")
|
|
_password = getpass.getpass(console.encode(msg))
|
|
constructauth = True
|
|
if options.pause is not None:
|
|
if options.pause >= 0:
|
|
config["wait"] = options.pause
|
|
else:
|
|
print_usage(_("Illegal argument %(arg)r for option %(option)s") % \
|
|
{"arg": options.pause, "option": "'-P, --pause'"})
|
|
if options.profile is not None:
|
|
do_profile = options.profile
|
|
if options.quiet is not None:
|
|
config['logger'] = config.logger_new('none')
|
|
if options.recursionlevel is not None:
|
|
config["recursionlevel"] = options.recursionlevel
|
|
if options.status is not None:
|
|
config['status'] = options.status
|
|
if options.threads is not None:
|
|
if options.threads < 1:
|
|
options.threads = 0
|
|
config["threads"] = options.threads
|
|
if options.timeout is not None:
|
|
if options.timeout > 0:
|
|
config["timeout"] = options.timeout
|
|
else:
|
|
print_usage(_("Illegal argument %(arg)r for option %(option)s") % \
|
|
{"arg": options.timeout, "option": "'--timeout'"})
|
|
if options.version is not None:
|
|
print_version()
|
|
if options.verbose is not None:
|
|
if options.verbose:
|
|
config["verbose"] = True
|
|
config["warnings"] = True
|
|
if options.complete is not None:
|
|
if options.complete:
|
|
config["complete"] = True
|
|
config["verbose"] = True
|
|
config["warnings"] = True
|
|
if options.viewprof:
|
|
viewprof()
|
|
if options.warningregex is not None:
|
|
config["warningregex"] = try_compile_re(options.warningregex)
|
|
config["warnings"] = True
|
|
if options.warningsizebytes is not None:
|
|
config["warnsizebytes"] = options.warningsizebytes
|
|
if options.cookies is not None:
|
|
config['storecookies'] = config['sendcookies'] = options.cookies
|
|
if options.cookiefile is not None:
|
|
config['cookiefile'] = options.cookiefile
|
|
config['storecookies'] = config['sendcookies'] = True
|
|
if constructauth:
|
|
config.add_auth(pattern=".+", user=_username, password=_password)
|
|
if options.scanvirus is not None:
|
|
config["scanvirus"] = options.scanvirus
|
|
# boolean options for syntaxcheck
|
|
for option in ("checkhtml", "checkcss"):
|
|
if getattr(options, option) is not None:
|
|
config[option] = getattr(options, option)
|
|
# read missing passwords
|
|
for entry in config["authentication"]:
|
|
if entry["password"] is None:
|
|
attrs = entry.copy()
|
|
attrs["strpattern"] = attrs["pattern"].pattern
|
|
msg = _("Enter LinkChecker password for user %(user)s" \
|
|
" at %(strpattern)s:") % attrs
|
|
entry["password"] = getpass.getpass(console.encode(msg))
|
|
if options.useragent is not None:
|
|
config["useragent"] = options.useragent
|
|
# now sanitize the configuration
|
|
config.sanitize()
|
|
|
|
log.debug(LOG_CMDLINE, "configuration: %s",
|
|
pprint.pformat(sorted(config.items())))
|
|
|
|
# prepare checking queue
|
|
aggregate = get_aggregate(config)
|
|
if options.cookiefile is not None:
|
|
try:
|
|
cookies = linkcheck.cookies.from_file(options.cookiefile)
|
|
for headers, scheme, host, path in cookies:
|
|
aggregate.cookies.add(headers, scheme, host, path)
|
|
config["sendcookies"] = True
|
|
except StandardError:
|
|
log.error(LOG_CMDLINE,
|
|
_("Could not parse cookie file: %s"), sys.exc_info()[1])
|
|
sys.exit(1)
|
|
if options.trace:
|
|
config["trace"] = True
|
|
import linkcheck.trace
|
|
linkcheck.trace.trace_filter([r"^linkcheck"])
|
|
linkcheck.trace.trace_on()
|
|
# add urls to queue
|
|
if options.stdin:
|
|
for url in read_stdin_urls():
|
|
aggregate_url(aggregate, url)
|
|
elif args:
|
|
for url in args:
|
|
aggregate_url(aggregate, strformat.stripurl(url))
|
|
else:
|
|
log.warn(LOG_CMDLINE, _("no files or URLs given"))
|
|
# set up profiling
|
|
if do_profile:
|
|
if has_profile:
|
|
if os.path.exists(_profile):
|
|
question = _("""Overwrite profiling file %(file)r?
|
|
Press Ctrl-C to cancel, RETURN to continue.""") % {"file": _profile}
|
|
try:
|
|
raw_input(question.encode(i18n.default_encoding, 'replace'))
|
|
except KeyboardInterrupt:
|
|
print >> sys.stderr
|
|
print >> sys.stderr, _("Canceled.")
|
|
sys.exit(1)
|
|
else:
|
|
log.warn(LOG_CMDLINE,
|
|
_("The `cProfile' Python module is not installed,"
|
|
" therefore the --profile option is disabled."))
|
|
do_profile = False
|
|
|
|
# finally, start checking
|
|
if do_profile:
|
|
import cProfile
|
|
cProfile.run("check_urls(aggregate)", _profile)
|
|
else:
|
|
check_urls(aggregate)
|
|
if config["debugmemory"]:
|
|
import linkcheck.memoryutil
|
|
if has_meliae:
|
|
log.info(LOG_CMDLINE, _(u"Dumping memory statistics..."))
|
|
filename = linkcheck.memoryutil.write_memory_dump()
|
|
message = _(u"The memory dump has been written to `%(filename)s'.")
|
|
log.info(LOG_CMDLINE, message % dict(filename=filename))
|
|
else:
|
|
log.warn(LOG_CMDLINE, linkcheck.memoryutil.MemoryDebugMsg)
|
|
|
|
stats = config['logger'].stats
|
|
# on internal errors, exit with status 2
|
|
if stats.internal_errors:
|
|
sys.exit(2)
|
|
# on errors or printed warnings, exit with status 1
|
|
if stats.errors or (stats.warnings_printed and config['warnings']):
|
|
sys.exit(1)
|