mirror of
https://github.com/Hopiu/linkchecker.git
synced 2026-03-23 01:10:27 +00:00
git-svn-id: https://linkchecker.svn.sourceforge.net/svnroot/linkchecker/trunk/linkchecker@2860 e7d03fd6-7b0d-0410-9947-9c21f3af8025
717 lines
27 KiB
Python
Executable file
717 lines
27 KiB
Python
Executable file
#!/usr/bin/python2.4
|
|
# -*- coding: iso-8859-1 -*-
|
|
# Copyright (C) 2000-2005 Bastian Kleineidam
|
|
#
|
|
# This program is free software; you can redistribute it and/or modify
|
|
# it under the terms of the GNU General Public License as published by
|
|
# the Free Software Foundation; either version 2 of the License, or
|
|
# (at your option) any later version.
|
|
#
|
|
# This program is distributed in the hope that it will be useful,
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
# GNU General Public License for more details.
|
|
#
|
|
# You should have received a copy of the GNU General Public License
|
|
# along with this program; if not, write to the Free Software
|
|
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
|
"""
|
|
Check HTML pages for broken links.
|
|
"""
|
|
|
|
import sys
|
|
import codecs
|
|
import re
|
|
import os
|
|
import pprint
|
|
import socket
|
|
import optparse
|
|
|
|
# set default 60 seconds socket timeout before importing anything else
|
|
default_timeout = 60
|
|
socket.setdefaulttimeout(default_timeout)
|
|
|
|
import linkcheck
|
|
# override optparse gettext method with the one from linkcheck.init_i18n()
|
|
optparse._ = _
|
|
# now import the rest of the linkchecker gang
|
|
import linkcheck.log
|
|
import linkcheck.checker
|
|
import linkcheck.checker.cache
|
|
import linkcheck.checker.consumer
|
|
import linkcheck.configuration
|
|
import linkcheck.strformat
|
|
# optional modules
|
|
try:
|
|
import optcomplete
|
|
has_optcomplete = True
|
|
except ImportError:
|
|
has_optcomplete = False
|
|
try:
|
|
import profile
|
|
has_profile = True
|
|
except ImportError:
|
|
has_profile = False
|
|
try:
|
|
import pstats
|
|
has_pstats = True
|
|
except ImportError:
|
|
has_pstats = False
|
|
|
|
# default profiling filename
|
|
_profile = "linkchecker.prof"
|
|
_username = None
|
|
_password = None
|
|
|
|
# main usage text
|
|
Usage = _("""USAGE\tlinkchecker [options] file-or-url...
|
|
""")
|
|
|
|
Notes = _("""NOTES
|
|
o URLs on the command line starting with "ftp." are treated like
|
|
"ftp://ftp.", URLs starting with "www." are treated like "http://www.".
|
|
You can also give local files as arguments.
|
|
o If you have your system configured to automatically establish a
|
|
connection to the internet (e.g. with diald), it will connect when
|
|
checking links not pointing to your local system.
|
|
See the --ignore-url option on how to prevent this.
|
|
o Javascript links are currently ignored.
|
|
o If your platform does not support threading, LinkChecker disables it
|
|
automatically.
|
|
o You can supply multiple user/password pairs in a configuration file.
|
|
o To use proxies set $http_proxy, $https_proxy, $ftp_proxy, $gopher_proxy
|
|
on Unix or Windows.
|
|
On a Mac use the Internet Config.
|
|
o When checking 'news:' links the given NNTP host doesn't need to be the
|
|
same as the host of the user browsing your pages.
|
|
""")
|
|
|
|
RegularExpressions = _("""REGULAR EXPRESSIONS
|
|
Only Python regular expressions are accepted by LinkChecker.
|
|
See http://www.amk.ca/python/howto/regex/ for an introduction in
|
|
regular expressions.
|
|
|
|
The only addition is that a leading exclamation mark negates
|
|
the regular expression.
|
|
""")
|
|
|
|
Retval = _(r"""RETURN VALUE
|
|
The return value is non-zero when
|
|
o invalid links were found or
|
|
o warnings were found warnings are enabled
|
|
o a program error occurred
|
|
""")
|
|
|
|
Examples = _(r"""EXAMPLES
|
|
The most common use checks the given domain recursively, plus any
|
|
single URL pointing outside of the domain:
|
|
linkchecker http://treasure.calvinsplayground.de/
|
|
Beware that this checks the whole site which can have several hundred
|
|
thousands URLs. Use the -r option to restrict the recursion depth.
|
|
|
|
Don't connect to mailto: hosts, only check their URL syntax. All other
|
|
links are checked as usual:
|
|
linkchecker --ignore-url=^mailto: www.mysite.org
|
|
|
|
Checking a local HTML file on Unix:
|
|
linkchecker ../bla.html
|
|
|
|
Checking a local HTML file on Windows:
|
|
linkchecker c:\temp\test.html
|
|
|
|
You can skip the "http://" url part if the domain starts with "www.":
|
|
linkchecker www.myhomepage.de
|
|
|
|
You can skip the "ftp://" url part if the domain starts with "ftp.":
|
|
linkchecker -r0 ftp.linux.org
|
|
""")
|
|
|
|
LoggerTypes = _(r"""OUTPUT TYPES
|
|
Note that by default only errors and warnings are logged.
|
|
You should use the --verbose option to get the complete URL list,
|
|
especially when outputting a sitemap graph format.
|
|
|
|
text Standard text output, logging URLs in keyword: argument fashion.
|
|
html Log URLs in keyword: argument fashion, formatted as HTML.
|
|
Additionally has links to the referenced pages. Invalid URLs have
|
|
HTML and CSS syntax check links appended.
|
|
csv Log check result in CSV format with one URL per line.
|
|
gml Log parent-child relations between linked URLs as a GML sitemap
|
|
graph.
|
|
dot Log parent-child relations between linked URLs as a DOT sitemap
|
|
graph.
|
|
gxml Log check result as a GraphXML sitemap graph.
|
|
xml Log check result as machine-readable XML.
|
|
sql Log check result as SQL script with INSERT commands. An example
|
|
script to create the initial SQL table is included as create.sql.
|
|
blacklist
|
|
Suitable for cron jobs. Logs the check result into a file
|
|
~/.linkchecker/blacklist which only contains entries with invalid
|
|
URLs and the number of times they have failed.
|
|
none Logs nothing. Suitable for scripts.
|
|
""")
|
|
|
|
Warnings = _(r"""IGNORE WARNINGS
|
|
The following warnings are recognized in the ignorewarnings config
|
|
file entry:
|
|
""")
|
|
for tag, desc in linkcheck.checker.Warnings.items():
|
|
Warnings += " o %s%s %s%s" % (tag, os.linesep, desc, os.linesep)
|
|
|
|
def encode (s, codec="iso8859-15"):
|
|
"""
|
|
Encode string with given codec for screen print.
|
|
"""
|
|
return s.encode(codec, "ignore")
|
|
|
|
|
|
def print_version ():
|
|
"""
|
|
Print the program version and exit.
|
|
"""
|
|
print encode(linkcheck.configuration.AppInfo)
|
|
sys.exit(0)
|
|
|
|
|
|
def print_usage (msg):
|
|
"""
|
|
Print a program msg text to stderr and exit.
|
|
"""
|
|
sys.stderr.write(encode(_("Error: %s") % msg))
|
|
sys.stderr.write(os.linesep)
|
|
sys.stderr.write(encode(_("Execute 'linkchecker -h' for help")))
|
|
sys.stderr.write(os.linesep)
|
|
sys.exit(1)
|
|
|
|
|
|
def viewprof ():
|
|
"""
|
|
Print profiling data and exit.
|
|
"""
|
|
if not has_pstats:
|
|
linkcheck.log.error(linkcheck.LOG_CMDLINE,
|
|
_("The `pstats' Python module is not installed,"
|
|
" therefore the --viewprof option is disabled."))
|
|
sys.exit(1)
|
|
if not os.path.exists(_profile):
|
|
linkcheck.log.warn(linkcheck.LOG_CMDLINE,
|
|
_("Could not find profiling file %r.") % _profile)
|
|
sys.stderr.write(
|
|
_("Please run linkchecker with --profile to generate it."))
|
|
sys.stderr.write(os.linesep)
|
|
sys.exit(1)
|
|
stats = pstats.Stats(_profile)
|
|
stats.strip_dirs().sort_stats("cumulative").print_stats(100)
|
|
sys.exit(0)
|
|
|
|
|
|
def try_compile_re (arg):
|
|
"""
|
|
Try to compile the regular expression. On error print an error message
|
|
and exit.
|
|
"""
|
|
try:
|
|
return re.compile(arg)
|
|
except re.error, msg:
|
|
linkcheck.log.error(linkcheck.LOG_CMDLINE,
|
|
_("Syntax error in %r: %s", arg, msg))
|
|
sys.exit(1)
|
|
|
|
|
|
def has_encoding (encoding):
|
|
try:
|
|
codecs.lookup(encoding)
|
|
return True
|
|
except LookupError:
|
|
return False
|
|
|
|
|
|
class LCHelpFormatter (optparse.IndentedHelpFormatter):
|
|
"""
|
|
Help formatter indenting paragraph-wise.
|
|
"""
|
|
|
|
def format_option (self, option):
|
|
# The help for each option consists of two parts:
|
|
# * the opt strings and metavars
|
|
# eg. ("-x", or "-fFILENAME, --file=FILENAME")
|
|
# * the user-supplied help string
|
|
# eg. ("turn on expert mode", "read data from FILENAME")
|
|
#
|
|
# If possible, we write both of these on the same line:
|
|
# -x turn on expert mode
|
|
#
|
|
# But if the opt string list is too long, we put the help
|
|
# string on a second line, indented to the same column it would
|
|
# start in if it fit on the first line.
|
|
# -fFILENAME, --file=FILENAME
|
|
# read data from FILENAME
|
|
result = []
|
|
opts = self.option_strings[option]
|
|
opt_width = self.help_position - self.current_indent - 2
|
|
if len(opts) > opt_width:
|
|
opts = "%*s%s\n" % (self.current_indent, "", opts)
|
|
indent_first = self.help_position
|
|
else: # start help on same line as opts
|
|
opts = "%*s%-*s " % (self.current_indent, "", opt_width, opts)
|
|
indent_first = 0
|
|
result.append(opts)
|
|
if option.help:
|
|
text = linkcheck.strformat.wrap(option.help, self.help_width)
|
|
help_lines = text.splitlines()
|
|
result.append("%*s%s\n" % (indent_first, "", help_lines[0]))
|
|
result.extend(["%*s%s\n" % (self.help_position, "", line)
|
|
for line in help_lines[1:]])
|
|
elif opts[-1] != "\n":
|
|
result.append("\n")
|
|
return "".join(result)
|
|
|
|
|
|
class LCOptionParser (optparse.OptionParser, object):
|
|
"""
|
|
Option parser with custom help text layout.
|
|
"""
|
|
|
|
def __init__ (self):
|
|
"""
|
|
Initializing using our own help formatter class.
|
|
"""
|
|
super(LCOptionParser, self).__init__(formatter=LCHelpFormatter())
|
|
|
|
def error (self, msg):
|
|
"""
|
|
Print usage info and given message.
|
|
"""
|
|
print_usage(msg)
|
|
|
|
def get_usage (self):
|
|
"""
|
|
Return translated usage text.
|
|
"""
|
|
return Usage
|
|
|
|
def print_help (self, file=None):
|
|
"""
|
|
Print translated help text.
|
|
"""
|
|
s = u"%s\n%s\n%s\n%s\n%s\n%s\n%s" % (self.format_help(),
|
|
Examples, LoggerTypes, RegularExpressions, Notes, Retval, Warnings)
|
|
s = s.encode("iso-8859-1", "replace")
|
|
if os.name != 'posix':
|
|
linkcheck.strformat.paginate(s)
|
|
else:
|
|
print s
|
|
sys.exit(0)
|
|
|
|
# instantiate option parser and configure options
|
|
optparser = LCOptionParser()
|
|
|
|
################# general options ##################
|
|
group = optparse.OptionGroup(optparser, _("General options"))
|
|
group.add_option("-f", "--config", type="string", dest="configfile",
|
|
metavar="FILENAME",
|
|
help=_(
|
|
"""Use FILENAME as configuration file. Per default LinkChecker first
|
|
searches /etc/linkchecker/linkcheckerrc and then ~/.linkchecker/linkcheckerrc
|
|
(under Windows <path-to-program>\\linkcheckerrc)."""))
|
|
group.add_option("-I", "--interactive", action="store_true",
|
|
dest="interactive", help=_(
|
|
"""Ask for URL if none are given on the commandline."""))
|
|
group.add_option("-t", "--threads", type="int", dest="threads",
|
|
metavar="NUMBER",
|
|
help=_(
|
|
"""Generate no more than the given number of threads. Default number
|
|
of threads is 10. To disable threading specify a non-positive number."""))
|
|
group.add_option("--priority", action="store_true", dest="priority",
|
|
help=_(
|
|
"""Run with normal thread scheduling priority. Per default LinkChecker
|
|
runs with low thread priority to be suitable as a background job."""))
|
|
group.add_option("--disable-psyco", action="store_false", dest="psyco",
|
|
default=True, help=_(
|
|
"""Do not use the psyco optimization module even if it is installed."""))
|
|
optparser.add_option_group(group)
|
|
group.add_option("-V", "--version", action="store_true", dest="version",
|
|
help=_(
|
|
"""Print version and exit."""))
|
|
|
|
################# output options ##################
|
|
group = optparse.OptionGroup(optparser, _("Output options"))
|
|
group.add_option("-v", "--verbose", action="store_true", dest="verbose",
|
|
help=_(
|
|
"""Log all checked URLs. Default is to log only errors and warnings."""))
|
|
group.add_option("--no-warnings", action="store_false", dest="warnings",
|
|
help=_("""Don't log warnings. Default is to log warnings."""))
|
|
group.add_option("-W", "--warning-regex", type="string", dest="warningregex",
|
|
metavar="REGEX",
|
|
help=_(
|
|
"""Define a regular expression which prints a warning if it matches
|
|
any content of the checked link. This applies only to valid pages,
|
|
so we can get their content.
|
|
|
|
Use this to check for pages that contain some form of error
|
|
message, for example 'This page has moved' or 'Oracle
|
|
Application Server error'."""))
|
|
group.add_option("--warning-size-bytes", dest="warningsizebytes",
|
|
metavar="NUMBER",
|
|
help=_(
|
|
"""Print a warning if content size info is available and exceeds the
|
|
given number of bytes."""))
|
|
group.add_option("-q", "--quiet", action="store_true", dest="quiet",
|
|
help=_(
|
|
"""Quiet operation, an alias for '-o none'.
|
|
This is only useful with -F."""))
|
|
group.add_option("-o", "--output", type="string", dest="output",
|
|
metavar="TYPE[/ENCODING]",
|
|
help=_(
|
|
"""Specify output as %(loggertypes)s. Default output type is text.
|
|
The ENCODING specifies the output encoding, the default is that of your
|
|
locale.
|
|
Valid encodings are listed at http://docs.python.org/lib/standard-encodings.html.""") % \
|
|
{'loggertypes': linkcheck.LoggerKeys})
|
|
group.add_option("-F", "--file-output", type="string", action="append",
|
|
dest="fileoutput", metavar="TYPE[/ENCODING][/FILENAME]",
|
|
help=_(
|
|
"""Output to a file linkchecker-out.TYPE, $HOME/.linkchecker/blacklist for
|
|
'blacklist' output, or FILENAME if specified.
|
|
The ENCODING specifies the output encoding, the default is that of your
|
|
locale.
|
|
Valid encodings are listed at http://docs.python.org/lib/standard-encodings.html.
|
|
The FILENAME and ENCODING parts of the 'none' output type will be ignored,
|
|
else if the file already exists, it will be overwritten.
|
|
You can specify this option more than once. Valid file output types
|
|
are %(loggertypes)s. You can specify this option multiple times to output
|
|
to more than one file. Default is no file output. Note that you can
|
|
suppress all console output with the option '-o none'.""") % \
|
|
{'loggertypes': linkcheck.LoggerKeys})
|
|
group.add_option("--no-status", action="store_false", dest="status",
|
|
default=True, help=_(
|
|
"""Do not print check status messages."""))
|
|
group.add_option("-D", "--debug", type="string", action="append",
|
|
metavar="STRING",
|
|
help=_("""Print debugging output for the given logger.
|
|
Available loggers are %(lognamelist)s.
|
|
Specifying 'all' is an alias for specifying all available loggers.
|
|
The option can be given multiple times to debug with more
|
|
than one logger.
|
|
|
|
For accurate results, threading and the psyco optimization module will
|
|
be disabled during debug runs.""") % \
|
|
{"lognamelist": linkcheck.lognamelist})
|
|
group.add_option("--trace", action="store_true", dest="trace",
|
|
help=_("""Print tracing information. The psyco optimization
|
|
module will be disabled during traced runs."""))
|
|
group.add_option("--profile", action="store_true", dest="profile",
|
|
help=_(
|
|
"""Write profiling data into a file named %s in the
|
|
current working directory. See also --viewprof.""") % _profile)
|
|
group.add_option("--viewprof", action="store_true", dest="viewprof",
|
|
help=_(
|
|
"""Print out previously generated profiling data. See also --profile."""))
|
|
optparser.add_option_group(group)
|
|
|
|
|
|
################# checking options ##################
|
|
group = optparse.OptionGroup(optparser, _("Checking options"))
|
|
group.add_option("-r", "--recursion-level", type="int", dest="recursionlevel",
|
|
metavar="NUMBER",
|
|
help=_(
|
|
"""Check recursively all links up to given depth. A negative depth
|
|
will enable infinite recursion. Default depth is infinite."""))
|
|
group.add_option("--no-follow-url", type="string", action="append",
|
|
metavar="REGEX",
|
|
dest="extern", help=_(
|
|
"""Check but do not recurse into URLs matching the given regular
|
|
expression. This option can be given multiple times."""))
|
|
group.add_option("--ignore-url", type="string", action="append",
|
|
metavar="REGEX",
|
|
dest="externstrict", help=_(
|
|
"""Only check syntax of URLs matching the given regular expression.
|
|
This option can be given multiple times."""))
|
|
group.add_option("-C", "--cookies", action="store_true", dest="cookies",
|
|
help=_(
|
|
"""Accept and send HTTP cookies according to RFC 2109. Only cookies
|
|
which are sent back to the originating server are accepted.
|
|
Sent and accepted cookies are provided as additional logging
|
|
information."""))
|
|
group.add_option("-a", "--anchors", action="store_true", dest="anchors",
|
|
help=_(
|
|
"""Check HTTP anchor references. Default is not to check anchors."""))
|
|
group.add_option("--no-anchor-caching", action="store_false",
|
|
dest="anchorcaching", help=_(
|
|
"""Treat url#anchora and url#anchorb as equal on caching. This
|
|
is the default browser behaviour, but it's not specified in
|
|
the URI specification. Use with care."""))
|
|
group.add_option("-u", "--user", type="string", dest="username",
|
|
metavar="STRING",
|
|
help=_(
|
|
"""Try the given username for HTTP and FTP authorization.
|
|
For FTP the default username is 'anonymous'. For HTTP there is
|
|
no default username. See also -p."""))
|
|
group.add_option("-p", "--password", type="string", dest="password",
|
|
metavar="STRING",
|
|
help=_(
|
|
"""Try the given password for HTTP and FTP authorization.
|
|
For FTP the default password is 'anonymous@'. For HTTP there is
|
|
no default password. See also -u."""))
|
|
group.add_option("--timeout", type="int", dest="timeout",
|
|
metavar="NUMBER",
|
|
help=_(
|
|
"""Set the timeout for connection attempts in seconds. The default
|
|
timeout is %d seconds.""") % default_timeout)
|
|
group.add_option("-P", "--pause", type="int", dest="pause",
|
|
metavar="NUMBER",
|
|
help=_(
|
|
"""Pause the given number of seconds between each url check. This option
|
|
disables threading. Default is no pause between requests."""))
|
|
group.add_option("-N", "--nntp-server", type="string", dest="nntpserver",
|
|
metavar="STRING",
|
|
help=_(
|
|
"""Specify an NNTP server for 'news:...' links. Default is the
|
|
environment variable NNTP_SERVER. If no host is given,
|
|
only the syntax of the link is checked."""))
|
|
group.add_option("--no-proxy-for", type="string", action="append",
|
|
metavar="REGEX",
|
|
dest="noproxyfor", help=_(
|
|
"""Contact hosts that match the given regular expression directly instead
|
|
of going through a proxy. This option can be given multiple times."""))
|
|
optparser.add_option_group(group)
|
|
|
|
################# auto completion #####################
|
|
if has_optcomplete:
|
|
optcomplete.autocomplete(optparser)
|
|
|
|
# read and parse command line options and arguments
|
|
(options, args) = optparser.parse_args()
|
|
|
|
# build a config object for this check session
|
|
config = linkcheck.configuration.Configuration()
|
|
# initialize logging
|
|
if options.debug:
|
|
allowed_debugs = linkcheck.lognames.keys()
|
|
for _name in options.debug:
|
|
if _name not in allowed_debugs:
|
|
print_usage(_("Invalid debug level %(level)r") % {'level': _name})
|
|
# disable psyco if debugging is enabled to prevent that stack lists
|
|
# have PsycoFrame objects instead of types.FrameType
|
|
options.psyco = False
|
|
if options.trace:
|
|
# disable psyco for tracing
|
|
options.psyco = False
|
|
|
|
config.init_logging(debug=options.debug)
|
|
linkcheck.log.debug(linkcheck.LOG_CMDLINE, "Python %s on %s",
|
|
sys.version, sys.platform)
|
|
# read configuration files
|
|
try:
|
|
if options.configfile:
|
|
config.read(files=[options.configfile])
|
|
else:
|
|
config.read()
|
|
except linkcheck.LinkCheckerError, msg:
|
|
# config error
|
|
print_usage(str(msg))
|
|
# apply commandline options and arguments
|
|
constructauth = False
|
|
do_profile = False
|
|
if not options.priority:
|
|
import linkcheck.threader
|
|
linkcheck.threader.set_thread_priority(linkcheck.threader.PRIO_LOW)
|
|
if options.warnings is not None:
|
|
config["warnings"] = options.warnings
|
|
if options.anchors is not None:
|
|
config["anchors"] = options.anchors
|
|
config["warnings"] = True
|
|
if options.extern:
|
|
pats = [linkcheck.get_link_pat(arg) for arg in options.extern]
|
|
config["externlinks"].extend(pats)
|
|
if options.externstrict:
|
|
pats = [linkcheck.get_link_pat(arg, strict=True) \
|
|
for arg in options.externstrict]
|
|
config["externlinks"].extend(pats)
|
|
if options.noproxyfor:
|
|
ros = [try_compile_re(arg) for arg in options.noproxyfor]
|
|
config["noproxyfor"].extend(ros)
|
|
if options.output:
|
|
if "/" in options.output:
|
|
logtype, encoding = options.output.split("/", 1)
|
|
else:
|
|
logtype, encoding = options.output, "iso-8859-15"
|
|
if not linkcheck.Loggers.has_key(logtype.lower()):
|
|
print_usage(_("Unknown logger type %r in %r for option %s") % \
|
|
(logtype, options.output, "'-o, --output'"))
|
|
if logtype != 'none' and not has_encoding(encoding):
|
|
print_usage(_("Unknown encoding %r in %r for option %s") % \
|
|
(encoding, options.output, "'-o, --output'"))
|
|
config['logger'] = config.logger_new(logtype.lower(), encoding=encoding)
|
|
if options.fileoutput:
|
|
ns = {'fileoutput': 1}
|
|
for arg in options.fileoutput:
|
|
ftype = arg
|
|
# look for (optional) filename and encoding
|
|
if '/' in ftype:
|
|
ftype, suffix = ftype.split('/', 1)
|
|
if suffix:
|
|
if has_encoding(suffix):
|
|
# it was an encoding
|
|
ns['encoding'] = suffix
|
|
elif '/' in suffix:
|
|
# look for (optional) encoding
|
|
encoding, filename = suffix.split('/', 1)
|
|
if has_encoding(encoding):
|
|
ns['encoding'] = encoding
|
|
ns['filename'] = filename
|
|
else:
|
|
ns['filename'] = suffix
|
|
else:
|
|
ns['filename'] = suffix
|
|
if not linkcheck.Loggers.has_key(ftype):
|
|
print_usage(_("Unknown logger type %r in %r for option %s") % \
|
|
(ftype, options.output, "'-F, --file-output'"))
|
|
if ftype != 'none' and 'encoding' in ns and \
|
|
not has_encoding(ns['encoding']):
|
|
print_usage(_("Unknown encoding %r in %r for option %s") % \
|
|
ns['encoding'], options.output, "'-F, --file-output'")
|
|
# generating loggers with fileoutput can throw
|
|
# an exception when opening the file
|
|
try:
|
|
logger = config.logger_new(ftype, **ns)
|
|
except OSError, msg:
|
|
print_usage(_("Illegal argument %r for option %s: %s") % \
|
|
(arg, "'-F, --file-output'", str(msg)))
|
|
config['fileoutput'].append(logger)
|
|
if options.interactive is not None:
|
|
config['interactive'] = options.interactive
|
|
if options.nntpserver:
|
|
config["nntpserver"] = options.nntpserver
|
|
if options.anchorcaching is not None:
|
|
config["anchorcaching"] = options.anchorcaching
|
|
if options.password is not None:
|
|
_password = options.password
|
|
constructauth = True
|
|
if options.pause is not None:
|
|
if options.pause >= 0:
|
|
config["wait"] = options.pause
|
|
else:
|
|
print_usage(_("Illegal argument %r for option %s") % \
|
|
(options.pause, "'-P, --pause'"))
|
|
if options.profile is not None:
|
|
do_profile = options.profile
|
|
if options.quiet is not None:
|
|
config['logger'] = config.logger_new('none')
|
|
if options.recursionlevel is not None:
|
|
config["recursionlevel"] = options.recursionlevel
|
|
if options.status is not None:
|
|
config['status'] = options.status
|
|
if options.threads is not None:
|
|
if options.threads < 1:
|
|
options.threads = 0
|
|
config["threads"] = options.threads
|
|
if options.timeout is not None:
|
|
if options.timeout > 0:
|
|
socket.setdefaulttimeout(options.timeout)
|
|
else:
|
|
print_usage(_("Illegal argument %r for option %s") % \
|
|
(options.timeout, "'--timeout'"))
|
|
if options.username is not None:
|
|
_username = options.username
|
|
constructauth = True
|
|
if options.version is not None:
|
|
print_version()
|
|
if options.verbose is not None:
|
|
if options.verbose:
|
|
config["verbose"] = True
|
|
config["warnings"] = True
|
|
if options.viewprof:
|
|
viewprof()
|
|
if options.warningregex is not None:
|
|
config["warningregex"] = try_compile_re(options.warningregex)
|
|
config["warnings"] = True
|
|
if options.warningsizebytes is not None:
|
|
config["warnsizebytes"] = options.warningsizebytes
|
|
if options.cookies is not None:
|
|
config['cookies'] = options.cookies
|
|
if constructauth:
|
|
config["authentication"].append({'pattern': try_compile_re(".+"),
|
|
'user': _username,
|
|
'password': _password})
|
|
|
|
linkcheck.log.debug(linkcheck.LOG_CMDLINE, "configuration: %s",
|
|
pprint.pformat(config.items()))
|
|
# warn about sitemap loggers and verbose output
|
|
klasses = [c.__class__ for c in [config['logger']] + config['fileoutput']]
|
|
if (linkcheck.logger.gml.GMLLogger in klasses or \
|
|
linkcheck.logger.dot.DOTLogger in klasses) and not config['verbose']:
|
|
linkcheck.log.warn(linkcheck.LOG_CMDLINE,
|
|
_("Using DOT or GML loggers without verbose output"
|
|
" gives an incomplete sitemap graph."))
|
|
|
|
# interactive input
|
|
if len(args) <= 0:
|
|
if config['interactive']:
|
|
urls = raw_input(
|
|
_("enter one or more URLs, separated by white-space\n--> "))
|
|
args = urls.split()
|
|
else:
|
|
linkcheck.log.warn(linkcheck.LOG_CMDLINE, _("no files or URLs given"))
|
|
|
|
# initialize the cache and the consumer model
|
|
cache = linkcheck.checker.cache.Cache()
|
|
consumer = linkcheck.checker.consumer.Consumer(config, cache)
|
|
if options.trace:
|
|
config["trace"] = True
|
|
linkcheck.log.trace_filter([r"^linkcheck"])
|
|
linkcheck.log.trace()
|
|
# syntactic sugar
|
|
for url in args:
|
|
if url.lower().startswith("www."):
|
|
url = "http://%s" % url
|
|
elif url.lower().startswith("ftp."):
|
|
url = "ftp://%s" % url
|
|
url_data = linkcheck.checker.get_url_from(url, 0, consumer, cmdline=True)
|
|
# add to consumer queue
|
|
consumer.append_url(url_data)
|
|
############################# check the URLs ################################
|
|
if do_profile and not has_profile:
|
|
linkcheck.log.warn(linkcheck.LOG_CMDLINE,
|
|
_("The `profile' Python module is not installed,"
|
|
" therefore the --profile option is disabled."))
|
|
|
|
if do_profile and has_profile:
|
|
run = True
|
|
if os.path.exists(_profile):
|
|
question = _("Overwrite profiling file %r?\n"
|
|
"Press Ctrl-C to cancel, RETURN to continue.") % _profile
|
|
try:
|
|
raw_input(question)
|
|
except KeyboardInterrupt:
|
|
sys.stderr.write(os.linesep)
|
|
sys.stderr.write(_("Canceled."))
|
|
sys.stderr.write(os.linesep)
|
|
run = False
|
|
if run:
|
|
profile.run("linkcheck.checker.check_urls(consumer)", _profile)
|
|
elif options.psyco:
|
|
try:
|
|
import psyco
|
|
# psyco >= 1.4.0 final is needed
|
|
if psyco.__version__ >= 0x10400f0:
|
|
psyco.profile(memory=10000)
|
|
else:
|
|
# warn about old psyco version
|
|
linkcheck.log.warn(linkcheck.LOG_CMDLINE,
|
|
_("Psyco is installed but not used since the version is too old.\n"
|
|
"Psyco >= 1.4 is needed."))
|
|
except ImportError:
|
|
# no psyco available, just ignore
|
|
pass
|
|
linkcheck.checker.check_urls(consumer)
|
|
#############################################################################
|
|
|
|
# interactive input end
|
|
if config['interactive']:
|
|
raw_input(_("Hit RETURN to finish"))
|
|
|
|
# if errors or warnings are encountered, exit with non-zero status
|
|
if config['logger'].errors or \
|
|
(config['logger'].warnings and config['warnings']):
|
|
sys.exit(1)
|