mirror of
https://github.com/Hopiu/linkchecker.git
synced 2026-05-21 12:51:53 +00:00
use new optparser
git-svn-id: https://linkchecker.svn.sourceforge.net/svnroot/linkchecker/trunk/linkchecker@1163 e7d03fd6-7b0d-0410-9947-9c21f3af8025
This commit is contained in:
parent
558d8ad2e7
commit
9f191a920a
1 changed files with 245 additions and 280 deletions
525
linkchecker
525
linkchecker
|
|
@ -42,112 +42,7 @@ _profile = "linkchecker.prof"
|
|||
|
||||
# main usage text
|
||||
Usage = i18n._("""USAGE\tlinkchecker [options] file-or-url...
|
||||
|
||||
OPTIONS
|
||||
For single-letter option arguments the space is not a necessity. So
|
||||
'-o colored' is the same as '-ocolored'.
|
||||
-a, --anchors
|
||||
Check HTTP anchor references. This option applies to both internal
|
||||
and external urls. Default is don't check anchors.
|
||||
This option implies -w because anchor errors are always warnings.
|
||||
-C, --cookies
|
||||
Accept and send HTTP cookies according to RFC 2109. Only cookies
|
||||
which are sent back to the originating server are accepted.
|
||||
Sent and accepted cookies are provided as additional logging
|
||||
information.
|
||||
-d, --denyallow
|
||||
Swap checking order to external/internal. Default checking order
|
||||
is internal/external.
|
||||
-D, --debug
|
||||
Print debugging information. Provide this option multiple times
|
||||
for even more debugging information.
|
||||
-e regex, --extern=regex
|
||||
Assume urls that match the given expression as external.
|
||||
Only internal HTML links are checked recursively.
|
||||
-f file, --config=file
|
||||
Use file as configuration file. As default LinkChecker first
|
||||
searches /etc/linkcheckerrc and then ~/.linkcheckerrc
|
||||
(under Windows <path-to-program>\\linkcheckerrc).
|
||||
-F type[/filename], --file-output=type[/filename]
|
||||
Same as -o, but write to a file linkchecker-out.<type>
|
||||
or <filename> if specified. If the file already exists, it
|
||||
is overwritten. You can specify this option more than once.
|
||||
There is no file output for the blacklist logger. Default is
|
||||
no file output.
|
||||
-I, --interactive
|
||||
Ask for url if none are given on the commandline.
|
||||
-i regex, --intern=regex
|
||||
Assume URLs that match the given expression as internal.
|
||||
LinkChecker descends recursively only to internal URLs, not to
|
||||
external.
|
||||
-h, --help
|
||||
Help me! Print usage information for this program.
|
||||
-N server, --nntp-server=server
|
||||
Specify an NNTP server for 'news:...' links. Default is the
|
||||
environment variable NNTP_SERVER. If no host is given,
|
||||
only the syntax of the link is checked.
|
||||
--no-anchor-caching
|
||||
Treat url#anchora and url#anchorb as equal on caching. This
|
||||
is the default browser behaviour, but it's not specified in
|
||||
the URI specification. Use with care.
|
||||
-o type, --output=type
|
||||
Specify output type as %s.
|
||||
Default type is text.
|
||||
-p pwd, --password=pwd
|
||||
Try password pwd for HTTP and FTP authorization.
|
||||
Default password is 'joe@'. See also -u.
|
||||
-P secs, --pause=secs
|
||||
Pause <secs> seconds between each url check. This option
|
||||
implies -t0.
|
||||
Default is no pause between requests.
|
||||
--profile
|
||||
Write profiling data into a file named %s in the
|
||||
current working directory.
|
||||
See also --viewprof.
|
||||
-q, --quiet
|
||||
Quiet operation. This is only useful with -F.
|
||||
-r depth, --recursion-level=depth
|
||||
Check recursively all links up to given depth. A negative depth
|
||||
will enable inifinite recursion.
|
||||
Default depth is 1.
|
||||
-s, --strict
|
||||
Check only syntax of external links, do not try to connect to them.
|
||||
For local file urls, only local files are internal. For
|
||||
http and ftp urls, all urls at the same domain name are internal.
|
||||
--status
|
||||
Print check status every 5 seconds to stderr.
|
||||
-t num, --threads=num
|
||||
Generate no more than num threads. Default number of threads is 5.
|
||||
To disable threading specify a non-positive number.
|
||||
--timeout=secs
|
||||
Set the timeout for TCP connection attempts in seconds. The default
|
||||
timeout is 30 seconds.
|
||||
-u name, --user=name
|
||||
Try username name for HTTP and FTP authorization.
|
||||
Default is 'anonymous'. See also -p.
|
||||
-V, --version
|
||||
Print version and exit.
|
||||
-v, --verbose
|
||||
Log all checked URLs (implies -w). Default is to log only invalid
|
||||
URLs.
|
||||
--viewprof
|
||||
Print out previously generated profiling data. See also --profile.
|
||||
-w, --warnings
|
||||
Log warnings.
|
||||
-W regex, --warning-regex=regex
|
||||
Define a regular expression which prints a warning if it matches
|
||||
any content of the checked link.
|
||||
This applies of course only to pages which are valid, so we can
|
||||
get their content.
|
||||
Use this to check for pages that contain some form of error
|
||||
message, for example 'This page has moved' or 'Oracle
|
||||
Application Server error'.
|
||||
This option implies -w.
|
||||
--warning-size-bytes=bytes
|
||||
Print a warning if content size is available and exceeds the given
|
||||
number of bytes.
|
||||
This option implies -w.
|
||||
""") % (LoggerKeys, _profile)
|
||||
""")
|
||||
|
||||
Notes = i18n._("""NOTES
|
||||
o A ! before any regex negates it. So '!^mailto:' matches everything but
|
||||
|
|
@ -220,193 +115,263 @@ def viewprof ():
|
|||
sys.exit(0)
|
||||
|
||||
# Read command line arguments
|
||||
try:
|
||||
# Note: cut out the name of the script
|
||||
options, args = getopt.getopt(sys.argv[1:],
|
||||
"adCDe:f:F:hIi:N:o:p:P:qr:Rst:u:VvwW:", # short options
|
||||
["anchors", # long options
|
||||
"config=",
|
||||
"cookies",
|
||||
"debug",
|
||||
"extern=",
|
||||
"file-output=",
|
||||
"nntp-server=",
|
||||
"help",
|
||||
"interactive",
|
||||
"intern=",
|
||||
"denyallow",
|
||||
"output=",
|
||||
"password=",
|
||||
"pause=",
|
||||
"profile",
|
||||
"quiet",
|
||||
"recursion-level=",
|
||||
"no-anchor-caching",
|
||||
"wischiwaschi",
|
||||
"robots-txt",
|
||||
"strict",
|
||||
"status",
|
||||
"threads=",
|
||||
"timeout=",
|
||||
"user=",
|
||||
"version",
|
||||
"verbose",
|
||||
"viewprof",
|
||||
"warnings",
|
||||
"warning-regex="])
|
||||
except getopt.error:
|
||||
type, value = sys.exc_info()[:2]
|
||||
printUsage(value)
|
||||
from optparse import OptionParser
|
||||
optparser = OptionParser()
|
||||
|
||||
optparser.add_option("-a", "--anchors", action="store_true", dest="anchors",
|
||||
help=i18n._(
|
||||
"""Check HTTP anchor references. This option applies to both internal
|
||||
and external urls. Default is don't check anchors.
|
||||
This option implies -w because anchor errors are always warnings."""))
|
||||
|
||||
optparser.add_option("-C", "--cookies", action="store_true", dest="cookies",
|
||||
help=i18n._(
|
||||
"""Accept and send HTTP cookies according to RFC 2109. Only cookies
|
||||
which are sent back to the originating server are accepted.
|
||||
Sent and accepted cookies are provided as additional logging
|
||||
information."""))
|
||||
|
||||
optparser.add_option("-d", "--denyallow", dest="denyallow",
|
||||
help=i18n._(
|
||||
"""Swap checking order to external/internal. Default checking order
|
||||
is internal/external."""))
|
||||
|
||||
optparser.add_option("-e", "--extern", type="string", action="append", dest="extern",
|
||||
help=i18n._(
|
||||
"""Assume urls that match the given expression as external.
|
||||
Only internal HTML links are checked recursively."""))
|
||||
|
||||
optparser.add_option("-f", "--config", type="string", dest="configfile",
|
||||
help=i18n._(
|
||||
"""Use file as configuration file. As default LinkChecker first
|
||||
searches /etc/linkcheckerrc and then ~/.linkcheckerrc
|
||||
(under Windows <path-to-program>\\linkcheckerrc)."""))
|
||||
|
||||
optparser.add_option("-F", "--file-output", type="string", dest="fileoutput",
|
||||
help=i18n._(
|
||||
"""type[/filename]
|
||||
Same as -o, but write to a file linkchecker-out.<type>
|
||||
or <filename> if specified. If the file already exists, it
|
||||
is overwritten. You can specify this option more than once.
|
||||
There is no file output for the blacklist logger. Default is
|
||||
no file output."""))
|
||||
|
||||
optparser.add_option("-D", "--debug", action="count",
|
||||
help=i18n._(
|
||||
"""Print debugging information. Provide this option multiple times
|
||||
for even more debugging information."""))
|
||||
|
||||
optparser.add_option("-I", "--interactive", action="store_true", dest="interactive",
|
||||
help=i18n._(
|
||||
"""Ask for url if none are given on the commandline."""))
|
||||
|
||||
optparser.add_option("-i", "--intern", type="string", action="append", dest="intern",
|
||||
help=i18n._(
|
||||
""" regex, --intern=regex
|
||||
Assume URLs that match the given expression as internal.
|
||||
LinkChecker descends recursively only to internal URLs, not to
|
||||
external."""))
|
||||
|
||||
optparser.add_option("-N", "--nntp-server", type="string", dest="nntpserver",
|
||||
help=i18n._(
|
||||
"""Specify an NNTP server for 'news:...' links. Default is the
|
||||
environment variable NNTP_SERVER. If no host is given,
|
||||
only the syntax of the link is checked."""))
|
||||
|
||||
optparser.add_option("--no-anchor-caching", action="store_false", dest="anchorcaching",
|
||||
help=i18n._(
|
||||
"""Treat url#anchora and url#anchorb as equal on caching. This
|
||||
is the default browser behaviour, but it's not specified in
|
||||
the URI specification. Use with care."""))
|
||||
|
||||
optparser.add_option("-o", "--output", type="string", dest="output",
|
||||
help=i18n._(
|
||||
"""Specify output type as %s. Default type is text.""")%LoggerKeys)
|
||||
|
||||
optparser.add_option("-p", "--password", type="string", dest="password",
|
||||
help=i18n._(
|
||||
"""Try password pwd for HTTP and FTP authorization.
|
||||
Default password is 'joe@'. See also -u."""))
|
||||
|
||||
optparser.add_option("-P", "--pause", type="int", dest="pause",
|
||||
help=i18n._(
|
||||
"""Pause <secs> seconds between each url check. This option implies -t0.
|
||||
Default is no pause between requests."""))
|
||||
|
||||
optparser.add_option("--profile", action="store_true", dest="profile",
|
||||
help=i18n._(
|
||||
"""Write profiling data into a file named %s in the
|
||||
current working directory. See also --viewprof.""")%_profile)
|
||||
|
||||
optparser.add_option("-q", "--quiet", action="store_true", dest="quiet",
|
||||
help=i18n._(
|
||||
"""Quiet operation. This is only useful with -F."""))
|
||||
|
||||
optparser.add_option("-r", "--recursion-level", type="int", dest="recursionlevel",
|
||||
help=i18n._(
|
||||
"""Check recursively all links up to given depth. A negative depth
|
||||
will enable inifinite recursion. Default depth is 1."""))
|
||||
|
||||
optparser.add_option("-s", "--strict", action="store_true", dest="strict",
|
||||
help=i18n._(
|
||||
"""Check only syntax of external links, do not try to connect to them.
|
||||
For local file urls, only local files are internal. For
|
||||
http and ftp urls, all urls at the same domain name are internal."""))
|
||||
|
||||
optparser.add_option("--status", action="store_true", dest="status",
|
||||
help=i18n._(
|
||||
"""Print check status every 5 seconds to stderr."""))
|
||||
|
||||
optparser.add_option("-t", "--threads", type="int", dest="threads",
|
||||
help=i18n._(
|
||||
"""Generate no more than num threads. Default number of threads is 5.
|
||||
To disable threading specify a non-positive number."""))
|
||||
|
||||
optparser.add_option("--timeout", type="int", dest="timeout",
|
||||
help=i18n._(
|
||||
"""Set the timeout for TCP connection attempts in seconds. The default
|
||||
timeout is 30 seconds."""))
|
||||
|
||||
optparser.add_option("-u", "--user", type="string", dest="username",
|
||||
help=i18n._(
|
||||
"""Try username name for HTTP and FTP authorization.
|
||||
Default is 'anonymous'. See also -p."""))
|
||||
|
||||
optparser.add_option("-V", "--version", dest="version",
|
||||
help=i18n._(
|
||||
"""Print version and exit."""))
|
||||
|
||||
optparser.add_option("-v", "--verbose", action="store_true", dest="verbose",
|
||||
help=i18n._(
|
||||
"""Log all checked URLs (implies -w). Default is to log only invalid
|
||||
URLs."""))
|
||||
|
||||
optparser.add_option("--viewprof", action="store_true", dest="viewprof",
|
||||
help=i18n._(
|
||||
"""Print out previously generated profiling data. See also --profile."""))
|
||||
|
||||
optparser.add_option("-w", "--warnings", action="store_true", dest="warnings",
|
||||
help=i18n._("""Log warnings."""))
|
||||
|
||||
optparser.add_option("-W", "--warning-regex", type="string", dest="warningregex",
|
||||
help=i18n._(
|
||||
"""Define a regular expression which prints a warning if it matches
|
||||
any content of the checked link.
|
||||
This applies of course only to pages which are valid, so we can
|
||||
get their content.
|
||||
Use this to check for pages that contain some form of error
|
||||
message, for example 'This page has moved' or 'Oracle
|
||||
Application Server error'.
|
||||
This option implies -w."""))
|
||||
|
||||
optparser.add_option("--warning-size-bytes", dest="warningsizebytes",
|
||||
help=i18n._(
|
||||
"""Print a warning if content size is available and exceeds the given
|
||||
number of bytes. This option implies -w."""))
|
||||
|
||||
if "--wischiwaschi" in sys.argv:
|
||||
from linkcheck import util1
|
||||
util1.abbuzze()
|
||||
sys.exit(0)
|
||||
|
||||
(options, args) = optparser.parse_args()
|
||||
|
||||
# set debug level as early as possible
|
||||
for opt,arg in options:
|
||||
if opt=="-D" or opt=="--debug":
|
||||
set_debuglevel(get_debuglevel()+1)
|
||||
if options.debug is not None:
|
||||
set_debuglevel(options.debug)
|
||||
debug(BRING_IT_ON, "Python", sys.version, "on", sys.platform)
|
||||
# read configuration from config files
|
||||
# config object
|
||||
config = linkcheck.Config.Configuration()
|
||||
# read configuration from config files
|
||||
configfiles = []
|
||||
for opt,arg in options:
|
||||
if opt=="-f" or opt=="--config":
|
||||
configfiles.append(arg)
|
||||
if options.configfile:
|
||||
configfiles.append(options.configfile)
|
||||
config.read(configfiles)
|
||||
# apply commandline options and arguments
|
||||
_user = "anonymous"
|
||||
_password = "guest@"
|
||||
constructauth = False
|
||||
do_profile = False
|
||||
for opt,arg in options:
|
||||
if opt=="-a" or opt=="--anchors":
|
||||
config["anchors"] = True
|
||||
config["warnings"] = True
|
||||
|
||||
elif opt=="-e" or opt=="--extern":
|
||||
config["externlinks"].append(linkcheck.getLinkPat(arg))
|
||||
|
||||
elif opt=="-h" or opt=="--help":
|
||||
printHelp()
|
||||
|
||||
elif opt=="-o" or opt=="--output":
|
||||
if linkcheck.log.Loggers.has_key(arg):
|
||||
config['log'] = config.newLogger(arg)
|
||||
else:
|
||||
printUsage((i18n._("Illegal argument '%s' for option ") % arg) +\
|
||||
"'-o, --output'")
|
||||
|
||||
elif opt=="-F" or opt=="--file-output":
|
||||
ns = {'fileoutput': 1}
|
||||
try:
|
||||
type, ns['filename'] = arg.split('/', 1)
|
||||
if not ns['filename']: raise ValueError
|
||||
except ValueError: type = arg
|
||||
if linkcheck.log.Loggers.has_key(type) and type != "blacklist":
|
||||
config['fileoutput'].append(config.newLogger(type, ns))
|
||||
else:
|
||||
printUsage((i18n._("Illegal argument '%s' for option ") % arg) +\
|
||||
"'-F, --file-output'")
|
||||
|
||||
elif opt=="-I" or opt=="--interactive":
|
||||
config['interactive'] = True
|
||||
|
||||
elif opt=="-i" or opt=="--intern":
|
||||
config["internlinks"].append(linkcheck.getLinkPat(arg))
|
||||
|
||||
elif opt=="-l" or opt=="--denyallow":
|
||||
config["denyallow"] = True
|
||||
|
||||
elif opt=="-N" or opt=="--nntp-server":
|
||||
config["nntpserver"] = arg
|
||||
|
||||
elif opt=="--no-anchor-caching":
|
||||
config["anchorcaching"] = False
|
||||
|
||||
elif opt=="-p" or opt=="--password":
|
||||
_password = arg
|
||||
constructauth = True
|
||||
|
||||
elif opt=="-P" or opt=="--pause":
|
||||
try:
|
||||
wait = int(arg)
|
||||
except ValueError:
|
||||
printUsage(i18n._("Illegal argument %r for option %s") % \
|
||||
(arg, "'-P, --pause'"))
|
||||
if wait >= 0:
|
||||
config["wait"] = wait
|
||||
else:
|
||||
printUsage(i18n._("Illegal argument %r for option %s") % \
|
||||
(arg, "'-P, --pause'"))
|
||||
|
||||
elif opt=="--profile":
|
||||
do_profile = True
|
||||
|
||||
elif opt=="-q" or opt=="--quiet":
|
||||
config["quiet"] = True
|
||||
|
||||
elif opt=="-r" or opt=="--recursion-level":
|
||||
try:
|
||||
depth = int(arg)
|
||||
if depth >= 0:
|
||||
config["recursionlevel"] = depth
|
||||
else:
|
||||
config["recursionlevel"] = -1
|
||||
except ValueError:
|
||||
printUsage(i18n._("Illegal argument %r for option %s") % \
|
||||
(arg, "'-r, --recursion-level'"))
|
||||
# robots.txt is now default, so ignore this option
|
||||
elif opt=="-R" or opt=="--robots-txt": pass
|
||||
|
||||
elif opt=="-s" or opt=="--strict":
|
||||
config["strict"] = True
|
||||
|
||||
elif opt=="--status":
|
||||
config['status'] = True
|
||||
|
||||
elif opt=="-t" or opt=="--threads":
|
||||
try:
|
||||
config.setThreads(int(arg))
|
||||
except ValueError:
|
||||
printUsage(i18n._("Illegal argument %r for option %s") % \
|
||||
(arg, "'-t, --threads'"))
|
||||
|
||||
elif opt=="--timeout":
|
||||
try:
|
||||
timeout = int(arg)
|
||||
if timeout <= 0:
|
||||
printUsage(i18n._("Illegal argument %r for option %s") % \
|
||||
(arg, "'--timeout'"))
|
||||
socket.setdefaulttimeout(timeout)
|
||||
except ValueError:
|
||||
printUsage(i18n._("Illegal argument %r for option %s") % \
|
||||
(arg, "'--timeout'"))
|
||||
|
||||
elif opt=="-u" or opt=="--user":
|
||||
_user = arg
|
||||
constructauth = True
|
||||
|
||||
elif opt=="-V" or opt=="--version":
|
||||
printVersion()
|
||||
|
||||
elif opt=="-v" or opt=="--verbose":
|
||||
if options.anchors is not None:
|
||||
config["anchors"] = options.anchors
|
||||
config["warnings"] = True
|
||||
if options.extern:
|
||||
config["externlinks"].extend([linkcheck.getLinkPat(arg) for arg in options.extern])
|
||||
if options.output:
|
||||
if linkcheck.log.Loggers.has_key(options.output):
|
||||
config['log'] = config.newLogger(options.output)
|
||||
else:
|
||||
printUsage(i18n._("Illegal argument %r for option %s") % \
|
||||
(options.output, "'-o, --output'"))
|
||||
if options.fileoutput:
|
||||
ns = {'fileoutput': 1}
|
||||
try:
|
||||
ftype, ns['filename'] = options.fileoutput.split('/', 1)
|
||||
if not ns['filename']: raise ValueError
|
||||
except ValueError:
|
||||
ftype = options.fileoutput
|
||||
if linkcheck.log.Loggers.has_key(ftype) and ftype != "blacklist":
|
||||
config['fileoutput'].append(config.newLogger(ftype, ns))
|
||||
else:
|
||||
printUsage(i18n._("Illegal argument %r for option %s") % \
|
||||
(options.fileoutput, "'-F, --file-output'"))
|
||||
if options.interactive is not None:
|
||||
config['interactive'] = options.interactive
|
||||
if options.intern:
|
||||
config["internlinks"].extend([linkcheck.getLinkPat(arg) for arg in options.intern])
|
||||
if options.denyallow is not None:
|
||||
config["denyallow"] = options.denyallow
|
||||
if options.nntpserver:
|
||||
config["nntpserver"] = options.nntpserver
|
||||
if options.anchorcaching is not None:
|
||||
config["anchorcaching"] = options.anchorcaching
|
||||
if options.password is not None:
|
||||
_password = options.password
|
||||
constructauth = True
|
||||
if options.pause is not None:
|
||||
if options.pause >= 0:
|
||||
config["wait"] = options.pause
|
||||
else:
|
||||
printUsage(i18n._("Illegal argument %d for option %s") % \
|
||||
(options.pause, "'-P, --pause'"))
|
||||
if options.profile is not None:
|
||||
do_profile = options.profile
|
||||
if options.quiet is not None:
|
||||
config["quiet"] = options.quiet
|
||||
if options.recursionlevel is not None:
|
||||
config["recursionlevel"] = options.recursionlevel
|
||||
if options.strict is not None:
|
||||
config["strict"] = options.strict
|
||||
if options.status is not None:
|
||||
config['status'] = options.status
|
||||
if options.threads is not None:
|
||||
config.setThreads(options.threads)
|
||||
if options.timeout is not None:
|
||||
if options.timeout > 0:
|
||||
socket.setdefaulttimeout(options.timeout)
|
||||
else:
|
||||
printUsage(i18n._("Illegal argument %r for option %s") % \
|
||||
(options.timeout, "'--timeout'"))
|
||||
if options.username is not None:
|
||||
_user = options.username
|
||||
constructauth = True
|
||||
if options.version is not None:
|
||||
printVersion()
|
||||
if options.verbose is not None:
|
||||
if options.verbose:
|
||||
config["verbose"] = True
|
||||
config["warnings"] = True
|
||||
|
||||
elif opt=="--viewprof":
|
||||
viewprof()
|
||||
|
||||
elif opt=="--wischiwaschi":
|
||||
from linkcheck import util1
|
||||
util1.abbuzze()
|
||||
sys.exit(0)
|
||||
elif opt=="-w" or opt=="--warnings":
|
||||
config["warnings"] = True
|
||||
|
||||
elif opt=="-W" or opt=="--warning-regex":
|
||||
config["warningregex"] = re.compile(arg)
|
||||
config["warnings"] = True
|
||||
|
||||
elif opt=="-C" or opt=="--cookies":
|
||||
config['cookies'] = True
|
||||
|
||||
if options.viewprof:
|
||||
viewprof()
|
||||
if options.warnings is not None:
|
||||
config["warnings"] = options.warnings
|
||||
if options.warningregex is not None:
|
||||
config["warningregex"] = re.compile(options.warningregex)
|
||||
config["warnings"] = True
|
||||
if options.warningsizebytes is not None:
|
||||
config["warnsizebytes"] = options.warningsizebytes
|
||||
if options.cookies is not None:
|
||||
config['cookies'] = options.cookies
|
||||
if constructauth:
|
||||
config["authentication"].insert(0, {'pattern': re.compile(".*"),
|
||||
'user': _user,
|
||||
|
|
|
|||
Loading…
Reference in a new issue